In [1]:
!pip3 install pandas
!pip3 install numpy
!pip3 install beautifulsoup4
!pip3 install --upgrade pip



In [2]:
import pandas as pd
import requests
import numpy as np 
import time
from bs4 import BeautifulSoup
import string
import csv




In [3]:
base_url = 'http://ufcstats.com/statistics/fighters'
characters = string.ascii_lowercase
fighters = []
seen_ids = set()

for char in characters:
    # Construct URL for each character page
    url = f"{base_url}?char={char}&page=all"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Select all rows in the table, skipping the header
    table_rows = soup.select('.b-statistics__table tbody .b-statistics__table-row')[1:]
    
    # Loop over each row to get name and ID
    for row in table_rows:
        cells = row.find_all('td')
        
        if len(cells) >= 9:  # Check if there are enough cells for name and other data
            # Extract first and last names, assuming they're in specific cells
            first_name = cells[0].get_text(strip=True)
            last_name = cells[1].get_text(strip=True)
            fighter_name = f"{first_name} {last_name}"
            
            # Extract the unique fighter ID from the profile link in the first cell
            profile_link = cells[0].find('a', href=True)
            
            if profile_link and '/fighter-details/' in profile_link['href']:
                fighter_id = profile_link['href'].split('/')[-1]
                
                # Ensure each ID is unique
                if fighter_id not in seen_ids:
                    seen_ids.add(fighter_id)
                    
                    # Append the fighter's name and ID to the fighters list
                    fighters.append({
                        'name': fighter_name,
                        'id': fighter_id
                    })

# Print or further process the collected fighter data
print(fighters)

[{'name': 'Tom Aaron', 'id': '93fe7332d16c6ad9'}, {'name': 'Danny Abbadi', 'id': '15df64c02b6b0fde'}, {'name': 'Nariman Abbasov', 'id': '59a9d6dac61c2540'}, {'name': 'David Abbott', 'id': 'b361180739bed4b0'}, {'name': 'Hamdy Abdelwahab', 'id': '3329d692aea4dc28'}, {'name': 'Mansur Abdul-Malik', 'id': '841695e02c99a521'}, {'name': 'Shamil Abdurakhimov', 'id': '2f5cbecbbe18bac4'}, {'name': 'Hiroyuki Abe', 'id': 'c0ed7b208197e8de'}, {'name': 'Daichi Abe', 'id': '5140122c3eecd307'}, {'name': 'Papy Abedi', 'id': 'c9f6385af6df66d7'}, {'name': 'Ricardo Abreu', 'id': 'aa6e591c2a2cdecd'}, {'name': 'Klidson Abreu', 'id': '7279654c7674cd24'}, {'name': 'Cyborg Abreu', 'id': 'f689bd7bbd14b392'}, {'name': 'Daniel Acacio', 'id': '1c5879330d42255f'}, {'name': 'John Adajar', 'id': '989b85f6540c86b1'}, {'name': 'Scott Adams', 'id': '2620f3eb21c79614'}, {'name': 'Juan Adams', 'id': '83b00f7597e5ac83'}, {'name': 'Anthony Adams', 'id': 'a77633a989013265'}, {'name': 'Zarrukh Adashev', 'id': '79cb2a690b9ba5e

In [4]:
fighters_df = pd.DataFrame(fighters)

In [5]:
n_base_url = 'http://ufcstats.com/fighter-details'
fighter_score_df = pd.DataFrame(columns=['fighter', 'score', 'Peak_score'])

# Function to update highest score correctly, considering all past scores
def update_highest_score(fighter):
    # Ensure highest_score reflects the best score achieved so far
    if fighter['score'] > fighter['highest_score']:
        fighter['highest_score'] = fighter['score']


def load_factor_data(filename):
    factor_data = {}
    with open(filename, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            fighter_name = row['name'].strip()
            factor_data[fighter_name] = float(row['Gfactor'])
    return factor_data

# Load factor data once at the start
factor_data = load_factor_data('static_data.csv')

# Function to get factor for a given opponent's name
def get_factor(opponent_name):
    return factor_data.get(opponent_name, 1) 

# Determines the style of victory multiplier based on the way of victory
def get_style_of_victory(way_of_fight):     
    if way_of_fight in ["KO", "TKO", "SUB"]:
        return 1.1
    elif way_of_fight == "U-DEC":
        return 1.0
    elif way_of_fight == "M-DEC":
        return 0.9
    elif way_of_fight == "S-DEC":
        return 0.8
    elif way_of_fight == "DQ":
        return 0
    else:
        return 1.0  # Default for cases like Draw or No Contest

for fighter in fighters:
    fighter_id = fighter['id']
    id_url = f"{n_base_url}/{fighter_id}"
    response = requests.get(id_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    fighter['score'] = 0  # Reset fighter score for each fighter
    fighter['highest_score'] = -1000

    fight_rows = soup.select('.b-fight-details__table-row')[::-1]

    for row in fight_rows:
        # Extract W_L (Win/Loss/Next/NC/Draw)
        W_L_element = row.select_one('.b-flag__text')
        if W_L_element:
            W_L_text = W_L_element.text.strip().lower()
            if W_L_text == "win":
                W_L = "WIN"
            elif W_L_text == "loss":
                W_L = "LOSS"
            else:
                continue  # Ignore Next, NC, Draw
        else:
            continue  # If no flag is found, skip to the next row

        # Extract the opponent name (second fighter)
        opponent_name = row.select('td.b-fight-details__table-col.l-page_align_left p.b-fight-details__table-text a.b-link.b-link_style_black')[1].text.strip()

        # Extract way_of_fight (KO, TKO, U-DEC, S-DEC, etc.)
        way_of_fight = row.select('td.b-fight-details__table-col.l-page_align_left p.b-fight-details__table-text')[4].text.strip()
        # Clean way_of_fight to only contain relevant data (e.g., "KO/TKO" or "U-DEC")
        if "KO" in way_of_fight or "TKO" in way_of_fight:
            way_of_fight = "KO/TKO"
        elif "SUB" in way_of_fight:
            way_of_fight = "SUB"
        elif "U-DEC" in way_of_fight:
            way_of_fight = "U-DEC"
        elif "M-DEC" in way_of_fight:
            way_of_fight = "M-DEC"
        elif "S-DEC" in way_of_fight:
            way_of_fight = "S-DEC"
        elif "DQ" in way_of_fight:
            way_of_fight = "DQ"
        else:
            way_of_fight = None

        # Calculate Gfactor for opponent
        opponent_gfactor = get_factor(opponent_name)

        # Determine style of victory multiplier
        style_of_victory = get_style_of_victory(way_of_fight)

        # Update fighter score based on win/loss
        if W_L == 'WIN':
            fighter['score'] += 10 * (opponent_gfactor / 100) * style_of_victory
        elif W_L == 'LOSS':
            fighter['score'] -= (10 * style_of_victory)
        else:
            continue  # Continue for cases like Draw, Next, etc.
        update_highest_score(fighter)

    print(f"Fighter: {fighter['name']} , Score: {fighter['score']} , Peak_Score: {fighter['highest_score']}")
    new_row = pd.DataFrame({'fighter': [fighter['name']], 'score': [fighter['score']], 'Peak_score': [fighter['highest_score']]})
    fighter_score_df = pd.concat([fighter_score_df, new_row], ignore_index=True)


# Write to CSV
fighter_score_df.to_csv('fighter_scores.csv', mode='w', header=True, index=False)



Fighter: Tom Aaron , Score: -6.276943090865345 , Peak_Score: 3.723056909134655


  fighter_score_df = pd.concat([fighter_score_df, new_row], ignore_index=True)


Fighter: Danny Abbadi , Score: -19.0 , Peak_Score: -11.0
Fighter: Nariman Abbasov , Score: -10.0 , Peak_Score: -10.0
Fighter: David Abbott , Score: -112.92363879365556 , Peak_Score: 5.4702345790295395
Fighter: Hamdy Abdelwahab , Score: 0 , Peak_Score: -1000
Fighter: Mansur Abdul-Malik , Score: 9.449244914630043 , Peak_Score: 9.449244914630043
Fighter: Shamil Abdurakhimov , Score: -37.16413812733662 , Peak_Score: 2.8358618726633784
Fighter: Hiroyuki Abe , Score: -11.0 , Peak_Score: -11.0
Fighter: Daichi Abe , Score: -15.091927385014134 , Peak_Score: 4.9080726149858664
Fighter: Papy Abedi , Score: -29.310759314088955 , Peak_Score: -11.0
Fighter: Ricardo Abreu , Score: -4.409597468714525 , Peak_Score: 3.590402531285475
Fighter: Klidson Abreu , Score: -14.833276320290256 , Peak_Score: -6.833276320290256
Fighter: Cyborg Abreu , Score: 0 , Peak_Score: -1000
Fighter: Daniel Acacio , Score: -11.952096532790776 , Peak_Score: 9.047903467209224
Fighter: John Adajar , Score: -11.0 , Peak_Score: -1

In [None]:

display(fighter_score_df)

In [None]:
#initial rating edit
#[(wins of opponent - losses of opponent )/(total wins*10)]     +     [ (wins of opponent - losses of opponent)/total losses*10]
#this is to add to toal wins so that to just add strength of competation into initial statistics


#loss effect(not in prime ) edit
#other is i can add age factor for each fight their birth age is given and year of fight , and weight calss is given is given . 
#therefore if (<155 lb) && (year of fight - birth year >35 )   -> loss = 0.8 * loss
#          if (>=170 - <180) && ( >38) - > loss = 0.8 * loss
#          if (205 - 265) && (>40) - > 0.8 *loss


#if not (initial rating edit) then consecutive recursive rating update
#once u get final rating . final rating =secondary rating
#initial rating = initial rating + secondary rating /2
#rerun whole thing
#(just be careful about if >2 recursive runs it might be overfitting and might lead to overflow of rating or over flattening of ratings)




In [2]:
    def update_highest_score(fighter):
    # Ensure highest_score reflects the best score achieved so far
    if fighter['score'] > fighter['highest_score']:
        fighter['highest_score'] = fighter['score']


def load_factor_data(filename):
    factor_data = {}
    with open(filename, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            fighter_name = row['name'].strip()
            factor_data[fighter_name] = float(row['Gfactor'])
    return factor_data

# Load factor data once at the start
factor_data = load_factor_data('static_data.csv')

# Function to get factor for a given opponent's name
def get_factor(opponent_name):
    return factor_data.get(opponent_name, 1) 

# Determines the style of victory multiplier based on the way of victory
def get_style_of_victory(way_of_fight):     
    if way_of_fight in ["KO", "TKO", "SUB"]:
        return 1.1
    elif way_of_fight == "U-DEC":
        return 1.0
    elif way_of_fight == "M-DEC":
        return 0.9
    elif way_of_fight == "S-DEC":
        return 0.8
    elif way_of_fight == "DQ":
        return 0
    else:
        return 1.0  # Default for cases like Draw or No Contest


fighter_url ="http://www.ufcstats.com/fighter-details/08af939f41b5a57b"
response = requests.get(fighter_url)
soup = BeautifulSoup(response.text, 'html.parser')
fighter['score'] = 0  # Reset fighter score for each fighter
fighter['highest_score'] = -1000
fight_rows = soup.select('.b-fight-details__table-row')[::-1]
for row in fight_rows:
    # Extract W_L (Win/Loss/Next/NC/Draw)
    W_L_element = row.select_one('.b-flag__text')
    if W_L_element:
        W_L_text = W_L_element.text.strip().lower()
        if W_L_text == "win":
            W_L = "WIN"
        elif W_L_text == "loss":
            W_L = "LOSS"
        else:
            continue  # Ignore Next, NC, Draw
    else:
        continue  # If no flag is found, skip to the next row

    # Extract the opponent name (second fighter)
    opponent_name = row.select('td.b-fight-details__table-col.l-page_align_left p.b-fight-details__table-text a.b-link.b-link_style_black')[1].text.strip()

    # Extract way_of_fight (KO, TKO, U-DEC, S-DEC, etc.)
    way_of_fight = row.select('td.b-fight-details__table-col.l-page_align_left p.b-fight-details__table-text')[4].text.strip()
    # Clean way_of_fight to only contain relevant data (e.g., "KO/TKO" or "U-DEC")
    if "KO" in way_of_fight or "TKO" in way_of_fight:
        way_of_fight = "KO/TKO"
    elif "SUB" in way_of_fight:
        way_of_fight = "SUB"
    elif "U-DEC" in way_of_fight:
        way_of_fight = "U-DEC"
    elif "M-DEC" in way_of_fight:
        way_of_fight = "M-DEC"
    elif "S-DEC" in way_of_fight:
        way_of_fight = "S-DEC"
    elif "DQ" in way_of_fight:
        way_of_fight = "DQ"
    else:
        way_of_fight = None

    # Calculate Gfactor for opponent
    opponent_gfactor = get_factor(opponent_name)

    # Determine style of victory multiplier
    style_of_victory = get_style_of_victory(way_of_fight)

    # Update fighter score based on win/loss
    if W_L == 'WIN':
        fighter['score'] += 10 * (opponent_gfactor / 100) * style_of_victory
    elif W_L == 'LOSS':
        fighter['score'] -= (10 * style_of_victory)
    else:
        continue  # Continue for cases like Draw, Next, etc.
    update_highest_score(fighter)

    print(f"Fighter: {fighter['name']} , Score: {fighter['score']} , Peak_Score: {fighter['highest_score']}")


NameError: name 'requests' is not defined