In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import requests 
pd.set_option('display.max_columns', None)

In [None]:
# Function to get HTML response
def get_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None
    
from bs4 import BeautifulSoup
import pandas as pd

def parse_individual_fight(html, card_name, fight_date, fights_location, f1, f2):
    soup = BeautifulSoup(html, 'html.parser')
    fight_stats = {}

    fight_stats['card_name'] = card_name
    fight_stats['fight_date'] = fight_date
    fight_stats['fights_location'] = fights_location
    fight_stats['f1'] = f1
    fight_stats['f2'] = f2

    # Winner name 
    if soup.find('i', class_='b-fight-details__person-status b-fight-details__person-status_style_green') is None:
        fight_stats['winner'] = 0  
    else: 
        fight_stats['winner'] = soup.find('i', class_='b-fight-details__person-status b-fight-details__person-status_style_green').find_next('a').text.strip()

    # Get fight details 
    fight_details_div = soup.find('div', class_='b-fight-details__fight')
    fight_stats['weightclass'] = fight_details_div.find('i', class_='b-fight-details__fight-title').text.strip()

    fight_details_tag = fight_details_div.find('p', class_='b-fight-details__text')
    details_tag = fight_details_tag.find_all('i')
    fight_stats['method'] = details_tag[2].get_text(strip=True)
    fight_stats['round'] = details_tag[3].get_text(strip=True).split(':')[1]
    fight_stats['end_time'] = details_tag[5].get_text(strip=True).split('e:')[1]
    fight_stats['time_format'] = details_tag[7].get_text(strip=True).split(':')[1].split(' ')[0]
    fight_stats['referee'] = details_tag[9].get_text(strip=True).split(':')[1]
    
    # Extract individual fight stats
    tables = soup.find_all('tbody', class_='b-fight-details__table-body')
    totals_table = tables[0]
    sig_strikes_table = tables[2]

    rows = totals_table.find_all('tr')
    data = {}
    for row in rows:
        cells = row.find_all('td')
        for index, key in enumerate(['fighter', 'kd', 'sig_str', 'sig_str_%', 'total_str', 'td', 'td_%', 'sub_att', 'rev', 'control']):
            data[key] = cells[index].text.strip()

    rows = sig_strikes_table.find_all('tr')
    for row in rows:
        cells = row.find_all('td')
        for index, key in enumerate(['fighter', 'sig_str', 'sig_str_%', 'head', 'body', 'leg', 'distance', 'clinch', 'ground']):
            data[key] = cells[index].text.strip()

    split_data = {key: [val for val in (val_raw.strip() for val_raw in values.strip().split("\n")) if val] for key, values in data.items()}
    
    winner_index = None
    if split_data['fighter'][0] == fight_stats['winner']: 
        winner_index = 0
    else:
        winner_index = 1
        
    loser_index = winner_index -1 

    df_f1 = pd.DataFrame({'f1_' + key: [value[winner_index]] for key, value in split_data.items()})
    df_f2 = pd.DataFrame({'f2_' + key: [value[loser_index]] for key, value in split_data.items()})

    fighting_stats = pd.concat([df_f1, df_f2], axis=1)
    return {**fight_stats, **fighting_stats.to_dict(orient='records')[0]}


In [None]:
# Read event URLs from CSV file
df = pd.read_csv('ufc_event_data.csv')
df.tail(173)

In [None]:
all_fights = []

# Loop through each event in the DataFrame
for index, row in df.iterrows():
    fight_url = row['fight_url']
    card_name = row['card_name']
    fight_date = row['fight_date']
    fights_location = row['fights_location']
    f1 = row['f1']
    f2 = row['f2']
    
    # Some old fights do not have fight stats available, skip them
    try:
        # Fetch HTML content
        html = get_html(fight_url)
        if html:
            # Parse fight details
            fight_details = parse_individual_fight(html, card_name, fight_date, fights_location, f1, f2)
            all_fights.append(fight_details)
    except Exception as e:
        print(f"Error processing fight at URL: {fight_url}. Error: {e}")

In [None]:
# Create a DataFrame from the list of fight details
fights_df = pd.DataFrame(all_fights)

# Save to CSV or perform further analysis
fights_df.to_csv('ufc_fight.csv', index=False)