In [2]:
import pandas as pd
from datetime import datetime
import re

In [4]:
# Print all UFC fights for a fighter 

# Define the fighter's name
fighter_name = "Brian Ortega"

# Load the dataset
master_df = pd.read_csv('data/github/master.csv')

# Filter the DataFrame to get all fights of the given fighter in chronological order
fighter_fights = master_df[
    master_df['FIGHTER1'].str.contains(fighter_name, case=False, na=False) | 
    master_df['FIGHTER2'].str.contains(fighter_name, case=False, na=False)
].sort_values(by='DATE')

# Check if there are any fights for the given fighter
if fighter_fights.empty:
    print(f"No fights found for {fighter_name}")
else:
    # Extract detailed information
    fighter_details = fighter_fights[['EVENT', 'BOUT', 'OUTCOME', 'WEIGHTCLASS', 'METHOD', 'ROUND', 'TIME', 'TIME FORMAT', 'REFEREE', 'DETAILS', 'URL', 'DATE', 'LOCATION', 'FIGHTER1', 'FIGHTER2']].copy()
    
    # Extract opponents and outcomes
    opponents_and_outcomes = fighter_fights[['DATE', 'FIGHTER1', 'FIGHTER2', 'OUTCOME', 'WINNING_FIGHTER']].copy()
    opponents_and_outcomes['OPPONENT'] = opponents_and_outcomes.apply(
        lambda row: row['FIGHTER2'] if fighter_name.lower() in row['FIGHTER1'].lower() else row['FIGHTER1'], axis=1
    )
    opponents_and_outcomes['RESULT'] = opponents_and_outcomes.apply(
        lambda row: 'Win' if (fighter_name.lower() in row['WINNING_FIGHTER'].lower()) else 'Loss', axis=1
    )
    
    # Combine details with opponents and outcomes
    detailed_fights = pd.concat([opponents_and_outcomes, fighter_details.drop(columns=['DATE', 'FIGHTER1', 'FIGHTER2', 'OUTCOME'])], axis=1)
    
    # Display the detailed fights
    print(detailed_fights)


            DATE               FIGHTER1        FIGHTER2 OUTCOME  \
4107  2014-07-26       mike de la torre    brian ortega   NC/NC   
3732  2015-06-06         thiago tavares    brian ortega     L/W   
3485  2016-01-02          diego brandao    brian ortega     L/W   
3327  2016-06-04           brian ortega      clay guida     W/L   
2830  2017-07-29           brian ortega  renato moicano     W/L   
2678  2017-12-09            cub swanson    brian ortega     L/W   
2582  2018-03-03          frankie edgar    brian ortega     L/W   
2284  2018-12-08           max holloway    brian ortega     W/L   
1541  2020-10-17           brian ortega  chan sung jung     W/L   
1155  2021-09-25  alexander volkanovski    brian ortega     W/L   
811   2022-07-16           brian ortega  yair rodriguez     L/W   
157   2024-02-24         yair rodriguez    brian ortega     L/W   

            WINNING_FIGHTER               OPPONENT RESULT  \
4107             No Contest       mike de la torre   Loss   
3732  

In [4]:
# Generate single fighter report

# Load the dataset
fighter_info_df = pd.read_csv('./data/fighter_info.csv')

# Fill NaN values in the 'Fighter' column to avoid the ValueError
fighter_info_df['Fighter'] = fighter_info_df['Fighter'].fillna('')

fighter_name = 'Dustin Poirier'

# Filtering for the fighter's data and explicitly create a copy to avoid SettingWithCopyWarning
fighter_data = fighter_info_df[fighter_info_df['Fighter'].str.contains(fighter_name, case=False, na=False)].copy()
fighter_data['Height_cm'] = fighter_data['Height'].apply(lambda height: (int(height.split("'")[0]) * 30.48 + int(height.split("'")[1].replace('"', '')) * 2.54) if pd.notnull(height) else 'N/A')
fighter_data['Age'] = fighter_data['Birth Date'].apply(lambda birth_date_str: datetime.today().year - datetime.strptime(birth_date_str, '%b %d, %Y').year - ((datetime.today().month, datetime.today().day) < (datetime.strptime(birth_date_str, '%b %d, %Y').month, datetime.strptime(birth_date_str, '%b %d, %Y').day)) if pd.notnull(birth_date_str) else 'N/A')

# Iterate through the filtered data and print the report
if not fighter_data.empty:
    for _, row in fighter_data.iterrows():
        print(f"Fighter: {row['Fighter']}")
        print(f"Height: {row['Height_cm']} cm")
        print(f"Reach: {row['REACH'] if pd.notnull(row['REACH']) else 'N/A'}")
        print(f"Nationality: {row['Nationality']}")
        print(f"Wins: {row['Wins']}")
        print(f"Losses: {row['Losses']}")
        print(f"Wins by Decision: {row['Win_Decision']}")
        print(f"Wins by KO: {row['Win_KO']}")
        print(f"Wins by Submission: {row['Win_Sub']}")
        print(f"Losses by Decision: {row['Loss_Decision']}")
        print(f"Losses by KO: {row['Loss_KO']}")
        print(f"Losses by Submission: {row['Loss_Sub']}")
        print(f"Age: {row['Age']}")
else:
    print(f"No data found for {fighter_name}.")


Fighter: dustin poirier
Height: 175.26 cm


KeyError: 'REACH'

In [5]:
### Fighters Methods of Victory (methods and rounds)

fighter_name = 'Dustin Poirier'

fighter_info_df = pd.read_csv('./data/fighter_info.csv')
event_data_sherdog_df = pd.read_csv('./data/event_data_sherdog.csv')

# Fill NaN values to ensure string operations do not result in errors
fighter_info_df['Fighter'] = fighter_info_df['Fighter'].fillna('')
event_data_sherdog_df[['Fighter 1', 'Fighter 2', 'Winning Method']] = event_data_sherdog_df[['Fighter 1', 'Fighter 2', 'Winning Method']].fillna('')

# Filter for fights involving the fighter and where the fighter won by KO or Submission
wins_by_finish = event_data_sherdog_df[((event_data_sherdog_df['Fighter 1'].str.contains(fighter_name, case=False)) | 
                                        (event_data_sherdog_df['Fighter 2'].str.contains(fighter_name, case=False))) & 
                                        (event_data_sherdog_df['Winning Method'].str.contains('KO|Submission'))]

# Group by winning round and method, then count occurrences
wins_by_round_and_method = wins_by_finish.groupby(['Winning Round', 'Winning Method']).size().reset_index(name='Counts')

# Filter the fighter's data
fighter_data = fighter_info_df[fighter_info_df['Fighter'].str.contains(fighter_name, case=False)].copy()

if not fighter_data.empty:
    for _, row in fighter_data.iterrows():
        print(f"Fighter: {row['Fighter']}")
        # Assuming 'Height' conversion and 'Age' calculation are already correctly implemented as in your original script
        # Existing details omitted for brevity
        
        # Display wins by finish per round
        print("\nWins by Finish per Round:")
        for _, win_row in wins_by_round_and_method.iterrows():
            print(f"{win_row['Winning Method']} in Round {win_row['Winning Round']}: {win_row['Counts']}")
else:
    print(f"No data found for {fighter_name}.")


Fighter: dustin poirier

Wins by Finish per Round:
KO (Punches) in Round 1: 4
Submission (Brabo Choke) in Round 1: 1
Submission (Triangle Armbar) in Round 1: 1
TKO (Leg Injury) in Round 1: 1
TKO (Punches) in Round 1: 2
KO (Head Kick) in Round 2: 1
KO (Punches) in Round 2: 2
Submission (Brabo Choke) in Round 2: 1
TKO (Punches) in Round 2: 2
Submission (Body Triangle) in Round 3: 1
Submission (Rear-Naked Choke) in Round 3: 3
TKO (Punches) in Round 4: 1
Technical Submission (Brabo Choke) in Round 4: 1
Submission (Brabo Choke) in Round 5: 1
