In [1]:
import os
import pandas as pd

In [2]:
directory = './data/fighters/'
fighters_files = os.listdir(directory)

columns = ['Fighter', 'Result', 'Opponent', 'Event Date', 'Method/Referee', 'Rounds', 'Time', 'Fight Sequence']
all_fights_data = pd.DataFrame(columns=columns)

processed_files = []  # List to store the names of fighters with three consecutive KO/TKO losses

# Loop through each file in the directory
for file in fighters_files:
    file_path = os.path.join(directory, file)
    if os.path.isfile(file_path) and file_path.endswith('.csv'):
        fight_data = pd.read_csv(file_path)
        
        # Ensure data is in chronological order
        fight_data = fight_data.iloc[::-1].reset_index(drop=True)
        
        loss_count = 0
        last_losses = []

        for index, row in fight_data.iterrows():
            result = str(row["Result"]).lower()
            method = str(row["Method/Referee"]).lower()
            
            if "loss" in result and ("ko" in method or "tko" in method):
                loss_count += 1
                last_losses.append(row.copy())  # Use copy() to ensure we're modifying a copy
                if loss_count == 3:
                    # Add file name to the list when the fighter has three consecutive KO/TKO losses
                    processed_files.append(file)

                    # Collect data for these fights
                    for loss in last_losses:
                        loss['Fighter'] = file
                        loss['Fight Sequence'] = 'Loss 1-3'
                        all_fights_data = pd.concat([all_fights_data, pd.DataFrame([loss])], ignore_index=True)
                    
                    # Check if there is a next fight
                    if index + 1 < len(fight_data):
                        next_fight = fight_data.iloc[index + 1].copy()
                        next_fight['Fighter'] = file
                        next_fight['Fight Sequence'] = 'Next Fight'
                        all_fights_data = pd.concat([all_fights_data, pd.DataFrame([next_fight])], ignore_index=True)
                    else:
                        # Add a row indicating no subsequent fight
                        na_fight = {col: 'NA' for col in columns}
                        na_fight['Fighter'] = file
                        na_fight['Fight Sequence'] = 'Next Fight'
                        all_fights_data = pd.concat([all_fights_data, pd.DataFrame([na_fight])], ignore_index=True)
                    
                    break
            else:
                loss_count = 0
                last_losses = []

# Save the DataFrame to an Excel file
all_fights_data.to_excel('fighters_analysis.xlsx', index=False)

# Print only fighters with three consecutive KO/TKO losses
print(f"Total fighters with three consecutive KO/TKO losses: {len(processed_files)}")
if processed_files:
    print("Fighters with three consecutive KO/TKO losses:")
    print("\n".join(processed_files))
else:
    print("No fighters with three consecutive KO/TKO losses found.")
    

Total fighters with three consecutive KO/TKO losses: 111
Fighters with three consecutive KO/TKO losses:
Ildemar_Alcantara_22223.csv
Joe_Doerksen_390.csv
Jason_Godsey_215.csv
Marlon_Moraes_30936.csv
Bojan_Mihajlovic_12400.csv
Antonio_Silva_12354.csv
Dominick_Reyes_145941.csv
Chuck_Liddell_192.csv
Brad_Kohler_181.csv
Gray_Maynard_15835.csv
Tony_Petarra_221.csv
Walt_Harris_72046.csv
Ian_Freeman_242.csv
Ryan_Madigan_16551.csv
Chris_Daukaus_103685.csv
Ruan_Potts_75842.csv
Ben_Saunders_10339.csv
Matt_Mitrione_49519.csv
Marco_Polo_Reyes_114125.csv
Sabah_Homasi_47068.csv
Nick_Thompson_7124.csv
Hans_Stringer_14825.csv
Jacob_Kilburn_238233.csv
Jason_Von_Flue_549.csv
Andre_Fialho_188171.csv
Isaac_Villanueva_41384.csv
Melvin_Guillard_7431.csv
Eric_Spicely_63723.csv
Justin_Eilers_5936.csv
Felipe_Silva_128357.csv
Dave_Roberts_220.csv
Roger_Huerta_10089.csv
Ken_Shamrock_4.csv
Hermes_Franca_3068.csv
Sherman_Pendergarst_12854.csv
Amilcar_Alves_23841.csv
Shamil_Abdurakhimov_26808.csv
Ross_Pointon_6541.c

In [32]:
!open fighters_analysis.xlsx

# Analysis

In [3]:
# W/L in 4th fight

fighters_data = pd.read_excel('fighters_analysis.xlsx')

# Select only the rows corresponding to the next fight after three consecutive losses
next_fights = fighters_data[fighters_data['Fight Sequence'] == 'Next Fight']

# Count the outcomes for the next fights
outcome_counts = next_fights['Result'].value_counts()

# Print the outcomes in the desired format
print("Outcome of Next Fights:")
print(f"Losses: {outcome_counts.get('loss', 0)} fighters lost their next fight.")
print(f"Wins: {outcome_counts.get('win', 0)} fighters won their next fight.")
print(f"No Contest (NC): {outcome_counts.get('NC', 0)} fight ended in no contest.")


Outcome of Next Fights:
Losses: 38 fighters lost their next fight.
Wins: 38 fighters won their next fight.
No Contest (NC): 1 fight ended in no contest.


In [4]:
# Methods in 4th fight

# Load the data from an Excel file
fighters_data = pd.read_excel('fighters_analysis.xlsx')

# Function to clean method names and remove referee details
def clean_method(data):
    data['Method'] = data['Method/Referee'].str.extract(r'([^)]+)')[0]
    return data

# Analyze next fights after three consecutive losses
def analyze_fights(fighters_data):
    # Select only the rows corresponding to the next fight
    next_fights = fighters_data[fighters_data['Fight Sequence'] == 'Next Fight']
    
    # Clean method names
    next_fights = clean_method(next_fights)
    
    # Analysis for losses
    losses_next_fight = next_fights[next_fights['Result'] == 'loss']
    method_of_victories_losses = losses_next_fight['Method'].value_counts()

    # Analysis for wins
    wins_next_fight = next_fights[next_fights['Result'] == 'win']
    method_of_victories_wins = wins_next_fight['Method'].value_counts()

    # Combine TKO and KO into "KO/TKO" category
    strikes_losses = method_of_victories_losses[method_of_victories_losses.index.str.contains('TKO|KO')].sum()
    strikes_wins = method_of_victories_wins[method_of_victories_wins.index.str.contains('TKO|KO')].sum()

    # Other categories
    categories = ['Submission', 'Decision', 'Other']
    results = {'losses': {}, 'wins': {}}

    for category in categories:
        results['losses'][category] = method_of_victories_losses[method_of_victories_losses.index.str.contains(category)].sum()
        results['wins'][category] = method_of_victories_wins[method_of_victories_wins.index.str.contains(category)].sum()

    # Adding KO/TKO to results
    results['losses']['KO/TKO'] = strikes_losses
    results['wins']['KO/TKO'] = strikes_wins

    return results

# Run the analysis
results = analyze_fights(fighters_data)
print("Analysis of losses:\n", results['losses'])
print("Analysis of wins:\n", results['wins'])


Analysis of losses:
 {'Submission': 6, 'Decision': 6, 'Other': 0, 'KO/TKO': 28}
Analysis of wins:
 {'Submission': 9, 'Decision': 8, 'Other': 0, 'KO/TKO': 22}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Method'] = data['Method/Referee'].str.extract(r'([^)]+)')[0]


In [6]:
import pandas as pd

# Load the fighter information
fighter_info_df = pd.read_csv('data/fighter_info.csv')

# List of fighter filenames from the user's input
# fighter_filenames = [
#     "Ildemar_Alcantara_22223.csv", "Joe_Doerksen_390.csv", "Jason_Godsey_215.csv", 
#     "Marlon_Moraes_30936.csv", "Bojan_Mihajlovic_12400.csv", "Antonio_Silva_12354.csv", 
#     "Dominick_Reyes_145941.csv", "Chuck_Liddell_192.csv", "Brad_Kohler_181.csv", 
#     "Gray_Maynard_15835.csv", # and others...
# ]

# Extract IDs from filenames
fighter_ids = [int(filename.split('_')[-1].split('.')[0]) for filename in processed_files]

# Filter the fighter_info DataFrame for these IDs
filtered_fighter_info = fighter_info_df[fighter_info_df['Fighter_ID'].isin(fighter_ids)]

# Count occurrences in each weight class
weight_class_counts = filtered_fighter_info['Weight Class'].value_counts()

# Display the weight class counts
print(weight_class_counts)


Weight Class
Heavyweight          31
Light Heavyweight    21
Welterweight         16
Middleweight         12
Lightweight          12
Featherweight         8
Bantamweight          5
Flyweight             2
Super Heavyweight     1
Name: count, dtype: int64
