In [2]:
import pandas as pd
import matplotlib.pyplot as plt


mma_dataframe = pd.read_csv('masterdataframe.csv')


In [3]:
#Constants
TESTSIZE = 0.1  # % of data will be used for testing.
VALIDSPLITSIZE = 0.01  # % of data will be used for validation.
MIN_MATCH_COUNT = 2 # Minimum amount of matches player has to have done in order to be used in data

#Define features
staticFeatures = [
    "date",
    "method",
    "control",
    'fighter',
    'opponent',
    'division',
    "time_format",
    "result"
]
skillFeatures = [
    "ground_att",
    "ground_def",
    "standup_att",
    "standup_def",
    "fighter_score",
    "stamina",
    "fight_ratio",
    "winloseratio",
    "control_ratio"
]

extra_features = [
    "division",
    "control",
    "time",
    "method"
]

fightStr = "fighter"

#Import data
mma_dataframe = pd.read_csv('masterdataframe.csv', parse_dates=True)
mma_fights = mma_dataframe[staticFeatures]
fighters_list = pd.DataFrame(mma_dataframe.drop_duplicates(fightStr)[fightStr])
feature_cols = mma_dataframe.columns

#Prepare datasets
fighters_list["matchcount"] = fighters_list.fighter.map(mma_dataframe.fighter.value_counts())
fighters_list = fighters_list.drop_duplicates(fightStr)
fighters_list = fighters_list.set_index(fightStr)
selected_fighters = pd.DataFrame(fighters_list.index[fighters_list["matchcount"] > MIN_MATCH_COUNT])
mma_dataframe[['minutes', 'seconds']] = mma_dataframe['time'].str.split(':', expand=True)
mma_fights['time'] = pd.to_numeric(mma_dataframe['minutes']) * 60 + pd.to_numeric(mma_dataframe['seconds'])

def fighter_statistics(fighter): 
    # Prepare helper functions
    def col_for_fighter(col): return mma_dataframe[col][forFighter]
    def matchAfter(seconds): return mma_dataframe["total_comp_time"] > seconds
    def matchBefore(seconds): return mma_dataframe["total_comp_time"] <= seconds
    def normalize(score): return max(0, score)
    def minimumOne(value): return max(1, value)

    # Prepare multiuse variables
    forFighter = (mma_dataframe[fightStr] == fighter)                      # For specific fighter
    lost = (mma_dataframe["result"]==0)                                    # Lost match
    won = (mma_dataframe["result"]==1)                                     # Won match
    withSub = (mma_dataframe["method"]=="SUB")                             # By submission
    withKO = (mma_dataframe["method"]=="KO/TKO")                           # By KO
    result = (mma_dataframe["result"])                                     # Match outcome
    num_fights = fighters_list.loc[fighter]["matchcount"]                  # Number of fights
    matchtime_total = col_for_fighter("total_comp_time").sum()             # Total matchtime   
    ground_time = col_for_fighter("control").sum()                         # Control time
    standup_time = matchtime_total - col_for_fighter("control").sum()      # Standup time

    def fight_ratio():
        return ground_time / standup_time

    def total_wins():
        return won[forFighter].sum()

    def control_ratio():
        controls = col_for_fighter("control").mean()
        return (controls / num_fights) / 2

    def standup_att():
        knockdowns = col_for_fighter('knockdowns').sum()
        sig_strikes_accuracy = col_for_fighter('sig_strikes_landed').mean()
        total_strikes_accuracy = col_for_fighter('total_strikes_landed').mean()
        head_strikes_accuracy = col_for_fighter('head_strikes_landed').mean()
        body_strikes_accuracy = col_for_fighter('body_strikes_landed').mean()
        leg_strikes_accuracy = col_for_fighter('leg_strikes_landed').mean()
        distance_strikes_accuracy = col_for_fighter('distance_strikes_landed').mean()
        clinch_strikes_accuracy = col_for_fighter('clinch_strikes_landed').mean()

        return (sig_strikes_accuracy + total_strikes_accuracy + head_strikes_accuracy + body_strikes_accuracy + leg_strikes_accuracy + distance_strikes_accuracy + clinch_strikes_accuracy) / 8 + knockdowns * 0.1

    def stamina():
        win_second_round= result[forFighter & matchAfter(300) & matchBefore(600)].sum()
        win_third_round= result[forFighter & matchAfter(600) & matchBefore(900)].sum()
        win_champ_round= result[forFighter & matchAfter(900)].sum()

        fights_second_round= result[forFighter & matchAfter(300) & matchBefore(600)].count()
        fights_third_round= result[forFighter & matchAfter(600) & matchBefore(900)].count()
        fights_champ_round= result[forFighter & matchAfter(900)].count()

        total_stamina = np.array([
            (0.2 * win_second_round / minimumOne(fights_second_round)),
            (0.6 * win_third_round / minimumOne(fights_third_round)),
            (win_champ_round / minimumOne(fights_champ_round))
        ]).sum()
        return normalize(total_stamina)

    def winloseratio():
        last_three_won = won[forFighter][-20:].sum()
        last_three_lost = lost[forFighter][-20:].sum()
    
        if last_three_lost == 0: 
            return 1 
        else:
            return last_three_won / last_three_lost

    def standup_def():
        sig_strikes_def = col_for_fighter('sig_strikes_def').mean()
        total_strikes_def = col_for_fighter('total_strikes_def').mean()
        head_strikes_def = col_for_fighter('head_strikes_def').mean()
        body_strikes_def = col_for_fighter('body_strikes_def').mean()
        leg_strikes_def = col_for_fighter('leg_strikes_def').mean()
        distance_strikes_def = col_for_fighter('distance_strikes_def').mean()
        clinch_strikes_def = col_for_fighter('clinch_strikes_def').mean()

        return (sig_strikes_def + total_strikes_def + head_strikes_def + body_strikes_def + leg_strikes_def + distance_strikes_def + clinch_strikes_def) / 8

    def ground_att():
        avg_takedowns_accuracy = col_for_fighter('takedowns_landed').mean()
        ground_strikes_accuracy = col_for_fighter('ground_strikes_landed').mean()
        control = col_for_fighter('control').mean()
        sub_attempts = col_for_fighter('avg_sub_attempts').mean()
        ground_skill_score = (control + sub_attempts + avg_takedowns_accuracy + ground_strikes_accuracy) / 4

        return ground_skill_score

    def ground_def():
        takedowns_def = col_for_fighter('takedowns_def').mean()
        ground_strikes_def = col_for_fighter('ground_strikes_def').mean()
        reversals = col_for_fighter('reversals').mean()
        return (takedowns_def + ground_strikes_def + reversals) / 3
    
    def fighter_score():
        LAST_MATCHES = 4 # Use last x matches of fighter
        score = 0
        multiplier = 0.4
        results = col_for_fighter("result")
        for matchResult in results[:LAST_MATCHES:-1]:
            if matchResult == 1: # Won match
                score += multiplier
            else:
                score -= multiplier
            multiplier += 0.4
        return score
    
    return total_wins(), fight_ratio(), ground_att(), ground_def(), standup_att(), standup_def(), fighter_score(), stamina(), winloseratio(), control_ratio()

suffixFighter = "_fighter"
suffixOpponent = "_opponent"
mma_fights = mma_fights.merge(selected_fighters, on=fightStr, how='inner')
mma_fights = mma_fights.merge(selected_fighters, left_on="opponent", right_on="fighter", how="inner", suffixes=(suffixFighter,suffixOpponent))
mma_fights = mma_fights.drop(f'{fightStr}_opponent', axis=1)
mma_fights['fighter'] = mma_fights['fighter_fighter']

#Transform data for dates
#mma_fights['date'] = pd.to_datetime(mma_fights['date'], errors='coerce')
#mma_fights['date'] = mma_fights['date'].astype('int64') / 10 ** 9

#Transform data for fighter names


finalFeatures = skillFeatures
suffixed_features = [skill + suffixFighter for skill in finalFeatures] + [skill + suffixOpponent for skill in finalFeatures]
suffixed_features = suffixed_features + extra_features



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mma_fights['time'] = pd.to_numeric(mma_dataframe['minutes']) * 60 + pd.to_numeric(mma_dataframe['seconds'])


In [4]:
import pandas as pd
import numpy as np

fighter_stats = {}
for fighter in selected_fighters[fightStr]:
    fighter_stats[fighter] = fighter_statistics(fighter)

fighter_stats_df = pd.DataFrame.from_dict(fighter_stats, orient='index', columns=['total_wins', 'fight_ratio', 'ground_att', 'ground_def', 'standup_att', 'standup_def', 'fighter_score', 'stamina', 'winloseratio', 'control_ratio'])

# Calculate the correlation matrix
correlation_matrix = fighter_stats_df.corr()

x = correlation_matrix.style.background_gradient(cmap='coolwarm').format(precision=2)

In [107]:
x

Unnamed: 0,total_wins,fight_ratio,ground_att,ground_def,standup_att,standup_def,fighter_score,stamina,winloseratio,control_ratio
total_wins,1.0,0.06,0.15,0.26,0.25,0.21,0.7,0.63,0.51,-0.33
fight_ratio,0.06,1.0,0.8,0.02,-0.11,-0.01,0.04,0.16,0.11,0.63
ground_att,0.15,0.8,1.0,-0.05,0.15,-0.13,0.1,0.25,0.16,0.74
ground_def,0.26,0.02,-0.05,1.0,0.16,0.26,0.14,0.24,0.31,-0.15
standup_att,0.25,-0.11,0.15,0.16,1.0,-0.19,0.13,0.37,0.28,0.01
standup_def,0.21,-0.01,-0.13,0.26,-0.19,1.0,0.11,0.15,0.22,-0.2
fighter_score,0.7,0.04,0.1,0.14,0.13,0.11,1.0,0.43,0.31,-0.14
stamina,0.63,0.16,0.25,0.24,0.37,0.15,0.43,1.0,0.6,-0.07
winloseratio,0.51,0.11,0.16,0.31,0.28,0.22,0.31,0.6,1.0,-0.08
control_ratio,-0.33,0.63,0.74,-0.15,0.01,-0.2,-0.14,-0.07,-0.08,1.0


In [5]:
mma_dataframe['ground_defence'] = (mma_dataframe['takedowns_def'] + mma_dataframe['ground_strikes_def'] + mma_dataframe['reversals'])
mma_dataframe['ground_attack'] = (mma_dataframe['takedowns_landed'] + mma_dataframe['sub_attempts'] + mma_dataframe['ground_strikes_landed'] + mma_dataframe['control'])
mma_dataframe['standup_defence'] = (mma_dataframe['sig_strikes_def'] + mma_dataframe['total_strikes_def'] + mma_dataframe['head_strikes_def'] + mma_dataframe['body_strikes_def'] + mma_dataframe['leg_strikes_def'] + mma_dataframe['distance_strikes_def'] + mma_dataframe['clinch_strikes_def'])
mma_dataframe['standup_attack'] = (mma_dataframe['sig_strikes_landed'] + mma_dataframe['total_strikes_landed'] + mma_dataframe['head_strikes_landed'] + mma_dataframe['body_strikes_landed'] + mma_dataframe['leg_strikes_landed'] + mma_dataframe['distance_strikes_landed'] + mma_dataframe['clinch_strikes_landed'])

mma_dataframe['stamina'] = (mma_dataframe['avg_takedowns_attempts_per_min'] + mma_dataframe['control'])

mma_dataframe['advantages'] = (mma_dataframe['height'] + mma_dataframe['reach'] + mma_dataframe['age'])

mma_dataframe['fight_ratio'] = mma_dataframe['control'] / (mma_dataframe['total_comp_time'] - mma_dataframe['control'])

mma_dataframe['total_skills'] = (mma_dataframe['fight_ratio'] + mma_dataframe['knockdowns'] +mma_dataframe['ground_defence'] + mma_dataframe['ground_attack'] + mma_dataframe['standup_defence'] + mma_dataframe['standup_attack']) 


lst = ['ground_defence', 'control', 'fight_ratio', 'stamina', 'ground_attack', 'standup_defence', 'standup_attack', 'total_skills', 'knockdowns', 'result']


chosen_mma_dataframe = mma_dataframe[lst]
corr = chosen_mma_dataframe.corr()

x = corr.style.background_gradient(cmap='coolwarm').format(precision=2)
x

Unnamed: 0,ground_defence,control,fight_ratio,stamina,ground_attack,standup_defence,standup_attack,total_skills,knockdowns,result
ground_defence,1.0,-0.0,0.1,-0.0,0.0,0.31,0.09,0.39,0.21,0.36
control,-0.0,1.0,0.44,1.0,1.0,-0.1,0.25,0.73,-0.09,0.28
fight_ratio,0.1,0.44,1.0,0.44,0.45,0.09,-0.03,0.34,-0.06,0.19
stamina,-0.0,1.0,0.44,1.0,1.0,-0.1,0.25,0.73,-0.09,0.28
ground_attack,0.0,1.0,0.45,1.0,1.0,-0.1,0.25,0.74,-0.08,0.29
standup_defence,0.31,-0.1,0.09,-0.1,-0.1,1.0,-0.14,0.35,0.15,0.27
standup_attack,0.09,0.25,-0.03,0.25,0.25,-0.14,1.0,0.59,0.02,0.23
total_skills,0.39,0.73,0.34,0.73,0.74,0.35,0.59,1.0,0.06,0.49
knockdowns,0.21,-0.09,-0.06,-0.09,-0.08,0.15,0.02,0.06,1.0,0.31
result,0.36,0.28,0.19,0.28,0.29,0.27,0.23,0.49,0.31,1.0
