In [1]:
import pandas as pd

In [2]:
ufcActiveFighters = pd.read_csv("ufcActiveFighters.csv")
ufcFights = pd.read_csv("ufcFights.csv")
ufcAdvancedStats = pd.read_csv("ufcAdvancedStats.csv")

In [28]:
fighterStatsSummary = {}
for index, fighter in ufcActiveFighters.iterrows():
    fighter_id = fighter['fighter_id']
    fighterFights = ufcFights[(ufcFights['fighter1_id'] == fighter_id) | (ufcFights['fighter2_id'] == fighter_id)]
    fightIds = fighterFights['fight_id'].unique()
    fighterStats = ufcAdvancedStats[(ufcAdvancedStats['fighter_id'] == fighter_id) & (ufcAdvancedStats['fight_id'].isin(fightIds))].copy()
    opponentStats = ufcAdvancedStats[(ufcAdvancedStats['fighter_id'] != fighter_id) & (ufcAdvancedStats['fight_id'].isin(fightIds))].copy()
    fighterStats.loc[:, 'sig_str_attempted'] = ((fighterStats['sig_str'] / fighterStats['sig_str_pct']) * 100).round() 
    fighterStats.loc[:, 'takedowns_attempted'] = ((fighterStats['td'] / fighterStats['td_pct']) * 100).round() 
    opponentStats.loc[:, 'sig_str_attempted'] = ((opponentStats['sig_str'] / opponentStats['sig_str_pct']) * 100).round() 
    opponentStats.loc[:, 'takedowns_attempted'] = ((opponentStats['td'] / opponentStats['td_pct']) * 100).round() 

    #Fight data stuff
    totalFights = len(fightIds)
    totalFinishes = 0
    totalFightTimeSecs = 0

    for fight_id in fightIds:
        fightData = ufcFights[ufcFights['fight_id'] == fight_id].iloc[0]
        if fightData['fighter1_id'] == fighter_id and fightData['outcome'] == 'win':
            if fightData['method'] in ['KO/TKO', 'SUB']:
                totalFinishes += 1
        
        roundEnd = fightData['rounds']
        endTime = fightData['time']
        minutes, seconds = map(int, endTime.split(':'))  # Convert to minutes and seconds

        #If no finish
        if (roundEnd == 3 and minutes == 5 and seconds == 0) or (roundEnd == 5 and minutes == 5 and seconds == 0):
            totalFightTimeSecs += (15 * 60 if roundEnd == 3 else 25 * 60)  # 15 minutes = 900 seconds, 25 minutes = 1500 seconds
        else:
            # Fight time is round_end_number - 1 * 5 minutes converted to seconds + endtime seconds
            totalFightTimeSecs += ((roundEnd - 1) * 5 * 60) + (minutes * 60 + seconds) 


    #Sum the values
    totalSigStrikes = fighterStats['sig_str'].sum()
    totalSigStrikesAttempted = fighterStats['sig_str_attempted'].sum()
    totalKnockdowns = fighterStats['kd'].sum()
    totalTakedowns = fighterStats['td'].sum()
    totalTakedownsAttempted = fighterStats['takedowns_attempted'].sum()
    totalControlSecs = fighterStats['ctrl'].sum()
    totalTakedownPercentage = totalTakedowns / totalTakedownsAttempted if totalTakedownsAttempted != 0 else 0
    totalSigStrikePercentage = totalSigStrikes / totalSigStrikesAttempted if totalTakedownsAttempted != 0 else 0

    opponentSigStrikes = opponentStats['sig_str'].sum()
    opponentControlTimeSecs = opponentStats['ctrl'].sum()
    opponentTakedowns = opponentStats['td'].sum()
    opponentTotalTakedown = opponentStats['takedowns_attempted'].sum()
    opponentTakedownPercentage = opponentTakedowns / opponentTotalTakedown if opponentTotalTakedown != 0 else 0
    



    fighterStatsSummary[fighter_id] = {
        'totalSigStrikes': totalSigStrikes,
        'totalSigStrikesAttempted' : totalSigStrikesAttempted,
        'totalKnockdowns' : totalKnockdowns,
        'totalTakedowns' : totalTakedowns,
        'totalTakedownsAttempted' : totalTakedownsAttempted,
        'totalControlSecs' : totalControlSecs,
        'totalTakedownPercentage' : totalTakedownPercentage,
        'totalSigStrikePercentage' : totalSigStrikePercentage,
        'opponentSigStrikes' : opponentSigStrikes,
        'opponentControlTimeSecs' : opponentControlTimeSecs,
        'opponentTakedowns' : opponentTakedowns,
        'opponentTotalTakedown' : opponentTotalTakedown,
        'opponentTakedownPercentage' : opponentTakedownPercentage, 
        'totalFights' : totalFights,
        'totalFinishes' : totalFinishes,
        'totalFightTimeSecs' : totalFightTimeSecs

    }

    
fighterStatsSummaryDf = pd.DataFrame.from_dict(fighterStatsSummary, orient='index')
fighterStatsSummaryDf.to_csv("fighterTotalStatsRaw", index = True)
    

    

In [43]:
ufcFighterTotalStats = pd.read_csv("fighterTotalStatsRaw", dtype={'fighter_id': int})

perMinuteStatsSummary = {}

for index, row in ufcFighterTotalStats.iterrows():
    fighter_id = row['fighter_id'].astype(int)
    totalFightTimeSecs = row['totalFightTimeSecs']
    totalFightTimeMins = totalFightTimeSecs * 1.0 / 60
    totalSigStrikes = row['totalSigStrikes']
    totalSigStrikesAttempted = row['totalSigStrikesAttempted']
    opponentSigStrikes = row['opponentSigStrikes']
    totalKnockdowns = row['totalKnockdowns']
    totalControlSecs = row['totalControlSecs']
    totalTakedowns = row['totalTakedowns']
    totalTakedownsAttempted = row['totalTakedownsAttempted']
    opponentTakedowns = row['opponentTakedowns']
    opponentTotalTakedown = row['opponentTotalTakedown']
    finishes = row['totalFinishes']
    totalFights = row['totalFights']
    opponentControlTimeSecs = row['opponentControlTimeSecs']



    #Per Min Equations
    strikingVolume = totalSigStrikes / totalFightTimeMins
    strikingSuccess = totalSigStrikes / totalSigStrikesAttempted
    strikingDefense = opponentSigStrikes / totalFightTimeMins
    knockdownsPerMin = totalKnockdowns / totalFightTimeMins
    grappingSuccess = (totalControlSecs * 1.0 / totalFightTimeSecs) * (totalTakedowns * 1.0 / totalTakedownsAttempted) #might want to mess with this a bit
    grapplingDefense = opponentTakedowns * 1.0 / opponentTotalTakedown
    finishingAbility = finishes * 1.0 / totalFights
    controlDominance = totalControlSecs / opponentControlTimeSecs if opponentControlTimeSecs != 0 else 2


    

    perMinuteStatsSummary[fighter_id] = {
        'strikingVolume': strikingVolume,
        'strikingSuccess' : strikingSuccess,
        'strikingDefense' : strikingDefense,
        'knockdownsPerMin' : knockdownsPerMin,
        'grappingSuccess' : grappingSuccess,
        'grapplingDefense' : grapplingDefense,
        'finishingAbility' : finishingAbility,
        'controlDominance' : controlDominance

    }

perMinuteStatsSummaryDf = pd.DataFrame.from_dict(perMinuteStatsSummary, orient='index')
perMinuteStatsSummaryDf.to_csv("perMinuteStats.csv")
print(perMinuteStatsSummaryDf)
#ufcFighterTotalStats.head(10)

      strikingVolume  strikingSuccess  strikingDefense  knockdownsPerMin  \
229         3.675909         0.446783         3.473453          0.034149   
268         4.713949         0.468185         4.657420          0.031405   
408         3.353892         0.493410         3.665520          0.011686   
457         3.139741         0.371540         2.627082          0.014175   
581         3.908642         0.531465         2.769676          0.050621   
...              ...              ...              ...               ...   
2573        5.079365         0.363636         4.444444          0.000000   
2574        3.979592         0.406250         4.897959          0.000000   
2575       10.200000         0.386364         8.400000          0.600000   
2576        1.985294         0.428571         2.426471          0.000000   
2577        1.200000         0.321429         4.866667          0.000000   

      grappingSuccess  grapplingDefense  finishingAbility  controlDominance  
229      

  grappingSuccess = (totalControlSecs * 1.0 / totalFightTimeSecs) * (totalTakedowns * 1.0 / totalTakedownsAttempted) #might want to mess with this a bit
  grapplingDefense = opponentTakedowns * 1.0 / opponentTotalTakedown
  strikingSuccess = totalSigStrikes / totalSigStrikesAttempted


In [69]:
def calculateZScore(group):
    return (group - group.mean()) / group.std()

In [None]:
currentFighters = pd.read_csv("ufcActiveFighters.csv")
perMinuteStats = pd.read_csv("perMinuteStats.csv")

currentFighters = currentFighters[['fighter_id', 'weightclass']]
perMinuteStats = pd.merge(perMinuteStats, currentFighters, on='fighter_id', how='left')

numeric_cols = perMinuteStats.select_dtypes(include=['float64', 'int64']).columns
numeric_cols = numeric_cols[numeric_cols != 'fighter_id']  # Exclude 'fighter_id'
#print(numeric_cols)

perMinuteStats[numeric_cols] = perMinuteStats.groupby('weightclass')[numeric_cols].transform(calculateZScore)


# Include the 'fighter_id' column and 'weightclass' for reference
perMinuteStats['fighter_id'] = perMinuteStats['fighter_id']
perMinuteStats['weightclass'] = perMinuteStats['weightclass']

# Optionally, filter out 'weightclass' if you don't need it in the final output
#fighterZScores = perMinuteStats[['fighter_id'] + [col for col in perMinuteStats.columns if col.endswith('_z')]]

# Print the result
#print(perMinuteStats)d
#perMinuteStats.to_csv("kValueStats.csv")
kValues = pd.read_csv("kValueStats.csv")
kValues.head(10)


Unnamed: 0,id,fighter_id,strikingVolume,strikingSuccess,strikingDefense,knockdownsPerMin,grappingSuccess,grapplingDefense,finishingAbility,controlDominance,weightclass
0,0,229,-0.204693,-0.88479,-0.399508,0.090422,-0.210763,-0.871132,-0.182158,-0.293041,Heavyweight
1,1,268,0.377955,0.011135,0.060571,0.47616,0.102934,-0.53262,0.038688,-0.057823,Welterweight
2,2,408,-0.382238,-0.025378,-0.254114,-0.372749,1.215704,-0.881063,-0.00095,-0.133366,Middleweight
3,3,457,-0.450527,-1.055115,-1.023078,-0.126636,0.633426,-0.944237,-0.35539,0.150562,Lightweight
4,4,581,-0.246918,1.16135,-0.866794,1.393832,0.328575,-0.147342,0.78314,-0.354065,Welterweight
5,5,596,0.131647,1.147113,-0.817311,-0.267237,-0.037773,-0.891921,0.629949,0.27791,Light Heavyweight
6,6,607,-0.315045,-0.297854,-0.708218,-0.120001,1.312021,0.241733,0.731157,-0.037277,Lightweight
7,7,612,-0.283694,-0.330192,-0.609121,-0.142464,0.324199,-0.248236,-0.071852,-0.141049,Lightweight
8,8,723,-0.241663,-0.35421,-0.725892,-0.135618,0.488552,-0.667489,-0.112047,-0.064146,Lightweight
9,9,735,0.314879,-1.207788,-0.207769,-0.323904,-0.334764,-0.972479,-0.785797,-0.109956,Middleweight
