In [4]:
import pandas as pd

In [2]:
ufcActiveFighters = pd.read_csv("ufcActiveFighters.csv")
ufcFights = pd.read_csv("ufcFights.csv")
ufcAdvancedStats = pd.read_csv("ufcAdvancedStats.csv")

In [28]:
fighterStatsSummary = {}
for index, fighter in ufcActiveFighters.iterrows():
    fighter_id = fighter['fighter_id']
    fighterFights = ufcFights[(ufcFights['fighter1_id'] == fighter_id) | (ufcFights['fighter2_id'] == fighter_id)]
    fightIds = fighterFights['fight_id'].unique()
    fighterStats = ufcAdvancedStats[(ufcAdvancedStats['fighter_id'] == fighter_id) & (ufcAdvancedStats['fight_id'].isin(fightIds))].copy()
    opponentStats = ufcAdvancedStats[(ufcAdvancedStats['fighter_id'] != fighter_id) & (ufcAdvancedStats['fight_id'].isin(fightIds))].copy()
    fighterStats.loc[:, 'sig_str_attempted'] = ((fighterStats['sig_str'] / fighterStats['sig_str_pct']) * 100).round() 
    fighterStats.loc[:, 'takedowns_attempted'] = ((fighterStats['td'] / fighterStats['td_pct']) * 100).round() 
    opponentStats.loc[:, 'sig_str_attempted'] = ((opponentStats['sig_str'] / opponentStats['sig_str_pct']) * 100).round() 
    opponentStats.loc[:, 'takedowns_attempted'] = ((opponentStats['td'] / opponentStats['td_pct']) * 100).round() 

    #Fight data stuff
    totalFights = len(fightIds)
    totalFinishes = 0
    totalFightTimeSecs = 0

    for fight_id in fightIds:
        fightData = ufcFights[ufcFights['fight_id'] == fight_id].iloc[0]
        if fightData['fighter1_id'] == fighter_id and fightData['outcome'] == 'win':
            if fightData['method'] in ['KO/TKO', 'SUB']:
                totalFinishes += 1
        
        roundEnd = fightData['rounds']
        endTime = fightData['time']
        minutes, seconds = map(int, endTime.split(':'))  # Convert to minutes and seconds

        #If no finish
        if (roundEnd == 3 and minutes == 5 and seconds == 0) or (roundEnd == 5 and minutes == 5 and seconds == 0):
            totalFightTimeSecs += (15 * 60 if roundEnd == 3 else 25 * 60)  # 15 minutes = 900 seconds, 25 minutes = 1500 seconds
        else:
            # Fight time is round_end_number - 1 * 5 minutes converted to seconds + endtime seconds
            totalFightTimeSecs += ((roundEnd - 1) * 5 * 60) + (minutes * 60 + seconds) 


    #Sum the values
    totalSigStrikes = fighterStats['sig_str'].sum()
    totalSigStrikesAttempted = fighterStats['sig_str_attempted'].sum()
    totalKnockdowns = fighterStats['kd'].sum()
    totalTakedowns = fighterStats['td'].sum()
    totalTakedownsAttempted = fighterStats['takedowns_attempted'].sum()
    totalControlSecs = fighterStats['ctrl'].sum()
    totalTakedownPercentage = totalTakedowns / totalTakedownsAttempted if totalTakedownsAttempted != 0 else 0
    totalSigStrikePercentage = totalSigStrikes / totalSigStrikesAttempted if totalTakedownsAttempted != 0 else 0

    opponentSigStrikes = opponentStats['sig_str'].sum()
    opponentControlTimeSecs = opponentStats['ctrl'].sum()
    opponentTakedowns = opponentStats['td'].sum()
    opponentTotalTakedown = opponentStats['takedowns_attempted'].sum()
    opponentTakedownPercentage = opponentTakedowns / opponentTotalTakedown if opponentTotalTakedown != 0 else 0
    



    fighterStatsSummary[fighter_id] = {
        'totalSigStrikes': totalSigStrikes,
        'totalSigStrikesAttempted' : totalSigStrikesAttempted,
        'totalKnockdowns' : totalKnockdowns,
        'totalTakedowns' : totalTakedowns,
        'totalTakedownsAttempted' : totalTakedownsAttempted,
        'totalControlSecs' : totalControlSecs,
        'totalTakedownPercentage' : totalTakedownPercentage,
        'totalSigStrikePercentage' : totalSigStrikePercentage,
        'opponentSigStrikes' : opponentSigStrikes,
        'opponentControlTimeSecs' : opponentControlTimeSecs,
        'opponentTakedowns' : opponentTakedowns,
        'opponentTotalTakedown' : opponentTotalTakedown,
        'opponentTakedownPercentage' : opponentTakedownPercentage, 
        'totalFights' : totalFights,
        'totalFinishes' : totalFinishes,
        'totalFightTimeSecs' : totalFightTimeSecs

    }

    
fighterStatsSummaryDf = pd.DataFrame.from_dict(fighterStatsSummary, orient='index')
fighterStatsSummaryDf.to_csv("fighterTotalStatsRaw", index = True)
    

    

In [43]:
ufcFighterTotalStats = pd.read_csv("fighterTotalStatsRaw", dtype={'fighter_id': int})

perMinuteStatsSummary = {}

for index, row in ufcFighterTotalStats.iterrows():
    fighter_id = row['fighter_id'].astype(int)
    totalFightTimeSecs = row['totalFightTimeSecs']
    totalFightTimeMins = totalFightTimeSecs * 1.0 / 60
    totalSigStrikes = row['totalSigStrikes']
    totalSigStrikesAttempted = row['totalSigStrikesAttempted']
    opponentSigStrikes = row['opponentSigStrikes']
    totalKnockdowns = row['totalKnockdowns']
    totalControlSecs = row['totalControlSecs']
    totalTakedowns = row['totalTakedowns']
    totalTakedownsAttempted = row['totalTakedownsAttempted']
    opponentTakedowns = row['opponentTakedowns']
    opponentTotalTakedown = row['opponentTotalTakedown']
    finishes = row['totalFinishes']
    totalFights = row['totalFights']
    opponentControlTimeSecs = row['opponentControlTimeSecs']



    #Per Min Equations
    strikingVolume = totalSigStrikes / totalFightTimeMins
    strikingSuccess = totalSigStrikes / totalSigStrikesAttempted
    strikingDefense = opponentSigStrikes / totalFightTimeMins
    knockdownsPerMin = totalKnockdowns / totalFightTimeMins
    grappingSuccess = (totalControlSecs * 1.0 / totalFightTimeSecs) * (totalTakedowns * 1.0 / totalTakedownsAttempted) #might want to mess with this a bit
    grapplingDefense = opponentTakedowns * 1.0 / opponentTotalTakedown
    finishingAbility = finishes * 1.0 / totalFights
    controlDominance = totalControlSecs / opponentControlTimeSecs if opponentControlTimeSecs != 0 else 2


    

    perMinuteStatsSummary[fighter_id] = {
        'strikingVolume': strikingVolume,
        'strikingSuccess' : strikingSuccess,
        'strikingDefense' : strikingDefense,
        'knockdownsPerMin' : knockdownsPerMin,
        'grappingSuccess' : grappingSuccess,
        'grapplingDefense' : grapplingDefense,
        'finishingAbility' : finishingAbility,
        'controlDominance' : controlDominance

    }

perMinuteStatsSummaryDf = pd.DataFrame.from_dict(perMinuteStatsSummary, orient='index')
perMinuteStatsSummaryDf.to_csv("perMinuteStats.csv")
print(perMinuteStatsSummaryDf)
#ufcFighterTotalStats.head(10)

      strikingVolume  strikingSuccess  strikingDefense  knockdownsPerMin  \
229         3.675909         0.446783         3.473453          0.034149   
268         4.713949         0.468185         4.657420          0.031405   
408         3.353892         0.493410         3.665520          0.011686   
457         3.139741         0.371540         2.627082          0.014175   
581         3.908642         0.531465         2.769676          0.050621   
...              ...              ...              ...               ...   
2573        5.079365         0.363636         4.444444          0.000000   
2574        3.979592         0.406250         4.897959          0.000000   
2575       10.200000         0.386364         8.400000          0.600000   
2576        1.985294         0.428571         2.426471          0.000000   
2577        1.200000         0.321429         4.866667          0.000000   

      grappingSuccess  grapplingDefense  finishingAbility  controlDominance  
229      

  grappingSuccess = (totalControlSecs * 1.0 / totalFightTimeSecs) * (totalTakedowns * 1.0 / totalTakedownsAttempted) #might want to mess with this a bit
  grapplingDefense = opponentTakedowns * 1.0 / opponentTotalTakedown
  strikingSuccess = totalSigStrikes / totalSigStrikesAttempted


In [5]:
def calculateZScore(group):
    return (group - group.mean()) / group.std()

In [None]:
currentFighters = pd.read_csv("ufcActiveFighters.csv")
perMinuteStats = pd.read_csv("perMinuteStats.csv")

currentFighters = currentFighters[['fighter_id', 'weightclass']]
perMinuteStats = pd.merge(perMinuteStats, currentFighters, on='fighter_id', how='left')

numeric_cols = perMinuteStats.select_dtypes(include=['float64', 'int64']).columns
numeric_cols = numeric_cols[numeric_cols != 'fighter_id']  # Exclude 'fighter_id'
#print(numeric_cols)

perMinuteStats[numeric_cols] = perMinuteStats.groupby('weightclass')[numeric_cols].transform(calculateZScore)


# Include the 'fighter_id' column and 'weightclass' for reference
perMinuteStats['fighter_id'] = perMinuteStats['fighter_id']
perMinuteStats['weightclass'] = perMinuteStats['weightclass']



In [None]:
kValues = pd.read_csv("fighterKValues.csv")

z_score_columns = [
    'strikingVolume', 'strikingSuccess', 'strikingDefense', 
    'knockdownsPerMin', 'grappingSuccess', 'grapplingDefense', 
    'finishingAbility', 'controlDominance'
]

# Define the weights for each stat
weights = {
    'strikingVolume': 0.15,
    'strikingSuccess': 0.075,
    'strikingDefense': 0.15,
    'knockdownsPerMin': 0.075,
    'grappingSuccess': 0.2,
    'grapplingDefense': 0.15,
    'finishingAbility': 0.10,
    'controlDominance': 0.10
}

kValues['z_score_composite'] = sum(kValues[col] * weights[col] for col in z_score_columns)

min_range = 1500
max_range = 2200

min_z_score = kValues['z_score_composite'].min()
max_z_score = kValues['z_score_composite'].max()

currentFighters = pd.read_csv("ufcActiveFighters.csv")
# Scale the values
kValues['scaled_z_score_composite'] = min_range + ((kValues['z_score_composite'] - min_z_score) / (max_z_score - min_z_score)) * (max_range - min_range)
kValues = pd.merge(kValues, currentFighters[['fighter_id', 'name']], on='fighter_id', how='left')
kValues = kValues[['fighter_id', 'name', 'z_score_composite', 'scaled_z_score_composite']]

#NEED TO FIX DOUBLE COMMA BUGS
kValues.to_csv("fighterZScoresAndComposite.csv", index = False)

In [42]:
ufcComposite = pd.read_csv("fighterZScoresAndComposite.csv")
ufcActiveFighters = pd.read_csv("ufcActiveFighters.csv")
ufcActiveFighters = pd.merge(ufcActiveFighters, ufcComposite[['fighter_id', 'scaled_z_score_composite']], on='fighter_id', how='left')
ufcActiveFighters['currentCombinedScore'] = (ufcActiveFighters['currentElo'] * 0.65) + (ufcActiveFighters['scaled_z_score_composite'] * 0.35)


ufcActiveFighters.to_csv("ufcActiveFightersWithComposite.csv", index=False)



In [46]:
ufcScores = pd.read_csv("ufcActiveFightersWithComposite.csv")
sorted_ufcScores = ufcScores.sort_values(by='currentCombinedScore', ascending=False)

# Print the top 10 rows
print(sorted_ufcScores[['fighter_id', 'name', 'currentCombinedScore']].head(50))

     fighter_id                   name  currentCombinedScore
5           596              Jon Jones           1994.912764
97         1438        Islam Makhachev           1988.048148
335        2062           Tom Aspinall           1960.600646
350        2100      Dricus Du Plessis           1937.476520
46         1117           Amanda Nunes           1932.046543
22          881           Stipe Miocic           1905.647884
137        1612  Alexander Volkanovski           1900.913068
101        1455           Kamaru Usman           1900.176226
351        2102           Ilia Topuria           1899.572432
26          929           Max Holloway           1891.067268
425        2215           Alex Pereira           1882.101985
211        1815            Zhang Weili           1879.491416
11          750       Charles Oliveira           1876.369550
15          792         Dustin Poirier           1870.818163
176        1730          Sean O'Malley           1869.551661
442        2235        J