In [31]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [6]:
ufc_data = pd.read_csv('ufc-master.csv')
ufc_data.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,Neil Magny,Carlos Prates,550.0,-800.0,550.0,12.5,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,Punch,1.0,4:50,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0
1,Gerald Meerschaert,Reinier de Ridder,250.0,-310.0,250.0,32.2581,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,Arm Triangle,3.0,1:44,704.0,700.0,200.0,600.0,165.0,1400.0,450.0
2,Gaston Bolanos,Cortavious Romious,205.0,-250.0,205.0,40.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,,3.0,5:00,900.0,550.0,275.0,3000.0,165.0,380.0,450.0
3,Luana Pinheiro,Gillian Robertson,360.0,-470.0,360.0,21.2766,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,,3.0,5:00,900.0,600.0,250.0,2200.0,110.0,900.0,500.0
4,Mansur Abdul-Malik,Dusko Todorovic,-485.0,370.0,20.6186,370.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,Punches,1.0,2:44,164.0,600.0,,800.0,,-250.0,


In [10]:
ufc_data.shape

(6489, 118)

In [13]:
print(ufc_data.columns.tolist())

['RedFighter', 'BlueFighter', 'RedOdds', 'BlueOdds', 'RedExpectedValue', 'BlueExpectedValue', 'Date', 'Location', 'Country', 'Winner', 'TitleBout', 'WeightClass', 'Gender', 'NumberOfRounds', 'BlueCurrentLoseStreak', 'BlueCurrentWinStreak', 'BlueDraws', 'BlueAvgSigStrLanded', 'BlueAvgSigStrPct', 'BlueAvgSubAtt', 'BlueAvgTDLanded', 'BlueAvgTDPct', 'BlueLongestWinStreak', 'BlueLosses', 'BlueTotalRoundsFought', 'BlueTotalTitleBouts', 'BlueWinsByDecisionMajority', 'BlueWinsByDecisionSplit', 'BlueWinsByDecisionUnanimous', 'BlueWinsByKO', 'BlueWinsBySubmission', 'BlueWinsByTKODoctorStoppage', 'BlueWins', 'BlueStance', 'BlueHeightCms', 'BlueReachCms', 'BlueWeightLbs', 'RedCurrentLoseStreak', 'RedCurrentWinStreak', 'RedDraws', 'RedAvgSigStrLanded', 'RedAvgSigStrPct', 'RedAvgSubAtt', 'RedAvgTDLanded', 'RedAvgTDPct', 'RedLongestWinStreak', 'RedLosses', 'RedTotalRoundsFought', 'RedTotalTitleBouts', 'RedWinsByDecisionMajority', 'RedWinsByDecisionSplit', 'RedWinsByDecisionUnanimous', 'RedWinsByKO', 

In [14]:
# instead of having winner labeled as red/blue,
# i'll create a column with the name of the winner
ufc_data['WinnerName'] = ufc_data.apply(lambda x: x['BlueFighter'] if x['Winner'] == 'Blue' else x['RedFighter'], axis = 1)
ufc_data.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds,WinnerName
0,Neil Magny,Carlos Prates,550.0,-800.0,550.0,12.5,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,1.0,4:50,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0,Carlos Prates
1,Gerald Meerschaert,Reinier de Ridder,250.0,-310.0,250.0,32.2581,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,3.0,1:44,704.0,700.0,200.0,600.0,165.0,1400.0,450.0,Reinier de Ridder
2,Gaston Bolanos,Cortavious Romious,205.0,-250.0,205.0,40.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,3.0,5:00,900.0,550.0,275.0,3000.0,165.0,380.0,450.0,Gaston Bolanos
3,Luana Pinheiro,Gillian Robertson,360.0,-470.0,360.0,21.2766,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,3.0,5:00,900.0,600.0,250.0,2200.0,110.0,900.0,500.0,Gillian Robertson
4,Mansur Abdul-Malik,Dusko Todorovic,-485.0,370.0,20.6186,370.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,1.0,2:44,164.0,600.0,,800.0,,-250.0,,Mansur Abdul-Malik


In [15]:
label_enc = LabelEncoder()
ufc_data['RedStance'] = label_enc.fit_transform(ufc_data['RedStance'])
ufc_data['BlueStance'] = label_enc.fit_transform(ufc_data['BlueStance'])
ufc_data['WeightClass'] = label_enc.fit_transform(ufc_data['WeightClass'])

In [23]:
# assigning each fighter to a unique ID
fighters = pd.concat([ufc_data['RedFighter'], ufc_data['BlueFighter']]).unique()
ids = range(1, len(fighters) + 1)

f_id = pd.DataFrame({'ids': ids, 'name': fighters})
id_dict = dict(zip(fighters, ids))
f_id.head()

Unnamed: 0,ids,name
0,1,Neil Magny
1,2,Gerald Meerschaert
2,3,Gaston Bolanos
3,4,Luana Pinheiro
4,5,Mansur Abdul-Malik


In [29]:
# of unique fighters in the dataset
len(fighters)

2100

In [24]:
ufc_data['RedFighter'].replace(id_dict, inplace = True)
ufc_data['BlueFighter'].replace(id_dict, inplace = True)
ufc_data['WinnerName'].replace(id_dict, inplace = True)
ufc_data.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ufc_data['RedFighter'].replace(id_dict, inplace = True)
  ufc_data['RedFighter'].replace(id_dict, inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ufc_data['BlueFighter'].replace(id_dict, inplace = True)
  ufc_data['BlueFighter'].replace(id_dict, inplace = True)
Th

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds,WinnerName
0,1,1656,550.0,-800.0,550.0,12.5,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,1.0,4:50,290.0,1100.0,800.0,1600.0,800.0,2000.0,-400.0,1656
1,2,1657,250.0,-310.0,250.0,32.2581,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,3.0,1:44,704.0,700.0,200.0,600.0,165.0,1400.0,450.0,1657
2,3,1658,205.0,-250.0,205.0,40.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,3.0,5:00,900.0,550.0,275.0,3000.0,165.0,380.0,450.0,3
3,4,351,360.0,-470.0,360.0,21.2766,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,3.0,5:00,900.0,600.0,250.0,2200.0,110.0,900.0,500.0,351
4,5,730,-485.0,370.0,20.6186,370.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,1.0,2:44,164.0,600.0,,800.0,,-250.0,,5


In [30]:
def american_to_decimal(odds):
    if odds > 0:
        return odds / 100 + 1
    else:
        return 100 / abs(odds) + 1

In [32]:
# transforming odds using the helper func
ufc_data['red_decimal_odds'] = ufc_data['RedOdds'].apply(american_to_decimal)
ufc_data['blue_decimal_odds'] = ufc_data['BlueOdds'].apply(american_to_decimal)

ufc_data['odds_difference'] = ufc_data['red_decimal_odds'] - ufc_data['blue_decimal_odds']
ufc_data.head()

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds,WinnerName,red_decimal_odds,blue_decimal_odds,odds_difference
0,1,1656,550.0,-800.0,550.0,12.5,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,1100.0,800.0,1600.0,800.0,2000.0,-400.0,1656,6.5,1.125,5.375
1,2,1657,250.0,-310.0,250.0,32.2581,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,700.0,200.0,600.0,165.0,1400.0,450.0,1657,3.5,1.322581,2.177419
2,3,1658,205.0,-250.0,205.0,40.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,550.0,275.0,3000.0,165.0,380.0,450.0,3,3.05,1.4,1.65
3,4,351,360.0,-470.0,360.0,21.2766,2024-11-09,"Las Vegas, Nevada, USA",USA,Blue,...,600.0,250.0,2200.0,110.0,900.0,500.0,351,4.6,1.212766,3.387234
4,5,730,-485.0,370.0,20.6186,370.0,2024-11-09,"Las Vegas, Nevada, USA",USA,Red,...,600.0,,800.0,,-250.0,,5,1.206186,4.7,-3.493814


## Modeling:

In [251]:
sett = ufc_data[['RedOdds', 'BlueOdds', 'LossDif', 'SigStrDif', 'ReachDif', 'AvgTDDif', 'KODif', 'SubDif', 'WeightClass', 'Winner']].dropna()
X = sett[['RedOdds', 'BlueOdds','LossDif','SigStrDif','ReachDif','AvgTDDif','KODif','SubDif', 'WeightClass']]
Y = sett['Winner']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.1, shuffle = True) 
sett.shape

(6253, 10)

In [252]:
X_train.shape

(5627, 9)

In [253]:
X_test.shape

(626, 9)

In [254]:
mlp = MLPClassifier(hidden_layer_sizes = (32, 16, 16), activation = 'relu', max_iter = 300, random_state = 42)
mlp.fit(X_train, y_train)

y_pred_mlp = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_mlp)

accuracy

0.6805111821086262

Notes:

- Out of the three classifiers, it seems that Random Forests or a Neural Network work best.

- Attempting to predict using WinnerName as Y gives very low accuracy below 5%

In [255]:
upcoming = pd.read_csv('upcoming.csv')
upcoming['WeightClass'] = label_enc.fit_transform(upcoming['WeightClass'])

# match between Weidman and Anders was cancelled
december_match = upcoming.loc[6, ['RedOdds', 'BlueOdds','LossDif','SigStrDif','ReachDif','AvgTDDif','KODif','SubDif', 'WeightClass']]
upcoming.drop(6, inplace = True)
upcoming

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,Jon Jones,Stipe Miocic,-625,455,16.0,455.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,400,1400,200,3500,120,700
1,Charles Oliveira,Michael Chandler,-258,210,38.7597,210.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,1200,1100,130,900,215,380
2,Bo Nickal,Paul Craig,-1200,750,8.3333,750.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,700,3000,175,1400,-150,3000
3,Viviane Araujo,Karine Silva,240,-298,240.0,33.557,2024-11-16,"New York City, New York, USA",USA,,...,,,,,300,100,2000,300,2000,750
4,Mauricio Ruffy,James Llontop,-900,600,11.1111,600.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,180,1200,800,3500,-150,1800
5,Jonathan Martinez,Marcus McGhee,114,-135,114.0,74.0741,2024-11-16,"New York City, New York, USA",USA,,...,,,,,215,250,1400,1400,500,225
7,Jim Miller,Damon Jackson,185,-225,185.0,44.4444,2024-11-16,"New York City, New York, USA",USA,,...,,,,,550,130,900,400,500,500
8,David Onama,Roberto Romero,-1350,800,7.4074,800.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,275,1400,275,4000,-120,2200
9,Marcin Tybura,Jhonata Diniz,-115,-105,86.9565,95.2381,2024-11-16,"New York City, New York, USA",USA,,...,,,,,350,650,300,2200,450,165
10,Mickey Gall,Ramiz Brahimaj,-166,140,60.241,140.0,2024-11-16,"New York City, New York, USA",USA,,...,,,,,180,300,380,550,450,1100


In [256]:
# manually adding in the Winner for the upcoming dataset according to https://www.ufc.com/event/ufc-309
winners = ['Red', 'Red', 'Red', 'Red', 'Red', 'Blue', 'Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue']
upcoming['Winner'] = winners
upcoming

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,Winner,...,FinishDetails,FinishRound,FinishRoundTime,TotalFightTimeSecs,RedDecOdds,BlueDecOdds,RSubOdds,BSubOdds,RKOOdds,BKOOdds
0,Jon Jones,Stipe Miocic,-625,455,16.0,455.0,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,400,1400,200,3500,120,700
1,Charles Oliveira,Michael Chandler,-258,210,38.7597,210.0,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,1200,1100,130,900,215,380
2,Bo Nickal,Paul Craig,-1200,750,8.3333,750.0,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,700,3000,175,1400,-150,3000
3,Viviane Araujo,Karine Silva,240,-298,240.0,33.557,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,300,100,2000,300,2000,750
4,Mauricio Ruffy,James Llontop,-900,600,11.1111,600.0,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,180,1200,800,3500,-150,1800
5,Jonathan Martinez,Marcus McGhee,114,-135,114.0,74.0741,2024-11-16,"New York City, New York, USA",USA,Blue,...,,,,,215,250,1400,1400,500,225
7,Jim Miller,Damon Jackson,185,-225,185.0,44.4444,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,550,130,900,400,500,500
8,David Onama,Roberto Romero,-1350,800,7.4074,800.0,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,275,1400,275,4000,-120,2200
9,Marcin Tybura,Jhonata Diniz,-115,-105,86.9565,95.2381,2024-11-16,"New York City, New York, USA",USA,Red,...,,,,,350,650,300,2200,450,165
10,Mickey Gall,Ramiz Brahimaj,-166,140,60.241,140.0,2024-11-16,"New York City, New York, USA",USA,Blue,...,,,,,180,300,380,550,450,1100


In [257]:
x_upc = upcoming[['RedOdds', 'BlueOdds','LossDif','SigStrDif','ReachDif','AvgTDDif','KODif','SubDif', 'WeightClass']]

upcoming_pred = mlp.predict(x_upc)
mlp_accuracy = accuracy_score(winners, upcoming_pred)

# rfc_pred = rfc.predict(x_upc)
# rfc_accuracy = accuracy_score(winners, rfc_pred)

# dtc_pred = dtc.predict(x_upc)
# dtc_accuracy = accuracy_score(winners, dtc_pred)

print(f"MLP Test Accuracy for recent matches: {mlp_accuracy * 100:.2f}%")
# print(f"RFC Test Accuracy for recent matches: {rfc_accuracy * 100:.2f}%")
# print(f"DTC Test Accuracy for recent matches: {dtc_accuracy * 100:.2f}%")

MLP Test Accuracy for recent matches: 66.67%


In [258]:
upcoming['Predictions'] = upcoming_pred
predictions = upcoming.apply(lambda x: x['BlueFighter'] if x['Predictions'] == 'Blue' else x['RedFighter'], axis=1)
actual = upcoming.apply(lambda x: x['BlueFighter'] if x['Winner'] == 'Blue' else x['RedFighter'], axis=1)

comparison = pd.DataFrame({'model_predictions': predictions, 'actual_results': actual})
comparison

Unnamed: 0,model_predictions,actual_results
0,Jon Jones,Jon Jones
1,Charles Oliveira,Charles Oliveira
2,Bo Nickal,Bo Nickal
3,Karine Silva,Viviane Araujo
4,Mauricio Ruffy,Mauricio Ruffy
5,Marcus McGhee,Marcus McGhee
7,Damon Jackson,Jim Miller
8,David Onama,David Onama
9,Jhonata Diniz,Marcin Tybura
10,Mickey Gall,Ramiz Brahimaj


In [259]:
#prediction for Dec. 7
mlp.predict(december_match.to_numpy().reshape(1, -1))



array(['Blue'], dtype='<U4')

In [263]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

feature_counts = range(1, X.shape[1] + 1)  # Test with 1 to all features
accuracies = []

# Train and evaluate models with increasing number of features
for count in feature_counts:
    # Select the first 'count' features
    X_subset = X.iloc[:, :count]
    
    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(X_subset, Y, test_size=0.25, shuffle=True)
    
    # Train the model
    mlp = MLPClassifier(hidden_layer_sizes=(32, 16, 16), activation='relu', max_iter=300, random_state=42)
    mlp.fit(X_train, y_train)
    
    # Evaluate the model
    y_pred = mlp.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Plot the elbow graph
plt.figure(figsize=(8, 6))
plt.plot(feature_counts, accuracies, marker='o')
plt.title("Elbow Graph for Number of Features")
plt.xlabel("Number of Features")
plt.ylabel("Accuracy")
plt.grid(True)
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'matplotlib'

In [264]:
sns.heatmap(ufc_data.isnull(), yticklabels=False)
plt.title("Heatmap for NaN values in our dataset")
plt.show()

NameError: name 'sns' is not defined

In [18]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Use the same code you already have for data preprocessing and model training.
# Assuming `mlp` is your trained MLP model and you've already split the data.

def predict_winner_terminal():
    print("Enter the following features for the fighters:")
    
    try:
        red_odds = float(input("Fighter 1 Odds: "))
        blue_odds = float(input("Fighter 2 Odds: "))
        loss_dif = float(input("Loss Difference(Fighter 1 - Fighter 2): "))
        sig_str_dif = float(input("Significant Strikes Difference(Fighter 1 - Fighter 2): "))
        reach_dif = float(input("Reach Difference(Fighter 1 - Fighter 2): "))
        avg_td_dif = float(input("Average Takedown Difference(Fighter 1 - Fighter 2): "))
        ko_dif = float(input("Knockout Difference(Fighter 1 - Fighter 2): "))
        sub_dif = float(input("Submission Difference(Fighter 1 - Fighter 2): "))
        weight_class = int(input("Weight Class (as an integer): "))

        # Create the feature array
        features = np.array([[red_odds, blue_odds, loss_dif, sig_str_dif, reach_dif, avg_td_dif, ko_dif, sub_dif, weight_class]])
        
        # Predict the winner
        prediction = mlp.predict(features)
        winner = "Fighter 1" if prediction[0] == "Red" else "Fighter 2"

        print(f"The predicted winner is: {winner}")

    except ValueError:
        print("Invalid input. Please enter numeric values where required.")

# Run the function
if __name__ == "__main__":
    predict_winner_terminal()


Enter the following features for the fighters:
The predicted winner is: Fighter 2


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5a713994-b4e1-47cc-89fe-2b6d51a0ee77' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>