In [1]:
# Initial imports.
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
file_path = Path('../Joeys_Branch/fighters_camps_correlations.csv')
fighter_df = pd.read_csv(file_path)
fighter_df = fighter_df.dropna()
fighter_df.head(10)

Unnamed: 0,Camps,Results
0,BMF Ranch,1
1,Demian Maia Jiu-Jitsu,1
2,BMF Ranch,1
3,Demian Maia Jiu-Jitsu,1
4,BMF Ranch,1
5,Miller Brothers MMA,1
6,Demian Maia Jiu-Jitsu,1
7,BMF Ranch,1
8,Tristar Gym,1
9,Miller Brothers MMA,1


In [38]:
X = fighter_df.drop("Results", axis=1)
X.head()

Unnamed: 0,avg_KD,avg_opp_KD,avg_SIG_STR_pct,avg_opp_SIG_STR_pct,avg_TD_pct,avg_opp_TD_pct,avg_SUB_ATT,avg_opp_SUB_ATT,avg_REV,avg_opp_REV,...,win_by_Decision_Split,win_by_Decision_Unanimous,win_by_KO/TKO,win_by_Submission,win_by_TKO_Doctor_Stoppage,Stance,Height,Reach,Weight,Age
0,1.0,0.0,0.5,0.46,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0,0,1.0,170.18,177.8,135.0,27.0
1,1.03125,0.0625,0.576875,0.38125,0.40625,0.11625,0.25,1.1875,0.375,0.25,...,1,0,3,0,0,1.0,182.88,187.96,185.0,28.0
2,0.546875,0.1875,0.538906,0.598594,0.0,0.3125,0.0,0.25,0.0,0.0,...,0,1,3,0,0,2.0,187.96,190.5,264.0,28.0
4,0.0,0.000977,0.403076,0.554961,0.511719,0.62875,0.231445,0.03125,0.03125,0.5,...,0,1,2,2,0,1.0,167.64,172.72,135.0,43.0
5,0.3125,0.015625,0.565156,0.343594,0.337031,0.048906,0.039062,0.0,0.0,0.5,...,1,2,2,2,0,1.0,182.88,190.5,155.0,41.0


In [39]:
# Define the target set.
y = fighter_df["Results"].ravel()
y[:5]

array([1, 1, 1, 0, 0], dtype=int64)

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)

In [41]:
# Create a random forest classifier.
fighter_model = RandomForestClassifier(n_estimators=5000, random_state=2) 

In [42]:
# Fitting the model
fighter_model = fighter_model.fit(X_train, y_train)

In [43]:
# Making predictions using the testing data.
predictions = fighter_model.predict(X_test)

In [44]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,489,651
Actual 1,405,762


In [45]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [46]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,489,651
Actual 1,405,762


Accuracy Score : 0.5422626788036411
Classification Report
              precision    recall  f1-score   support

           0       0.55      0.43      0.48      1140
           1       0.54      0.65      0.59      1167

    accuracy                           0.54      2307
   macro avg       0.54      0.54      0.54      2307
weighted avg       0.54      0.54      0.54      2307



In [47]:
# Calculate feature importance in the Random Forest model.
importances = fighter_model.feature_importances_
importances

array([0.01236603, 0.01015545, 0.02124986, 0.02294492, 0.01801961,
       0.01680888, 0.01433204, 0.01320394, 0.00955403, 0.00912015,
       0.01695191, 0.01765954, 0.01666093, 0.01749271, 0.01780963,
       0.01972411, 0.01776846, 0.0193312 , 0.01938868, 0.01693748,
       0.01868144, 0.01509638, 0.01752535, 0.01912817, 0.01737359,
       0.01884463, 0.01864314, 0.01833933, 0.01851956, 0.01806095,
       0.01927487, 0.01906151, 0.01858305, 0.0180005 , 0.01712622,
       0.01752392, 0.01718937, 0.01762495, 0.01836117, 0.01810433,
       0.01819403, 0.01841444, 0.01949836, 0.01891044, 0.01753451,
       0.01718931, 0.02084247, 0.02081354, 0.01863532, 0.01390363,
       0.00436529, 0.00733302, 0.00399925, 0.00801462, 0.00972772,
       0.00866414, 0.        , 0.00066732, 0.00399438, 0.00592122,
       0.00632315, 0.0054544 , 0.00130466, 0.00343518, 0.01233268,
       0.01432717, 0.01068904, 0.02096869])

In [49]:
sorted(zip(fighter_model.feature_importances_, X.columns), reverse=True)

[(0.02294491672890903, 'avg_opp_SIG_STR_pct'),
 (0.021249857331155506, 'avg_SIG_STR_pct'),
 (0.020968686191259148, 'Age'),
 (0.020842470154125582, 'avg_CTRL_time(seconds)'),
 (0.02081353973707194, 'avg_opp_CTRL_time(seconds)'),
 (0.0197241140351132, 'avg_TOTAL_STR_landed'),
 (0.019498362606852327, 'avg_GROUND_att'),
 (0.019388684893213284, 'avg_TD_att'),
 (0.019331203642084103, 'avg_opp_TOTAL_STR_landed'),
 (0.01927486653698827, 'avg_LEG_att'),
 (0.01912817293441343, 'avg_HEAD_landed'),
 (0.01906151247489095, 'avg_LEG_landed'),
 (0.01891044043481325, 'avg_GROUND_landed'),
 (0.018844627790731764, 'avg_opp_HEAD_landed'),
 (0.0186814358046926, 'avg_opp_TD_att'),
 (0.018643140940186178, 'avg_BODY_att'),
 (0.018635323784724756, 'total_time_fought(seconds)'),
 (0.01858304845092119, 'avg_opp_LEG_att'),
 (0.01851956435411724, 'avg_opp_BODY_att'),
 (0.018414439110380338, 'avg_opp_CLINCH_landed'),
 (0.018361167105530842, 'avg_CLINCH_att'),
 (0.018339329371224764, 'avg_BODY_landed'),
 (0.01819403