In [2]:
# !pip install --upgrade pandas xgboost scikit-learn

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('fighter_stats.csv')

In [89]:
print('%d columns' % len(df.filter(like='precomp').columns))
print(df.shape[0])
df['precomp_recent_avg_distance_absorbed_diff']

1694 columns
10274


0       -0.229556
1       -0.057370
2       -0.133333
3        0.000000
4       -0.415099
           ...   
10269    0.000000
10270    0.000000
10271    0.000000
10272    0.000000
10273    0.000000
Name: precomp_recent_avg_distance_absorbed_diff, Length: 10274, dtype: float64

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
subset = df.loc[(df['date'] > '2015-12-31')]
test_df = subset.loc[(df['date'] > '2022-11-31')]
train_df = subset.drop(test_df.index)
# train_df, test_df = train_test_split(subset, test_size=0.2, random_state=42)
y_test = test_df['outcome']
X_test = test_df.filter(like='precomp')
print(X_test.shape[0])
# train_df = subset.drop(test_df.index).loc[(df['date'] > '2014-12-31')]
y_train = train_df['outcome']
X_train = train_df.filter(like='precomp')
print(X_train.shape[0])
print(X_test.shape[0] / (X_train.shape[0] + X_test.shape[0]))

1234
4796
0.20464344941956883


In [46]:
model = LogisticRegression(max_iter=10000, solver='liblinear')
model.fit(X_train, y_train)

In [48]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.570316301703163


In [94]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.5899513776337115


In [70]:
!pip install hyperopt

Collecting hyperopt
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl.metadata (1.7 kB)
Collecting networkx>=2.2 (from hyperopt)
  Downloading networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting future (from hyperopt)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cloudpickle (from hyperopt)
  Downloading cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)
Collecting py4j (from hyperopt)
  Downloading py4j-0.10.9.7-py2.py3-none-any.whl.metadata (1.5 kB)
Downloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.6 MB 991.0 kB/s eta 0:00:02
   --------- ------------------------------ 0.4/1.6 MB 4.6 MB/s eta 0:00:01
   ---------------------------------------  1.6/1.6 MB 12.5 MB/s eta 0:00:01
   ---------------------------------------- 1.6/1.6 MB 11.2 MB/s eta 0:00:00
Downloading networkx-3.3-py3-none-any.whl (1.7 MB)
   --------------------

In [3]:
import xgboost as xgb

In [9]:
from hyperopt import hp, STATUS_OK, tpe, Trials, fmin

base_params={
  'tree_method': 'hist',
  'objective': 'binary:logistic',
  'verbosity': 0,
  'n_jobs': -1,
  'n_estimators': 180,
  'seed': 0,
}

def objective(space):
  params = {}
  for k in base_params:
    params[k] = base_params[k]
  for k in space:
    params[k] = space[k]
  for k in ['max_depth', 'reg_alpha', 'min_child_weight']:
    params[k] = int(space[k])
  clf = xgb.XGBClassifier(**params)
  evaluation = [( X_train, y_train), ( X_test, y_test)]
  clf.fit(X_train, y_train,
          eval_set=evaluation,
          verbose=False)
  pred = clf.predict(X_test)
  pred_cast = (pred >= 0.5).astype(int) # Cast to binary
  accuracy = accuracy_score(y_test, pred_cast)
  print ("SCORE:", accuracy)
  return {'loss': -accuracy, 'status': STATUS_OK }

trials = Trials()

space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 1,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,10),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
        'scale_pos_weight': hp.uniform('scale_pos_weight', 0.1, 1),
        'learning_rate': hp.uniform('learning_rate', 0, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
        'colsample_bynode': hp.uniform('colsample_bynode', 0.5, 1),
        'gamma': hp.uniform('gamma', 0, 10),
        'subsample': hp.uniform('subsample', 0, 1),
    }

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

print("The best hyperparameters are : ","\n")
print(best_hyperparams)

SCORE:                                                                                                                                                                                                          
0.5883306320907618                                                                                                                                                                                              
SCORE:                                                                                                                                                                                                          
0.5745542949756888                                                                                                                                                                                              
SCORE:                                                                                                                                                              

In [12]:
params = {
    "tree_method": "hist",
    "objective": "binary:logistic",
    "verbosity": 0,
    "n_jobs": -1,
    "n_estimators": 180,
    "colsample_bylevel": np.float64(0.8497599275226619),
    "colsample_bynode": np.float64(0.5008160313472609),
    "colsample_bytree": np.float64(0.9725183368881618),
    "gamma": np.float64(4.189027961860227),
    "learning_rate": np.float64(0.03814040166239305),
    "max_depth": 8,
    "min_child_weight": 0,
    "reg_alpha": 92,
    "reg_lambda": np.float64(7.211872931754883),
    "scale_pos_weight": np.float64(0.9960300484223034),
    "subsample": np.float64(0.6216121777044358),
}

model = xgb.XGBClassifier(**params)  # Create the classifier
model.fit(
    X_train, y_train
)  # Fit the model using the training data; this is the actual training of the model
ypred = model.predict(X_test)  # Predict the test fights using the test fighter's stats
ypred_binary = (ypred >= 0.5).astype(int)  # Cast to binary
accuracy = accuracy_score(y_test, ypred_binary)  # Check the accuracy
print(f"XGBoost test accuracy: {accuracy}")  # Print the accuracy

XGBoost test accuracy: 0.6077795786061588


In [13]:
importances = model.feature_importances_
feature_importances = pd.DataFrame({'Feature': X_train.columns, 'Importance': importances})
feature_importances = feature_importances.sort_values(by='Importance', ascending=False)
for index, row in feature_importances.head(20).iterrows():
    print(f"{row['Feature']}: {row['Importance']}")

precomp_recent_avg_age_diff_vs_opp: 0.10844708979129791
precomp_recent_avg_head_defended_diff: 0.107951320707798
precomp_recent_avg_total_str_landed_diff_vs_opp: 0.10147129744291306
opponent_precomp_recent_avg_age: 0.09321340918540955
precomp_control_time_diff_peak_vs_opp: 0.09268488734960556
precomp_avg_sig_str_landed_diff_vs_opp: 0.09121507406234741
precomp_recent_avg_sig_str_landed_diff_vs_opp: 0.08885915577411652
precomp_recent_avg_head_landed_diff_vs_opp: 0.08799506723880768
precomp_recent_avg_age_diff: 0.08129054307937622
precomp_avg_age_diff_vs_opp: 0.07683244347572327
precomp_recent_avg_age: 0.07003972679376602
precomp_avg_knockdowns: 0.0
precomp_recent_avg_knockdowns: 0.0
precomp_knockdowns_peak: 0.0
precomp_knockdowns_valley: 0.0
precomp_recent_avg_knockdowns_vs_peak: 0.0
precomp_recent_avg_knockdowns_vs_valley: 0.0
precomp_avg_knockdowns_vs_peak: 0.0
precomp_clinch_landed_peak_vs_opp: 0.0
precomp_clinch_landed_valley_vs_opp: 0.0


In [14]:
model.save_model('model.json')