In [9]:
from sklearn import linear_model
from sklearn import tree
from sklearn import ensemble
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas as pd
import os
import joblib
from sklearn.compose import ColumnTransformer
# all the same imports as model_fitting.ipynb

In [10]:
game_data = pd.read_csv("../Data/data/clean_game_data.csv",dtype={"p1_id" : "string","p2_id" : "string","p1_char" : "string", "p2_char" : "string", "stage" : "string", "p1_games_played" : "int32", "p1_games_won" : "int32", "p2_games_played" : "int32", "p2_games_won" : "int32", "p1_won" : "bool"})
game_data = pd.get_dummies(game_data, columns=["p1_char","p2_char","stage"], prefix_sep=".", )

In [11]:
game_train, game_test = train_test_split(game_data, train_size = 0.8, stratify = game_data[["p1_won"]], random_state=2049)
X = game_train.loc[:,game_train.columns != "p1_won"]
y = game_train["p1_won"]

X_test = game_test.loc[:,game_train.columns != "p1_won"]
y_test = game_test["p1_won"]
# We use the same seed so that we get the same testing data as in our model_fitting file. This is important for evaluation.

In [13]:
# Let's load our models in

lm = joblib.load("models/logistic_regression.joblib")
en = joblib.load("models/elastic_net.joblib")
dtc = joblib.load("models/decision_tree.joblib")
rfc = joblib.load("models/random_forest.joblib")
gbc = joblib.load("models/boosted_tree.joblib")
rfc.set_params(random_forest__verbose = 0)
gbc.set_params(boosted_tree__verbose = 0)

In [14]:
def get_metrics(model):
    prediction = model.predict(X_test)
    actual = y_test
    print("Metrics for {model}\n".format(model=model[-1]))
    print("Accuracy: %0.4f" % accuracy_score(prediction,actual))
    print("ROC_AUC: %0.4f" % roc_auc_score(prediction,actual))
    print("\n")

In [15]:
get_metrics(lm)
get_metrics(en)
get_metrics(dtc)
get_metrics(rfc)
get_metrics(gbc)

Metrics for LogisticRegression(penalty=None)

Accuracy: 0.6434
ROC_AUC: 0.6434


Metrics for LogisticRegression(C=0.01, l1_ratio=1.0, penalty='elasticnet', solver='saga')

Accuracy: 0.6432
ROC_AUC: 0.6432


Metrics for DecisionTreeClassifier(max_depth=10, min_samples_leaf=10, random_state=42)

Accuracy: 0.6481
ROC_AUC: 0.6483


Metrics for RandomForestClassifier(min_samples_leaf=3, n_estimators=200, n_jobs=4,
                       random_state=420)

Accuracy: 0.6782
ROC_AUC: 0.6782


Metrics for GradientBoostingClassifier(learning_rate=0.2, max_depth=2, min_samples_leaf=3,
                           n_estimators=500, random_state=21)

Accuracy: 0.6802
ROC_AUC: 0.6802


