# ML Notebook 4 - MLP Classifier

In [22]:
import sys
sys.path.append('../')
import copy
#import config
#import dota2api
import json
import requests
from pandas.io.json import json_normalize 
import joblib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_columns = 200
pd.options.display.max_rows = 200

import warnings; warnings.simplefilter('ignore')

In [2]:
# Sklearn stuff
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score

SEED = 17

In [3]:
# Import Training and Test Data  
train_features = pd.read_csv('data2/dpc_train_features.csv', index_col='match_id')
train_targets = pd.read_csv('data2/dpc_train_target.csv', index_col='match_id')
test_features = pd.read_csv('data2/dpc_test_features.csv', index_col='match_id')
test_targets = pd.read_csv('data2/dpc_test_target.csv', index_col='match_id')

In [4]:
# Define our targets and features 

X_remainder = train_features
X_test = test_features

y_remainder = train_targets.radiant_win.values
y_test = test_targets.radiant_win.values

In [5]:
test_targets.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 117 entries, 4870161807 to 5055077558
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   radiant_win  117 non-null    int64
 1   duration     117 non-null    int64
dtypes: int64(2)
memory usage: 2.7 KB


We will try out the MLP classifier once out of the box with the raw data. 

In [6]:
# Split training data 

X_train, X_valid, y_train, y_valid = train_test_split(X_remainder, y_remainder, test_size = 0.1, random_state = SEED) ## see top, SEED = 17

In [7]:
from sklearn.neural_network import MLPClassifier

In [8]:
mlp1 = MLPClassifier()

mlp1.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [33]:
y_pred = mlp1.predict(X_valid)
y_pred_proba = mlp1.predict_proba(X_valid)[:, 1]

print(f"Accuracy Score on Train Set: {mlp1.score(X_train, y_train)}")
print(f"Accuracy Score on validation Set: {mlp1.score(X_valid, y_valid)}")
print(f'MLP validation roc_auc score: {roc_auc_score(y_pred, y_valid)}')

Accuracy Score on Train Set: 0.9212765957446809
Accuracy Score on validation Set: 0.4380952380952381
MLP validation roc_auc score: 0.4386792452830188


In [13]:
#sklearn classification report
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

           0       0.50      0.44      0.47        59
           1       0.38      0.43      0.40        46

    accuracy                           0.44       105
   macro avg       0.44      0.44      0.44       105
weighted avg       0.45      0.44      0.44       105



In [14]:
mlp1_final = MLPClassifier()
mlp1_final.fit(X_remainder, y_remainder)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [32]:
mlp1_final.score(X_test, y_test)
y_pred = mlp1_final.predict(X_test)
y_pred_proba = mlp1_final.predict_proba(X_test)[:, 1]

print(f"Accuracy Score on Test Set: {mlp1.score(X_test, y_test)}")
print(f'MLP Test roc_auc score: {roc_auc_score(y_pred, y_test)}')

Accuracy Score on Test Set: 0.46153846153846156
MLP Test roc_auc score: 0.48622167789344767


In [23]:
joblib.dump(mlp1_final, 'mlp1_final.pkl')

['mlp1_final.pkl']

## MLP Model 2

Reload data and encode

In [29]:
# Import Training and Test Data  
train_features = pd.read_csv('data2/dpc_train_features.csv', index_col='match_id')
train_targets = pd.read_csv('data2/dpc_train_target.csv', index_col='match_id')
test_features = pd.read_csv('data2/dpc_test_features.csv', index_col='match_id')
test_targets = pd.read_csv('data2/dpc_test_target.csv', index_col='match_id')

In [30]:
# Combining training and test features for convenience so I can hot encode both at same time
all_features = pd.concat([train_features, test_features])

# Index to split the training and test data sets
idx_split = train_features.shape[0]

In [31]:
for i in range(1,23):
        all_features = pd.get_dummies(all_features, columns = [f'{i}hero_id'])

print(all_features.shape)

(1162, 2088)


In [32]:
# Encoded train/test features

X_remainder = all_features[:idx_split].values
X_test = all_features[idx_split:].values

y_remainder = train_targets.radiant_win.values
y_test = test_targets.radiant_win.values

In [17]:
# Split training data 

X_train, X_valid, y_train, y_valid = train_test_split(X_remainder, y_remainder, test_size = 0.1, random_state = SEED) ## see top, SEED = 17

In [24]:
mlp2 = MLPClassifier()

mlp2.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [31]:
y_pred = mlp2.predict(X_valid)
y_pred_proba = mlp2.predict_proba(X_valid)[:, 1]

print(f"Accuracy Score on Train Set: {mlp2.score(X_train, y_train)}")
print(f"Accuracy Score on validation Set: {mlp2.score(X_valid, y_valid)}")
print(f'MLP validation roc_auc score: {roc_auc_score(y_pred, y_valid)}')

Accuracy Score on Train Set: 0.9138297872340425
Accuracy Score on validation Set: 0.49523809523809526
MLP validation roc_auc score: 0.4958272859216256


In [26]:
#sklearn classification report
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

           0       0.56      0.49      0.52        59
           1       0.43      0.50      0.46        46

    accuracy                           0.50       105
   macro avg       0.50      0.50      0.49       105
weighted avg       0.50      0.50      0.50       105



In [27]:
mlp2_final = MLPClassifier()
mlp2_final.fit(X_remainder, y_remainder)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [30]:
mlp2_final.score(X_test, y_test)
y_pred = mlp2_final.predict(X_test)
y_pred_proba = mlp2_final.predict_proba(X_test)[:, 1]

print(f"Accuracy Score on Test Set: {mlp2.score(X_test, y_test)}")
print(f'MLP Regression Test roc_auc score: {roc_auc_score(y_pred, y_test)}')

Accuracy Score on Test Set: 0.5555555555555556
MLP Regression Test roc_auc score: 0.4920400943396226


In [29]:
joblib.dump(mlp2_final, 'mlp_2.pkl')

['mlp_2.pkl']