### Import libraries and dataset, divide it into x and y

In [14]:
import pandas as pd
import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix

from keras.models import Sequential
from keras.layers import Dense
from keras import metrics

df = pd.read_csv('LOL/Clean_LeagueofLegends.csv')

x_cols = ['blueMiddleChamp', 'blueJungleChamp', 'redMiddleChamp', 'redJungleChamp',
          'rKills_pre15', 'rTowers_pre15', 'rDragons_pre15','rHeralds_pre15', 'golddiff_min15']

x = df[x_cols]
y = df['rResult']

### Encode categorical variables into single columns

In [15]:
x = pd.get_dummies(x,['blueMiddleChamp', 'blueJungleChamp', 'redMiddleChamp', 'redJungleChamp'])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

print('x has {} columns and {} rows\n'.format(x.shape[1], x.shape[0]))

print(x.head())
print(y.head())



x has 269 columns and 7620 rows

   rKills_pre15  rTowers_pre15  rDragons_pre15  rHeralds_pre15  \
0             0              1               0               0   
1             3              1               1               0   
2             3              0               0               0   
3             4              0               1               0   
4             1              3               0               0   

   golddiff_min15  blueMiddleChamp_Ahri  blueMiddleChamp_Akali  \
0            -611                     1                      0   
1            1301                     1                      0   
2            3074                     0                      0   
3              -8                     0                      0   
4             177                     0                      0   

   blueMiddleChamp_Anivia  blueMiddleChamp_AurelionSol  blueMiddleChamp_Azir  \
0                       0                            0                     0   
1            

## Logistic model

Variables:

    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:
    rResult: 1 if red team won (boolean)

In [16]:
logit = LogisticRegression()
logit.fit(x_train, y_train)

y_pred = logit.predict(x_test)

logit_accuracy = sum(y_pred==y_test)/len(y_test)

print('Logistic model accuracy: {}'.format(logit_accuracy)) # 0.7355643044619422
print('='*50 + 'Confusion matrix' + '='*50, '\n',confusion_matrix(y_test, y_pred))

Logistic model accuracy: 0.7355643044619422
 [[649 198]
 [205 472]]


## Decision Tree

Variables:

    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:
    rResult: 1 if red team won (boolean)

In [17]:
dTree = DecisionTreeClassifier()

dTree.fit(x_train, y_train)

y_pred = dTree.predict(x_test)

dTree_accuracy = sum(y_pred==y_test)/len(y_test)

print('Decision tree model accuracy: {}'.format(dTree_accuracy)) # 0.6437007874015748
print('='*50 + 'Confusion matrix' + '='*50, '\n', confusion_matrix(y_test, y_pred))

Decision tree model accuracy: 0.636482939632546
 [[545 302]
 [252 425]]


## Random Forest

Variables:

    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:

    rResult: 1 if red team won (boolean)
Number of trees:

    100

In [18]:
rForest = RandomForestClassifier(n_estimators=100)

rForest.fit(x_train, y_train)

y_pred = rForest.predict(x_test)

rForest_accuracy = sum(y_pred==y_test)/len(y_test)

print('Random Forest model accuracy: {}'.format(rForest_accuracy)) # 0.7276902887139107
print('='*50 + 'Confusion matrix' + '='*50, '\n', confusion_matrix(y_test, y_pred))

Random Forest model accuracy: 0.7263779527559056
 [[658 189]
 [228 449]]


## SVM

Variables:

    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:
    
    rResult: 1 if red team won (boolean)

In [19]:
svm = SVC()

svm.fit(x_train, y_train)

y_pred = svm.predict(x_test)

svm_accuracy = sum(y_pred==y_test)/len(y_test)

print('SVM model accuracy: {}'.format(svm_accuracy)) # 0.7368766404199475
print('='*50 + 'Confusion matrix' + '='*50, '\n', confusion_matrix(y_test, y_pred))

SVM model accuracy: 0.7322834645669292
 [[644 203]
 [205 472]]


Neural Network

Variables:

    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:

    rResult: 1 if red team won (boolean)
Architecture:
    
    1 dense layer: 10 relu
    2 dense layer: 50 relu
    3 dense layer: 10 relu
    4 dense layer: 5 relu
    5 dense layer: 1 Sigmoid

In [21]:
scalar = MinMaxScaler()
scalar.fit(x)

x_nn = scalar.transform(x)

x_train, x_test, y_train, y_test = train_test_split(x_nn, y, test_size=0.2, random_state=0)

nn = Sequential()
print('xtrain lenght: {}'.format(len(x_train[0])))
nn.add(Dense(10, input_dim=len(x_train[0]), activation='relu'))
nn.add(Dense(50, activation='relu'))
nn.add(Dense(10, activation='relu'))
nn.add(Dense(5, activation='relu'))
nn.add(Dense(1, activation='sigmoid'))

nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
nn.summary()

history = nn.fit(x_train, y_train, epochs=150, batch_size=10) # 0.9826 (500 iterations)

print(history.history.keys())

nn.evaluate(x_test, y_test) # 0.6640

xtrain lenght: 269
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 10)                2700      
_________________________________________________________________
dense_11 (Dense)             (None, 50)                550       
_________________________________________________________________
dense_12 (Dense)             (None, 10)                510       
_________________________________________________________________
dense_13 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 6         
Total params: 3,821
Trainable params: 3,821
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epo

Epoch 150/150
dict_keys(['loss', 'accuracy'])


[1.6964792013168335, 0.6699475049972534]

## Linear regression

Variables:
    
    Red_MID: Red Champion selection for the MID role (string) 
    Red_JUNG: Red Champion selection for the JUNG role (string) 
    Blue_MID: Blue Champion selection for the MID role (string) 
    Blue_JUNG: Blue Champion selection for the JUNG role (string)
    Red_Kills_pre15: number of kills before minute 15 for red (integer)
    Red_Towers_pre15: number of towers destroyed before minute 15 for red (integer)
    Red_dragons_pre15: number of dragons captured by minute 15 (integer)
    Red_herald_pre15: herald captured by minute 15 for red (integer)
    Gold_Diff_MIN_15: Diff in gold (red team - blue team) (integer)
Target:
    
    final_golddiff: Gold difference at the end of the game

In [22]:
x = df[x_cols]
x = pd.get_dummies(x,['blueMiddleChamp', 'blueJungleChamp', 'redMiddleChamp', 'redJungleChamp'])

y = df['golddiff_final']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

linear_model = LinearRegression()
linear_model.fit(x_train, y_train)

y_pred = linear_model.predict(x_test)

print('linear model R^2: {}'.format(linear_model.score(x_test, y_test))) # 0.4371161384443095


linear model R^2: 0.4371161384443145
