# Predicting the Outcome of League of Legends Games
#### Trevor Bushnell and Ben Lombardi

## 0. Import Statements

In [1]:
import importlib

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils


import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 


import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier, MyDecisionTreeClassifier, MyRandomForestClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

import copy
import numpy as np

## 1. Introduction


### 1.1: What is League of Legends
League of Legends is one of the most popular MOBA (Mobile Online Battle Arena) games to date. In this game, two teams of five champions face off to destroy the other team's base. There are many different champions that you can choose from to build your team, and each champion has their own abilities. As such, there is room for a lot of strategy in League of Legends. 

Battles in League of Legends generally take a very long time - ranging anywhere from 30 to 40 minutes on average. Since these battles are so long, we are interested in finding out whether or not we can predict the outcome of a League of Legends game from data collected only from the first few minutes of a game. The results of this analysis will be able to better inform League of Legends players about early win conditions for ranked games. While the dataset that we will be using is 2 years old, the strategies and meta of League of Legends is still fairly similar. Additionally, by identifying which attributes are strong predictors of success, the results of this analysis could help Riot Games (the publisher of League) better balance the game. 



### 1.2: Overview of the Dataset
The dataset that we will be using is the [League of Legends Diamond Ranked Games (10 mins )](https://www.kaggle.com/datasets/bobbyscience/league-of-legends-diamond-ranked-games-10-min) on kaggle. This dataset contains various different statistics such as the number of kills on each team, the amount of gold collected on each team, etc. There are roughly 10,000 games in our dataset. Since there are so many games to train on, we randomly selected 3,000 games from the dataset to keep (this way we can have a perfect 2:1 split for classification tasks). We will use the various attributes in this dataset to help us predict whether **the blue team wins or loses**. 


### 1.3: Classification Results

**WILL PUT THE CLASSIFICATION RESULTS HERE ONCE WE HAVE THEM**

## 2. Data Analysis

### 2.1: Information About the Dataset



In [2]:
df = MyPyTable()
df.load_from_file("high_diamond_ranked_10min.csv")
y = df.get_column("blueWins")

X = copy.deepcopy(df.data)
# randomizing indexes to sort out a stratified sample
X_indexes = [ i for i in range(len(X))]
myutils.randomize_in_place(X_indexes,y,0)
grouped_X =[]
grouped_y = []
for i in range(len(X)):
    if grouped_y.count(y[i]) >0:
        grouped_X[grouped_y.index(y[i])].append(X_indexes[i])
    else:
        grouped_y.append(y[i])
        grouped_X.append([X_indexes[i]])
X_data = []
y_data = []
for i in range(1500):
    for j in range(len(grouped_X)):
        X_data.append(X[grouped_X[j][i]])
        y_data.append(grouped_y[j])

X = copy.deepcopy(X_data)

y = copy.deepcopy(y_data)
for entry in X:
    del entry[0]
    del entry[0]

# Initialize all of our classifiers
knn_clf = MyKNeighborsClassifier()
nb_clf = MyNaiveBayesClassifier()
dummy_clf = MyDummyClassifier()
tree_clf = MyDecisionTreeClassifier()

In [3]:
# Doing classification on all of the attributes
myutils.compute_clf_stats(knn_clf,X,y,"kNN Classifier")
myutils.compute_clf_stats(dummy_clf,X,y,"Dummy Classifier")


Hold-Out Method
kNN Classifier : accuracy =  0.6909090909090909  error =  0.3090909090909091
binary precision:  0.6915322580645161
binary recall:  0.6915322580645161
f1 score:  0.6915322580645161
Confusion Matrix
[[341, 153], [153, 343]]

Hold-Out Method
Dummy Classifier : accuracy =  0.5  error =  0.5
binary precision:  0.0
binary recall:  0.0
f1 score:  0
Confusion Matrix
[[495, 0], [495, 0]]


In [4]:
# Deleting certain columns from the header
header = df.column_names.copy()
del header[0:2]
del header[-9:-1]
del header[-1]
del header[10:15]
del header[12:14]
print(header)

['blueWardsPlaced', 'blueWardsDestroyed', 'blueFirstBlood', 'blueKills', 'blueDeaths', 'blueAssists', 'blueEliteMonsters', 'blueDragons', 'blueHeralds', 'blueTowersDestroyed', 'blueGoldDiff', 'blueExperienceDiff', 'redWardsPlaced', 'redWardsDestroyed', 'redFirstBlood', 'redKills', 'redDeaths', 'redAssists', 'redEliteMonsters', 'redDragons', 'redHeralds', 'redTowersDestroyed']


In [5]:
X = copy.deepcopy(X_data)

# Deleting certain columns from the dataset
for entry in X:
    del entry[0:2]
    del entry[-9:-1]
    del entry[-1]
    del entry[10:15]
    del entry[12:14]

# Re-running these classifications on these trimmed down attributes
myutils.compute_clf_stats(knn_clf,X,y,"kNN Classifier")
myutils.compute_clf_stats(dummy_clf,X,y,"Dummy Classifier")


Hold-Out Method
kNN Classifier : accuracy =  0.6525252525252525  error =  0.3474747474747475
binary precision:  0.6583333333333333
binary recall:  0.6370967741935484
f1 score:  0.6475409836065574
Confusion Matrix
[[316, 180], [164, 330]]

Hold-Out Method
Dummy Classifier : accuracy =  0.5  error =  0.5
binary precision:  0.0
binary recall:  0.0
f1 score:  0
Confusion Matrix
[[495, 0], [495, 0]]


In [6]:
# equal width binning -> discretization of the data
X = copy.deepcopy(X_data)
y = [str(value) for value in y]
for entry in X:
    del entry[0:2]
    del entry[-9:-1]
    del entry[-1]
    del entry[10:15]
    del entry[12:14]
for j in range(len(X[0])):
    binned_col = myutils.binning([row[j] for row in X])
    for i in range(len(X)):
        X[i][j] = binned_col[i]

In [7]:
# Fitting and predicting the dataset using a random forest classifier
X_train, X_test, y_train,y_test = myevaluation.train_test_split(X,y,random_state=0)

forest_clf = MyRandomForestClassifier()
forest_clf.fit(X_train,y_train,5,3,3)
y_pred_clf = forest_clf.predict(X_test)

accuracy = myevaluation.accuracy_score(y_test,y_pred_clf)
print("Hold-Out Method")
print("Random Forest : accuracy = ", accuracy, " error = ", 1 - accuracy)

predicting
predicting
predicting
predicting
predicting
fitted
predicting2
0
1
2
predicing 3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
25

In [8]:
# Another take at fitting and predicting the dataset with a random forest classifier
X_train, X_test, y_train,y_test = myevaluation.train_test_split(X,y,random_state=0)

forest_clf = MyRandomForestClassifier()
forest_clf.fit(X_train,y_train,20,10,3)
y_pred_clf = forest_clf.predict(X_test)

accuracy = myevaluation.accuracy_score(y_test,y_pred_clf)
print("Hold-Out Method")
print("Random Forest : accuracy = ", accuracy, " error = ", 1 - accuracy)

predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
predicting
fitted
predicting2
0
1
2
3


KeyboardInterrupt: 

In [None]:
# YET ANOTHER take at fitting and predicting the dataset with a random forest classifier
X_train, X_test, y_train,y_test = myevaluation.train_test_split(X,y,random_state=0)

forest_clf = MyRandomForestClassifier()
forest_clf.fit(X_train,y_train,12,3,5)
y_pred_clf = forest_clf.predict(X_test)

accuracy = myevaluation.accuracy_score(y_test,y_pred_clf)
print("Hold-Out Method")
print("Random Forest : accuracy = ", accuracy, " error = ", 1 - accuracy)

predicting
predicting
predicting
predicting
predicting
predicting


KeyboardInterrupt: 