In [6]:
import pandas as pd
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import accuracy_score

In [7]:
y_train = pd.read_csv('../data/train_winners.csv')

In [8]:
f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = 23))

data = json.load(f)

**Looking into gold and XP**

In [9]:
df = []

In [10]:
for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })

In [11]:
X_train = pd.DataFrame(df)

**Not using difference in gold/xp, and using team totals.**

In [12]:
gold = ['blueGold', 'redGold']
xp = ['blueXP', 'redXP']
both = ['blueGold', 'redGold', 'blueXP', 'redXP']

**Just gold**

In [13]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.705375

**Just xp**

In [14]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.672

**Gold/xp - submissionV4**

In [15]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70975

In [16]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })
    
X_test = pd.DataFrame(df)

In [17]:
submissionV4 = X_test[['matchId']]
submissionV4 = submissionV4.copy()
submissionV4['win'] = logreg.predict(X_test[both])
submissionV4['winner'] = np.where(submissionV4['win'] == True, 100, 200)
submissionV4 = submissionV4[['matchId', 'winner']]

In [18]:
#submissionV4.to_csv('../submissions/submissionV4.csv', index = False)

**Using difference in gold/xp, and using team totals**

In [19]:
X_train['goldDifference'] = X_train['blueGold'] - X_train['redGold']

X_train['xpDifference'] = X_train['blueXP'] - X_train['redXP']

gold = ['goldDifference']
xp = ['xpDifference']
both = ['goldDifference', 'xpDifference']

**Just Gold**

In [20]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.7055

**Just xp**

In [21]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.6715

**Gold/xp**

In [22]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70825

**Adding kills to gold/xp totals**

In [23]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [24]:
X_train = X_train.merge(right = killsTrain, on = ['matchId'], how = 'left').fillna(0)

kills = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills']

**Not using difference in gold/xp/kills, and using team totals.**

In [25]:
logreg = LogisticRegression().fit(X_train[kills], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[kills]))

0.70975

**Adding dragons to gold/xp/kills totals**

In [26]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [27]:
X_train = X_train.merge(right = dragonsTrain, on = ['matchId'], how = 'left').fillna(0)

dragons = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills', 'blueDragons', 'redDragons']

**Not using difference in gold/xp/kills/dragons, and using team totals.**

In [28]:
logreg = LogisticRegression().fit(X_train[dragons], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[dragons]))

0.70975

**Trying ridge regression using gold/xp/kills/dragons - submissionV5**

In [29]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_train[dragons], y_train['winner'] == 100)

In [30]:
accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[dragons]))

0.71925

In [31]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [32]:
X_test = X_test.merge(right = killsTest, on = ['matchId'], how = 'left').fillna(0)

In [33]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [34]:
X_test = X_test.merge(right = dragonsTest, on = ['matchId'], how = 'left').fillna(0)

In [35]:
submissionV5 = X_test[['matchId']]
submissionV5 = submissionV5.copy()
submissionV5['win'] = pipe.predict(X_test[dragons])
submissionV5['winner'] = np.where(submissionV5['win'] == True, 100, 200)
submissionV5 = submissionV5[['matchId', 'winner']]

In [36]:
#submissionV5.to_csv('../submissions/submissionV5.csv', index = False)

**Adding champion points to gold/xp/kills/dragons**

In [37]:
champMastery = pd.read_csv('../data/champion_mastery.csv')
participantsTrain = pd.read_csv('../data/participants_train.csv')
participantsTest = pd.read_csv('../data/participants_test.csv')

In [38]:
champPoints_df = (
    participantsTrain[['matchId', 'teamId', 'summonerId', 'championId']]
    .merge(
        right = champMastery[['summonerId', 'championId', 'championPoints']],
        how = 'left',
        on = ['summonerId', 'championId']
    )
    .fillna(0)[['matchId', 'teamId', 'championPoints']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 100][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'blueChampPoints'})
)

redChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 200][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'redChampPoints'})
)

X_train = X_train.merge(
    right = blueChampPoints,
    how = 'left',
    on = 'matchId'
).merge(
    right = redChampPoints,
    how = 'left',
    on = 'matchId'
)

**Trying normal/ridge regression using gold/xp/kills/dragons/champion points - submissionV7**

In [39]:
champPoints = [
    'blueGold', 'redGold', 
    'blueXP', 'redXP', 
    'blueKills', 'redKills', 
    'blueDragons', 'redDragons',
    'blueChampPoints', 'redChampPoints'
]

In [40]:
logreg = LogisticRegression().fit(X_train[champPoints], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[champPoints]))

0.707375

In [41]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_train[champPoints], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[champPoints]))

0.72125

In [42]:
champPoints_df = (
    participantsTest[['matchId', 'teamId', 'summonerId', 'championId']]
    .merge(
        right = champMastery[['summonerId', 'championId', 'championPoints']],
        how = 'left',
        on = ['summonerId', 'championId']
    )
    .fillna(0)[['matchId', 'teamId', 'championPoints']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 100][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'blueChampPoints'})
)

redChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 200][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'redChampPoints'})
)

X_test = X_test.merge(
    right = blueChampPoints,
    how = 'left',
    on = 'matchId'
).merge(
    right = redChampPoints,
    how = 'left',
    on = 'matchId'
)

In [43]:
submissionV7 = X_test[['matchId']]
submissionV7 = submissionV7.copy()
submissionV7['win'] = pipe.predict(X_test[champPoints])
submissionV7['winner'] = np.where(submissionV7['win'] == True, 100, 200)
submissionV7 = submissionV7[['matchId', 'winner']]

In [44]:
#submissionV7.to_csv('../submissions/submissionV7.csv', index = False)

**Adding summoner level to gold/xp/kills/dragons/champion points**

In [45]:
sumLevel_df = (
    participantsTrain[['matchId', 'teamId', 'summonerLevel']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueSumLevel = (
    sumLevel_df.loc[sumLevel_df['teamId'] == 100][['matchId', 'summonerLevel']]
    .rename(columns = {'summonerLevel' : 'blueSumLevel'})
)

redSumLevel = (
    sumLevel_df.loc[sumLevel_df['teamId'] == 200][['matchId', 'summonerLevel']]
    .rename(columns = {'summonerLevel' : 'redSumLevel'})
)

X_train = (
    X_train.merge(
        right = blueSumLevel,
        how = 'left',
        on = 'matchId'
    ).merge(
        right = redSumLevel,
        how = 'left',
        on = 'matchId'
    )
)

**Trying normal/ridge regression using gold/xp/kills/dragons/champion points/summoner level**

In [46]:
sumLevel = [
    'blueGold', 'redGold', 
    'blueXP', 'redXP', 
    'blueKills', 'redKills', 
    'blueDragons', 'redDragons',
    'blueChampPoints', 'redChampPoints',
    'blueSumLevel', 'redSumLevel'
]

In [47]:
logreg = LogisticRegression().fit(X_train[sumLevel], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[sumLevel]))

0.70775

In [48]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_train[sumLevel], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[sumLevel]))

0.717875

**Seeing if a simpler model may be better using lassos regression**

In [49]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(C = .5, penalty = 'l1', solver = 'saga', max_iter = 10000))
    ]
)

pipe.fit(X_train[sumLevel], y_train['winner'] == 100)

In [50]:
from sklearn.model_selection import GridSearchCV

In [51]:
gs = GridSearchCV(estimator = pipe, 
                 param_grid = {'logistic__C': [1, 0.5, 0.1, 0.05, 0.01]},
                 scoring = 'accuracy')

In [52]:
gs.fit(X_train[sumLevel], y_train['winner'] == 100)

In [53]:
gs.best_params_

{'logistic__C': 0.01}

In [54]:
features = None
for step in pipe[:-1]:
    features = step.get_feature_names_out(features)

outcome = False
idx = list(pipe['logistic'].classes_).index(outcome)

coefficients = pd.DataFrame({
    'variable': ['intercept'] + list(features),
    'coefficient': [gs.best_estimator_['logistic'].intercept_[idx]] + list(gs.best_estimator_['logistic'].coef_[idx])
})

coefficients[coefficients['coefficient'] != 0]

Unnamed: 0,variable,coefficient
0,intercept,0.042686
1,blueGold,0.289664
2,redGold,-0.232057
5,blueKills,0.002636
14,blueGold blueXP,0.559987
18,blueGold blueDragons,0.150046
25,redGold redXP,-0.63248
27,redGold redKills,-0.02062
29,redGold redDragons,-0.180214
35,blueXP blueKills,0.047085


In [55]:
accuracy_score(y_train['winner'] == 100, gs.predict(X_train[sumLevel]))

0.714625

**Accounting for games that ended prior to the 10 minute mark using gold/xp/kills/dragons as predictors and ridge regression - submissionV8**

In [56]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    if data['frames'][-1]['events'][-1]['type'] == 'GAME_END':
        df.append({
            'matchId': matchId,
            'winner' : data['frames'][-1]['events'][-1]['winningTeam']
                  })
    else:
        pass  
    
underTenTrain_df = pd.DataFrame(df)

In [57]:
underTen = underTenTrain_df['matchId']

X_trainOverTen = X_train.loc[~X_train['matchId'].isin(underTen)]

y_trainOverTen = y_train.loc[~y_train['matchId'].isin(underTen)]

In [58]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_trainOverTen[dragons], y_trainOverTen['winner'] == 100)

In [59]:
predicitions = X_trainOverTen[['matchId']].copy()
predicitions['win'] = pipe.predict(X_trainOverTen[dragons])
predicitions['winner'] = np.where(predicitions['win'] == True, 100, 200)
predicitions = predicitions[['matchId', 'winner']]

In [60]:
accuracy_score(
    y_train['winner'] == 100, 
    pd.concat([predicitions, underTenTrain_df]).sort_values(by = 'matchId')['winner'] == 100
)

0.72225

In [61]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    if data['frames'][-1]['events'][-1]['type'] == 'GAME_END':
        df.append({
            'matchId': matchId,
            'winner' : data['frames'][-1]['events'][-1]['winningTeam']
                  })
    else:
        pass  
    
underTenTest_df = pd.DataFrame(df)

In [62]:
X_testOverTen = X_test.loc[~X_test['matchId'].isin(underTenTest_df['matchId'])]

In [63]:
predicitions = X_testOverTen[['matchId']].copy()
predicitions['win'] = pipe.predict(X_testOverTen[dragons])
predicitions['winner'] = np.where(predicitions['win'] == True, 100, 200)
predicitions = predicitions[['matchId', 'winner']]

In [64]:
submissionV8 = pd.concat([predicitions, underTenTest_df]).sort_values(by = 'matchId')

In [65]:
#submissionV8.to_csv('../submissions/submissionV8.csv', index = False)

**Trying to improve the model using neural networks, while still filtering out games under ten minutes**

In [66]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler

In [67]:
pipe = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('nn', MLPClassifier(activation = 'tanh',
                             hidden_layer_sizes = (100, 100, 100),
                             alpha = 1,
                             max_iter = 10000))
    ]
)

pipe.fit(X_trainOverTen[sumLevel], y_trainOverTen['winner'] == 100)

In [68]:
predicitions = X_trainOverTen[['matchId']].copy()
predicitions['win'] = pipe.predict(X_trainOverTen[sumLevel])
predicitions['winner'] = np.where(predicitions['win'] == True, 100, 200)
predicitions = predicitions[['matchId', 'winner']]

In [69]:
accuracy_score(
    y_train['winner'] == 100, 
    pd.concat([predicitions, underTenTrain_df]).sort_values(by = 'matchId')['winner'] == 100
)

0.720375

**Adding champion win percentage and champion score to gold/xp/kills/dragons/summoner level/champion points and filter games out that ended under 10 minutes**

In [70]:
patchTrain = pd.read_csv('../data/patchTrain.csv')

In [71]:
X_train = X_train.merge(
    right = patchTrain,
    how = 'left',
    on = ['matchId']
)

In [72]:
champWinPercent = [
    'blueGold', 'redGold', 
    'blueXP', 'redXP', 
    'blueKills', 'redKills', 
    'blueDragons', 'redDragons',
    'blueChampPoints', 'redChampPoints',
    'blueSumLevel', 'redSumLevel',
    'blueChampWinPercent', 'redChampWinPercent',
    'blueChampScore', 'redChampScore'
]

X_trainOverTen = X_train.loc[~X_train['matchId'].isin(underTenTrain_df['matchId'])][champWinPercent]

y_trainOverTen = y_train.loc[~y_train['matchId'].isin(underTenTrain_df['matchId'])]

**Using train/test spilt to avoid having to use kaggle to tell if my model is an improvemnt**

In [73]:
from sklearn.model_selection import train_test_split

In [74]:
XTrain, XTest, yTrain, yTest = train_test_split(X_trainOverTen, y_trainOverTen, random_state = 777)

**Now using gradient boosting model**

In [75]:
from sklearn.ensemble import GradientBoostingClassifier

In [96]:
gbr = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('scaler', MinMaxScaler()),
        ('gb', GradientBoostingClassifier(n_estimators = 1000))
    ]
)

gbr.fit(XTrain, yTrain['winner'] == 100)

In [97]:
accuracy_score(
    yTest['winner'] == 100,
    gbr.predict(XTest)
)

0.6814928425357873

In [98]:
importances = pd.DataFrame({
    'variable': gbr['pf'].get_feature_names_out(),
    'importance': gbr['gb'].feature_importances_
})

importances.sort_values('importance', ascending = False)

Unnamed: 0,variable,importance
17,blueGold blueXP,0.128252
42,redGold redChampWinPercent,0.076364
27,blueGold blueChampWinPercent,0.068976
32,redGold redXP,0.052339
105,redDragons redChampWinPercent,0.015548
...,...,...
5,redKills,0.000078
4,blueKills,0.000014
6,blueDragons,0.000001
91,blueDragons redDragons,0.000000


**Trying a neural networks with gold/xp/kills/dragons/summoner level/champion points/champion score/champion win percentage and filtering out games that are under 10 minutes**

In [101]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('scaler', MinMaxScaler()),
        ('nn', MLPClassifier(activation = 'tanh',
                             hidden_layer_sizes = (100, 100, 100),
                             alpha = 1,
                             max_iter = 10000))
    ]
)

pipe.fit(XTrain, yTrain['winner'] == 100)

In [102]:
accuracy_score(
    yTest['winner'] == 100,
    pipe.predict(XTest)
)

0.6998977505112475