In [1]:
import pandas as pd
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import accuracy_score

In [2]:
y_train = pd.read_csv('../data/train_winners.csv')

In [3]:
f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = 0))

data = json.load(f)

In [4]:
data['frames'][8]['events'][40]['monsterType']

'DRAGON'

**Looking into gold and XP**

In [5]:
df = []

In [6]:
for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })

In [7]:
X_train = pd.DataFrame(df)

**Not using difference in gold/xp, and using team totals.**

In [8]:
gold = ['blueGold', 'redGold']
xp = ['blueXP', 'redXP']
both = ['blueGold', 'redGold', 'blueXP', 'redXP']

**Just gold**

In [9]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.705375

**Just xp**

In [10]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.672

**Gold/xp - submissionV4**

In [11]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70975

In [12]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })
    
X_test = pd.DataFrame(df)

In [13]:
submissionV4 = X_test[['matchId']]
submissionV4 = submissionV4.copy()
submissionV4['win'] = logreg.predict(X_test[both])
submissionV4['winner'] = np.where(submissionV4['win'] == True, 100, 200)
submissionV4 = submissionV4[['matchId', 'winner']]

In [14]:
#submissionV4.to_csv('../submissions/submissionV4.csv', index = False)

**Using difference in gold/xp, and using team totals**

In [15]:
X_train['goldDifference'] = X_train['blueGold'] - X_train['redGold']

X_train['xpDifference'] = X_train['blueXP'] - X_train['redXP']

gold = ['goldDifference']
xp = ['xpDifference']
both = ['goldDifference', 'xpDifference']

**Just Gold**

In [16]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.7055

**Just xp**

In [17]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.6715

**Gold/xp**

In [18]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70825

**Adding kills to gold/xp totals**

In [19]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [20]:
X_train = X_train.merge(right = killsTrain, on = ['matchId'], how = 'left').fillna(0)

kills = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills']

**Not using difference in gold/xp/kills, and using team totals.**

In [21]:
logreg = LogisticRegression().fit(X_train[kills], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[kills]))

0.70975

**Adding dragons to gold/xp/kills totals**

In [22]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [23]:
X_train = X_train.merge(right = dragonsTrain, on = ['matchId'], how = 'left').fillna(0)

dragons = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills', 'blueDragons', 'redDragons']

**Not using difference in gold/xp/kills/dragons, and using team totals.**

In [24]:
logreg = LogisticRegression().fit(X_train[dragons], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[dragons]))

0.70975

**Trying ridge regression using gold/xp/kills/dragons - submissionV5**

In [25]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_train[dragons], y_train['winner'] == 100)

In [26]:
accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[dragons]))

0.71925

In [27]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [28]:
X_test = X_test.merge(right = killsTest, on = ['matchId'], how = 'left').fillna(0)

In [29]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [30]:
X_test = X_test.merge(right = dragonsTest, on = ['matchId'], how = 'left').fillna(0)

In [31]:
submissionV5 = X_test[['matchId']]
submissionV5 = submissionV5.copy()
submissionV5['win'] = pipe.predict(X_test[dragons])
submissionV5['winner'] = np.where(submissionV5['win'] == True, 100, 200)
submissionV5 = submissionV5[['matchId', 'winner']]

In [99]:
#submissionV5.to_csv('../submissions/submissionV5.csv', index = False)

**Adding champion points to gold/xp/kills/dragons**

In [32]:
champMastery = pd.read_csv('../data/champion_mastery.csv')
participantsTrain = pd.read_csv('../data/participants_train.csv')
participantsTest = pd.read_csv('../data/participants_test.csv')

In [47]:
champPoints_df = (
    participantsTrain[['matchId', 'teamId', 'summonerId', 'championId']]
    .merge(
        right = champMastery[['summonerId', 'championId', 'championPoints']],
        how = 'left',
        on = ['summonerId', 'championId']
    )
    .fillna(0)[['matchId', 'teamId', 'championPoints']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueChampPoints = (
    champPoints.loc[champPoints_df['teamId'] == 100][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'blueChampPoints'})
)

redChampPoints = (
    champPoints.loc[champPoints_df['teamId'] == 200][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'redChampPoints'})
)

X_train = X_train.merge(
    right = blueChampPoints,
    how = 'left',
    on = 'matchId'
).merge(
    right = redChampPoints,
    how = 'left',
    on = 'matchId'
)

**Trying normal/ridge regression using gold/xp/kills/dragons/champion points - submissionV7**

In [120]:
champPoints = [
    'blueGold', 'redGold', 
    'blueXP', 'redXP', 
    'blueKills', 'redKills', 
    'blueDragons', 'redDragons',
    'blueChampPoints', 'redChampPoints'
]

In [75]:
logreg = LogisticRegression().fit(X_train[champPoints], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[champPoints]))

0.707375

In [119]:
pipe.fit(X_train[champPoints], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[champPoints]))

0.72125

In [116]:
champPoints_df = (
    participantsTest[['matchId', 'teamId', 'summonerId', 'championId']]
    .merge(
        right = champMastery[['summonerId', 'championId', 'championPoints']],
        how = 'left',
        on = ['summonerId', 'championId']
    )
    .fillna(0)[['matchId', 'teamId', 'championPoints']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 100][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'blueChampPoints'})
)

redChampPoints = (
    champPoints_df.loc[champPoints_df['teamId'] == 200][['matchId', 'championPoints']]
    .rename(columns = {'championPoints' : 'redChampPoints'})
)

X_test = X_test.merge(
    right = blueChampPoints,
    how = 'left',
    on = 'matchId'
).merge(
    right = redChampPoints,
    how = 'left',
    on = 'matchId'
)

In [121]:
submissionV7 = X_test[['matchId']]
submissionV7 = submissionV7.copy()
submissionV7['win'] = pipe.predict(X_test[champPoints])
submissionV7['winner'] = np.where(submissionV7['win'] == True, 100, 200)
submissionV7 = submissionV7[['matchId', 'winner']]

In [123]:
#submissionV7.to_csv('../submissions/submissionV7.csv', index = False)

**Adding summoner level to gold/xp/kills/dragons/champion points**

In [81]:
sumLevel_df = (
    participantsTrain[['matchId', 'teamId', 'summonerLevel']]
    .groupby(['matchId', 'teamId']).sum().reset_index()
)

blueSumLevel = (
    sumLevel_df.loc[sumLevel_df['teamId'] == 100][['matchId', 'summonerLevel']]
    .rename(columns = {'summonerLevel' : 'blueSumLevel'})
)

redSumLevel = (
    sumLevel_df.loc[sumLevel_df['teamId'] == 200][['matchId', 'summonerLevel']]
    .rename(columns = {'summonerLevel' : 'redSumLevel'})
)

X_train = (
    X_train.merge(
        right = blueSumLevel,
        how = 'left',
        on = 'matchId'
    ).merge(
        right = redSumLevel,
        how = 'left',
        on = 'matchId'
    )
)

**Trying normal/ridge regression using gold/xp/kills/dragons/champion points/summoner level**

In [96]:
sumLevel = [
    'blueGold', 'redGold', 
    'blueXP', 'redXP', 
    'blueKills', 'redKills', 
    'blueDragons', 'redDragons',
    'blueChampPoints', 'redChampPoints',
    'blueSumLevel', 'redSumLevel'
]

In [97]:
logreg = LogisticRegression().fit(X_train[sumLevel], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[sumLevel]))

0.70775

In [98]:
pipe.fit(X_train[sumLevel], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[sumLevel]))

0.717875

**The best model seems to be using ridge regression with gold/xp/kills/dragons/champion points as predictors. Now to see if adding interaction terms can help improve the model.**

In [104]:
X_train['goldInteraction'] = X_train['blueGold'] * X_train['redGold']
X_train['xpInteraction'] = X_train['blueXP'] * X_train['redXP']
X_train['killsInteraction'] = X_train['blueKills'] * X_train['redKills']
X_train['dragonsInteraction'] = X_train['blueDragons'] * X_train['redDragons']
X_train['champPointsInteraction'] = X_train['blueChampPoints'] * X_train['redChampPoints']

**Trying normal/ridge regression using gold/xp/kills/dragons/champion points/summoner level plus interaction terms**

In [106]:
interactions = [
    'blueGold', 'redGold', 'goldInteraction',
    'blueXP', 'redXP', 'xpInteraction',
    'blueKills', 'redKills', 'killsInteraction',
    'blueDragons', 'redDragons', 'dragonsInteraction',
    'blueChampPoints', 'redChampPoints', 'champPointsInteraction'
]

In [108]:
logreg = LogisticRegression().fit(X_train[interactions], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[interactions]))

0.491125

In [109]:
pipe.fit(X_train[interactions], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[interactions]))

0.721