In [60]:
import pandas as pd
import numpy as np
import json
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import accuracy_score

In [61]:
y_train = pd.read_csv('../data/train_winners.csv')

In [62]:
f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = 0))

data = json.load(f)

In [63]:
data['frames'][8]['events'][40]['monsterType']

'DRAGON'

**Looking into gold and XP**

In [64]:
df = []

In [65]:
for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })

In [66]:
X_train = pd.DataFrame(df)

**Not using difference in gold/xp, and using team totals.**

In [67]:
gold = ['blueGold', 'redGold']
xp = ['blueXP', 'redXP']
both = ['blueGold', 'redGold', 'blueXP', 'redXP']

**Just gold**

In [68]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.705375

**Just xp**

In [69]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.672

**Gold/xp - submissionV4**

In [70]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70975

In [71]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    df.append({
        'matchId' : matchId,
        'blueGold' : (
            data['frames'][-1]['participantFrames']['1']['totalGold'] +
            data['frames'][-1]['participantFrames']['2']['totalGold'] +
            data['frames'][-1]['participantFrames']['3']['totalGold'] +
            data['frames'][-1]['participantFrames']['4']['totalGold'] +
            data['frames'][-1]['participantFrames']['5']['totalGold']
        ),
        'blueXP' : (
            data['frames'][-1]['participantFrames']['1']['xp'] +
            data['frames'][-1]['participantFrames']['2']['xp'] +
            data['frames'][-1]['participantFrames']['3']['xp'] +
            data['frames'][-1]['participantFrames']['4']['xp'] +
            data['frames'][-1]['participantFrames']['5']['xp']
        ),
        'redGold' : (
            data['frames'][-1]['participantFrames']['6']['totalGold'] + 
            data['frames'][-1]['participantFrames']['7']['totalGold'] +
            data['frames'][-1]['participantFrames']['8']['totalGold'] +
            data['frames'][-1]['participantFrames']['9']['totalGold'] +
            data['frames'][-1]['participantFrames']['10']['totalGold']
        ),
        'redXP' : (
            data['frames'][-1]['participantFrames']['6']['xp'] + 
            data['frames'][-1]['participantFrames']['7']['xp'] +
            data['frames'][-1]['participantFrames']['8']['xp'] +
            data['frames'][-1]['participantFrames']['9']['xp'] +
            data['frames'][-1]['participantFrames']['10']['xp']
        )
    })
    
X_test = pd.DataFrame(df)

In [72]:
submissionV4 = X_test[['matchId']]
submissionV4 = submissionV4.copy()
submissionV4['win'] = logreg.predict(X_test[both])
submissionV4['winner'] = np.where(submissionV4['win'] == True, 100, 200)
submissionV4 = submissionV4[['matchId', 'winner']]

In [73]:
#submissionV4.to_csv('../submissions/submissionV4.csv', index = False)

**Using difference in gold/xp, and using team totals**

In [74]:
X_train['goldDifference'] = X_train['blueGold'] - X_train['redGold']

X_train['xpDifference'] = X_train['blueXP'] - X_train['redXP']

gold = ['goldDifference']
xp = ['xpDifference']
both = ['goldDifference', 'xpDifference']

**Just Gold**

In [75]:
logreg = LogisticRegression().fit(X_train[gold], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[gold]))

0.7055

**Just xp**

In [76]:
logreg = LogisticRegression().fit(X_train[xp], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[xp]))

0.6715

**Gold/xp**

In [77]:
logreg = LogisticRegression().fit(X_train[both], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[both]))

0.70825

**Adding kills to gold/xp totals**

In [78]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [79]:
X_train = X_train.merge(right = killsTrain, on = ['matchId'], how = 'left').fillna(0)

kills = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills']

**Not using difference in gold/xp/kills, and using team totals.**

In [80]:
logreg = LogisticRegression().fit(X_train[kills], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[kills]))

0.70975

**Adding dragons to gold/xp/kills totals**

In [81]:
df = []

for matchId in range(0, 8000):
    f = open('../data/train_timelines/train_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTrain = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [82]:
X_train = X_train.merge(right = dragonsTrain, on = ['matchId'], how = 'left').fillna(0)

dragons = ['blueGold', 'redGold', 'blueXP', 'redXP', 'blueKills', 'redKills', 'blueDragons', 'redDragons']

**Not using difference in gold/xp/kills/dragons, and using team totals.**

In [83]:
logreg = LogisticRegression().fit(X_train[dragons], y_train['winner'] == 100)

accuracy_score(y_train['winner'] == 100, logreg.predict(X_train[dragons]))

0.70975

**Trying lasso regression using gold/xp/kills/dragons - submissionV5**

In [84]:
pipe = Pipeline(
    steps = [
        ('pf', PolynomialFeatures(interaction_only = True, include_bias = False)),
        ('vt', VarianceThreshold()),
        ('scaler', StandardScaler()),
        ('logistic', LogisticRegression(max_iter = 10000))
    ]
)

pipe.fit(X_train[dragons], y_train['winner'] == 100)

In [85]:
accuracy_score(y_train['winner'] == 100, pipe.predict(X_train[dragons]))

0.71925

In [86]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'CHAMPION_KILL':
                if data['frames'][frame]['events'][i]['killerId'] < 6:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 1,
                        'redKills' : 0

                    })
                else:
                    df.append({
                        'matchId' : matchId,
                        'blueKills' : 0,
                        'redKills' : 1

                    })
            else:
                pass

killsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [87]:
X_test = X_test.merge(right = killsTest, on = ['matchId'], how = 'left').fillna(0)

Unnamed: 0,matchId,blueGold,blueXP,redGold,redXP,blueKills,redKills
0,8000,17313,18099,16933,17117,7.0,5.0
1,8001,17492,16756,17533,16527,8.0,8.0
2,8002,16206,16640,17480,16518,8.0,10.0
3,8003,17784,16759,18126,17173,10.0,10.0
4,8004,16341,18377,16993,17113,4.0,7.0
...,...,...,...,...,...,...,...
1995,9995,15440,17688,15060,17845,5.0,4.0
1996,9996,19123,18273,17433,18545,8.0,8.0
1997,9997,14719,17211,17129,18888,3.0,8.0
1998,9998,18353,18298,15134,16823,8.0,5.0


In [88]:
df = []

for matchId in range(8000, 10000):
    f = open('../data/test_timelines/test_timelines/timeline_{matchIds}.json'.format(matchIds = matchId))
    
    data = json.load(f)
    for frame in range(len(data['frames']) - 1, -1, -1):
        for i in range(0, len(data['frames'][frame]['events'])):
            if data['frames'][frame]['events'][i]['type']  == 'ELITE_MONSTER_KILL':
                if data['frames'][frame]['events'][i]['monsterType']  == 'DRAGON':
                    if data['frames'][frame]['events'][i]['killerTeamId'] == 100:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 1,
                            'redDragons' : 0
                        })
                    else:
                        df.append({
                            'matchId' : matchId,
                            'blueDragons' : 0,
                            'redDragons' : 1
                        })
                else:
                    pass
            else:
                pass

dragonsTest = pd.DataFrame(df).groupby('matchId').sum().reset_index()

In [89]:
X_test = X_test.merge(right = dragonsTest, on = ['matchId'], how = 'left').fillna(0)

Unnamed: 0,matchId,blueGold,blueXP,redGold,redXP,blueKills,redKills,blueDragons,redDragons
0,8000,17313,18099,16933,17117,7.0,5.0,0.0,1.0
1,8001,17492,16756,17533,16527,8.0,8.0,0.0,0.0
2,8002,16206,16640,17480,16518,8.0,10.0,0.0,0.0
3,8003,17784,16759,18126,17173,10.0,10.0,0.0,0.0
4,8004,16341,18377,16993,17113,4.0,7.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...
1995,9995,15440,17688,15060,17845,5.0,4.0,1.0,0.0
1996,9996,19123,18273,17433,18545,8.0,8.0,0.0,0.0
1997,9997,14719,17211,17129,18888,3.0,8.0,1.0,0.0
1998,9998,18353,18298,15134,16823,8.0,5.0,1.0,0.0


In [97]:
submissionV5 = X_test[['matchId']]
submissionV5 = submissionV5.copy()
submissionV5['win'] = pipe.predict(X_test[dragons])
submissionV5['winner'] = np.where(submissionV5['win'] == True, 100, 200)
submissionV5 = submissionV5[['matchId', 'winner']]

In [99]:
#submissionV5.to_csv('../submissions/submissionV5.csv', index = False)