In [497]:
import pandas as pd
import requests
import json
from urllib import parse
import matplotlib as plt
import numpy as np
import os
import time
import math
import scipy.stats as ss
import warnings

warnings.filterwarnings(action='ignore') 

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import eli5
from eli5.sklearn import PermutationImportance

In [187]:
matchresult = pd.read_csv('./matchresult.csv')
timeline = pd.read_csv('./timeline.csv')
matchresult2 = pd.read_csv('./matchresult2.csv')
timeline2 = pd.read_csv('./timeline2.csv')

matchresult = pd.concat([matchresult, matchresult2]).reset_index(drop=True)
timeline = pd.concat([timeline, timeline2]).reset_index(drop=True)

In [188]:
matchresult

Unnamed: 0.1,Unnamed: 0,allInPings,assistMePings,assists,baitPings,baronKills,basicPings,bountyLevel,champExperience,champLevel,...,challenges.highestChampionDamage,challenges.shortestTimeToAceFromFirstTakedown,challenges.soloTurretsLategame,challenges.teleportTakedowns,challenges.highestWardKills,challenges.fasterSupportQuestCompletion,challenges.baronBuffGoldAdvantageOverThreshold,challenges.thirdInhibitorDestroyedTime,challenges.earliestElderDragon,challenges.hadAfkTeammate
0,0,0,8,9,0,0,0,0,24459,18,...,,,,,,,,,,
1,1,0,0,8,1,0,0,0,21446,18,...,,,,,,,,,,
2,2,1,3,7,0,0,0,0,24538,18,...,,,,,,,,,,
3,3,0,1,8,0,0,0,0,20109,18,...,,,,,,,,,,
4,4,1,17,15,2,0,0,0,14913,16,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32465,5,0,1,5,0,0,0,0,17883,17,...,,,1.0,,,,,,,
32466,6,0,0,5,0,0,0,0,15320,16,...,,,,,,,,,,
32467,7,0,5,5,0,0,0,0,18469,18,...,,,,,,,,,,
32468,8,0,0,7,0,0,0,0,14986,16,...,,,,,,,,,,


In [189]:
columns = list(matchresult.columns)

remove_list = ['Unnamed: 0', 'gameEndedInEarlySurrender', 'gameEndedInSurrender', 'summonerId', 'summonerLevel', 'summonerName',\
               'lane', 'nexusKills', 'nexusLost', 'nexusTakedowns', 'profileIcon', 'puuid', 'riotIdName', 'riotIdTagline', 'role', \
                'teamEarlySurrendered', 'allInPings', 'baitPings', 'basicPings', 'championId', 'champLevel', 'commandPings', \
                'dangerPings', 'eligibleForProgression', 'holdPings', \
                'challenges.dancedWithRiftHerald', 'challenges.hadOpenNexus', 'challenges.outnumberedNexusKill', \
                'challenges.playedChampSelectPosition', 'challenges.poroExplosions', 'challenges.snowballsHit', \
                'challenges.takedownsInEnemyFountain', 'perks.statPerks.defense', 'perks.statPerks.flex', 'perks.statPerks.offense',\
                'perks.styles', 'challenges.damagePerMinute']

for i in remove_list:
    columns.remove(i)

In [190]:
df = matchresult[columns]
df['GDM'] = [0] * len(df)
df['KPDM'] = [0] * len(df)
df['XPDM'] = [0] * len(df)
df['GPM'] = [0] * len(df)
df['DPM'] = [0] * len(df)
df['DTPM'] = [0] * len(df)

In [191]:
for i in range(len(df)):
    num1 = i // 10 # 몫
    num2 = i % 10 # 나머지 
    opp_index = num1 * 10 + ((num2 + 5) % 10)
    gamelength = df['challenges.gameLength'][i]/60

    df['GPM'][i] = df['goldEarned'][i] / gamelength
    df['DPM'][i] = df['totalDamageDealtToChampions'][i] / gamelength
    df['DTPM'][i] = df['totalDamageTaken'][i] / gamelength
    df['GDM'][i] = (df['goldEarned'][i] - df['goldEarned'][opp_index]) / gamelength
    df['KPDM'][i] = (df['kills'][i] + df['assists'][i] - df['kills'][opp_index] - df['assists'][opp_index]) / gamelength 
    df['XPDM'][i] = (df['champExperience'][i] - df['champExperience'][opp_index]) / gamelength

df.to_csv('./rawdata.csv', index=False)

In [192]:
df = pd.read_csv('./rawdata.csv')

In [193]:
df = df.fillna(0)

dfTOP = df[df.teamPosition == 'TOP'].reset_index(drop=True)
dfJNG = df[df.teamPosition == 'JUNGLE'].reset_index(drop=True)
dfMID = df[df.teamPosition == 'MIDDLE'].reset_index(drop=True)
dfBOT = df[df.teamPosition == 'BOTTOM'].reset_index(drop=True)
dfSUP = df[df.teamPosition == 'UTILITY'].reset_index(drop=True)

In [194]:
champtop = list(dfTOP.championName.value_counts()[dfTOP.championName.value_counts() >= int(len(dfTOP) * 0.01)].index) # 픽률 1% 이상 리스트
champjng = list(dfJNG.championName.value_counts()[dfJNG.championName.value_counts() >= int(len(dfJNG) * 0.01)].index) # 픽률 1% 이상 리스트
champmid = list(dfMID.championName.value_counts()[dfMID.championName.value_counts() >= int(len(dfMID) * 0.01)].index) # 픽률 1% 이상 리스트
champbot = list(dfBOT.championName.value_counts()[dfBOT.championName.value_counts() >= int(len(dfBOT) * 0.01)].index) # 픽률 1% 이상 리스트
champsup = list(dfSUP.championName.value_counts()[dfSUP.championName.value_counts() >= int(len(dfSUP) * 0.01)].index) # 픽률 1% 이상 리스트

In [542]:
def getWinRate(position, champion):
    if position == 'TOP': positionData = dfTOP
    elif position == 'JUNGLE': positionData = dfJNG
    elif position == 'MIDDLE': positionData = dfMID
    elif position == 'BOTTOM': positionData = dfBOT
    elif position == 'UTILITY': positionData = dfSUP

    train = positionData[positionData.championName == champion]
    for i in ['participantId', 'championName', 'individualPosition', 'teamPosition',\
            'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'challenges.mythicItemUsed' ,\
                'goldEarned', 'challenges.goldPerMinute', 'totalDamageDealtToChampions', 'totalDamageTaken', \
                'challenges.abilityUses', 'timePlayed', 'assistMePings', 'itemsPurchased', 'pushPings', 'visionClearedPings', \
                'getBackPings', 'onMyWayPings', 'enemyMissingPings', 'needVisionPings', 'enemyVisionPings', 'challenges.killParticipation']:
        train = train.drop(i, axis=1)

    traindata = train[:int(len(train) *0.7)].reset_index(drop=True)
    valdata = train[int(len(train) *0.7):].reset_index(drop=True)

    modelLR = LogisticRegression(random_state=42, penalty='l2')
    # scaler = StandardScaler()


    trainX = traindata
    trainX = trainX.drop('win', axis=1)
    # scaler.fit(trainX)
    # trainXScaled = scaler.transform(trainX)

    trainY = traindata['win']

    valX = valdata
    valX = valX.drop('win', axis=1)
    # valXScaled = scaler.transform(valX)

    valY = valdata['win']


    modelLR.fit(trainX, trainY)          # 과적합되는것 같으니 정규화까지는 하지 않음

    winrate = pd.DataFrame(modelLR.predict_proba(valX)*100, columns= ['loserate', 'winrate'])['winrate']
    winrate = pd.DataFrame([winrate, valY]).T

    traindata.to_csv(f'./train/{position}/{champion}.csv', index=False)   # 모델 학습용 데이터
    winrate.to_csv(f'./winrate_predict/{position}/{champion}.csv', index=False) # n인분 도출용 데이터

In [543]:
for i in range(len(champtop)): getWinRate('TOP', champtop[i])
for i in range(len(champjng)): getWinRate('JUNGLE', champjng[i])
for i in range(len(champmid)): getWinRate('MIDDLE', champmid[i])
for i in range(len(champbot)): getWinRate('BOTTOM', champbot[i])
for i in range(len(champsup)): getWinRate('UTILITY', champsup[i])

In [566]:
pd.Series(champtop).to_csv('./championlist/champtop.csv', index=False)
pd.Series(champjng).to_csv('./championlist/champjng.csv', index=False)
pd.Series(champmid).to_csv('./championlist/champmid.csv', index=False)
pd.Series(champbot).to_csv('./championlist/champbot.csv', index=False)
pd.Series(champsup).to_csv('./championlist/champsup.csv', index=False)

In [539]:
data2 = pd.read_csv('./train/BOTTOM/Aphelios.csv')

In [547]:
winrate = data2
ss.zscore(winrate[winrate.win == 0].winrate)

0     -0.150263
4     -0.018531
7     -0.382501
8     -0.441560
9     -0.653636
         ...   
241   -0.203362
242   -0.218400
245   -0.329793
248   -0.148973
249   -0.654228
Name: winrate, Length: 126, dtype: float64

In [None]:
ss.zscore(winrate[winrate.win == 0].winrate)

In [488]:
train = dfTOP[dfTOP.championName == champtop[3]]
for i in ['participantId', 'championName', 'individualPosition', 'teamPosition',\
           'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'challenges.mythicItemUsed' ,\
            'goldEarned', 'challenges.goldPerMinute', 'totalDamageDealtToChampions', 'totalDamageTaken', \
            'challenges.abilityUses', 'timePlayed', 'assistMePings', 'itemsPurchased', 'pushPings', 'visionClearedPings', \
            'getBackPings', 'onMyWayPings', 'enemyMissingPings', 'needVisionPings', 'enemyVisionPings', 'challenges.killParticipation']:
    train = train.drop(i, axis=1)

traindata = train[:int(len(train) *0.7)].reset_index(drop=True)
valdata = train[int(len(train) *0.7):].reset_index(drop=True)

modelLR = LogisticRegression(random_state=42, penalty='l2')
scaler = StandardScaler()


trainX = traindata
trainX = trainX.drop('win', axis=1)
# scaler.fit(trainX)
# trainXScaled = scaler.transform(trainX)

trainY = traindata['win']

valX = valdata
valX = valX.drop('win', axis=1)
# valXScaled = scaler.transform(valX)

valY = valdata['win']

In [489]:
# modelLR.fit(trainXScaled, trainY)
modelLR.fit(trainX, trainY)

LogisticRegression(random_state=42)

In [490]:
# modelLR.score(valXScaled, valY)
modelLR.score(valX, valY)

0.8666666666666667

In [491]:
modelLR.score(valX, valY)

0.8666666666666667

In [562]:
pd.DataFrame(modelLR.predict_proba(valX)*100, columns= ['loserate', 'winrate'])['winrate']

0      5.479336
1     99.908838
2     99.919299
3      0.030771
4     81.774781
        ...    
70    99.978050
71     0.015002
72     0.389739
73     0.759532
74     0.019872
Name: winrate, Length: 75, dtype: float64

In [492]:
winrate = pd.DataFrame(modelLR.predict_proba(valX)*100, columns= ['loserate', 'winrate'])['winrate']
winrate = pd.DataFrame([winrate, valY]).T

In [534]:
winrate

Unnamed: 0,winrate,win
0,5.479336,0.0
1,99.908838,1.0
2,99.919299,1.0
3,0.030771,0.0
4,81.774781,0.0
...,...,...
70,99.978050,1.0
71,0.015002,0.0
72,0.389739,0.0
73,0.759532,0.0


In [493]:
winrate[winrate.win == 0].winrate.mean()

12.81816696756537

In [494]:
winrate[winrate.win == 1].winrate.mean()

84.71653641001087

In [499]:
ss.zscore(winrate[winrate.win == 0].winrate)

0    -0.260472
3    -0.453855
4     2.447432
6    -0.452527
10   -0.454908
11   -0.387560
14   -0.431193
15   -0.453960
16   -0.444436
17   -0.454519
23   -0.415342
25   -0.398894
26   -0.454870
27   -0.447467
28   -0.454521
30   -0.454874
31   -0.454003
32    3.085349
35   -0.434316
38    2.554099
39   -0.454947
40    1.210214
41   -0.399652
42   -0.454498
44    0.578913
45   -0.314876
47    0.551569
48   -0.453706
50   -0.437025
51   -0.453415
52    2.442802
53   -0.453953
54   -0.454125
55   -0.443193
56   -0.160521
58   -0.431755
59    2.844065
61   -0.454923
63   -0.451112
65   -0.369787
69   -0.441479
71   -0.454414
72   -0.441114
73   -0.427989
74   -0.454242
Name: winrate, dtype: float64

In [496]:
perm = PermutationImportance(modelLR, scoring = "f1", random_state = 42).fit(valX, valY)
eli5.show_weights(perm, top = len(trainX.columns), feature_names = valX.columns.tolist())

Weight,Feature
0.2248  ± 0.1070,champExperience
0.2106  ± 0.0757,physicalDamageDealt
0.1706  ± 0.1217,totalDamageDealt
0.1316  ± 0.0841,totalHeal
0.0940  ± 0.0567,physicalDamageTaken
0.0808  ± 0.0413,trueDamageTaken
0.0711  ± 0.0154,magicDamageTaken
0.0323  ± 0.0722,goldSpent
0.0285  ± 0.0368,challenges.earliestBaron
0.0254  ± 0.0400,challenges.earliestDragonTakedown
