In [52]:
import numpy as np
import pandas as pd
import pickle

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [53]:
pd.set_option('display.max_columns', 50)

In [54]:
data = pd.concat([
    pd.read_csv('rankData/Challenger_Ranked_Games.csv'),
    pd.read_csv('rankData/Master_Ranked_Games.csv'),
    pd.read_csv('rankData/GrandMaster_Ranked_Games.csv')
])
data.shape

(199925, 50)

In [55]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 199925 entries, 0 to 65895
Data columns (total 50 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   gameId                   199925 non-null  int64  
 1   gameDuraton              199925 non-null  int64  
 2   blueWins                 199925 non-null  int64  
 3   blueFirstBlood           199925 non-null  int64  
 4   blueFirstTower           199925 non-null  int64  
 5   blueFirstBaron           199925 non-null  int64  
 6   blueFirstDragon          199925 non-null  int64  
 7   blueFirstInhibitor       199925 non-null  int64  
 8   blueDragonKills          199925 non-null  int64  
 9   blueBaronKills           199925 non-null  int64  
 10  blueTowerKills           199925 non-null  int64  
 11  blueInhibitorKills       199925 non-null  int64  
 12  blueWardPlaced           199925 non-null  int64  
 13  blueWardkills            199925 non-null  int64  
 14  blueK

In [56]:
data = data.drop_duplicates('gameId') # 중복제거

In [57]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145740 entries, 0 to 65891
Data columns (total 50 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   gameId                   145740 non-null  int64  
 1   gameDuraton              145740 non-null  int64  
 2   blueWins                 145740 non-null  int64  
 3   blueFirstBlood           145740 non-null  int64  
 4   blueFirstTower           145740 non-null  int64  
 5   blueFirstBaron           145740 non-null  int64  
 6   blueFirstDragon          145740 non-null  int64  
 7   blueFirstInhibitor       145740 non-null  int64  
 8   blueDragonKills          145740 non-null  int64  
 9   blueBaronKills           145740 non-null  int64  
 10  blueTowerKills           145740 non-null  int64  
 11  blueInhibitorKills       145740 non-null  int64  
 12  blueWardPlaced           145740 non-null  int64  
 13  blueWardkills            145740 non-null  int64  
 14  blueK

In [58]:
from sklearn.base import (BaseEstimator, TransformerMixin)


class Deduplicator(TransformerMixin, BaseEstimator): # 데이터 중복제거
    def __init__(self, unique):
        self._unique = unique # 중복제거
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.drop_duplicates(self._unique)
    
    
class Dropper(TransformerMixin, BaseEstimator):
    def __init__(self, columns):
        self._columns = columns
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.drop(self._columns, axis=1)
    

class FieldTransformer(TransformerMixin, BaseEstimator): # blue팀의 점수 - red 팀의 점수. 즉 점수가 높을수록 유리

    label = None

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        inp = pd.DataFrame()

        blue_columns = [x for x in data.columns if x.startswith('blue')]
        red_columns = [x.replace('blue', 'red') for x in blue_columns]
        for b, row in zip(blue_columns, red_columns):
            if row in X.columns:
                name = b.replace('blue', '')
                col = X[b] - X[row]
                inp[name] = col
                
        self.label = inp['Wins']
        return inp.drop('Wins', axis=1)


from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


pipeline = Pipeline([
    ('dedup', Deduplicator('gameId')), # gameId로 중복제거
    ('fields', FieldTransformer()), # 블루팀 점수 - 레드 팀 점수
    ('drop', Dropper(['WardPlaced', 'Wardkills', 'ChampionDamageDealt', 'JungleMinionKills', 'TotalHeal', 'ObjectDamageDealt'])), # 불필요한 열 제거 (ward)
    ('scaler', StandardScaler()), # 표준화
])


def label(pipeline):
    return pipeline.steps[1][1].label.values

In [59]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, shuffle=True, random_state=42)
print(train.shape, test.shape)

(109305, 50) (36435, 50)


In [60]:
from sklearn.linear_model import LogisticRegression

X_train = pipeline.fit_transform(train)
y_train = label(pipeline)

reg = LogisticRegression()
reg.fit(X_train, y_train)

from sklearn.model_selection import cross_val_score
cross_val_score(LogisticRegression(), X_train, y_train)

array([0.99011939, 0.98806093, 0.98934175, 0.98906729, 0.99039385])

In [61]:
def pred(reg, X, y):
    assert(X.shape[0] == y.shape[0])
    y_pred = reg.predict(X)
    good = (y_pred == y).sum()
    total = len(y)
    percentage = good / total
    print(f'{good}/{total} accuracy: {percentage:%}')

# train set accuracy
pred(reg, X_train, y_train)

# test set accuracy
X_test = pipeline.transform(test)
y_test = label(pipeline)
pred(reg, X_test, y_test)

108149/109305 accuracy: 98.942409%
36035/36435 accuracy: 98.902155%


In [118]:
championship = pd.read_csv('rankData/LWC_fianlMatch_v3.csv')

In [119]:
testData = championship
testData

Unnamed: 0,FirstBlood,FirstTower,FirstBaron,FirstDragon,FirstInhibitor,dragonKills,BaronKills,TowerKills,InhibitorKills,championKills,championDeath,championAssist,totalGold,totalMinionKills,totalLevel,avgLevel,killingSpree
0,1,0,0,0,0,-2,0,0,0,1,-1,2,1500,31,2,1,0
1,1,1,0,-1,0,-4,0,2,0,1,-1,-1,3000,41,1,0,1
2,1,1,0,-1,0,-4,0,1,0,-3,3,-4,0,12,-2,0,-3
3,1,1,-1,-1,-1,-4,-1,-3,-1,-10,10,-15,-6600,-17,-8,-2,-10


In [132]:
testpred = pipeline.named_steps['scaler'].transform(testData)
print("Predicted probabilities:\n{}".format(reg.predict_proba(testpred).round(3)*100))

Predicted probabilities:
[[  3.4  96.6]
 [  7.6  92.4]
 [ 62.3  37.7]
 [100.    0. ]]


In [121]:
reg.predict(testpred)

array([ 1,  1, -1, -1], dtype=int64)