# 1. 모듈 import & 데이터 경로 설정

In [761]:
import os
import csv
import json
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from pprint import pprint
from collections import defaultdict
from collections import OrderedDict
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from pytorch_tabnet.pretraining import TabNetPretrainer
from pytorch_tabnet.multitask import TabNetMultiTaskClassifier
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor

CSV_DATA_PATH = "../data/csv/"
JSON_DATA_PATH = "../data/json/"

# 2. 매치 데이터로부터 데이터 프레임 만들기


In [500]:
match_data["11.24.412.2185"][0]["info"]

{'gameCreation': 1638940791000,
 'gameDuration': 1089,
 'gameEndTimestamp': 1638941970358,
 'gameId': 5613073433,
 'gameMode': 'CLASSIC',
 'gameName': 'teambuilder-match-5613073433',
 'gameStartTimestamp': 1638940880845,
 'gameType': 'MATCHED_GAME',
 'gameVersion': '11.24.412.2185',
 'mapId': 11,
 'participants': [{'assists': 9,
   'baronKills': 0,
   'bountyLevel': 2,
   'champExperience': 9635,
   'champLevel': 12,
   'championId': 164,
   'championName': 'Camille',
   'championTransform': 0,
   'consumablesPurchased': 2,
   'damageDealtToBuildings': 8123,
   'damageDealtToObjectives': 8861,
   'damageDealtToTurrets': 8123,
   'damageSelfMitigated': 10540,
   'deaths': 3,
   'detectorWardsPlaced': 0,
   'doubleKills': 0,
   'dragonKills': 0,
   'firstBloodAssist': False,
   'firstBloodKill': False,
   'firstTowerAssist': False,
   'firstTowerKill': False,
   'gameEndedInEarlySurrender': False,
   'gameEndedInSurrender': False,
   'goldEarned': 9666,
   'goldSpent': 8475,
   'individu

In [504]:
def create_data_frame(match_data):
    matchId = []
    gameCreation = []
    participants = []
    champions = []
    gameDuration = []
    goldDifference = []
    damageDifference = []
    kills = []
    deaths = []
    assists = []
    win = []
    
    for version in match_data.keys():
        for data in match_data[version]:
            matchId.append(data["metadata"]["matchId"])
            gameCreation.append(data["info"]["gameCreation"])
            participants.append(data["metadata"]["participants"])

            if version[:5] >= "11.20":
                gameDuration.append(data["info"]["gameDuration"])
            else:
                gameDuration.append(data["info"]["gameDuration"] // 1000)

            champion_per_match = []
            goldEarned = []
            totalDamageDealtToChampions = []
            kills_per_match = []
            deaths_per_match = []
            assists_per_match = []

            for participant in data["info"]["participants"]:
                champion_per_match.append(participant["championName"])
                goldEarned.append(participant["goldEarned"])
                totalDamageDealtToChampions.append(
                    participant["totalDamageDealtToChampions"]
                )
                kills_per_match.append(participant["kills"])
                deaths_per_match.append(participant["deaths"])
                assists_per_match.append(participant["assists"])
                

            champions.append(champion_per_match)
            goldDifference.append(sum(goldEarned[:5]) - sum(goldEarned[5:]))
            damageDifference.append(
                sum(totalDamageDealtToChampions[:5])
                - sum(totalDamageDealtToChampions[5:])
            )
            kills.append(kills_per_match)
            deaths.append(deaths_per_match)
            assists.append(assists_per_match)
            win.append(data["info"]["participants"][0]["win"])

    df = pd.DataFrame(
        {
            "match_Id": matchId,
            "gameCreation": gameCreation,
            "participants": participants,
            "champions": champions,
            "game_duration": gameDuration,
            "gold_difference": goldDifference,
            "damage_difference": damageDifference,
            "kills": kills,
            "deaths": deaths,
            "assists": assists,
            "win": win,
        }
    )
    return df

## 2.1. 샘플 티어 데이터로 확인

In [505]:
with open(os.path.join(JSON_DATA_PATH, "CHALLENGER_I_MatchData.json")) as json_data:
    match_data = json.load(json_data)
    df = create_data_frame(match_data)

df

Unnamed: 0,match_Id,gameCreation,participants,champions,game_duration,gold_difference,damage_difference,kills,deaths,assists,win
0,KR_5590645492,1637935948000,[whb6TE8UXO6O47WKckyDiOPwh4hr4WV9yvEy4PgIE81nI...,"[DrMundo, LeeSin, Viktor, Samira, Nautilus, Gw...",1391,6240,-3599,"[7, 11, 0, 7, 1, 4, 3, 5, 8, 1]","[5, 2, 3, 6, 5, 8, 4, 5, 4, 5]","[6, 9, 12, 10, 14, 5, 11, 3, 5, 10]",True
1,KR_5590538011,1637934227000,[whb6TE8UXO6O47WKckyDiOPwh4hr4WV9yvEy4PgIE81nI...,"[Gragas, Talon, TwistedFate, Jinx, Thresh, Vik...",1326,8383,17422,"[7, 12, 4, 10, 3, 4, 4, 4, 5, 3]","[2, 6, 2, 6, 4, 7, 8, 8, 9, 4]","[20, 7, 22, 15, 19, 4, 5, 4, 4, 10]",True
2,KR_5590504186,1637932922000,[_YwdGIudpX226XlBqetTIqnsA70MXQ__C2-fd3n8jPqIC...,"[Renekton, LeeSin, Sylas, Jinx, Thresh, Yone, ...",919,-11695,-16935,"[0, 2, 1, 2, 0, 4, 9, 4, 5, 1]","[5, 4, 4, 5, 5, 0, 1, 1, 1, 2]","[0, 1, 2, 2, 3, 2, 5, 8, 9, 8]",False
3,KR_5582021199,1637503831000,[m5JxEyEcP-m_8V8sBykPbR1rSvvgOj5USTVYZECHVU3Xy...,"[Camille, Kayn, TwistedFate, Jinx, Thresh, Jax...",1195,2653,7347,"[6, 4, 2, 10, 2, 3, 4, 5, 5, 2]","[3, 6, 1, 4, 5, 6, 4, 3, 6, 5]","[8, 7, 16, 8, 15, 4, 6, 2, 5, 8]",True
4,KR_5589724949,1637905009000,[AXvRIebrc0GW3xNeDgctg5jd7Hgi9KXigbPdnwTJ9n-tV...,"[Jayce, XinZhao, Syndra, Jhin, Bard, Irelia, Q...",1904,10857,13140,"[16, 2, 9, 8, 3, 9, 4, 5, 4, 2]","[6, 7, 5, 1, 5, 8, 9, 10, 3, 8]","[9, 20, 11, 18, 21, 6, 7, 4, 8, 16]",True
...,...,...,...,...,...,...,...,...,...,...,...
35648,KR_5309487999,1625667707000,[1j8OqgmOEUwPBmTm5xLQEk74bOcb_ZvT3eBga9aLZduA2...,"[DrMundo, LeeSin, Zoe, Samira, Sett, Camille, ...",1539,5974,7461,"[5, 8, 4, 15, 1, 7, 6, 1, 2, 0]","[4, 5, 3, 1, 4, 7, 9, 3, 9, 5]","[13, 16, 11, 8, 19, 2, 3, 2, 7, 8]",True
35649,KR_5308751360,1625653118000,[FQMniHHRvBBMwiuKsY6LeHU965e6iU84xzaMGPDbbQPDY...,"[Fiora, Nocturne, Akali, Kaisa, Leona, Jayce, ...",1692,-13116,-37340,"[2, 11, 3, 5, 1, 8, 6, 5, 12, 2]","[5, 7, 8, 5, 9, 6, 3, 4, 3, 6]","[3, 4, 4, 5, 7, 9, 11, 13, 14, 22]",False
35650,KR_5308584518,1625646039000,[dy50MifJzfvwcLMh7CSCJuD3UAbQjxnZybRmY9M_wLVa5...,"[Jax, Gwen, Sylas, Aphelios, Thresh, Camille, ...",1149,10878,5624,"[11, 8, 3, 2, 1, 2, 6, 0, 0, 0]","[2, 0, 4, 1, 1, 5, 6, 5, 4, 5]","[2, 7, 5, 6, 12, 0, 2, 4, 1, 4]",True
35651,KR_5307723460,1625588048000,[C8rLwisMLCS0Oc7BNZu9q5OgvtZ6EHbtrWYgcqynkcKNM...,"[Sett, Diana, Yasuo, Aphelios, Thresh, LeeSin,...",1017,6351,3587,"[5, 7, 4, 3, 1, 0, 0, 0, 7, 2]","[1, 1, 0, 4, 3, 6, 2, 4, 3, 5]","[5, 6, 5, 4, 9, 0, 2, 3, 2, 5]",True


## 2.2. 챌린저, 그랜드마스터, 마스터 티어 매치 데이터 합치기

In [506]:
all_tier_df = []
for fileName in tqdm(os.listdir(JSON_DATA_PATH)):
    if (
        ("CHALLENGER" in fileName)
        or ("GRANDMASTER" in fileName)
        or ("MASTER" in fileName)
    ):
        with open(os.path.join(JSON_DATA_PATH, fileName)) as json_data:
            match_data = json.load(json_data)
            df = create_data_frame(match_data)
            all_tier_df.append(df)

100%|██████████| 10/10 [06:28<00:00, 38.81s/it]


In [507]:
df_concat = pd.concat(all_tier_df, ignore_index=True)
df_concat

Unnamed: 0,match_Id,gameCreation,participants,champions,game_duration,gold_difference,damage_difference,kills,deaths,assists,win
0,KR_5583030086,1637565009000,[dTsCaFfQB2RlfdF64EMHs119A9YrqgAy-ryaxbXrroCF8...,"[Irelia, Ekko, Zed, Vayne, Aatrox, Sejuani, Ta...",1501,-11660,-24603,"[3, 13, 8, 7, 4, 7, 10, 12, 15, 4]","[7, 8, 9, 10, 14, 3, 6, 9, 6, 11]","[2, 8, 8, 5, 15, 14, 6, 12, 14, 21]",False
1,KR_5582966326,1637563120000,[bECTFZTeOoBlLqBIrXvNiXoO4TmcXlhWRUe7v4cRqf2Od...,"[Renekton, Taliyah, Sylas, Draven, Nautilus, Y...",1399,-878,-12928,"[9, 17, 9, 1, 4, 3, 4, 17, 7, 4]","[6, 7, 7, 8, 7, 11, 11, 5, 7, 6]","[6, 6, 8, 9, 13, 4, 12, 5, 4, 18]",False
2,KR_5582953729,1637561641000,[dTsCaFfQB2RlfdF64EMHs119A9YrqgAy-ryaxbXrroCF8...,"[Kalista, Warwick, Qiyana, Jhin, Karma, Riven,...",1072,-6809,-9867,"[4, 6, 3, 2, 1, 6, 2, 3, 15, 1]","[6, 4, 4, 6, 7, 3, 3, 4, 2, 4]","[1, 6, 3, 2, 10, 3, 2, 5, 6, 16]",False
3,KR_5580827743,1637478980000,[UbdHkDWFUBDL1vhGaxUbjxy_qSkzQr8mJ11TcKfSKcN-a...,"[DrMundo, Talon, Zoe, Ashe, Sett, Jayce, Drave...",1646,9020,11615,"[6, 20, 9, 7, 6, 4, 4, 10, 8, 4]","[5, 7, 3, 10, 5, 10, 11, 8, 8, 12]","[11, 10, 13, 13, 19, 4, 10, 6, 13, 10]",True
4,KR_5580787457,1637475966000,[EcIpJ3g4P7KtQBg5Sa36D1nmfZ3Mn9fooLR4c-_f4gNtH...,"[Jax, Khazix, Katarina, Aphelios, Nami, Gragas...",1463,-10026,-7898,"[1, 6, 7, 2, 1, 3, 12, 7, 8, 2]","[5, 6, 6, 8, 7, 3, 2, 3, 3, 6]","[1, 2, 4, 5, 9, 10, 8, 8, 11, 13]",False
...,...,...,...,...,...,...,...,...,...,...,...
411638,KR_5613046424,1638942544000,[knIki7oWE73rEzzcyXrwZuG8tCJw_AfmquSr-wSji63sb...,"[Irelia, Karthus, Ryze, Samira, Rell, Akshan, ...",925,-13666,-18843,"[1, 1, 0, 4, 1, 9, 3, 6, 8, 1]","[6, 8, 5, 4, 4, 1, 1, 0, 1, 4]","[0, 3, 0, 1, 4, 2, 6, 3, 3, 12]",False
411639,KR_5613052577,1638940314000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Syndra, Viego, Jayce, Aphelios, Lux, Irelia, ...",1471,-11210,-6252,"[4, 11, 2, 2, 2, 7, 5, 8, 18, 1]","[7, 4, 11, 9, 8, 6, 4, 6, 3, 2]","[3, 3, 2, 2, 11, 3, 11, 3, 5, 22]",False
411640,KR_5612968773,1638937965000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Tryndamere, Viego, Viktor, Jhin, Karma, Aksha...",938,-14171,-20732,"[1, 2, 0, 3, 0, 2, 5, 12, 4, 3]","[3, 5, 5, 5, 8, 2, 1, 1, 2, 0]","[3, 1, 2, 2, 4, 1, 9, 1, 5, 12]",False
411641,KR_5612903241,1638933858000,[m_mtJn28NppqS6wzcB_0zRRUPh4iV_WtNXpn1eVP9wQq4...,"[Qiyana, Vayne, Ryze, Yuumi, LeeSin, Irelia, R...",1958,-1899,-8456,"[13, 19, 9, 2, 3, 7, 13, 13, 14, 0]","[13, 9, 8, 6, 11, 11, 10, 7, 8, 10]","[9, 9, 11, 34, 15, 18, 7, 10, 14, 31]",True


## 2.3. 중복 매치 데이터 삭제

In [508]:
df_concat.drop_duplicates(subset=["match_Id"], inplace=True, ignore_index=True)
df_concat

Unnamed: 0,match_Id,gameCreation,participants,champions,game_duration,gold_difference,damage_difference,kills,deaths,assists,win
0,KR_5583030086,1637565009000,[dTsCaFfQB2RlfdF64EMHs119A9YrqgAy-ryaxbXrroCF8...,"[Irelia, Ekko, Zed, Vayne, Aatrox, Sejuani, Ta...",1501,-11660,-24603,"[3, 13, 8, 7, 4, 7, 10, 12, 15, 4]","[7, 8, 9, 10, 14, 3, 6, 9, 6, 11]","[2, 8, 8, 5, 15, 14, 6, 12, 14, 21]",False
1,KR_5582966326,1637563120000,[bECTFZTeOoBlLqBIrXvNiXoO4TmcXlhWRUe7v4cRqf2Od...,"[Renekton, Taliyah, Sylas, Draven, Nautilus, Y...",1399,-878,-12928,"[9, 17, 9, 1, 4, 3, 4, 17, 7, 4]","[6, 7, 7, 8, 7, 11, 11, 5, 7, 6]","[6, 6, 8, 9, 13, 4, 12, 5, 4, 18]",False
2,KR_5582953729,1637561641000,[dTsCaFfQB2RlfdF64EMHs119A9YrqgAy-ryaxbXrroCF8...,"[Kalista, Warwick, Qiyana, Jhin, Karma, Riven,...",1072,-6809,-9867,"[4, 6, 3, 2, 1, 6, 2, 3, 15, 1]","[6, 4, 4, 6, 7, 3, 3, 4, 2, 4]","[1, 6, 3, 2, 10, 3, 2, 5, 6, 16]",False
3,KR_5580827743,1637478980000,[UbdHkDWFUBDL1vhGaxUbjxy_qSkzQr8mJ11TcKfSKcN-a...,"[DrMundo, Talon, Zoe, Ashe, Sett, Jayce, Drave...",1646,9020,11615,"[6, 20, 9, 7, 6, 4, 4, 10, 8, 4]","[5, 7, 3, 10, 5, 10, 11, 8, 8, 12]","[11, 10, 13, 13, 19, 4, 10, 6, 13, 10]",True
4,KR_5580787457,1637475966000,[EcIpJ3g4P7KtQBg5Sa36D1nmfZ3Mn9fooLR4c-_f4gNtH...,"[Jax, Khazix, Katarina, Aphelios, Nami, Gragas...",1463,-10026,-7898,"[1, 6, 7, 2, 1, 3, 12, 7, 8, 2]","[5, 6, 6, 8, 7, 3, 2, 3, 3, 6]","[1, 2, 4, 5, 9, 10, 8, 8, 11, 13]",False
...,...,...,...,...,...,...,...,...,...,...,...
317233,KR_5613046424,1638942544000,[knIki7oWE73rEzzcyXrwZuG8tCJw_AfmquSr-wSji63sb...,"[Irelia, Karthus, Ryze, Samira, Rell, Akshan, ...",925,-13666,-18843,"[1, 1, 0, 4, 1, 9, 3, 6, 8, 1]","[6, 8, 5, 4, 4, 1, 1, 0, 1, 4]","[0, 3, 0, 1, 4, 2, 6, 3, 3, 12]",False
317234,KR_5613052577,1638940314000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Syndra, Viego, Jayce, Aphelios, Lux, Irelia, ...",1471,-11210,-6252,"[4, 11, 2, 2, 2, 7, 5, 8, 18, 1]","[7, 4, 11, 9, 8, 6, 4, 6, 3, 2]","[3, 3, 2, 2, 11, 3, 11, 3, 5, 22]",False
317235,KR_5612968773,1638937965000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Tryndamere, Viego, Viktor, Jhin, Karma, Aksha...",938,-14171,-20732,"[1, 2, 0, 3, 0, 2, 5, 12, 4, 3]","[3, 5, 5, 5, 8, 2, 1, 1, 2, 0]","[3, 1, 2, 2, 4, 1, 9, 1, 5, 12]",False
317236,KR_5612903241,1638933858000,[m_mtJn28NppqS6wzcB_0zRRUPh4iV_WtNXpn1eVP9wQq4...,"[Qiyana, Vayne, Ryze, Yuumi, LeeSin, Irelia, R...",1958,-1899,-8456,"[13, 19, 9, 2, 3, 7, 13, 13, 14, 0]","[13, 9, 8, 6, 11, 11, 10, 7, 8, 10]","[9, 9, 11, 34, 15, 18, 7, 10, 14, 31]",True


In [509]:
df_concat.to_csv(os.path.join(CSV_DATA_PATH, "puuids_and_champions.csv"), index=False)

## 2.4. train, test data 분리

In [510]:
# 주의: 아래와 같이 csv 파일 로드해서 데이터 프레임을 만들 경우, participants와 champions 데이터 형태가 list가 아닌 str로 지정되어서 승률 계산시 느리다.
# 해결: 다시 list 형태로 바꿔준다.
# df_concat = pd.read_csv(os.path.join(CSV_DATA_PATH, "puuids_and_champions.csv"))
# df_concat

In [511]:
# df_concat["participants"] =  list(map(eval, df_concat["participants"]))
# df_concat["champions"] =  list(map(eval, df_concat["champions"]))
# df_concat

In [512]:
# gameCreation으로 정렬 후 승률 계산하고, 이후 train, test 분리한다.
# train_data, test_data = train_test_split(df_concat, test_size=0.01, random_state=42)
# train_data

In [513]:
# test_data

## 2.5. gameCreation 시간 기준으로 정렬 (오래된 순)

In [514]:
df_concat.sort_values(by="gameCreation", inplace=True)
df_concat

Unnamed: 0,match_Id,gameCreation,participants,champions,game_duration,gold_difference,damage_difference,kills,deaths,assists,win
316359,KR_5279540031,1624399450000,[FpEfYav89q4tBPNu0QDbcUwEoQhtXEy3HvEaMsCPuTMJJ...,"[Skarner, LeeSin, Akali, Kaisa, Thresh, Sett, ...",1722,-3428,-9464,"[5, 4, 9, 6, 1, 0, 10, 8, 13, 1]","[5, 6, 8, 6, 7, 6, 5, 2, 4, 8]","[7, 8, 4, 4, 14, 15, 11, 14, 8, 16]",False
213755,KR_5279510057,1624399625000,[s-1oMVpFkFPuLg1dwGfLStyebK8Ng_Eoo-2N1QsZZ2-Lf...,"[Velkoz, JarvanIV, Anivia, Aphelios, TahmKench...",1078,-13002,-11183,"[1, 3, 0, 1, 2, 2, 7, 10, 2, 0]","[7, 3, 4, 3, 4, 1, 2, 0, 2, 2]","[3, 3, 0, 1, 4, 3, 6, 4, 1, 9]",False
212356,KR_5279540133,1624400144000,[u0m40xyFrscY63zkcju-reOdJjnlKS8E5478Y9ROuCUia...,"[Anivia, Khazix, Kassadin, Aphelios, Lulu, Ken...",1210,9749,12061,"[4, 10, 6, 4, 0, 0, 2, 2, 4, 1]","[0, 1, 3, 3, 2, 9, 4, 5, 4, 2]","[5, 3, 3, 0, 6, 2, 4, 2, 2, 4]",True
208647,KR_5279370139,1624400268000,[U-Yg4XJIlfbUNUjbW3Jou_Zj66vIGtUvl8csmSXyhhfN_...,"[Gragas, Olaf, Talon, Kaisa, Yuumi, Sion, XinZ...",1497,-11502,-28346,"[0, 5, 1, 2, 0, 2, 3, 6, 8, 2]","[6, 2, 5, 4, 4, 2, 3, 2, 0, 1]","[0, 0, 4, 0, 2, 9, 11, 9, 8, 14]",False
279589,KR_5279370215,1624400726000,[G-i0KsBLriFdx8mMo296XwVDbd0sALpSeQdfkikxhYy4A...,"[Sion, Karthus, Lucian, KogMaw, Janna, Jayce, ...",1319,12755,18778,"[2, 11, 4, 8, 2, 1, 1, 5, 0, 1]","[1, 1, 3, 3, 0, 4, 7, 5, 7, 4]","[5, 4, 7, 7, 18, 2, 4, 0, 2, 2]",True
...,...,...,...,...,...,...,...,...,...,...,...
317235,KR_5612968773,1638937965000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Tryndamere, Viego, Viktor, Jhin, Karma, Aksha...",938,-14171,-20732,"[1, 2, 0, 3, 0, 2, 5, 12, 4, 3]","[3, 5, 5, 5, 8, 2, 1, 1, 2, 0]","[3, 1, 2, 2, 4, 1, 9, 1, 5, 12]",False
317234,KR_5613052577,1638940314000,[KT4JX77THrfqn_Hqfkmr_LLEEGuGRxmIM6dxbeF0TPM8G...,"[Syndra, Viego, Jayce, Aphelios, Lux, Irelia, ...",1471,-11210,-6252,"[4, 11, 2, 2, 2, 7, 5, 8, 18, 1]","[7, 4, 11, 9, 8, 6, 4, 6, 3, 2]","[3, 3, 2, 2, 11, 3, 11, 3, 5, 22]",False
317230,KR_5613073433,1638940791000,[VLdfcwRTHFPovaYVdcqtpfjS7qGqRWkn7u5_DHFSdErWr...,"[Camille, Nidalee, Akshan, Caitlyn, Nami, Jayc...",1089,9245,14566,"[6, 6, 5, 7, 2, 1, 4, 5, 2, 2]","[3, 3, 5, 2, 1, 10, 6, 4, 4, 2]","[9, 8, 3, 5, 10, 3, 3, 3, 4, 4]",True
317233,KR_5613046424,1638942544000,[knIki7oWE73rEzzcyXrwZuG8tCJw_AfmquSr-wSji63sb...,"[Irelia, Karthus, Ryze, Samira, Rell, Akshan, ...",925,-13666,-18843,"[1, 1, 0, 4, 1, 9, 3, 6, 8, 1]","[6, 8, 5, 4, 4, 1, 1, 0, 1, 4]","[0, 3, 0, 1, 4, 2, 6, 3, 3, 12]",False


# 3. 플레이어-챔피언 게임 수, 승률, 챔피언 승률, KDA 계산

In [812]:
participants_and_champions_win_loss = defaultdict(dict)
participants_and_champions_win_rates = []
participants_and_champions_played = []
champions_win_loss = defaultdict(list)
champions_win_rates = []
participants_and_champions_kills = defaultdict(dict)
participants_and_champions_deaths = defaultdict(dict)
participants_and_champions_assists = defaultdict(dict)
participants_and_champions_kda = []

for i in tqdm(range(len(df_concat))):
    match = df_concat.iloc[i]
    participants_and_champions_win_rates_per_match = []
    participants_and_champions_played_per_match = []
    champions_win_rates_per_match = []
    participants_and_champions_kda_per_match = []

    def get_win_rates(puuid, champion, team_id):
        win_or_loss = (int(match["win"] + team_id)) % 2
        try:
            participants_and_champions_played_per_match.append(
                len(participants_and_champions_win_loss[puuid][champion])
            )
            participants_and_champions_win_rates_per_match.append(
                sum(participants_and_champions_win_loss[puuid][champion])
                / len(participants_and_champions_win_loss[puuid][champion])
            )
            participants_and_champions_win_loss[puuid][champion].append(win_or_loss)
        except:  
            participants_and_champions_played_per_match.append(0)
            participants_and_champions_win_rates_per_match.append(0)
            participants_and_champions_win_loss[puuid][champion] = [win_or_loss]
        try:
            champions_win_rates_per_match.append(
                sum(champions_win_loss[champion]) / len(champions_win_loss[champion])
            )
        except:
            champions_win_rates_per_match.append(0)
        champions_win_loss[champion].append(win_or_loss)

    def get_kda(index, puuid, champion):
        try:
            participants_and_champions_kda_per_match.append(
                (
                    sum(participants_and_champions_kills[puuid][champion])
                    + sum(participants_and_champions_assists[puuid][champion])
                )
                / sum(participants_and_champions_deaths[puuid][champion])
            )
            participants_and_champions_kills[puuid][champion].append(
                match["kills"][index]
            )
            participants_and_champions_deaths[puuid][champion].append(
                match["deaths"][index]
            )
            participants_and_champions_assists[puuid][champion].append(
                match["assists"][index]
            )
        except KeyError:
            participants_and_champions_kda_per_match.append(0)
            participants_and_champions_kills[puuid][champion] = [match["kills"][index]]
            participants_and_champions_deaths[puuid][champion] = [
                match["deaths"][index]
            ]
            participants_and_champions_assists[puuid][champion] = [
                match["assists"][index]
            ]
        except ZeroDivisionError:
            participants_and_champions_kda_per_match.append(
                (
                    sum(participants_and_champions_kills[puuid][champion])
                    + sum(participants_and_champions_assists[puuid][champion])
                )
                * 1.2
            )
            participants_and_champions_kills[puuid][champion].append(
                match["kills"][index]
            )
            participants_and_champions_deaths[puuid][champion].append(
                match["deaths"][index]
            )
            participants_and_champions_assists[puuid][champion].append(
                match["assists"][index]
            )

    # 블루팀일 떄 index // 5 = 0, 레드팀일 떄 index // 5 = 1
    for index, (puuid, champion) in enumerate(
        zip(match["participants"], match["champions"])
    ):
        get_win_rates(puuid, champion, index // 5)
        get_kda(index, puuid, champion)

    participants_and_champions_played.append(
        participants_and_champions_played_per_match
    )
    participants_and_champions_win_rates.append(
        participants_and_champions_win_rates_per_match
    )
    champions_win_rates.append(champions_win_rates_per_match)
    participants_and_champions_kda.append(participants_and_champions_kda_per_match)

data = pd.DataFrame(
    {
        "participants_and_champions_played": participants_and_champions_played,
        "participants_and_champions_win_rates": participants_and_champions_win_rates,
        "champions_win_rates": champions_win_rates,
        "kda": participants_and_champions_kda,
        "win": df_concat["win"],
    }
)
data

100%|██████████| 317238/317238 [14:36<00:00, 361.93it/s]


Unnamed: 0,participants_and_champions_played,participants_and_champions_win_rates,champions_win_rates,kda,win
316359,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",False
213755,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",False
212356,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0.0, 0, 0, 0.0, 1.0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",True
208647,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0.0, 0, 0.0, 0, 1.0, 1.0, 0, 1.0, 0.5]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",False
279589,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 0, 0, 0, 0, 0, 0, 0, 0.0, 1.0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",True
...,...,...,...,...,...
317235,"[3, 10, 2, 31, 7, 0, 2, 40, 5, 10]","[0.0, 0.5, 0.0, 0.7096774193548387, 0.42857142...","[0.5153897726580118, 0.49007547748453817, 0.48...","[1.6666666666666667, 2.6545454545454548, 1.75,...",False
317234,"[7, 27, 0, 8, 2, 0, 0, 3, 32, 44]","[0.2857142857142857, 0.3333333333333333, 0, 0....","[0.4937039241723522, 0.49006712781544964, 0.48...","[1.9318181818181819, 2.5714285714285716, 0, 1....",False
317230,"[3, 36, 1, 11, 7, 21, 2, 4, 64, 0]","[0.0, 0.8611111111111112, 1.0, 0.8181818181818...","[0.5151818299709147, 0.5139375640265272, 0.505...","[1.6666666666666667, 6.63, 3.8, 5.153846153846...",True
317233,"[1, 0, 76, 13, 12, 1, 72, 0, 14, 2]","[1.0, 0, 0.4473684210526316, 0.615384615384615...","[0.4843989280245023, 0.5323878358107187, 0.468...","[1.6666666666666667, 0, 2.1220657276995305, 2....",False


In [818]:
column_dict = {
    "bluetopgp": [],
    "bluetopwr": [],
    "bluetopcw": [],
    "bluetopkda": [],
    "bluejunglegp": [],
    "bluejunglewr": [],
    "bluejunglecw": [],
    "bluejunglekda": [],
    "bluemidgp": [],
    "bluemidwr": [],
    "bluemidcw": [],
    "bluemidkda": [],
    "blueadcgp": [],
    "blueadcwr": [],
    "blueadccw": [],
    "blueadckda": [],
    "bluesupportgp": [],
    "bluesupportwr": [],
    "bluesupportcw": [],
    "bluesupportkda": [],
    "redtopgp": [],
    "redtopwr": [],
    "redtopcw": [],
    "redtopkda": [],
    "redjunglegp": [],
    "redjunglewr": [],
    "redjunglecw": [],
    "redjunglekda": [],
    "redmidgp": [],
    "redmidwr": [],
    "redmidcw": [],
    "redmidkda": [],
    "redadcgp": [],
    "redadcwr": [],
    "redadccw": [],
    "redadckda": [],
    "redsupportgp": [],
    "redsupportwr": [],
    "redsupportcw": [],
    "redsupportkda": [],
    "result": list(map(int, data["win"])),
}


def separate_columns(index, post_fix, column_name):
    match = data.iloc[index]
    tmp_key = [key for key in column_dict.keys() if key[-len(post_fix) :] == post_fix]
    tmp_col = match[column_name]
    for key, col in zip(tmp_key, tmp_col):
        column_dict[key].append(col)


for i in tqdm(range(len(data))):
    separate_columns(i, "gp", "participants_and_champions_played")
    separate_columns(i, "wr", "participants_and_champions_win_rates")
    separate_columns(i, "cw", "champions_win_rates")
    separate_columns(i, "kda", "kda")

data_preprocessed = pd.DataFrame(column_dict)
data_preprocessed


100%|██████████| 317238/317238 [02:49<00:00, 1876.79it/s]


Unnamed: 0,bluetopgp,bluetopwr,bluetopcw,bluetopkda,bluejunglegp,bluejunglewr,bluejunglecw,bluejunglekda,bluemidgp,bluemidwr,...,redmidkda,redadcgp,redadcwr,redadccw,redadckda,redsupportgp,redsupportwr,redsupportcw,redsupportkda,result
0,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,...,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0
1,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,...,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0
2,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,...,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,1
3,0,0.000000,0.000000,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,...,0.000000,0,0.000000,1.000000,0.000000,0,0.000000,0.500000,0.000000,0
4,0,0.000000,1.000000,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,...,0.000000,0,0.000000,0.000000,0.000000,0,0.000000,1.000000,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317233,3,0.000000,0.515390,1.666667,10,0.500000,0.490075,2.654545,2,0.000000,...,3.006452,5,0.200000,0.502303,2.100000,10,0.500000,0.519497,2.833333,0
317234,7,0.285714,0.493704,1.931818,27,0.333333,0.490067,2.571429,0,0.000000,...,3.272727,32,0.687500,0.498560,3.457516,44,0.568182,0.462097,4.158537,0
317235,3,0.000000,0.515182,1.666667,36,0.861111,0.513938,6.630000,1,1.000000,...,2.000000,64,0.531250,0.521394,3.102459,0,0.000000,0.482397,0.000000,1
317236,1,1.000000,0.484399,1.666667,0,0.000000,0.532388,0.000000,76,0.447368,...,0.000000,14,0.714286,0.502366,3.255319,2,0.000000,0.482355,2.666667,0


In [819]:
# 같은 이유로 되도록이면 csv 로 저장후 다시 로드해서 쓰기보다, 데이터 프레임 그대로 계속 이어서 써야 할 것 같다.
# train_data.to_csv(os.path.join(CSV_DATA_PATH, "train_data.csv"), index=False)
# test_data.to_csv(os.path.join(CSV_DATA_PATH, "test_data.csv"), index=False)

In [820]:
# for champion, win_loss_data in champions_win_rates.items():
#     champions_win_rates[champion] = sum(win_loss_data) / len(win_loss_data)
# champions_win_rates

In [821]:
data_preprocessed.to_csv("/opt/ml/tabnet/data/dataset.csv", index=False)

In [817]:
# with open(os.path.join(CSV_DATA_PATH, "dataset.csv"), "w") as f:
#     w = csv.writer(f)
#     w.writerow(["champion", "win_rate"])
#     for champion, win_rate in champions_win_rates.items():
#         w.writerow([champion, win_rate])

## 3.1. train, test data 분리, test 는 가장 최신 데이터?

In [739]:
# train_data, test_data = train_test_split(data.iloc[-5000:], test_size=0.1, random_state=42)
# data_length = len(data)
train_data = data.iloc[-1000:-50]
test_data = data.iloc[-50:]

# 4. 모델별 훈련, 평가 (LogisticRegression, TabNet)

## 4.1. LogisticRegression (데이터 형식: List)

In [740]:
# train_data = pd.read_csv(os.path.join(CSV_DATA_PATH, "train_data.csv"))
# test_data = pd.read_csv(os.path.join(CSV_DATA_PATH, "test_data.csv"))

In [823]:
X_train = (train_data["participants_and_champions_win_rates"] + train_data["champions_win_rates"] + train_data["kda"]).tolist()
X_test = (test_data["participants_and_champions_win_rates"] + test_data["champions_win_rates"] + test_data["kda"]).tolist()
y_train = list(map(int, train_data["win"]))
y_test = list(map(int, test_data["win"]))

In [824]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [825]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=500)
clf.fit(X_train, y_train)
Y_pred = clf.predict(X_test)
accuracy = accuracy_score(Y_pred, y_test)
accuracy

0.6

In [826]:
y_pred = clf.predict(X_valid)
accuracy = accuracy_score(y_pred, y_valid)
accuracy

0.42105263157894735

In [827]:
Y_pred = clf.predict(X_test)
accuracy = accuracy_score(Y_pred, y_test)
accuracy

0.6

## 4.2. TabNet (데이터 형식: ndarray)

In [828]:
X_train = np.array(X_train)
X_valid = np.array(X_valid)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_valid = np.array(y_valid)
y_test = np.array(y_test)

In [829]:
clf = TabNetClassifier()
clf.fit(X_train, y_train, eval_set=[(X_valid, y_valid)])
# clf.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], eval_metric=["accuracy"])

Device used : cuda
epoch 0  | loss: 0.89828 | val_0_auc: 0.51038 |  0:00:00s
epoch 1  | loss: 0.78943 | val_0_auc: 0.48662 |  0:00:00s
epoch 2  | loss: 0.73779 | val_0_auc: 0.49706 |  0:00:00s
epoch 3  | loss: 0.73238 | val_0_auc: 0.44877 |  0:00:00s
epoch 4  | loss: 0.72252 | val_0_auc: 0.43912 |  0:00:00s
epoch 5  | loss: 0.69497 | val_0_auc: 0.48596 |  0:00:00s
epoch 6  | loss: 0.69137 | val_0_auc: 0.53158 |  0:00:00s
epoch 7  | loss: 0.69688 | val_0_auc: 0.50494 |  0:00:00s
epoch 8  | loss: 0.69238 | val_0_auc: 0.51149 |  0:00:00s
epoch 9  | loss: 0.68676 | val_0_auc: 0.50239 |  0:00:00s
epoch 10 | loss: 0.68375 | val_0_auc: 0.48407 |  0:00:00s
epoch 11 | loss: 0.68118 | val_0_auc: 0.47308 |  0:00:00s
epoch 12 | loss: 0.68088 | val_0_auc: 0.48396 |  0:00:00s
epoch 13 | loss: 0.67877 | val_0_auc: 0.47408 |  0:00:00s
epoch 14 | loss: 0.67943 | val_0_auc: 0.48485 |  0:00:01s
epoch 15 | loss: 0.6785  | val_0_auc: 0.49462 |  0:00:01s
epoch 16 | loss: 0.67944 | val_0_auc: 0.4894  |  0:00

In [830]:
y_pred = clf.predict(X_valid)
accuracy = accuracy_score(y_pred, y_valid)
accuracy

0.5263157894736842

In [831]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
accuracy

0.56

In [647]:
# TabNetPretrainer
unsupervised_model = TabNetPretrainer(
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    mask_type="entmax",  # "sparsemax"
)

unsupervised_model.fit(
    X_train=X_train,
    eval_set=[X_valid],
    pretraining_ratio=0.8,
)

clf = TabNetClassifier(
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    scheduler_params={
        "step_size": 10,  # how to use learning rate scheduler
        "gamma": 0.9,
    },
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    mask_type="sparsemax",  # This will be overwritten if using pretrain model
)

clf.fit(
    X_train=X_train,
    y_train=y_train,
    eval_set=[(X_train, y_train), (X_valid, y_valid)],
    eval_name=["train", "valid"],
    eval_metric=["auc"],
    from_unsupervised=unsupervised_model,
)


Device used : cuda
epoch 0  | loss: 1695.60178| val_0_unsup_loss: 2973.15845|  0:00:00s
epoch 1  | loss: 559.80868| val_0_unsup_loss: 306.4259|  0:00:00s
epoch 2  | loss: 267.47568| val_0_unsup_loss: 178.96567|  0:00:00s
epoch 3  | loss: 123.61233| val_0_unsup_loss: 69.99332|  0:00:01s
epoch 4  | loss: 64.86641| val_0_unsup_loss: 33.8256 |  0:00:01s
epoch 5  | loss: 33.79939| val_0_unsup_loss: 16.56229|  0:00:01s
epoch 6  | loss: 18.16831| val_0_unsup_loss: 12.78724|  0:00:02s
epoch 7  | loss: 10.81394| val_0_unsup_loss: 5.57351 |  0:00:02s
epoch 8  | loss: 5.8917  | val_0_unsup_loss: 3.36739 |  0:00:02s
epoch 9  | loss: 4.54039 | val_0_unsup_loss: 2.33122 |  0:00:02s
epoch 10 | loss: 3.16816 | val_0_unsup_loss: 2.80791 |  0:00:03s
epoch 11 | loss: 2.48722 | val_0_unsup_loss: 2.03484 |  0:00:03s
epoch 12 | loss: 2.0761  | val_0_unsup_loss: 1.58843 |  0:00:03s
epoch 13 | loss: 1.84242 | val_0_unsup_loss: 1.46743 |  0:00:04s
epoch 14 | loss: 1.68093 | val_0_unsup_loss: 1.35492 |  0:00:04



epoch 0  | loss: 0.71877 | train_auc: 0.49943 | valid_auc: 0.49818 |  0:00:00s
epoch 1  | loss: 0.69531 | train_auc: 0.50254 | valid_auc: 0.47691 |  0:00:00s
epoch 2  | loss: 0.69232 | train_auc: 0.51042 | valid_auc: 0.49073 |  0:00:00s
epoch 3  | loss: 0.69072 | train_auc: 0.51062 | valid_auc: 0.49873 |  0:00:01s
epoch 4  | loss: 0.68875 | train_auc: 0.50604 | valid_auc: 0.49862 |  0:00:01s
epoch 5  | loss: 0.68657 | train_auc: 0.50392 | valid_auc: 0.50205 |  0:00:01s
epoch 6  | loss: 0.68632 | train_auc: 0.50335 | valid_auc: 0.52358 |  0:00:02s
epoch 7  | loss: 0.68547 | train_auc: 0.5154  | valid_auc: 0.51316 |  0:00:02s
epoch 8  | loss: 0.68362 | train_auc: 0.50953 | valid_auc: 0.49577 |  0:00:02s
epoch 9  | loss: 0.68354 | train_auc: 0.51347 | valid_auc: 0.48951 |  0:00:03s
epoch 10 | loss: 0.68119 | train_auc: 0.51588 | valid_auc: 0.48183 |  0:00:03s
epoch 11 | loss: 0.67903 | train_auc: 0.51377 | valid_auc: 0.50157 |  0:00:03s
epoch 12 | loss: 0.67687 | train_auc: 0.5095  | vali

In [648]:
Y_pred = clf.predict(X_valid)
accuracy = accuracy_score(Y_pred, y_valid)
accuracy

0.52

In [649]:
Y_pred = clf.predict(X_test)
accuracy = accuracy_score(Y_pred, y_test)
accuracy

0.508