In [1]:
import json
import numpy as np
import pandas as pd
import csv
import time
import category_encoders as ce
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold

### Data Processing

The data will be processed with the following:
- Synergies will undergo Ordinal Encoding since there is a clear rank structure between grey, bronze, silver, gold, chromatic
- Champion information (Carrry, Carry contested, vertical synergy) will undergo 1-Hot Encoding since they are true categorical with no ordering

In [2]:
data = pd.read_pickle("synergy_carry_value_df.pkl")

In [3]:
data.sample(10)

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Secondary_Carry,Third_Carry_Tier,Third_Carry,Carry_Contested,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
46200,grey,bronze,grey,grey,grey,bronze,silver,grey,grey,grey,...,TFT5_Gwen,1,TFT5_Garen,Contested,60,54,31,9,6,0
43637,grey,grey,grey,grey,gold,bronze,grey,gold,grey,grey,...,TFT5_Garen,1,TFT5_Kayle,Contested,62,29,35,0,2,1
58576,grey,grey,grey,grey,gold,grey,grey,grey,gold,grey,...,TFT5_Lucian,2,TFT5_Rell,Contested,78,17,35,6,4,1
113019,grey,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,,0,,Contested,1,85,10,10,8,0
27020,grey,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Karma,2,TFT5_Gragas,Uncontested,61,59,30,5,7,0
100260,silver,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Viego,1,TFT5_Diana,Contested,54,51,28,12,7,0
78829,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Soraka,3,TFT5_Khazix,Contested,84,14,37,0,1,1
63662,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Galio,2,TFT5_Ashe,Contested,75,7,33,5,4,1
89070,grey,grey,grey,bronze,bronze,bronze,grey,bronze,grey,grey,...,TFT5_Draven,2,TFT5_Kayle,Contested,72,29,35,3,3,1
74713,grey,grey,grey,grey,grey,grey,grey,gold,grey,grey,...,TFT5_Velkoz,2,TFT5_Brand,Uncontested,85,0,37,0,2,1


In [4]:
champs = pd.read_json("champions.json")[1:].reset_index().drop(columns="index")
champions = list(champs["championId"])
champions

['TFT5_Aatrox',
 'TFT5_Akshan',
 'TFT5_Aphelios',
 'TFT5_Ashe',
 'TFT5_Brand',
 'TFT5_Diana',
 'TFT5_Draven',
 'TFT5_Fiddlesticks',
 'TFT5_Galio',
 'TFT5_Garen',
 'TFT5_Gragas',
 'TFT5_Gwen',
 'TFT5_Hecarim',
 'TFT5_Heimerdinger',
 'TFT5_Ivern',
 'TFT5_Irelia',
 'TFT5_Jax',
 'TFT5_Kalista',
 'TFT5_Karma',
 'TFT5_Kayle',
 'TFT5_Kennen',
 'TFT5_Khazix',
 'TFT5_Kled',
 'TFT5_LeeSin',
 'TFT5_Leona',
 'TFT5_Lucian',
 'TFT5_Lulu',
 'TFT5_Lux',
 'TFT5_MissFortune',
 'TFT5_Nautilus',
 'TFT5_Nidalee',
 'TFT5_Nocturne',
 'TFT5_Nunu',
 'TFT5_Olaf',
 'TFT5_Poppy',
 'TFT5_Pyke',
 'TFT5_Rakan',
 'TFT5_Rell',
 'TFT5_Riven',
 'TFT5_Sejuani',
 'TFT5_Senna',
 'TFT5_Sett',
 'TFT5_Soraka',
 'TFT5_Syndra',
 'TFT5_Teemo',
 'TFT5_Thresh',
 'TFT5_Tristana',
 'TFT5_Udyr',
 'TFT5_Varus',
 'TFT5_Vayne',
 'TFT5_Velkoz',
 'TFT5_Viego',
 'TFT5_Vladimir',
 'TFT5_Volibear',
 'TFT5_Yasuo',
 'TFT5_Ziggs',
 'TFT5_Zyra']

In [5]:
json_file = open("traits.json", encoding='UTF-8')
trait = json.load(json_file)

col = []
no_synergy_traits = ['Set5_Caretaker','Set5_Cruel','Set5_Inanimate','Set5_Victorious']
for i in range(len(trait)):
    col.append(trait[i]["key"])
for i in no_synergy_traits:
    col.remove(i)
feature_traits = col.copy()
feature_nontraits = []
feature_nontraits.append("Number of Chromatic")
feature_nontraits.append("Number of Gold")
feature_nontraits.append("Number of Silver")
feature_nontraits.append("Number of Bronze")
feature_nontraits.append("Synergy Contest")
feature_nontraits.append("Vertical Synergies")
feature_nontraits.append("Primary_Carry")
feature_nontraits.append("Primary_Carry_Tier")
feature_nontraits.append("Secondary_Carry")
feature_nontraits.append("Secondary_Carry_Tier")
feature_nontraits.append("Third_Carry")
feature_nontraits.append("Third_Carry_Tier")
feature_nontraits.append("Carry_Contested")
feature_nontraits.append("Value")
feature_nontraits.append("Diff_from_Match_Max")
feature_nontraits.append("Last_Round")
feature_nontraits.append("Diff_from_Match_End")
feature_nontraits.append("Placement")
col.extend(feature_nontraits)

In [6]:
new_trait = {temp["key"]:temp for temp in trait}

dummy_data = []

for x in champions:
    temp = dict(data.iloc[0])
    temp["Primary_Carry"] = x
    temp["Secondary_Carry"] = x
    temp["Third_Carry"] = x
    dummy_data.append(temp)
    
pd.DataFrame(dummy_data)

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Secondary_Carry,Third_Carry_Tier,Third_Carry,Carry_Contested,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
0,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Aatrox,2,TFT5_Aatrox,Uncontested,141,0,40,0,1,1
1,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Akshan,2,TFT5_Akshan,Uncontested,141,0,40,0,1,1
2,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Aphelios,2,TFT5_Aphelios,Uncontested,141,0,40,0,1,1
3,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Ashe,2,TFT5_Ashe,Uncontested,141,0,40,0,1,1
4,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Brand,2,TFT5_Brand,Uncontested,141,0,40,0,1,1
5,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Diana,2,TFT5_Diana,Uncontested,141,0,40,0,1,1
6,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Draven,2,TFT5_Draven,Uncontested,141,0,40,0,1,1
7,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Fiddlesticks,2,TFT5_Fiddlesticks,Uncontested,141,0,40,0,1,1
8,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Galio,2,TFT5_Galio,Uncontested,141,0,40,0,1,1
9,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Garen,2,TFT5_Garen,Uncontested,141,0,40,0,1,1


In [7]:
data_encoded = pd.concat([pd.DataFrame(dummy_data),data])
data_encoded

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Secondary_Carry,Third_Carry_Tier,Third_Carry,Carry_Contested,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
0,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Aatrox,2,TFT5_Aatrox,Uncontested,141,0,40,0,1,1
1,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Akshan,2,TFT5_Akshan,Uncontested,141,0,40,0,1,1
2,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Aphelios,2,TFT5_Aphelios,Uncontested,141,0,40,0,1,1
3,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Ashe,2,TFT5_Ashe,Uncontested,141,0,40,0,1,1
4,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Brand,2,TFT5_Brand,Uncontested,141,0,40,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148067,grey,grey,bronze,gold,bronze,grey,grey,grey,grey,grey,...,TFT5_Hecarim,2,TFT5_Thresh,Contested,66,35,34,3,4,1
148068,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Lulu,3,TFT5_Nunu,Contested,101,0,37,0,2,1
148069,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Garen,2,TFT5_Soraka,Contested,61,40,33,4,5,0
148070,grey,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Garen,1,TFT5_Teemo,Contested,65,36,30,7,7,0


In [8]:
data_encoded["Primary_Carry"].unique()

array(['TFT5_Aatrox', 'TFT5_Akshan', 'TFT5_Aphelios', 'TFT5_Ashe',
       'TFT5_Brand', 'TFT5_Diana', 'TFT5_Draven', 'TFT5_Fiddlesticks',
       'TFT5_Galio', 'TFT5_Garen', 'TFT5_Gragas', 'TFT5_Gwen',
       'TFT5_Hecarim', 'TFT5_Heimerdinger', 'TFT5_Ivern', 'TFT5_Irelia',
       'TFT5_Jax', 'TFT5_Kalista', 'TFT5_Karma', 'TFT5_Kayle',
       'TFT5_Kennen', 'TFT5_Khazix', 'TFT5_Kled', 'TFT5_LeeSin',
       'TFT5_Leona', 'TFT5_Lucian', 'TFT5_Lulu', 'TFT5_Lux',
       'TFT5_MissFortune', 'TFT5_Nautilus', 'TFT5_Nidalee',
       'TFT5_Nocturne', 'TFT5_Nunu', 'TFT5_Olaf', 'TFT5_Poppy',
       'TFT5_Pyke', 'TFT5_Rakan', 'TFT5_Rell', 'TFT5_Riven',
       'TFT5_Sejuani', 'TFT5_Senna', 'TFT5_Sett', 'TFT5_Soraka',
       'TFT5_Syndra', 'TFT5_Teemo', 'TFT5_Thresh', 'TFT5_Tristana',
       'TFT5_Udyr', 'TFT5_Varus', 'TFT5_Vayne', 'TFT5_Velkoz',
       'TFT5_Viego', 'TFT5_Vladimir', 'TFT5_Volibear', 'TFT5_Yasuo',
       'TFT5_Ziggs', 'TFT5_Zyra', 'N/A'], dtype=object)

In [9]:
import category_encoders as ce

ordinal_list_dict = []
set_rank = {"grey":0,"bronze":1,"silver":2,"gold":3,"chromatic":4}
for x in feature_traits:
    temp = {"col":x, "mapping":set_rank}
    ordinal_list_dict.append(temp)
encoder = ce.OrdinalEncoder(cols = feature_traits, mapping = ordinal_list_dict)    
data_encoded_ordinalized = encoder.fit_transform(data_encoded)

  elif pd.api.types.is_categorical(cols):


In [10]:
data_encoded_ordinalized

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Secondary_Carry,Third_Carry_Tier,Third_Carry,Carry_Contested,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
0,1,0,1,0,3,1,2,0,0,1,...,TFT5_Aatrox,2,TFT5_Aatrox,Uncontested,141,0,40,0,1,1
1,1,0,1,0,3,1,2,0,0,1,...,TFT5_Akshan,2,TFT5_Akshan,Uncontested,141,0,40,0,1,1
2,1,0,1,0,3,1,2,0,0,1,...,TFT5_Aphelios,2,TFT5_Aphelios,Uncontested,141,0,40,0,1,1
3,1,0,1,0,3,1,2,0,0,1,...,TFT5_Ashe,2,TFT5_Ashe,Uncontested,141,0,40,0,1,1
4,1,0,1,0,3,1,2,0,0,1,...,TFT5_Brand,2,TFT5_Brand,Uncontested,141,0,40,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148067,0,0,1,3,1,0,0,0,0,0,...,TFT5_Hecarim,2,TFT5_Thresh,Contested,66,35,34,3,4,1
148068,0,1,0,0,0,0,0,0,0,0,...,TFT5_Lulu,3,TFT5_Nunu,Contested,101,0,37,0,2,1
148069,0,1,0,0,0,0,0,0,0,0,...,TFT5_Garen,2,TFT5_Soraka,Contested,61,40,33,4,5,0
148070,0,0,0,0,0,0,0,0,0,0,...,TFT5_Garen,1,TFT5_Teemo,Contested,65,36,30,7,7,0


In [11]:
encoder = ce.OneHotEncoder(cols = ["Synergy Contest", "Vertical Synergies", "Primary_Carry", "Secondary_Carry", "Third_Carry", "Carry_Contested"])
data_encoded_final = encoder.fit_transform(data_encoded_ordinalized)[57:]

  elif pd.api.types.is_categorical(cols):


In [12]:
data_encoded_final

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Third_Carry_57,Third_Carry_58,Carry_Contested_1,Carry_Contested_2,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
0,1,0,1,0,3,1,2,0,0,1,...,0,0,1,0,141,0,40,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,87,54,33,7,3,1
2,0,0,0,1,1,1,0,1,0,0,...,0,0,0,1,61,80,33,7,4,1
3,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,100,41,40,0,2,1
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,1,51,90,27,13,7,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148067,0,0,1,3,1,0,0,0,0,0,...,0,0,0,1,66,35,34,3,4,1
148068,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,101,0,37,0,2,1
148069,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,61,40,33,4,5,0
148070,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,65,36,30,7,7,0


In [13]:
data_encoded_final.to_pickle("synergy_carry_value_df_encoded.pkl")