In [1]:
import json
import numpy as np
import pandas as pd
import csv
import time

### Extracting suitable data

The data was originally stored seperate files for easier storage and import. The commented out section would be code used if following from "Obtaining Raw Data"

The output is a dataframe containing the processed information of team compositions in the matches. Each match has 8 teams and from each team we extract the data of team synergy and process the data of champions to obtain "carry_heroes" which are the champions that the players have put emphasis on (decided through number of items on champions, tier and cost).

In [None]:
df2 = pd.read_json("FirstDataSet.json")
df3 = pd.read_json("SecondDataSet.json")
df4 = pd.read_json("ThirdDataSet.json")
df = pd.concat([df2,df3,df4],ignore_index=True)

## with open('data.json', 'r') as filename:
##    json_file=json.load(filename)
##    df = pd.read_json(json_file)

In [3]:
## Identifying the patches of the matches
## We want to keep our data to be part of the same patches or in patches that are very similar

patches = set({})
for i in range(len(df)):
    patches.add(df["info"][i]["game_version"])
patches

{'Version 11.10.374.9538 (May 10 2021/13:12:58) [PUBLIC] <Releases/11.10>',
 'Version 11.10.376.4811 (May 18 2021/11:14:02) [PUBLIC] <Releases/11.10>',
 'Version 11.11.377.6311 (May 24 2021/16:15:59) [PUBLIC] <Releases/11.11>',
 'Version 11.12.379.4946 (Jun 03 2021/15:13:06) [PUBLIC] <Releases/11.12>',
 'Version 11.13.382.1241 (Jun 17 2021/15:58:02) [PUBLIC] <Releases/11.13>',
 'Version 11.14.385.9967 (Jul 08 2021/11:30:05) [PUBLIC] <Releases/11.14>',
 'Version 11.15.387.5736 (Jul 15 2021/16:17:02) [PUBLIC] <Releases/11.15>',
 'Version 11.15.388.2387 (Jul 20 2021/14:15:51) [PUBLIC] <Releases/11.15>',
 'Version 11.15.389.2308 (Jul 26 2021/13:45:59) [PUBLIC] <Releases/11.15>',
 'Version 11.16.390.1945 (Jul 30 2021/15:25:18) [PUBLIC] <Releases/11.16>',
 'Version 11.17.393.0607 (Aug 19 2021/17:22:17) [PUBLIC] <Releases/11.17>',
 'Version 11.17.394.4489 (Aug 26 2021/15:09:18) [PUBLIC] <Releases/11.17>',
 'Version 11.18.395.7538 (Sep 02 2021/16:24:52) [PUBLIC] <Releases/11.18>',
 'Version 11

In [4]:
## List the patches we want as a part of our data set

patch_choices = ["Version 11.18.395.7538 (Sep 02 2021/16:24:52) [PUBLIC] <Releases/11.18>"]
Excluding_List = []
for i in range(len(df)):
    if df["info"][i]["game_version"] not in patch_choices:
        Excluding_List.append(i)
len(Excluding_List)

32714

In [5]:
df_patch_11_18 = df.drop(Excluding_List, axis=0).copy().reset_index(drop=True)
df_patch_11_18 = df_patch_11_18.reset_index(drop=True)

In [6]:
test = set({})
for i in range(len(df_patch_11_18)):
    test.add(df_patch_11_18["info"][i]["game_version"])
test


{'Version 11.18.395.7538 (Sep 02 2021/16:24:52) [PUBLIC] <Releases/11.18>'}

In [7]:
json_file = open("traits.json", encoding='UTF-8')
trait = json.load(json_file)
new_trait = {temp["key"]:temp for temp in trait}

In [11]:
team_df.keys()

Index(['Set5_Abomination', 'Set5_Assassin', 'Set5_Brawler', 'Set5_Cavalier',
       'Set5_Cannoneer', 'Set5_Dawnbringer', 'Set5_Draconic', 'Set5_Forgotten',
       'Set5_Hellion', 'Set5_Invoker', 'Set5_Ironclad', 'Set5_Knight',
       'Set5_Legionnaire', 'Set5_Mystic', 'Set5_Nightbringer', 'Set5_Ranger',
       'Set5_Redeemed', 'Set5_Renewer', 'Set5_Revenant', 'Set5_Sentinel',
       'Set5_Skirmisher', 'Set5_Spellweaver', 'Number of Chromatic',
       'Number of Gold', 'Number of Silver', 'Number of Bronze',
       'Synergy Contest', 'Vertical Synergies', 'Primary_Carry',
       'Primary_Carry_Tier', 'Secondary_Carry', 'Secondary_Carry_Tier',
       'Third_Carry', 'Third_Carry_Tier', 'Carry_Contested', 'Value',
       'Diff_from_Match_Max', 'Last_Round', 'Diff_from_Match_End',
       'Placement'],
      dtype='object')

### Processing data

Below is a sequence of functions that would extract and process the specific data to be put into the data set

In [12]:
## Determines the rank (grey, bronze, silver, gold, chromatic) of the trait in question using the information in "traits.json"

def Traits_To_Medal(trait_name = "", num_of_units = 0):
    if num_of_units < new_trait[trait_name]["sets"][0]["min"]:
        return "grey"
    for i in range(len(new_trait[trait_name]["sets"])):
        if num_of_units >= new_trait[trait_name]["sets"][-i-1]["min"]:
            return new_trait[trait_name]["sets"][-i-1]["style"]

In [13]:
## Compute the number of bronze, silver, gold, chromatic given the traits of a team composition

def find_num_of_synergy(traits = {}):
    synergy = {"Number of Chromatic": 0, "Number of Gold": 0, "Number of Silver":0, "Number of Bronze": 0}
    for key in traits:
        if traits[key] != "grey":
            if traits[key] == "bronze":
                synergy["Number of Bronze"] += 1
            elif traits[key] == "silver":
                synergy["Number of Silver"] += 1
            elif traits[key] == "gold":
                synergy["Number of Gold"] += 1
            elif traits[key] == "chromatic":
                synergy["Number of Chromatic"] += 1
    return synergy

In [14]:
## Establishes vertical synergies and weather a team has one

chromatic_vertical_traits = ['Set5_Assassin','Set5_Brawler','Set5_Cannoneer','Set5_Dawnbringer','Set5_Forgotten','Set5_Hellion','Set5_Legionnaire','Set5_Nightbringer','Set5_Ranger','Set5_Redeemed','Set5_Renewer','Set5_Sentinel','Set5_Skirmisher']
gold_vertical_traits = ['Set5_Dawnbringer','Set5_Forgotten','Set5_Hellion','Set5_Legionnaire','Set5_Nightbringer','Set5_Redeemed','Set5_Sentinel','Set5_Skirmisher','Set5_Knight']

def vertical_synergy(traits = {}):
    for key in traits:
        if traits[key] == "gold":
            if key in gold_vertical_traits:
                return True
        elif traits[key] ==  "chromatic":
            if key in chromatic_vertical_traits:
                return True
    return False

In [15]:
## Determine of there is another team in the match contesting for the same team synergy.
## This is determined by if another player has at least a silver synergy to a gold synergy of the current player

def find_synergy_contest(traits = {}, match_traits = [{}]):
    func = {"grey":0, "bronze":1, "silver":2, "gold":3, "chromatic":4}
    
    gold_exists = False
    highest_enemy_rank = 0
    for key in traits:
        if func[traits[key]] > 2:
            gold_exists = True
            for i in range(len(match_traits)):
                if func[match_traits[i][key]] > 1:
                    return "Contested"
    for key in traits:
        if func[traits[key]] == 2 and gold_exists:
            for i in range(len(match_traits)):
                if func[match_traits[i][key]] > 1:
                    return "Light Contested"
        elif func[traits[key]] == 2 and not gold_exists: 
            for i in range(len(match_traits)):
                if func[match_traits[i][key]] > 1:
                    return "Contested"        
    return "Uncontested"

In [16]:
## The means to determine the carry heros. The team is sorted in the following order (Items, Tier, Rarity)
## Item is by descending order, champions with more items will be places ahead
## In each item container, they are then sorted by Tier since obtaining a 3-Tier unit is non-trivial
## Afterward, within those they are then sorted by rarity where higher cost champions are prioritized vs  lower cost heros

def sort_hero_by_rarity(unit_list = []):
    temp = [[],[],[],[],[]]
    for i in range(len(unit_list)):
        if unit_list[i]["rarity"] == 4:
            temp[4].append(unit_list[i])
        elif unit_list[i]["rarity"] == 3:
            temp[3].append(unit_list[i])
        elif unit_list[i]["rarity"] == 2:
            temp[2].append(unit_list[i])
        elif unit_list[i]["rarity"] == 1:
            temp[1].append(unit_list[i])
        else:
            temp[0].append(unit_list[i])
    sorted_list = []
    for x in list(reversed(temp)):
        sorted_list.extend(x)
    return sorted_list

def sort_hero_by_tier(unit_list = []):
    temp = [[],[],[],[]]
    for i in range(len(unit_list)):
        if unit_list[i]["tier"] == 3:
            temp[3].append(unit_list[i])
        elif unit_list[i]["tier"] == 2:
            temp[2].append(unit_list[i])
        elif unit_list[i]["tier"] == 1:
            temp[1].append(unit_list[i])
        else:
            temp[0].append(unit_list[i])
    sorted_list = []
    for x in list(reversed(temp)):
        sorted_list.extend(x)
    return sorted_list

def find_carries(unit_list = []):
    temp = [[],[],[],[]]
    for i in range(len(unit_list)):
        if len(unit_list[i]["items"]) != 0 and (unit_list[i]["items"][0] == 99 or unit_list[i]["items"][0] == 2099):
            temp[1].append(unit_list[i])
        else:
            temp[len(unit_list[i]["items"])].append(unit_list[i])
    sorted_list = []
    for x in list(reversed(temp)):
        sorted_list.extend(x)
    carries = []
    while len(sorted_list) < 3:
        sorted_list.append({"tier":0, "character_id": "N/A"})
    for x in sorted_list[:3]:
        carries.append([x["tier"],x["character_id"]])
    return carries

In [17]:
## Determines if the carry heros of the player is being contested by those in the same match

def carries_contest(unit_list = [], match_units = [{}]):
    units = set({})
    for x in unit_list:
        units.add(x[1])
    match_unit_list = set({})
    for x in match_units:
        for y in x["units"]:
            match_unit_list.add(y["character_id"])
    if units.isdisjoint(match_unit_list):
        return "Uncontested"
    return "Contested"

In [18]:
## Computes the team value of the heros bases on gold cost in game

def team_value(units = [{}]):
    value = []
    for x in units:
        value.append((x["rarity"] + 1)*(3**(x["tier"]-1)))
    return sum(value)

In [19]:
## Determines the max value of the 8 teams in the match

def match_max_value(match_info = [{}]):
    max_value = []
    for x in match_info["participants"]:
        max_value.append(team_value(x["units"]))
    return max(max_value)

In [20]:
## Determines the last round of the match

def match_last_round(match_info = [{}]):
    last_round = []
    for x in match_info["participants"]:
        last_round.append(x["last_round"])
    return max(last_round)

In [21]:
## Will loop through the matches and process each team and store in the list "test_data" to be turned into a dataframe

test_data = []
for k in range(len(df_patch_11_18)):
    temp = []
    match_info = df_patch_11_18.iloc[k]["info"]
    participants = match_info["participants"]
    max_value = match_max_value(match_info)
    last_round = match_last_round(match_info)
    for j in range(len(match_info["participants"])):
        participant_info = match_info["participants"][j]
        temp.append({})
        for i in range(len(participant_info["traits"])):
            temp[j][participant_info["traits"][i]["name"]] = Traits_To_Medal(participant_info["traits"][i]["name"], participant_info["traits"][i]["num_units"])
        other_traits = list(set(new_trait.keys()).difference(set(temp[j].keys())))
        for i in range(len(other_traits)):
            temp[j][other_traits[i]] = "grey"
        for key in no_synergy_traits: 
            del temp[j][key]
    for j in range(len(temp)):
        extra_temp = {}
        other_participants = temp[0:j]
        other_participants.extend(temp[j+1:8])
        extra_temp.update(find_num_of_synergy(temp[j]))
        extra_temp["Synergy Contest"] = find_synergy_contest(temp[j],other_participants)
        extra_temp["Vertical Synergies"] = vertical_synergy(temp[j])
        carries = find_carries(participants[j]["units"])
        extra_temp["Primary_Carry_Tier"] = carries[0][0]
        extra_temp["Primary_Carry"] = carries[0][1]
        extra_temp["Secondary_Carry_Tier"] = carries[1][0]
        extra_temp["Secondary_Carry"] = carries[1][1]
        extra_temp["Third_Carry_Tier"] = carries[2][0]
        extra_temp["Third_Carry"] = carries[2][1]
        other_units = participants[0:j]
        other_units.extend(participants[j+1:8])
        extra_temp["Carry_Contested"] = carries_contest(carries,other_units)
        extra_temp["Value"] = team_value(participants[j]["units"])
        extra_temp["Diff_from_Match_Max"] = max_value - extra_temp["Value"]
        extra_temp["Last_Round"] = participants[j]["last_round"]
        extra_temp["Diff_from_Match_End"] = last_round - extra_temp["Last_Round"]
        extra_temp["Placement"] = participants[j]["placement"]
        temp[j].update(extra_temp)
    if k % 1000 == 0: print(k)
    test_data.extend(temp)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000


In [22]:
## Adding the main feature to be predicted, whether the team composition is in the top 4 or not

df = pd.DataFrame(test_data)
top_4 = []
for i in range(len(df)):
    top_4.append(int(df["Placement"][i] <=4))
df["Top 4"] = top_4

In [23]:
## Saving data into file

df.to_pickle("synergy_carry_value_df.pkl")

In [24]:
df

Unnamed: 0,Set5_Assassin,Set5_Brawler,Set5_Cavalier,Set5_Forgotten,Set5_Ironclad,Set5_Legionnaire,Set5_Nightbringer,Set5_Redeemed,Set5_Sentinel,Set5_Skirmisher,...,Secondary_Carry,Third_Carry_Tier,Third_Carry,Carry_Contested,Value,Diff_from_Match_Max,Last_Round,Diff_from_Match_End,Placement,Top 4
0,bronze,grey,bronze,grey,gold,bronze,silver,grey,grey,bronze,...,TFT5_Yasuo,2,TFT5_Viego,Uncontested,141,0,40,0,1,1
1,grey,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Karma,2,TFT5_Soraka,Contested,87,54,33,7,3,1
2,grey,grey,grey,bronze,bronze,bronze,grey,bronze,grey,grey,...,TFT5_Galio,2,TFT5_Rell,Contested,61,80,33,7,4,1
3,grey,grey,grey,grey,bronze,grey,grey,grey,grey,grey,...,TFT5_Fiddlesticks,2,TFT5_Garen,Contested,100,41,40,0,2,1
4,silver,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Diana,1,TFT5_Ivern,Contested,51,90,27,13,7,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148067,grey,grey,bronze,gold,bronze,grey,grey,grey,grey,grey,...,TFT5_Hecarim,2,TFT5_Thresh,Contested,66,35,34,3,4,1
148068,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Lulu,3,TFT5_Nunu,Contested,101,0,37,0,2,1
148069,grey,bronze,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Garen,2,TFT5_Soraka,Contested,61,40,33,4,5,0
148070,grey,grey,grey,grey,grey,grey,grey,grey,grey,grey,...,TFT5_Garen,1,TFT5_Teemo,Contested,65,36,30,7,7,0
