# Necessary libraries

In [1]:
import pandas as pd
import requests
import numpy as np
from API_Key import key, username, password
import psycopg2 as pg2
import json
from datetime import datetime
import os
import plotly.express as px

In [2]:
# Creating connection to postgresql and reading in table
conn = pg2.connect(database='TFT', user=username, password=password)
df = pd.read_sql(sql=("select * from matches"),con=conn)

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7928 entries, 0 to 7927
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   match            7928 non-null   object 
 1   game_datetime    7928 non-null   float64
 2   game_length      7928 non-null   float64
 3   game_version     7928 non-null   object 
 4   gold_left        7928 non-null   int64  
 5   last_round       7928 non-null   int64  
 6   level            7928 non-null   int64  
 7   placement        7928 non-null   int64  
 8   puuid            7928 non-null   object 
 9   time_eliminated  7928 non-null   float64
 10  total_damage     7928 non-null   int64  
 11  traits           7928 non-null   object 
 12  units            7928 non-null   object 
dtypes: float64(3), int64(5), object(5)
memory usage: 805.3+ KB


In [4]:
# By looking at the first few rows, I identified the datetime column needs to be updated to properly show dates
df.head()

Unnamed: 0,match,game_datetime,game_length,game_version,gold_left,last_round,level,placement,puuid,time_eliminated,total_damage,traits,units
0,NA1_4107203113,1637190000000.0,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,34,8,3,2-xYFcIJUem__n9qL8bVWDc7uuuvvnp-SuB4VqKmGiPBFw...,1896.383179,77,"[{""name"": ""Set6_Bruiser"", ""num_units"": 2, ""sty...","[{""character_id"": ""TFT6_Vi"", ""items"": [], ""nam..."
1,NA1_4107203113,1637190000000.0,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,27,7,7,rIB0yQoA9jT06yokBJjP0wQQRbXe2xwZK1dlIrFwi471Im...,1550.396729,37,"[{""name"": ""Set6_Academy"", ""num_units"": 2, ""sty...","[{""character_id"": ""TFT6_Graves"", ""items"": [], ..."
2,NA1_4107203113,1637190000000.0,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,1,33,8,5,WiQACHT1c3gcMNkdAVHuRybWz48Dsbu3PbTZGlr28dOcCV...,1842.735352,91,"[{""name"": ""Set6_Academy"", ""num_units"": 1, ""sty...","[{""character_id"": ""TFT6_KogMaw"", ""items"": [12,..."
3,NA1_4107203113,1637190000000.0,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,1,30,8,6,FqFDLL5R9eVnnGE5TVK-kd1LiZ8GXeS7Ejy73BSnjddxz5...,1695.539795,41,"[{""name"": ""Set6_Academy"", ""num_units"": 1, ""sty...","[{""character_id"": ""TFT6_Swain"", ""items"": [2193..."
4,NA1_4107203113,1637190000000.0,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,27,8,8,4uM1ZdIf5gv4G-I9ZvP9zKajQsJurn24wQQP-UivRpLnh4...,1546.322754,53,"[{""name"": ""Set6_Academy"", ""num_units"": 1, ""sty...","[{""character_id"": ""TFT6_Darius"", ""items"": [], ..."


In [5]:
df['game_datetime'] = df['game_datetime'].apply(
                                            lambda x: datetime.utcfromtimestamp(x/1000).strftime('%Y-%m-%d %H:%M:%S'))

In [6]:
# Filtering dataset for 
df = df[df['game_datetime'] >= "2021-11-04"]

### Successfully webscraped champion names and saved them in pickle to prevent constant scraping
Keeping code for reference

In [7]:
# # Getting champions from Latest Set
# from bs4 import BeautifulSoup
# import requests
# url = 'https://app.mobalytics.gg/tft/champions'
# soup = BeautifulSoup(requests.get(url).content, 'html.parser')

# champs = []
# for champ in soup.find("div", class_='m-1o47yso').find_all('a'):
#     champs.append(champ['href'])

# # removing excess string in champion name
# for position, champ in enumerate(champs):
#     champs[position] = champ.replace("/tft/champions/", "")

In [8]:
# import pickle 
# # Store data (serialize)
# with open('Set6_Champions.pickle', 'wb') as handle:
#     pickle.dump(champs, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
# Load data (deserialize)
import pickle
with open('Set6_Champions.pickle', 'rb') as handle:
    unserialized_data = pickle.load(handle)

champs = unserialized_data

In [10]:
# creating new columns for each champ in the TFT set and filling values with 0
for champ in champs:
    df[champ] = 0

In [11]:
# Need to rename some of the champion named columns in order to be more consistent
df.rename(columns={"dr-mundo": "drmundo", 
                   "blitz": "blitzcrank",
                  "tahm-kench": "tahmkench",
                  "twisted-fate": "twistedfate",
                  "miss-fortune": "missfortune"}, inplace=True)
df['rainingdummy'] = 0

In [13]:
champs = df.columns[13:]
champs

Index(['akali', 'blitzcrank', 'braum', 'caitlyn', 'camille', 'chogath',
       'darius', 'drmundo', 'ekko', 'ezreal', 'fiora', 'galio', 'gangplank',
       'garen', 'graves', 'heimerdinger', 'illaoi', 'janna', 'jayce', 'jhin',
       'jinx', 'kaisa', 'kassadin', 'katarina', 'kogmaw', 'leona', 'lissandra',
       'lulu', 'lux', 'malzahar', 'missfortune', 'orianna', 'poppy', 'quinn',
       'samira', 'seraphine', 'shaco', 'singed', 'sion', 'swain', 'tahmkench',
       'talon', 'taric', 'tristana', 'trundle', 'twistedfate', 'twitch',
       'urgot', 'veigar', 'vex', 'vi', 'viktor', 'warwick', 'yone', 'yuumi',
       'zac', 'ziggs', 'zilean', 'zyra', 'rainingdummy'],
      dtype='object')

In [14]:
df['traits'] = df['traits'].apply(lambda x: json.loads(x))
df['units'] = df['units'].apply(lambda x: json.loads(x))

In [15]:
active_traits = []
for match in df.traits:
    active_traits.append([{trait['name']:trait['tier_current']} 
                          for trait in match 
                          if trait['tier_current'] >=1])
df['active_traits'] = active_traits

In [16]:
army = []
for unit in df['units']:
    champs_in_army = []
    for champ in unit:
        champs_in_army.append(champ['character_id'][5:])
    army.append(champs_in_army) 
df['army'] = army

In [17]:
# Create a dictionary of champions in order to loop through the dataframe and populate with counts of army
unit_counter = {}
for champ in champs:
    unit_counter[champ] = np.zeros(len(df))

{'akali': array([0., 0., 0., ..., 0., 0., 0.]),
 'blitzcrank': array([0., 0., 0., ..., 0., 0., 0.]),
 'braum': array([0., 0., 0., ..., 0., 0., 0.]),
 'caitlyn': array([0., 0., 0., ..., 0., 0., 0.]),
 'camille': array([0., 0., 0., ..., 0., 0., 0.]),
 'chogath': array([0., 0., 0., ..., 0., 0., 0.]),
 'darius': array([0., 0., 0., ..., 0., 0., 0.]),
 'drmundo': array([0., 0., 0., ..., 0., 0., 0.]),
 'ekko': array([0., 0., 0., ..., 0., 0., 0.]),
 'ezreal': array([0., 0., 0., ..., 0., 0., 0.]),
 'fiora': array([0., 0., 0., ..., 0., 0., 0.]),
 'galio': array([0., 0., 0., ..., 0., 0., 0.]),
 'gangplank': array([0., 0., 0., ..., 0., 0., 0.]),
 'garen': array([0., 0., 0., ..., 0., 0., 0.]),
 'graves': array([0., 0., 0., ..., 0., 0., 0.]),
 'heimerdinger': array([0., 0., 0., ..., 0., 0., 0.]),
 'illaoi': array([0., 0., 0., ..., 0., 0., 0.]),
 'janna': array([0., 0., 0., ..., 0., 0., 0.]),
 'jayce': array([0., 0., 0., ..., 0., 0., 0.]),
 'jhin': array([0., 0., 0., ..., 0., 0., 0.]),
 'jinx': array

In [18]:
counter = 0
for team in df.army:
    for unit in team:
        unit_counter[unit.lower()][counter] +=1
    counter +=1

In [19]:
for col in champs:
    df[col] = unit_counter[col.lower()]

# Can now look at champion popularity at a match level

In [20]:
df.groupby('match').sum()[champs]

Unnamed: 0_level_0,akali,blitzcrank,braum,caitlyn,camille,chogath,darius,drmundo,ekko,ezreal,...,vi,viktor,warwick,yone,yuumi,zac,ziggs,zilean,zyra,rainingdummy
match,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NA1_4090637842,1.0,0.0,2.0,0.0,0.0,3.0,2.0,3.0,0.0,0.0,...,1.0,0.0,1.0,1.0,3.0,1.0,0.0,0.0,2.0,0.0
NA1_4090775924,0.0,1.0,1.0,2.0,0.0,0.0,0.0,3.0,1.0,2.0,...,2.0,1.0,1.0,1.0,2.0,3.0,0.0,1.0,0.0,0.0
NA1_4090828571,0.0,3.0,0.0,0.0,1.0,2.0,2.0,1.0,0.0,0.0,...,3.0,0.0,0.0,1.0,2.0,3.0,1.0,2.0,1.0,0.0
NA1_4090875217,0.0,0.0,0.0,0.0,1.0,2.0,0.0,4.0,1.0,0.0,...,0.0,2.0,1.0,2.0,2.0,3.0,0.0,0.0,0.0,0.0
NA1_4090931175,1.0,0.0,0.0,1.0,1.0,1.0,1.0,4.0,0.0,0.0,...,1.0,1.0,2.0,0.0,0.0,2.0,2.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NA1_4109638377,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,...,3.0,1.0,1.0,2.0,4.0,0.0,1.0,0.0,0.0,0.0
NA1_4109647577,0.0,3.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0,...,2.0,0.0,1.0,1.0,1.0,3.0,0.0,3.0,0.0,0.0
NA1_4109652492,0.0,0.0,3.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,...,1.0,2.0,0.0,1.0,3.0,1.0,0.0,1.0,0.0,0.0
NA1_4109681202,3.0,0.0,5.0,0.0,0.0,1.0,2.0,1.0,1.0,2.0,...,0.0,0.0,1.0,0.0,2.0,0.0,1.0,2.0,2.0,0.0


In [21]:
items = {}
item_dict = requests.get("https://raw.communitydragon.org/latest/cdragon/tft/en_us.json").json()
for item in item_dict['items']:
    items[item['id']] = item['name']

In [22]:
# Go through dataframe to capture unique items
all_items = []
for team in df.units:
    for champ in team:
        if champ['items']:
            for item in champ['items']:
                all_items.append(item)

In [23]:
# set dict of lists to add to dataframe capturing count of each item
set_6_items = {}
for item in set(all_items):
    set_6_items[item] = np.zeros(len(df))

In [24]:
counter = 0
for team in df.units:
    team_items = []
    for champ in team:
        if champ['items']:
            for item in champ['items']:
                set_6_items[item][counter] +=1
    counter +=1

In [25]:
for item in set_6_items.keys():
    df[items[item]] = set_6_items[item]

In [26]:
df.head()

Unnamed: 0,match,game_datetime,game_length,game_version,gold_left,last_round,level,placement,puuid,time_eliminated,...,Loot Bag,Clockwork Emblem,Zeke's Herald,Tactician's Crown,Assassin Emblem,tft_item_name_UmbralGlaive,Thief's Gloves,Mercenary Emblem,Death's Defiance,Manazane
0,NA1_4107203113,2021-11-17 23:03:32,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,34,8,3,2-xYFcIJUem__n9qL8bVWDc7uuuvvnp-SuB4VqKmGiPBFw...,1896.383179,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,NA1_4107203113,2021-11-17 23:03:32,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,27,7,7,rIB0yQoA9jT06yokBJjP0wQQRbXe2xwZK1dlIrFwi471Im...,1550.396729,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,NA1_4107203113,2021-11-17 23:03:32,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,1,33,8,5,WiQACHT1c3gcMNkdAVHuRybWz48Dsbu3PbTZGlr28dOcCV...,1842.735352,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,NA1_4107203113,2021-11-17 23:03:32,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,1,30,8,6,FqFDLL5R9eVnnGE5TVK-kd1LiZ8GXeS7Ejy73BSnjddxz5...,1695.539795,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,NA1_4107203113,2021-11-17 23:03:32,2028.263306,Version 11.23.409.0111 (Nov 12 2021/12:01:09) ...,0,27,8,8,4uM1ZdIf5gv4G-I9ZvP9zKajQsJurn24wQQP-UivRpLnh4...,1546.322754,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [49]:
all_items = df.columns[75:]

In [50]:
top_15_items_mean = df.groupby('match').sum()[all_items].mean().sort_values(ascending=False).head(15)
top_15_champs_mean = df.groupby('match').sum()[champs].mean().sort_values(ascending=False).head(15)

In [51]:
top_15_items_mean

Infinity Edge          4.032967
Jeweled Gauntlet       3.271062
Guinsoo's Rageblade    3.229548
Warmog's Armor         3.155067
Giant Slayer           2.754579
Hand Of Justice        2.619048
Gargoyle Stoneplate    2.504274
Runaan's Hurricane     2.479853
Sunfire Cape           2.413919
Bramble Vest           2.257631
Blue Buff              2.240537
Bloodthirster          2.115995
Spear of Shojin        2.103785
Titan's Resolve        2.054945
Dragon's Claw          1.791209
dtype: float64

In [52]:
top_15_champs_mean

drmundo      2.252747
janna        2.235653
yuumi        2.095238
taric        1.716728
zac          1.709402
leona        1.703297
braum        1.553114
vi           1.493284
urgot        1.466422
vex          1.451770
lux          1.438339
orianna      1.432234
yone         1.395604
jinx         1.336996
tahmkench    1.323565
dtype: float64

In [53]:
fig = px.bar(x = top_15_items_mean.index, y = top_15_items_mean, 
             labels={"x":"Champion", "y":"Count"}, 
             title="Counts of the top 15 played champs",)
fig.show()

In [54]:
fig2 = px.bar(x = top_15_champs_mean.index, y = top_15_champs_mean, 
             labels={"x":"Champion", "y":"Count"}, 
             title="Average number of champs per game",)
fig2.show()

In [60]:
top_15_winrates_champs = df[champs].apply(lambda x: x.corr(df['placement'])).sort_values(ascending=True).head(15)
top_15_winrates_items = df[all_items].apply(lambda x: x.corr(df['placement'])).sort_values(ascending=True).head(15)

In [61]:
fig3 = px.bar(x = top_15_winrates_champs.index, y = -top_15_winrates_champs, 
             labels={"x":"Champion", "y":"correlation of placement"}, 
             title="Correlation of placement and champion",)
fig3.show()

In [62]:
fig4 = px.bar(x = top_15_winrates_items.index, y = -top_15_winrates_items, 
             labels={"x":"item", "y":"correlation of placement"}, 
             title="Correlation of placement and item",)
fig4.show()

In [63]:
if not os.path.exists("images"):
    os.mkdir("images")

fig.write_image("images/total_champ_usage.png")
fig2.write_image("images/average_champ_usage.png")
fig3.write_image("images/correlation_placement_champion.png")
fig4.write_image("images/correlation_placement_item.png")

In [64]:
df.to_csv("refined_tft_data.csv")