In [1]:
import json
import requests
import time
import sys
import pandas as pd
from tqdm import tqdm
import re
import pickle
import numpy as np
import os

In [2]:
API_KEY = 'RGAPI-1cc76fd7-3173-42a2-9680-edb53e72a4a7'

requests_count = 100
requests_duration = 120
api_fetch_limit = round(1/(requests_count/requests_duration) + .1, 2) # 100 requests / 120 seconds; round up

region = 'NA1'

In [3]:
# Riot's Platform Services
platforms = ['BR1','EUN1','EUW1','JP1','KR','LA1','LA2','NA1','OC1','TR1','RU']
regions = {'NA1': 'americas',
           'BR1': 'americas',
           'LA1': 'americas',
           'LA2': 'americas',
           'OCE1': 'americas',
           'KR': 'asia',
           'JP1': 'asia',
           'EUN1': 'europe',
           'EUW1': 'europe',
           'TR1': 'europe',
           'RU': 'europe'}

champions = pd.read_json('set3/champions.json')
galaxies = pd.read_json('set3/galaxies.json')
traits = pd.read_json('set3/traits.json')
items = pd.read_json('set3/items.json')

items_dict = dict(zip(items['id'].values, items['name'].values))
galaxies_dict = dict(zip(galaxies.key, galaxies.name))
champions_dict = dict(zip(champions['championId'], champions['name']))
champions_dict[''] = ''

# TFT Rank Tiers
divisions = ['IV', 'III', 'II', 'I']
ranks_iron2diamond = ['iron', 'bronze', 'silver', 'gold', 'platinum', 'diamond']
ranks_master2challenger = ['master', 'grandmaster', 'challenger']

# Lambda Function to flatten list of lists
flatten = lambda l: [item for sublist in l for item in sublist]

In [4]:
def get_league_entries(platform, tier, division = None):
    """
    Fetches all league entries using Riot's API that are specified by the given parameters

    Parameters
    ----------
    platform : str
        The client platform (e.g. NA, EUN, ect.) 
    tier : str
        The rank tier (e.g. Iron, Bronze, ect.)
    division : str 
        The division of the desired tier (I, II, III, or IV)


    Returns
    -------
    list
        Returns a list of dictionaries containing all league entries of the given parameters
            
    """
    
    # Handles lower tiers
    if tier in ranks_iron2diamond:
        page = 1
        entries = []
        output_length = 50

        while True:
            url = "https://" + platform.lower() + ".api.riotgames.com/tft/league/v1/entries/" + tier.upper() + "/" + division + "?page=" + str(page) + "&api_key=" + API_KEY
            req = requests.get(url)
            
            # If the request is successful and the json is non-empty, fetch the data
            if req.status_code == 200 and len(req.json()) > 0:
                entries.extend(req.json())
                sys.stdout.write("\r Fetching entries for " + tier + ' ' + division + ' | Page: ' + str(page))
                sys.stdout.flush()
                page += 1
                time.sleep(api_fetch_limit)
            else:
                sys.stdout.write("\r" + " " * output_length)
                sys.stdout.flush()
                break
                
        return entries
                
    # Handles upper tiers
    elif tier in ranks_master2challenger:
        url = "https://" + platform.lower() + ".api.riotgames.com/tft/league/v1/" + tier.lower() + "?api_key=" + API_KEY
        req = requests.get(url)
        return req.json()
            

In [5]:
def parse_master2challenger(data):
    """
    Reformats the master2challenger dataframe so it has the same structure as the iron2diamond dataframe

    Parameters
    ----------
    data : dict
        A dictionary of all entries of master to challenger players

    Returns
    -------
    dataframe
        Returns a dataframe containing all master to challenger league entries
            
    """
    
    df = pd.DataFrame(data['entries'])
    df['tier'] = data['tier']
    df['leagueId'] = data['leagueId']
    df['queueType'] = data['queue']
    col_order = ['leagueId', 'queueType', 'tier', 'rank', 'summonerId', 'summonerName', 'leaguePoints', 'wins','losses','veteran','inactive','freshBlood', 'hotStreak']
    df = df[col_order]
    return df

In [6]:
def get_all_league_entries(region):
    """
    Fetches all league entries using Riot's API

    Returns
    -------
    dataframe
        Returns a dataframe containing all league entries
            
    """
        
    # Get all lower rank entries
    entries_iron2diamond = []
    for tier in ranks_iron2diamond:
        for division in divisions:
            entries_iron2diamond.extend(get_league_entries(region, tier, division))

    # Build a dataframe containing all lower rank entries
    all_entries = pd.DataFrame(entries_iron2diamond)

    # For each upper tier, merge the dataframe for each tier with the all_entries dataframe
    for tier in ranks_master2challenger:
        df_master2challenger = parse_master2challenger(get_league_entries(region, tier))
        all_entries = pd.merge(all_entries, df_master2challenger, how='outer')
        
    return all_entries

In [7]:
def get_summoners_info(platform, summoners):
    """
    Fetches the information for all summoners using Riot's API

    Parameters
    ----------
    platform : str
        The client platform (e.g. NA, EUN, ect.) 
    summoners: list of str
        A list containg all summoner names
    
    Returns
    -------
    dataframe
        returns a dataframe of all summoners' information
            
    """
    summoner_info = []
    summoner_count = len(summoners)
    for i, summoner in enumerate(summoners):
        url = "https://" + platform.lower() + ".api.riotgames.com/tft/summoner/v1/summoners/by-name/" + summoner + "?api_key=" + API_KEY
        req = requests.get(url)
        sys.stdout.write("\r Fetching summoner info: " + str(i+1) + "/" + str(summoner_count) + " | " + str(round(((i+1)/summoner_count)*100)) + '%')
        sys.stdout.flush()
        summoner_info.append(req.json())
        time.sleep(api_fetch_limit)
        
    return pd.DataFrame(summoner_info)

In [8]:
def get_match_history_id(region, puuid):
    """
    Fetches the last 200 tft match ids for a given player

    Parameters
    ----------
    region : str
        The client region (e.g. americas, asia, ect.) 
    puuid: str
        The puuid of a given player
    
    Returns
    -------
    list
        returns a list of match ids
            
    """
    
    url = 'https://' + region + '.api.riotgames.com/tft/match/v1/matches/by-puuid/' + puuid +'/ids?count=200&api_key=' + API_KEY
    req = requests.get(url)
    
    return req.json()

In [9]:
def get_all_match_history_ids(platform, puuids):
    """
    Fetches all match history ids for the given puuids

    Parameters
    ----------
    platform : str
        The client platform (e.g. NA, EUN, ect.)
    puuids: list of str
        A list of puuids for a given set of players
    
    Returns
    -------
    list
        returns a list of all match history ids
            
    """
    
    region = regions[platform]
        
    match_histories = []
    n = len(puuids)
    for i, puuid in enumerate(puuids):
        match_histories.extend(get_match_history_id(region, puuid))
        time.sleep(api_fetch_limit)
        sys.stdout.write("\r Fetching match history ids: " + str(i+1) + "/" + str(n) + " | " + str(round(((i+1)/n)*100)) + '%')
        sys.stdout.flush()
    return match_histories
        

In [10]:
def get_match_histories(match_history):
    """
    Fetches the match history for each match_id in match_history

    Parameters
    ----------
    match_history : list
        A unique list of all match_ids
    
    Returns
    -------
    list
        returns a list of dictionaries containing all match history data
            
    """
    
    matches = []
    n = len(match_history)
    for i, match in enumerate(match_history):
        platform = match.split('_')[0]
        region = regions[platform]

        url = 'https://' + region + '.api.riotgames.com/tft/match/v1/matches/' + match + '?api_key=' + API_KEY
        req = requests.get(url)
        sys.stdout.write("\r Fetching match histories: " + str(i) + "/" + str(n) + " | " + str(round((i/n)*100)) + '%')
        sys.stdout.flush()
        matches.append(req.json())
        time.sleep(api_fetch_limit)
        
    return matches

In [11]:
def get_base_dict():
    """
    Creates a dictionary with the default values for each tft match
    
    Returns
    -------
    dict
        returns a dictionary
            
    """
    
    # Default values
    values = [['', 0, '', '','', 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    
    # Extended values for units
    values.extend([['', '', '', '', 0, 0] for x in range(1,12)])
    
    # Extended values for traits
    values.extend([[0, 0, 0, 0] for x in traits['key'].values])
    
    # Flatten values into a 1D list
    values = flatten(values)    
    
    # Default features
    features = [['match_id', 'date', 'puuid', 'tft_set_number', 'game_variation','game_length', 'champion_count', 'gold_left','last_round','level','placement','players_eliminated','time_eliminated','total_damage_to_players']]
    
    # Extended features for units
    features.extend([['champion_' + str(x), 'champion_' + str(x) + '_item1', 'champion_' + str(x) + '_item2', 'champion_' + str(x) + '_item3', 'champion_' + str(x) + '_cost', 'champion_' + str(x) + '_tier'] for x in range(1, 12)])
    
    # Extended features for traits
    features.extend([[x, x + '_num_units', x + '_tier_current', x + '_tier_total'] for x in traits['key'].values])
    
    # Flatten features into a 1D list
    features = flatten(features)

    return dict(zip(features, values))


In [12]:
def parse_units_traits(units, traits, base_dict):
    """
    Parses 
    
    Parameters
    ----------
    units : dict
        A dictionary containing all unit data for a given player in a given match
    traits : dict
        A dictionary containing all trait data for a given player in a given match
    base_dict: dict
        A dictionary containing all match information related to a given player in a given match
    
    Returns
    -------
    dict
        returns an updated dictionary of all match information of a given player in a given match
            
    """
    
    base_dict['champion_count'] = len(units)
    
    # Parse unit dictionaries
    for i in range(len(units)):
        base_dict['champion_' + str(i+1)] = units[i]['character_id']
        
        #items
        for j, item in enumerate(units[i]['items']):
            if item in items_dict:
                base_dict['champion_' + str(i+1) + '_item' + str(j+1)] = items_dict[item]
            else:
                base_dict['champion_' + str(i+1) + '_item' + str(j+1)] = ''

            
        base_dict['champion_' + str(i+1) + '_cost'] = units[i]['rarity'] + 1
        base_dict['champion_' + str(i+1) + '_tier'] = units[i]['tier']
        
    # Parse trait dictionaries
    for trait in traits:
        base_dict[trait['name']] = 1
        base_dict[trait['name'] + '_num_units'] = trait['num_units']
        base_dict[trait['name'] + '_tier_current'] = trait['tier_current']
        base_dict[trait['name'] + '_tier_total'] = trait['tier_total']
        
    return base_dict

In [13]:
def parse_match(match):
    """
    Parses a dictionary containing data for a single match 
    
    Parameters
    ----------
    match : dict
        A dictionary containg data for a single match

    
    Returns
    -------
    list
        Returns a list of dictionaries containing the match data for each participant in the match
            
    """
        
    match_data = []
    for participant in match['info']['participants']:
        
        base_dict = get_base_dict()

        # Participant ID
        base_dict['match_id'] = match['metadata']['match_id']
        base_dict['date'] = match['info']['game_datetime'] 
        base_dict['puuid'] = participant['puuid']
        base_dict['tft_set_number'] = match['info']['tft_set_number']
        base_dict['game_variation'] = match['info']['game_variation']
        base_dict['game_length'] = match['info']['game_length']
        base_dict['gold_left'] = participant['gold_left']
        base_dict['last_round'] = participant['last_round']
        base_dict['level'] = participant['level']
        base_dict['placement'] = participant['placement']
        base_dict['players_eliminated'] = participant['players_eliminated']
        base_dict['time_eliminated'] = participant['time_eliminated']
        base_dict['total_damage_to_players'] = participant['total_damage_to_players']
        
        units = participant['units']
        traits = participant['traits']
        
        match_data.append(parse_units_traits(units, traits, base_dict))
    return match_data

In [14]:
def parse_matches(matches):
    """
    Parses each match and appends it to the list_of_matches 
    
    Parameters
    ----------
    matches : list
        A list of matches
    
    Returns
    -------
    dict
        returns a 1D list of dictionaries containing all information about the respective matches
            
    """
    
    list_of_matches = []
    for match in matches:
        try:
            list_of_matches.append(parse_match(match))
        except:
            pass
        
    return flatten(list_of_matches)

In [15]:
def build_dataset(matches):
    """
    Given a list of match data, builds a dataframe for all match data for all players of all matches
    
    For a list of dictionaries of match data, this function builds a list of dictionaries for all matches 
    for each player, puts it into a dataframe, adds additional features with respect to time related columns, 
    and maps the  correct values  for galaxies, items, and champions.
    
    Parameters
    ----------
    matches : list
        A list of matches 
    
    Returns
    -------
    dict
        returns a dataframe containing all the match data for all players
            
    """
    
    matches = parse_matches(matches)
    
    
    df = pd.DataFrame(matches)

    df['date'] = pd.to_datetime(df['date'], unit='ms')
    df['game_variation'] = df['game_variation'].map(galaxies_dict)
    
    # Handle times
    df['game_length'] = round(df['game_length']).astype('int')
    df['game_length_minutes'] = (df['game_length'] / 60).astype('int')
    df['game_length_seconds'] = (df['game_length'] - (df['game_length_minutes'] * 60)).astype('int')
    
    df['time_eliminated'] = round(df['time_eliminated']).astype('int')
    df['time_eliminated_minutes'] = (df['time_eliminated'] / 60).astype('int')
    df['time_eliminated_seconds'] = (df['time_eliminated'] - (df['time_eliminated_minutes'] * 60)).astype('int')
    
    cols = list(df.columns)
    col_order = cols[:6] + ['game_length_minutes', 'game_length_seconds']  + cols[6:13] + ['time_eliminated_minutes', 'time_eliminated_seconds'] + cols[13:-4]
    df = df[col_order]
    
    # Champion Names
    for i in range(1, 12):
        df['champion_' + str(i)] = df['champion_' + str(i)].map(champions_dict)
        
    df = df[df['tft_set_number'] == 3]
    
    return df

In [16]:
def save_dataframe(dataframe, filename):
    
    rows, cols = dataframe.shape
    splits = int(rows/300000)
    
    if splits > 0:
        for i, partial in enumerate(np.split(league_entries, splits)):
            pickle.dump(partial, open( "pickles/" + filename + "_partial_" + str(i+1) + ".p", "wb" ))
    else:
        pickle.dump(dataframe, open( "pickles/" + filename + ".p", "wb" ))

In [17]:
def load_dataframe(filename):
    pickle_files = os.listdir('pickles')
    
    if filename + ".p" in pickle_files:
        df = pickle.load(open("pickles/" + filename + ".p", "rb" ))
    else:
        count = 0
        split = 1
        path = filename + "_partial_" + str(split) + ".p"
        while path in pickle_files:
            count += 1
            split += 1
            path = filename + "_partial_" + str(split) + ".p"
            
        df = pickle.load(open("pickles/" + filename + "_partial_1.p", "rb" ))
        for i in range(2, count + 1):
            df_temp = pickle.load(open("pickles/" + filename + "_partial_" + str(i) + ".p", "rb" ))
            df = pd.merge(df, df_temp, how='outer')
            
    return df
            
        
        

In [18]:
# Get all tft league entries
#league_entries = get_all_league_entries(region)

# Save league_entries object
#save_dataframe(league_entries, 'league_entries')

In [19]:
league_entries = load_dataframe('league_entries')

In [20]:
# Extract all summoner names
summoner_names = list(league_entries['summonerName'].values)

# Get all occurences of each summoner
seen = {}
for i, summoner in enumerate(summoner_names):
    if summoner not in seen:
        seen[summoner] = [i]
    else:
        seen[summoner].append(i)
        
# Get all indicies of duplicate summoners excluding their most recent    
dups = []   
for item in seen.keys():
    if len(seen[item]) > 1:
        dups.append(seen[item][:-1])

# Flatten into a 1D list and drop duplicates by index
dups = flatten(dups) 

# Drop duplicates by index
league_entries.drop(dups, inplace=True)
summoner_names = flatten(pd.DataFrame(summoner_names).drop(dups).values)

In [21]:
# Fetch summoners' information
#summoner_infos = get_summoners_info('NA1', summoner_names[:20])

# Save match_histories object
#save_dataframe(summoner_infos, 'summoner_infos')

In [22]:
# load summoner_infos object
summoner_infos = load_dataframe('summoner_infos')

In [23]:
# Update league entries with additional information about each respective summoner
league_entries_desc = league_entries.iloc[:20].copy()
league_entries_desc['accountId'] = summoner_infos['accountId'].values
league_entries_desc['puuid'] = summoner_infos['puuid'].values
league_entries_desc['profileIconId'] = summoner_infos['profileIconId'].values
league_entries_desc['revisionDate'] = summoner_infos['revisionDate'].values
league_entries_desc['summonerLevel'] = summoner_infos['summonerLevel'].values

# Fetch all matches for each puuid respectively and filter out duplicates
puuids = league_entries_desc['puuid'].values
match_history_ids = get_all_match_history_ids('NA1', puuids)
match_history_ids = [x for x in set(match_history_ids)]

 Fetching match history ids: 20/20 | 100%

In [24]:
# Fetch a list of match histories with respect to each match history id
#match_histories = get_match_histories(match_history_ids)

# Save match_histories object
#pickle.dump(match_histories, open( "pickles/match_histories.p", "wb" ))

In [25]:
# load summoner_infos object
match_histories = pickle.load(open('pickles/match_histories.p', "rb" ))
matches = match_histories.copy()

In [27]:
dataset = build_dataset(matches)
dataset

Unnamed: 0,match_id,date,puuid,tft_set_number,game_variation,game_length,game_length_minutes,game_length_seconds,champion_count,gold_left,...,Valkyrie_tier_current,Valkyrie_tier_total,Vanguard,Vanguard_num_units,Vanguard_tier_current,Vanguard_tier_total,Set3_Void,Set3_Void_num_units,Set3_Void_tier_current,Set3_Void_tier_total
0,NA1_3440412745,2020-05-29 01:17:28.783,UAwXUVEH2tdKM78BkkGgDQQI0QNdYSKyBS1BJjFqHBgMTR...,3,Superdense Galaxy,2108,35,8,1,84,...,0,0,0,0,0,0,0,0,0,0
1,NA1_3440412745,2020-05-29 01:17:28.783,UWsGPsQUMkh-Kczr-H1UFvN8KgGulot8x_mOIN8PVsy3Z-...,3,Superdense Galaxy,2108,35,8,8,1,...,0,1,1,1,0,2,0,0,0,0
2,NA1_3440412745,2020-05-29 01:17:28.783,Lb6bVw0Zn4I_Fhlt3tKT8Wxt_k2mhGuY_Ua4VRV5rz8FPW...,3,Superdense Galaxy,2108,35,8,8,0,...,0,0,0,0,0,0,1,1,0,1
3,NA1_3440412745,2020-05-29 01:17:28.783,3zkwlBn0A88t9XlWuacOVFl6HD8ffltEdDrND1jzA_5LaI...,3,Superdense Galaxy,2108,35,8,7,40,...,0,1,1,1,0,2,1,1,0,1
4,NA1_3440412745,2020-05-29 01:17:28.783,fDyFqG0Ul-Am2BnoXy2D4Ixbo6jvxKwaYxW8LktKRDNsw_...,3,Superdense Galaxy,2108,35,8,8,13,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5571,NA1_3436656640,2020-05-26 05:29:14.936,R-1iaUJzT1DrU15z08EncmLZcAGY1omyhgKKrqAD1U0bW1...,3,Normal Game,1980,33,0,8,3,...,1,1,0,0,0,0,0,0,0,0
5572,NA1_3436656640,2020-05-26 05:29:14.936,ONbXxfirnNxW2VF4KtnwnEA8egGMxaUGeRJsPGMUXJB98h...,3,Normal Game,1980,33,0,8,15,...,0,0,1,1,0,2,0,0,0,0
5573,NA1_3436656640,2020-05-26 05:29:14.936,d2iV49u0481RlFFbaCRajQF_8639uFo58JKfVqm_RP4hEj...,3,Normal Game,1980,33,0,8,1,...,0,0,0,0,0,0,1,1,0,1
5574,NA1_3436656640,2020-05-26 05:29:14.936,3O4_AcQlcmRm9biKAreM1TqjDbW-HseVGYDFK9q95gTZe_...,3,Normal Game,1980,33,0,8,32,...,0,1,1,1,0,2,1,1,0,1
