This Juypter Notebooked titled 'Exploring_Riot_API' isn’t endorsed by Riot Games and doesn’t reflect the views or opinions of Riot Games
or anyone officially involved in producing or managing League of Legends. League of Legends and Riot Games are
trademarks or registered trademarks of Riot Games, Inc. League of Legends © Riot Games, Inc.

# Data Wrangling and Cleaning

In [2]:
%matplotlib inline
import pandas as pd
import requests
import time
import re
import pickle

In [3]:
#CSS to make default files look more readable
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

## Obtaining Data from API

In [4]:
#ENTER API KEY HERE as string
#go to https://developer.riotgames.com/ to obtain API KEY

#TEMP KEY
api_key = 'RGAPI-90353f8a-275a-43d5-be43-210fcdb41280'

In [5]:
def get_acct_id(summoner_name, region, key):
    """
    Returns the encrypted summonerID for a players in game summoner name.
    
    summoner_name: string, in game summoner name
    region: string
    key: string
    """
    url = 'https://' + region +'.api.riotgames.com/lol/summoner/v4/summoners/' +\
          'by-name/' + summoner_name + '?api_key=' + key
    response = requests.get(url)
    
    if response.status_code == 200:
        print('App Rate Limit ' + response.headers['X-App-Rate-Limit'])
        print('App Rate Limit Count ' + response.headers['X-App-Rate-Limit-Count'])
        print(response.status_code)
        json_data = response.json()
        return json_data['accountId']
    #429 response means we hit the API rate limit 
    elif response.status_code == 429:
        print(response.status_code)
        print('Hit rate limit...Sleeping...')
        time.sleep(121)
        response = requests.get(url)
        json_data = response.json()
        return json_data['accountId']
    else:
        print(response.status_code)
        print('failed to get account id')
        return None

In [6]:
def get_match_history(acct_id, region, game_type, season,\
                      beg_ind, key):
    """
    returns past 100 match history as json
    use game_type '420' for 5v5 ranked
    season '11' for latest season
    """
    url = 'https://' + region + '.api.riotgames.com/lol/match/v4/matchlists/' +\
          'by-account/' + acct_id +'?queue=' + game_type + '&season=' + season +\
          '&beginIndex=' + str(beg_ind) + '&api_key=' + key
        
    response = requests.get(url)

    if response.status_code == 200:
        print('App Rate Limit ' + response.headers['X-App-Rate-Limit'])
        print('App Rate Limit Count ' + response.headers['X-App-Rate-Limit-Count'])
        print(response.status_code)
        json_data = response.json()
        return json_data
    #429 response means we hit the API rate limit 
    elif response.status_code == 429:
        print(response.status_code)
        print('Hit rate limit...Sleeping...')
        time.sleep(121)
        response = requests.get(url)
        json_data = response.json()
        return json_data
    else:
        print(response.status_code)
        return None

In [7]:
def get_match_history_int(acct_id, region, game_type, season, key, begin):
    """
    returns specified interval match history as json
    use game_type '420' for 5v5 ranked
    season '11' for latest season
    """
    url = 'https://' + region + '.api.riotgames.com/lol/match/v4/matchlists/' +\
          'by-account/' + acct_id +'?queue=' + game_type + '&season=' + season +\
          '&beginTime=' + str(begin) + '&api_key=' + key
    response = requests.get(url)
    
    if response.status_code == 200:
        print('App Rate Limit ' + response.headers['X-App-Rate-Limit'])
        print('App Rate Limit Count ' + response.headers['X-App-Rate-Limit-Count'])
        print(response.status_code)
        json_data = response.json()
        return json_data
    #429 response means we hit the API rate limit 
    elif response.status_code == 429:
        print(response.status_code)
        print('Hit rate limit...Sleeping...')
        time.sleep(121)
        response = requests.get(url)
        json_data = response.json()
        return json_data
    else:
        print(response.status_code)
        return None

In [8]:
def get_match_stats(match_id, region, key):
    """
    returns json of match stats based on match id
    """
    url = 'https://' + region +'.api.riotgames.com/lol/match/v4/matches/' +\
           str(match_id) + '?api_key=' + key
    response = requests.get(url)
    
    if response.status_code == 200:
        print('App Rate Limit ' + response.headers['X-App-Rate-Limit'])
        print('App Rate Limit Count ' + response.headers['X-App-Rate-Limit-Count'])
        print(response.status_code)
        json_data = response.json()
        return json_data
    #429 response means we hit the API rate limit 
    elif response.status_code == 429:
        print(response.status_code)
        print('Hit rate limit...Sleeping...')
        time.sleep(121)
        response = requests.get(url)
        json_data = response.json()
        return json_data
    else:
        print(response.status_code)
        print('No game data returned')
        return None

In [9]:
x = get_match_stats(2932547603, 'na1', api_key)

App Rate Limit 20:1,100:120
App Rate Limit Count 1:1,1:120
200


## Cleaning the Data

In [10]:
#Champion Information
champion_data_url = 'http://ddragon.leagueoflegends.com/cdn/8.24.1/data/en_US/champion.json'
champ_response = requests.get(champion_data_url)
champ_json = champ_response.json()

In [11]:
#Champ list
champ_list = []
for champ in champ_json['data']:
    champ_list.append(champ)

In [12]:
#Champ type
champ_type = {}

for champ in champ_list:
    champ_type[champ] = champ_json['data'][champ]['tags']

In [13]:
#Create dict to map champion ids to champ names
#will use to clean data later
champ_dict = {}

for champ in champ_list:
    champ_dict[int(champ_json['data'][champ]['key'])] = champ 

In [21]:
#Item Information
item_data_url = 'http://ddragon.leagueoflegends.com/cdn/8.24.1/data/en_US/item.json'
item_response = requests.get(item_data_url)
#print(item_response.status_code)
item_json = item_response.json()

In [20]:
#Access Item: item_json['data'][ITEM ID]['name']
item_json['data']['1001']['name']

#Item list
item_list = []
for item in item_json['data']:
    item_list.append(item)

In [19]:
#create a dict of item ids to map to item name for cleaning data
#Some of the item names were missing, went to api chat to find
item_dict = {}

for item in item_list:
    item_dict[int(item)] = item_json['data'][item]['name']

item_dict[0] = None

In [19]:
#Summoner Spell Info
sumspell_data_url = 'http://ddragon.leagueoflegends.com/cdn/8.24.1/data/en_US/summoner.json'
sumspell_response = requests.get(sumspell_data_url)
sumspell_json = sumspell_response.json()

In [20]:
#Access the sumspell ID num sumspell_json['data'][SUMSPELLNAME]['key']
#sumspell_json['data']['SummonerFlash']['key']

sumspell_list = []

for sumspell in sumspell_json['data']:
    sumspell_list.append(sumspell)

In [21]:
#create dict mapping of summoner spell id to name
sumspell_dict = {}
for spell in sumspell_list:
    sumspell_dict[int(sumspell_json['data'][spell]['key'])] = spell

#replace name of 'SummonerDot' to 'SummonerIgnite' like in game
sumspell_dict[14] = 'SummonerIgnite'

In [90]:
#testing 
#API key needs to be updated once expired
match_id = 2932547603
#return account_id
account_id = get_acct_id('YaomiKing', 'na1', api_key)
print(account_id)
#get json data on past 100 matches
history = get_match_history(account_id, 'na1', '420', '11', 0, api_key)

App Rate Limit 20:1,100:120
App Rate Limit Count 1:1,1:120
200
KUuZNj9ltgE1fRB4kPMyoOQaaVv1pd81wDC373Dw6NXQkA
App Rate Limit 20:1,100:120
App Rate Limit Count 2:1,2:120
200


In [26]:
#df = pd.DataFrame(history['matches'])
# Exploring match history as a Pandas DataFrame
#print(df.info())

## Building a Master Data Frame

In [1]:
#Lists of player names in first set
#Players are high tier ranked players from which we will draw match data from
players1 = ['scarra', 'gosu', 'trick2g' 'hashinshin', 'loltyler1', 'imaqtpie',
             'nightblue3', 'wingsofdeath', 'chapanya', 'shiphtur', 'doublelift',
            'voyboy', 'yassuo', 'gripex90','aphromoo', 'pokimane', 'dyrus', 'TF Blade',
           'pants are dragon', 'lourlo', 'allorim', 'Boxbôx', 'Per Se Bambi', 'horseie', 'Zile',
          'Nicolaj Jensen', 'pokeball', 'valkrin', 'hate', 'likeamaws', 'twinge',
          'sakurafloret', 'Chad jungle', 'mintcola', 'blobheart', 'joonroot', 'dominatelane',
           'vegeta', 'omnidk', 'dr beat', 'voki', 'kaipop', 'oogie', 'sophist sage',
           'takeji', 'zutter', 'haseeb', 'delyze', 'kicking', 'xiao ming',
           'SKT T1 Madlife', 'abibibi', 'Papá Chau', 'data88', 'wildtomgg',
           'bronson', 'd1 police', 'mercury','a noob draven', 'yao 17', 'xuegao',
           'treh', 'cdew', 'fakego', 'feng6', 'heisendong', 'pytrigon', 'billbobbilly', 'techxiii',
            'cao cao n1', 'honi', 'oddi3', 'adrian riv3n', 'xj11',
            'yu sang', 'meteos', 'ffwii', 'starwhale', 'iwdominatelol',
            'saskio', 'froogie', 'mancloud2', 'yoshi', 'haebaragi',
            'stuntopolis', 'wonhae', 'nopause', 'the holy slurp',
            '100T Bangg', '100t freeze', 'taproot86', 'bloodwater', 'catria', 'trinn',
            'nasfinest', 'im avi', 'liquid matt', 'dmytro', 'omizu', 'huhi',
            'TL cain', 'waynedwops', 'busioc', 'chaosrain', 'free papachau',
            'chris', '5fire', 'shiba inu', 'tsm bjergsen', '100t fragas', 'c9 sneaky',
           'thorn', 'deftsuo', 'adc abuser', 'dellzor', 'yisus', 'nebula', 'xin xin',
            'zoebestgurl', 'canadaad', 'rawvision', 'll stylish', 'oldhome', 'link',
            'rainbowboom', 'raigarak', 'thiengar', 'skeggsy', 'kyroo1', 'frosfroes',
            'wolfer', 'seouless', 'don tito', 'seanic', 'daddyio', 'aeriya',
            'quantum0002', 'ordinn', 'roja', 'boom u dead', 'rain lover',
            'jimmytheslayer', 'ikingvex', 'sun prince', 'morningst4r', 'zatsmod',
            'dragoonsmash', 'little mix', 'jim j poggers', 'll justice', 'shupian',
            'fwii', 'zaion', 'cile', 'gate', 'aesthetic player', 'duoqueen1',
            'urelectric', 'bigfatlp']

In [22]:
#Define function to get match history 
def get_many_matches(acct_names, key, file_name, add_on=False):
    '''
    Returns a list of match ids
    
    >>> l = [1,2,3,4]
    >>> with open("test.txt", "wb") as fp:   #Pickling
    ...   pickle.dump(l, fp)
    ... 
    >>> with open("test.txt", "rb") as fp:   # Unpickling
    ...   b = pickle.load(fp)
    ... 
    >>> b
    [1, 2, 3, 4]
    
    add_on = True -> add game ids to the specified pickled file
    add_on = False (default) -> create new pickled file of game ids
    '''
    #store matches in the list and keep count of api calls
    match_list = []
    bad_player_ids = []
    calls = 0
    
    #if the api is called 100 times, then sleep for 2 mins
    for player in acct_names:
        if calls == 100:
            print("API calls at 100, sleeping for 2 mins...")
            time.sleep(121)
            calls = 0
            
        acct_id = get_acct_id(player, 'na1', key)
        calls += 1
        
        if calls == 100:
            print("API calls at 100, sleeping for 2 mins...")
            time.sleep(121)
            calls = 0
        
        #Can sometimes return bad game data and return None
        #Handle the TypeError exception
        try:    
            beg_ind = 0
            data = get_match_history(acct_id, 'na1', '420', '11',\
                                     beg_ind, key)['matches']
            calls += 1
        except TypeError:
            #log current play and Skip current player and move on to the next
            bad_player_ids.append(player)
            continue
    
        while data != []:
            if calls == 100:
                print("API calls at 100, sleeping for 2 mins...")
                time.sleep(121)
                calls = 0
            
            data = get_match_history(acct_id, 'na1', '420', '11',\
                                     beg_ind, key)['matches']
            calls += 1
            
            for game in data:
                if game['gameId'] not in match_list:
                    match_list.append(game['gameId'])
            beg_ind += 100
            
    
    #store list using pickle for an existing file in 
    #the PlayerMatches directory
    if add_on == True:
        with open('PlayerMatches\\'+ file_name, 'rb') as file:
            old_list = pickle.load(file)
            old_list = list(set(old_list + match_list))
        with open('PlayerMatches\\'+ file_name, 'wb') as file:
            pickle.dump(old_list, file)
    
    #store list in a new file using pickle
    elif add_on == False:
        with open('PlayerMatches\\'+ file_name, 'wb') as file:
            pickle.dump(match_list, file)
    
    #print out player names for which we could not get match ids for
    if len(bad_player_ids) != 0:
        print('Could not return match ids for player: ' + str(bad_player_ids))
            
    

In [23]:
#add on to players1list.txt
#get_many_matches([], api_key, 'players1list.txt', add_on=True)

In [25]:
def create_row(json_data):
    #create all columns for dataframe
    row_contents = {}
    row_contents['game_id'] = json_data.get('gameId', 'unknown')
    row_contents['game_length_mins'] = json_data.get('gameDuration', 'unknown')
    add_player_name(row_contents, json_data)
    add_champ_pick(row_contents, json_data, champ_dict)
    add_sumspell_pick(row_contents, json_data, sumspell_dict)
    add_stats(row_contents, json_data, item_dict)
    add_team(row_contents, json_data)
    add_team_stats(row_contents, json_data)

    #return pd.DataFrame(row_contents, index=[0])
    return row_contents


### Defining some helper functions to clean data for create_row()

In [27]:
#Function to create player#:player name
def add_player_name(to_dict, json_data):
    for player in range(len(json_data.get('participantIdentities'))):
        to_dict['p'+str(player+1)+'_name'] = json_data['participantIdentities'][player]['player'].get('summonerName', 'unknown')
    print(json_data['gameId'])

In [28]:
#adds champion name
def add_champ_pick(to_dict, json_data, champ_map):
    for player in range(len(json_data['participantIdentities'])):
        champ = json_data['participants'][player].get('championId', 'unknown')
        to_dict['p'+str(player+1)+'_champ'] = champ
        if champ in champ_map:
            to_dict['p'+str(player+1)+'_champ'] = champ_map[to_dict['p'+str(player+1)+'_champ']]

In [29]:
#addes picks of summoner spells
def add_sumspell_pick(to_dict, json_data, spell_map):
    for player in range(len(json_data['participantIdentities'])):
        spell1 = json_data['participants'][player].get('spell1Id', 'unknown')
        spell2 = json_data['participants'][player].get('spell2Id', 'unknown')
        to_dict['p'+str(player+1)+'_spell1'] = spell1
        to_dict['p'+str(player+1)+'_spell2'] = spell2
        if spell1 in sumspell_dict:
            to_dict['p'+str(player+1)+'_spell1'] = sumspell_dict[to_dict['p'+str(player+1)+'_spell1']]
        if spell2 in sumspell_dict:
            to_dict['p'+str(player+1)+'_spell2'] = sumspell_dict[to_dict['p'+str(player+1)+'_spell2']]

In [30]:
#define function to add stats to Data Frame
def add_stats(to_dict, json_data, item_map):
    pattern = re.compile('item\d')
    for player in range(len(json_data['participants'])):
        for stat in json_data['participants'][player]['stats']:
            stat_name = json_data['participants'][player]['stats'].get(stat, 'unknown')
            to_dict['p'+str(player+1)+stat] = stat_name
            #if item not in item_map it means its not a current active item as of patch
            if (bool(pattern.match(stat)) == True) and (stat_name in item_map):
                to_dict['p'+str(player+1)+stat] = item_map[to_dict['p'+str(player+1)+stat]]

In [31]:
#determine team
def add_team(to_dict, json_data):
    for player in range(len(json_data.get('participantIdentities'))):
        to_dict['p'+str(player+1)+'_team'] = json_data['participants'][player].get('teamId', 'unknown')
        if to_dict['p'+str(player+1)+'_team'] == 100:
            to_dict['p'+str(player+1)+'_team'] = 'blue'
       
    else:
            to_dict['p'+str(player+1)+'_team'] = 'red'

In [32]:
#add stats from all 10 players
def add_team_stats(to_dict, json_data):
    for team in range(len(json_data['teams'])):
        if json_data['teams'][team]['teamId'] == 100:
            for stat in json_data['teams'][team]:
                to_dict['blue_team_'+ stat] = json_data['teams'][team].get(stat, 'unknown')
            del to_dict['blue_team_bans']
            del to_dict['blue_team_teamId']
        else:
            for stat in json_data['teams'][team]:
                to_dict['red_team_'+ stat] = json_data['teams'][team].get(stat, 'unknown')
            del to_dict['red_team_bans']
            del to_dict['red_team_teamId']


### Putting it all together

In [80]:
def create_master_data(match_list, region, key, out_name=None, csv=False):
    #Create csv files of about 10,000 games at a time
    '''
    If csv=True, then the function will create a .csv of the match data,
    otherwise return a dataframe
    
    match_list will be the pickled list of match ids using get_many_matches()
    in PlayerMatches directory
    match_list: pickled filed of match ids in PlayerMatches directory
    '''
    match_data = []
    failed_games = []
    loop_count = 0
    
    
    with open('PlayerMatches\\'+ match_list, 'rb') as file:
        matches = pickle.load(file)
        #Uncomment below to test for 200 matches
        #matches = matches[:200]
  
    #Limit this loop to 100 calls every 2 mins   
    for match in matches:
        try:
            if loop_count == 100:
                print("wait 2 minutes before next call")
                time.sleep(121)
                loop_count = 0

            loop_count += 1
            stats = get_match_stats(match, region, key)

            if stats == None:
                print('Skipping Game...')
                continue

            row = create_row(stats)
            match_data.append(row)
        except Exception as e:
            print(e)
            print('Failed to get data for game: ' + str(match))
            failed_games.append(match)
            continue
            
        
    if csv == False:
        return pd.DataFrame(match_data)

    elif csv == True:
        out_file = 'PlayerMatches\\' + str(out_name) + '.csv'
        x = pd.DataFrame(match_data)
        x.to_csv(out_file)
        
        with open('PlayerMatches\\'+ 'failedgames.txt', 'wb') as file:
            pickle.dump(failed_games, file)
        
    #handle exception if there is an error and write current data to file
    #and print out last match id so I can start back from there

In [111]:
#create_master_data('matchid_2.txt', 'na1', api_key, out_name='match_data_2', csv=True)

In [None]:
#save list of ids 10,000 at a time using naming convention matchid_#
#from the total list of ids make sublists of 10,000 ids for create_master_data() 

#DO THIS BY:
    
#with open("PlayerMatches\\players1list.txt", "rb") as file:
#    big_list = pickle.load(file)

#FIRST 10000 games
#with open("PlayerMatches\\matchid_1.txt", "wb") as file:
#    sublist = big_list[:10000] 
#    pickle.dump(sublist, file)

#NEXT 10000 games
#with open("PlayerMatches\\matchid_5.txt", "wb") as file:
#    sublist = big_list[40000:50000] 
#    pickle.dump(sublist, file)