# INFO 2950 Final Project - Data Collection Code

# League of Legends Gamplay Data Analysis by Bin Jang

## Data Collection Code

In [14]:
import pandas as pd
import requests # package for http requests
import bs4 # package for html parsing
import numpy as np
import matplotlib.pyplot as plt
import time

# API Key (has to be updated every 24 hrs)
API_key = "RGAPI-b588742d-1f15-4a51-bc55-bf985c07da10"

In [5]:
## Create Dataframe for summoners in each league (Challenger)
challenger = "https://kr.api.riotgames.com/lol/league/v4/challengerleagues/by-queue/RANKED_SOLO_5x5?api_key=" + API_key
r = requests.get(challenger)
challenger_df = pd.DataFrame(r.json())

challenger_df.reset_index(inplace=True)
challenger_entries_df = pd.DataFrame(dict(challenger_df['entries'])).T
challenger_df = pd.concat([challenger_df, challenger_entries_df], axis=1)

challenger_df = challenger_df.drop(['index', 'queue', 'name', 'leagueId', 'entries', 'rank'], axis=1)
challenger_df.to_csv('challenger_data.csv',index=False,encoding = 'utf-8')

In [7]:
## Add 'accountId' column for each summoner - required to request matchmaking data

challenger_df['accountId'] = 0

for i in range(len(challenger_df)):
    try:
        summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + challenger_df['summonerName'].iloc[i] + '?api_key=' + API_key 
        r = requests.get(summoner)
        
        while r.status_code == 429:
            time.sleep(5)
            summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + challenger_df['summonerName'].iloc[i] + '?api_key=' + API_key 
            r = requests.get(summoner)
            
        account_id = r.json()['accountId']
        challenger_df.loc[i, 'accountId'] = account_id
    
    except:
        pass

challenger_df

Unnamed: 0,tier,summonerId,summonerName,leaguePoints,wins,losses,veteran,inactive,freshBlood,hotStreak,accountId
0,CHALLENGER,2lHP_tI7_4u1q7Z_zzQJThCSwV50e35RGcN1wvhFLO_M-C...,살퀸레,673,257,227,False,False,False,False,39IDdOr2Yu7olI-OJJAJgcB27kbsHEbGkmNwf8UZKk7Rmb...
1,CHALLENGER,6gEAOSKIL_eNW2YVVoTbQc77EvEE_Le5P0pKJqQLpnBKZW...,Hidere,758,255,218,True,False,False,False,SncdNlUbmvTkTUOUR1MT1FYY8_g_Fv1LCMFZpGmvJ7qVAN...
2,CHALLENGER,QDFPKhojmB0DaBoUsy9Xxy5NBDmpp5x1V7roqEkOLwi-QjA,래 뚜,771,347,310,True,False,False,False,BFNEElt6E7WFhQUEWB8AXAT9z62yIBZNn2ZzT1pL5-kvl9M
3,CHALLENGER,4HidLdNhpUaK3YaAr83exnctEF-Z26f6e4iF9_UfichOvQ,No longer love,679,245,185,False,False,False,False,WFGFfCXOE2USA_5LxshtAaPcrLtbEFUzSGfn8g2i9FLs
4,CHALLENGER,lcf6cgIYFGZCM3ESmr2hUOIOz9OSHThMP1h7DiXWGYdhsQ,11년째롤하는틀딱,782,164,123,False,False,True,False,xqwoEcIBNNQ_dkDgZU4AOKBclppm3HYC1r46BxRy8bI
...,...,...,...,...,...,...,...,...,...,...,...
295,CHALLENGER,XHiEUguhakoUZ1y0D5toFYjkAd42gFGfYUk3p56C-ybdkI...,long yan jing yi,1034,171,113,True,False,False,False,xgz2NMoSfLIKpIC8lmyY-jclh23CoPgDnw9lT6aKNG_y5z...
296,CHALLENGER,GmxRMOMKFIK_MboEA_OSw7rNE0BSrZCvPRxQi3TlJsDFEA,CCrazy,655,224,190,False,False,True,False,_MXJDBFN69lGiMfstXnLCYamTMPsv5kZfejfu0cw4rIK
297,CHALLENGER,xxw49vKdyCJNlOE-4ZCLUWfeLLsyvdfXskqBjsYVXIcP2A,xiangtaosensei,683,169,129,False,False,False,True,3ZxG8T_9hoBuXPiqh0k_Rg5_QGvjtMMXAJKx4gXX6S4
298,CHALLENGER,GuwkH4AhhoEMH8hIksDV7GPrq-5kwGHzdp8ftOBbNhM25Uo,BRO Yaharong,973,647,599,True,False,False,False,E9lpa-KIGdxJ-fv7GzWpO8lR1JOGXRff4Zv2LW7uiTdazZU


In [None]:
# Add a column that counts the number of unique champions used in the past 20 games

challenger_df['nChamps'] = 0

# for every player in the dataframe
for i in range(len(challenger_df)):
    try:
        acc_id = challenger_df.loc[i,'accountId']

        match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
        r_match = requests.get(match_url)
        
        while r_match.status_code == 429:
            time.sleep(5)
            match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
            r_match = requests.get(match_url)
        
        champ_history = r_match.json()["matches"] # Champions used on recent matches
        champ_history_df = pd.DataFrame(champ_history)
        champList = []

        for j in range(20): # list of champions
            champList.append(champ_history_df.loc[j,'champion'])

        nChamps = len(set(champList)) # make the list into a set, count its number
        challenger_df.loc[i,'nChamps'] = nChamps
    except:
        print("Error on summoner", i)
        pass


In [12]:
challenger_df.to_csv('challenger_data.csv',index=False,encoding = 'utf-8')

What's below is the smae process of data collection just for a different tier. (Grandmaster)

In [None]:
## Create Dataframe for summoners in each league (Grandmaster)
grandmaster = "https://kr.api.riotgames.com/lol/league/v4/grandmasterleagues/by-queue/RANKED_SOLO_5x5?api_key=" + API_key
r = requests.get(grandmaster)
grandmaster_df = pd.DataFrame(r.json())

grandmaster_df.reset_index(inplace=True)
grandmaster_entries_df = pd.DataFrame(dict(grandmaster_df['entries'])).T
grandmaster_df = pd.concat([grandmaster_df, grandmaster_entries_df], axis=1)

grandmaster_df = grandmaster_df.drop(['index', 'queue', 'name', 'leagueId', 'entries', 'rank'], axis=1)

In [16]:
grandmaster_df['accountId'] = 0

for i in range(len(grandmaster_df)):
    try:
        summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + grandmaster_df['summonerName'].iloc[i] + '?api_key=' + API_key 
        r = requests.get(summoner)
        
        while r.status_code == 429:
            time.sleep(5)
            summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + grandmaster_df['summonerName'].iloc[i] + '?api_key=' + API_key 
            r = requests.get(summoner)
            
        account_id = r.json()['accountId']
        grandmaster_df.loc[i, 'accountId'] = account_id
    
    except:
        pass

grandmaster_df['nChamps'] = 0

for i in range(len(grandmaster_df)):
    try:
        acc_id = grandmaster_df.loc[i,'accountId']

        match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
        r_match = requests.get(match_url)
        
        while r_match.status_code == 429:
            time.sleep(5)
            match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
            r_match = requests.get(match_url)
        
        champ_history = r_match.json()["matches"] # Champions used on recent matches
        champ_history_df = pd.DataFrame(champ_history)
        champList = []

        for j in range(20):
            champList.append(champ_history_df.loc[j,'champion'])

        nChamps = len(set(champList))
        grandmaster_df.loc[i,'nChamps'] = nChamps
    except:
        pass

grandmaster_df.to_csv('grandmaster_data.csv',index=False,encoding = 'utf-8')

In [18]:
## Create Dataframe for summoners in each league (Master)
master = "https://kr.api.riotgames.com/lol/league/v4/masterleagues/by-queue/RANKED_SOLO_5x5?api_key=" + API_key
r = requests.get(master)
master_df = pd.DataFrame(r.json())

master_df.reset_index(inplace=True)
master_entries_df = pd.DataFrame(dict(master_df['entries'])).T
master_df = pd.concat([master_df, master_entries_df], axis=1)

master_df = master_df.drop(['index', 'queue', 'name', 'leagueId', 'entries', 'rank'], axis=1)

## I actually had to stop my data collection here.

# master_df['accountId'] = 0

# for i in range(len(master_df)):
#     try:
#         summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + master_df['summonerName'].iloc[i] + '?api_key=' + API_key 
#         r = requests.get(summoner)
        
#         while r.status_code == 429:
#             time.sleep(5)
#             summoner = 'https://kr.api.riotgames.com/lol/summoner/v4/summoners/by-name/' + master_df['summonerName'].iloc[i] + '?api_key=' + API_key 
#             r = requests.get(summoner)
            
#         account_id = r.json()['accountId']
#         master_df.loc[i, 'accountId'] = account_id
    
#     except:
#         pass
    
# master_df['nChamps'] = 0

# for i in range(len(master_df)):
#     try:
#         acc_id = master_df.loc[i,'accountId']

#         match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
#         r_match = requests.get(match_url)
        
#         while r_match.status_code == 429:
#             time.sleep(5)
#             match_url = "https://kr.api.riotgames.com/lol/match/v4/matchlists/by-account/{}?queue=420&endIndex=20&api_key={}".format(acc_id, API_key)
#             r_match = requests.get(match_url)
        
#         champ_history = r_match.json()["matches"] # Champions used on recent matches
#         champ_history_df = pd.DataFrame(champ_history)
#         champList = []

#         for j in range(20):
#             champList.append(champ_history_df.loc[j,'champion'])

#         nChamps = len(set(champList))
#         master_df.loc[i,'nChamps'] = nChamps
#     except:
#         pass

master_df.to_csv('master_data.csv',index=False,encoding = 'utf-8')

I could not collect the 'accountId' and 'nChamps' data for Master tier users, because I had to make too many requests (rate is limited to 100 requests / 2 min by the API);
It already took over an hour executing a single cell for even Grandmaster which had only 700 users... Master had twice more users.
So I had to stop my data collection here. master_data.csv does not have a column for accountId and nChamps.

In [31]:
challenger_df = pd.read_csv('challenger_data.csv')
challenger_df

Unnamed: 0,tier,summonerId,summonerName,leaguePoints,wins,losses,veteran,inactive,freshBlood,hotStreak,accountId,nChamps
0,CHALLENGER,2lHP_tI7_4u1q7Z_zzQJThCSwV50e35RGcN1wvhFLO_M-C...,살퀸레,673,257,227,False,False,False,False,39IDdOr2Yu7olI-OJJAJgcB27kbsHEbGkmNwf8UZKk7Rmb...,4
1,CHALLENGER,6gEAOSKIL_eNW2YVVoTbQc77EvEE_Le5P0pKJqQLpnBKZW...,Hidere,758,255,218,True,False,False,False,SncdNlUbmvTkTUOUR1MT1FYY8_g_Fv1LCMFZpGmvJ7qVAN...,12
2,CHALLENGER,QDFPKhojmB0DaBoUsy9Xxy5NBDmpp5x1V7roqEkOLwi-QjA,래 뚜,771,347,310,True,False,False,False,BFNEElt6E7WFhQUEWB8AXAT9z62yIBZNn2ZzT1pL5-kvl9M,6
3,CHALLENGER,4HidLdNhpUaK3YaAr83exnctEF-Z26f6e4iF9_UfichOvQ,No longer love,679,245,185,False,False,False,False,WFGFfCXOE2USA_5LxshtAaPcrLtbEFUzSGfn8g2i9FLs,4
4,CHALLENGER,lcf6cgIYFGZCM3ESmr2hUOIOz9OSHThMP1h7DiXWGYdhsQ,11년째롤하는틀딱,782,164,123,False,False,True,False,xqwoEcIBNNQ_dkDgZU4AOKBclppm3HYC1r46BxRy8bI,11
...,...,...,...,...,...,...,...,...,...,...,...,...
295,CHALLENGER,XHiEUguhakoUZ1y0D5toFYjkAd42gFGfYUk3p56C-ybdkI...,long yan jing yi,1034,171,113,True,False,False,False,xgz2NMoSfLIKpIC8lmyY-jclh23CoPgDnw9lT6aKNG_y5z...,6
296,CHALLENGER,GmxRMOMKFIK_MboEA_OSw7rNE0BSrZCvPRxQi3TlJsDFEA,CCrazy,655,224,190,False,False,True,False,_MXJDBFN69lGiMfstXnLCYamTMPsv5kZfejfu0cw4rIK,10
297,CHALLENGER,xxw49vKdyCJNlOE-4ZCLUWfeLLsyvdfXskqBjsYVXIcP2A,xiangtaosensei,683,169,129,False,False,False,True,3ZxG8T_9hoBuXPiqh0k_Rg5_QGvjtMMXAJKx4gXX6S4,13
298,CHALLENGER,GuwkH4AhhoEMH8hIksDV7GPrq-5kwGHzdp8ftOBbNhM25Uo,BRO Yaharong,973,647,599,True,False,False,False,E9lpa-KIGdxJ-fv7GzWpO8lR1JOGXRff4Zv2LW7uiTdazZU,10


Lastly, I did data cleaning because some of the accountId and nChamps columns were out of bound. They probably changed their summoner name or something, that the API couldn't fetch their data.

In [36]:
chal_filtered = challenger_df[challenger_df['nChamps'] > 0]
gm_filtered = grandmaster_df[grandmaster_df['nChamps'] > 0]

chal_filtered.to_csv('challenger_data.csv',index=False,encoding = 'utf-8')
gm_filtered.to_csv('grandmaster_data.csv',index=False,encoding = 'utf-8')