In [None]:
# import the necessary libraries
import requests
import json
import time

## 1.GETTING RAW DATA

<span style ="font-size: 24px;"> 1.1.Getting summonerId</span><br>

<span style ="font-size: 20px;"> 1.1.1.Getting summonerId of ranks below master</span><br>

<span style ="font-size: 16px;">From master onwards, ranks no longer have divisions, which makes their division only I instead of I, II, III, and IV</span><br>
<span style ="font-size: 16px;">Also, the higher the rank, the less the number of players, some doesn't even have enough 200</span><br>
<span style ="font-size: 16px;">Therefore, for these higher ranks, we will use another way to work with them</span><br>

In [None]:
# api key
api_key = 'RGAPI-898a3f42-867f-410a-9845-ca10dd463281'

# All the ranks that we want to work with
tiers = ["IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM", "EMERALD", "DIAMOND"]

# 7 first ranks each has 4 divison : I, II, III, IV. So we will get 50 summonerId for each divion
divisions = ["I", "II", "III", "IV"]

for tier in tiers:
    # A list consists of summonerId of the rank, we will get 200 summonerId for each rank
    summoner_ids=[]
    for division in divisions:
        url = f"https://vn2.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/{tier}/{division}?page=1&api_key={api_key}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json() # data is a list of dicts, each contains information about an account. We will only get the summonerId
            summoner_ids +=[entry['summonerId'] for entry in data[:50]] #get 50 elements from data, and add their summonerId value to the list
        else:
            print("Failed to retrieve account PUUID. Status Code:", response.status_code)

    # check if we got enough 200 summonerId. If true, save them to a json file
    if len(summoner_ids) == 200:
        with open(f"summoner_ids/{tier}_summoner_ids.json", 'w', encoding='utf-8') as file:
            json.dump(summoner_ids, file, ensure_ascii=False, indent=4)
        print(f"got {tier}'s summonerIds succesfully!")
    else:
        print(f"something is wrong! {tier} has ", len(summoner_ids), "summonerIds!")

200


<span style ="font-size: 20px;"> 1.1.2.Getting summonerId of ranks from master onwards</span><br>

<span style ="font-size: 16px;">Now, ranks no longer have divisions, so the division will always be I.</span><br>
<span style ="font-size: 16px;">These higher ranks have a limited number of players; Therefore, if a rank's number of players is less than 200, we will get as many as possible, and compensate for this by increasing the number of matches we get from each players.</span><br>
<span style ="font-size: 16px;">Instead of getting the summonerIds of each rank with one run, we will do each seperately to check</span><br>

In [None]:
# api key
api_key = 'RGAPI-898a3f42-867f-410a-9845-ca10dd463281'

# the rank that we want to work with, there are 3 ranks this code will be used for: MASTER, GRANDMASTER, CHALLENGER
tier = "CHALLENGER"

# A list to store summonerId of the rank
summoner_ids=[]

"""
in these 3 ranks, we don't know if the number of players is more than 200 or not.
Therefore, we will get all the summonerId first.
After that, we check if the number is more than 200.
If it does, remove the unecessary Ids and save it
For the ranks that its number of players is less than 200, after getting all of its summonerId,
we will check the number with the real number via ingame leaderboard.
After making sure they are equal, save it
"""

url = f"https://vn2.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/{tier}/I?page=1&api_key={api_key}"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    summoner_ids +=[entry['summonerId'] for entry in data]
else:
    print("Failed to retrieve account PUUID. Status Code:", response.status_code)
if len(summoner_ids) > 200:
    summoner_ids = summoner_ids[:200]
print(len(summoner_ids))

145


In [None]:
# after checking, if nothing is wrong, we save
with open(f"summoner_ids/{tier}_summoner_ids.json", 'w', encoding='utf-8') as file:
    json.dump(summoner_ids, file, ensure_ascii=False, indent=4)

<span style ="font-size: 24px;"> 1.2.Getting puuid from summonerId</span><br>

<span style ="font-size: 16px;">Now we will have to send 200 requests for each rank, doing all 10 ranks at one run will take a really long time and any error will be a disaster.</span><br>
<span style ="font-size: 16px;">Therefore, we will work with each rank seperately.</span><br>

In [None]:
# api key
api_key = 'RGAPI-898a3f42-867f-410a-9845-ca10dd463281'

# the rank that we want to get its players's puuids
rank = "iron"

# path of the json file that stores the rank's summonerIds
path= f"summoner_ids/{rank}_summoner_ids.json"

# A list to store puuids
puuids = []

# Get summonerIds from the json file
with open(path, "r") as file:
    summoner_ids = json.load(file)

# check its number:
print(len(summoner_ids))

for summoner_id in summoner_ids:
    url = f"https://vn2.api.riotgames.com/lol/summoner/v4/summoners/{summoner_id}?api_key={api_key}"
    retries = 12
    for i in range(retries):
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json() # data responded is a dict that contains some basic information of the account, we need its puuid
            puuids.append(data["puuid"])
            break
        elif response.status_code == 429:
            time.sleep(6)
        else:
            print("Failed to retrieve account PUUID. Status Code:", response.status_code)
            break

# check the number of puuids after requesting
print(len(puuids))

# if nothing goes wrong, save the puuids
if len(puuids) == len(summoner_ids):
    with open(f"puuids/{tier}_puuids.json", 'w', encoding='utf-8') as file:
        json.dump(puuids, file, ensure_ascii=False, indent=4)

200
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...


KeyboardInterrupt: 

<span style ="font-size: 24px;"> 1.3.Getting matchId using puuid</span><br>

<span style ="font-size: 16px;">Now we need to get 2000 matchIds for each rank, each matchId must be unique.</span><br>
<span style ="font-size: 16px;">We've already got 200 players's puuid for each rank (most of it). Idealy, we will want to get 10 matchIds for each puuid (each request will give us 20 for each puuid)</span><br>
<span style ="font-size: 16px;">However, there are circumstances where this is impossible:</span><br>
<span style ="font-size: 16px;">Ranks like master, grandmaster and challenger has very few players, the probality of them facing each other in a match is really high(each match has 10 players, and challenger, for example, has about 150 players)</span><br>
<span style ="font-size: 16px;">Because of that, getting just 10 matches for each puuid won't be enough for these ranks, so a reserve set - reserve_matchids is created to store unique exceeding matchIds of a puuid after getting enough 10 from it.</span><br>
<span style ="font-size: 16px;">After going through all the puuids in the rank, if the number of unique matchIds is less than 2000, we will add matchIds from reserve_matchids in.</span><br>
<span style ="font-size: 16px;">Also, players of 2 different ranks can also be in the same match, so we will need to store all the matchIds we got to a set and check if any new matchId we want to add is already in it.</span><br>
<span style ="font-size: 16px;">For the highest rank - challenger, since it has even less player, even getting all of the matchIds won't be enough.</span><br>
<span style ="font-size: 16px;">Luckily, these players play alot, so we will just need to wait for them to play more, by get the matchIds once, and do it again few days later until we get enough 2000 unique matches.</span><br>
<span style ="font-size: 16px;">Similar to puuId, we will have to send 1 request for each player. Doing 9 ranks at the same time is not a wise choice and so we will do it rank by rank</span><br>

In [None]:
# create a set to store all the matchIds
match_ids = set({})

In [None]:
# api key
api_key = 'RGAPI-ad1788db-515f-42b6-9f1c-096d76edf3fe'

# the rank we want to work with
tiers = "IRON"

# create a set to store each rank's matchIds
matchids = set({})

# create the reserve set
reserve_matchids=set({})

# get the puuids of the rank
path = f"puuids/{tier}_puuids.json"
with open(path, "r") as file:
    puuids = json.load(file)

for puuid in puuids:
    url = f"https://sea.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?type=ranked&start=0&count=20&api_key={api_key}"
    retries = 12
    for i in range(retries):
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json() # data is a list of 20 matchIds - 20 latest matches of that account
            count = 0 # count how many matchIds of this puuid have been put to the set
            for i in range(len(data)):
                if data[i] not in match_ids: # check if the matchId is already in the set
                    if count < 10:
                        match_ids.add(data[i])
                        matchids.add(data[i])
                        count+=1
                    elif count == 10: # if the puuid has dedicated 10 unique matchIds to the set, add the rest to reserve
                        reserve_matchids.add(data[i])
                        count+=1
                        break
            break
        elif response.status_code == 429:
            time.sleep(6)
        else:
            print("Failed to retrieve account PUUID. Status Code:", response.status_code)
            break

# Check if the number of matchIds is less than 2000 to compensate for it
if len(matchids) < 2000:
    ids_to_add = list(reserve_matchids)[:2000-len(matchids)]
    match_ids.update(ids_to_add)
    matchids.update(ids_to_add)

# Check the result
print(len(match_ids))
print(len(matchids))

Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
2000
2000


In [None]:
# if nothing go wrong, save
with open(f"match_ids/{tier}_puuids.json", 'w', encoding='utf-8') as file:
    json.dump(matchids, file, ensure_ascii=False, indent=4)

<span style ="font-size: 24px;"> 1.4.Getting data of 20000 matches</span><br>

<span style ="font-size: 16px;">Since the limit of the api key is 100 requests/minute, and each response of a request is a 150kb file, getting 2000 matches of a singular rank alone takes approximately 40 minutes, and all 20000 matches would be around 3gb.</span><br>
<span style ="font-size: 16px;">That's why we will get 2000 matches of a rank each time, to get the data faster using 3 devices, avoid wasting time just to receive some random error, and lagging when trying to save all the data to file</span><br>

In [None]:
# api key
api_key = "RGAPI-898a3f42-867f-410a-9845-ca10dd463281"

# rank we want to work with
rank = "master"

# A list containing data of 2000 matches
match_datas = []

# Get a list of 2000 matchIds of the rank
with open(f"match_ids/{rank}_matchids.json", "r") as file:
    match_ids = json.load(file)

for match_id in match_ids:
    url = f"https://sea.api.riotgames.com/lol/match/v5/matches/{match_id}?api_key={api_key}"
    retries = 12
    for i in range(retries):
        response = requests.get(url)
        if response.status_code == 200:
            match_datas.append(response.json())
            # Check for progress
            if len(match_datas)%100==0:
                print(len(match_datas))
            break
        elif response.status_code == 429:
            time.sleep(6)
        else:
            print("Failed to retrieve account PUUID. Status Code:", response.status_code)
            break

# save to file
with open(f"match_datas/{rank}_match_datas", 'w', encoding='utf-8') as file:
    json.dump(match_datas, file, ensure_ascii=False, indent=4)

## 2. Data cleaning

<span style ="font-size: 24px;"> 2.1. Some information about data of a match</span><br>

<span style ="font-size: 16px;">For each matchId, we will have a json file of about 150kb after requesting the data.</span><br>
<span style ="font-size: 16px;">Since this is just a small project and we will only use some basic information, all we need are some information mentioned below, together with some examples:</span><br>

In [None]:
# Data of a match, here saved in test.json
with open("test.json", 'r') as file:
    match_data = json.load(file)

# match_data is a dict with 2 keys metadata and info. we only need the value of info
info = match_data["info"]

"""
info is a dict with 16 keys, we will only go for participants and teams:
    - participants's value is a 1 list of 10 elements representing 10 players of that match
    - teams's value is a list of 2 elements representing 2 teams blue/red
"""
participants = info["participants"]
teams = info['teams']

"""
Each player has a dict with 132 keys which are data of that player.
We will only get 5 data about challenges, championId, teamPosition and win:
    - challenges's value is a dict with 128 keys, but we only care about legendaryItemUsed
        + legendaryItemUsed is a list containing id of legendary items(big iteams) of that one player
    - championId is the id of the champion used by the player
    - teamPosition is the role that player takes charge of, among 5 roles: TOP, JUNGLE, MIDDLE, BOTTOM, UTILITY
    - win is a bool true/false corresponds to win/lose
"""
player = participants[0]
challenges = player['challenges']
legendaryItemUsed = challenges['legendaryItemUsed']
championId = player['championId']
teamPosition = player['teamPosition']
win = player['win']

"""
Each team has a dict with 4 keys: bans, objectives, teamId and win:
    - bans is a list with 5 elements representing 5 champions that the team banned from being chose in that match (each team has 5 bans)
        + Each element of bans is a dict with 2 keys: championId(Id of the banned champion) and pickTurn(the turn at which the champion is banned),
          this represent a ban (We only need championId to know which champion is banned)
              *for example: Leesin got banned at turn 1, the dict will be: {championId : 64, pickTurn : 1}
    - objectives are targets(muc tieu) that the team tookdown(killed), it is a dict with 7 keys:
      baron, champion(tuong), dragon(rong), horde(sau hu khong), inhibitor(nha linh), riftHerald(su gia khe nut) and tower(tru).
      Each objective is given by a dict with 2 keys: first and kills:
        + first is a bool true/false that tells whether that team tookdown that target first (before the otherteam does it)
              *for example: if first of champion of blue team is True, that means blue tookdown a player
               of red team before any of their player is tookdown by red
               in other words, blue team is the first team to get a kill
        + kills is the number of times the team tookdown that target
              *for example: dragon's skills of blue team is 2, that means blue team killed 2 dragons
    - teamId is the id of the team, 100 is blue va 200 is red (we won't use teamId for the project)
    - win is a bool that tells whether the win has won the match
"""
team = teams[0]
bans = list(team['bans'][i]['championId'] for i in range(5))
objectives = team['objectives']
team_win = team['win']

# Check the stats:
print("player1 basic infos:")
print(championId, teamPosition, legendaryItemUsed, win)
print()
print("team1 basic infos:")
print(bans)
for objective in objectives:
    print(objective)
    print(objectives[objective])
print(team_win)

player1 basic infos:
64 TOP [6692, 6698, 3053, 3053, 6676] True

team1 basic infos:
[23, 223, 164, 31, 11]
baron
{'first': True, 'kills': 1}
champion
{'first': False, 'kills': 40}
dragon
{'first': False, 'kills': 2}
horde
{'first': True, 'kills': 6}
inhibitor
{'first': True, 'kills': 3}
riftHerald
{'first': False, 'kills': 0}
tower
{'first': False, 'kills': 11}
True
