In [2]:
# import the necessary libraries
import requests
import json
import time
import datetime

## 1.GETTING RAW DATA

<span style ="font-size: 24px;"> 1.1.Getting summonerId</span><br>

<span style ="font-size: 20px;"> 1.1.1.Getting summonerId of ranks below master</span><br>

<span style ="font-size: 16px;">From master onwards, ranks no longer have divisions, which makes their division only I instead of I, II, III, and IV</span><br>
<span style ="font-size: 16px;">Also, the higher the rank, the less the number of players, some doesn't even have enough 200</span><br>
<span style ="font-size: 16px;">Therefore, for these higher ranks, we will use another way to work with them</span><br>

In [None]:
def get_all_low_tier_summonerId(api_key):
    """Get 200 summonerId for each rank of 7 ranks from iron to diamond, and save them

    Args:
        api_key (string): the api key to make requests. Our api key resets after every 24 hours so we can't fix it
    """

    # All the ranks that we want to work with
    tiers = ["IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM", "EMERALD", "DIAMOND"]

    # 7 first ranks each has 4 divison : I, II, III, IV. So we will get 50 summonerId for each divion
    divisions = ["I", "II", "III", "IV"]

    for tier in tiers:
        # A list consists of summonerId of the rank, we will get 200 summonerId for each rank
        summoner_ids=[]
        for division in divisions:
            url = f"https://vn2.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/{tier}/{division}?page=1&api_key={api_key}"
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json() # data is a list of dicts, each contains information about an account. We will only get the summonerId
                summoner_ids +=[entry['summonerId'] for entry in data[:50]] #get 50 elements from data, and add their summonerId value to the list
            else:
                print("Failed to retrieve account PUUID. Status Code:", response.status_code)

        # check if we got enough 200 summonerId. If true, save them to a json file
        if len(summoner_ids) == 200:
            with open(f"summoner_ids/{tier}_summoner_ids.json", 'w', encoding='utf-8') as file:
                json.dump(summoner_ids, file, ensure_ascii=False, indent=4)
            print(f"got {tier}'s 200 summonerIds succesfully!")
        else:
            print(f"something is wrong! {tier} has ", len(summoner_ids), "summonerIds!")

200


<span style ="font-size: 20px;"> 1.1.2.Getting summonerId of ranks from master onwards</span><br>

<span style ="font-size: 16px;">Now, ranks no longer have divisions, so the division will always be I.</span><br>
<span style ="font-size: 16px;">These higher ranks have a limited number of players; Therefore, if a rank's number of players is less than 200, we will get as many as possible, and compensate for this by increasing the number of matches we get from each players.</span><br>
<span style ="font-size: 16px;">Instead of getting the summonerIds of each rank with one run, we will do each seperately to check</span><br>

<span style ="font-size: 16px;">In these 3 ranks, we don't know if the number of players is more than 200 or not; Therefore, we will get all the summonerId first.</span><br>
<span style ="font-size: 16px;">After that, we check if the number is more than 200. If it does, remove the unecessary Ids.</span><br>
<span style ="font-size: 16px;">For the ranks that its number of players is less than 200, after getting all of its summonerId, we will check the number with the real number via ingame leaderboard. After making sure they are equal, save it</span><br>

In [None]:
def get_high_tier_summonerId(api_key, tier):
    """Get 200 summonerId of the given tier(rank), and save them

    Args:
        api_key (_tring): the api key to make requests
        tier (string): the tier you want to get its player's sumonnerId

    Returns:
        list: a list of 200 summonerId of the given tier
    """

    # A list to store summonerId of the rank
    summoner_ids=[]

    url = f"https://vn2.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/{tier}/I?page=1&api_key={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json() # the data we get is a list of dicts, each dict contains each player's data
        summoner_ids +=[entry['summonerId'] for entry in data] # we only need the player's summonerId
    else:
        print("Failed to retrieve account PUUID. Status Code:", response.status_code)
    if len(summoner_ids) > 200:
        summoner_ids = summoner_ids[:200]
    print("got", len(summoner_ids), f"summonerIds from rank {tier}")    #print out the number of summonerIds to check
    return summoner_ids

145


In [None]:
# after checking, if nothing is wrong, we save
# for example:
master_summoner_ids = get_high_tier_summonerId('RGAPI-898a3f42-867f-410a-9845-ca10dd463281', 'master')
with open(f"summoner_ids/master_summoner_ids.json", 'w', encoding='utf-8') as file:
    json.dump(master_summoner_ids, file, ensure_ascii=False, indent=4)

<span style ="font-size: 24px;"> 1.2.Getting puuid from summonerId</span><br>

<span style ="font-size: 16px;">Now we will have to send 200 requests for each rank, doing all 10 ranks at one run will take a really long time and any error will be a disaster.</span><br>
<span style ="font-size: 16px;">Therefore, we will work with each rank seperately.</span><br>

In [None]:
def get_puuid(api_key, tier):
    """get 200 puuid from the 200 summonerId got from the given tier, and save them

    Args:
        api_key (string): the api key to make requests
        tier (string): the tier you want to get its player's puuid
    """

    # path of the json file that stores the rank's summonerIds
    path= f"summoner_ids/{tier}_summoner_ids.json"

    # A list to store puuids
    puuids = []

    # Get summonerIds from the json file
    with open(path, "r") as file:
        summoner_ids = json.load(file)

    # check its number:
    print(len(summoner_ids))

    for summoner_id in summoner_ids:
        url = f"https://vn2.api.riotgames.com/lol/summoner/v4/summoners/{summoner_id}?api_key={api_key}"
        retries = 12
        for i in range(retries):
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json() # data responded is a dict that contains some basic information of the account, we need its puuid
                puuids.append(data["puuid"])
                break
            elif response.status_code == 429:
                time.sleep(6)
            else:
                print("Failed to retrieve account PUUID. Status Code:", response.status_code)
                break

    # check the number of puuids after requesting
    print("got", len(puuids), f"of rank {tier}")

    # if nothing goes wrong, save the puuids
    if len(puuids) == len(summoner_ids):
        with open(f"puuids/{tier}_puuids.json", 'w', encoding='utf-8') as file:
            json.dump(puuids, file, ensure_ascii=False, indent=4)

200
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...


KeyboardInterrupt: 

<span style ="font-size: 24px;"> 1.3.Getting matchId using puuid</span><br>

<span style ="font-size: 16px;">Now we need to get 2000 matchIds for each rank, each matchId must be unique.</span><br>
<span style ="font-size: 16px;">We've already got 200 players's puuid for each rank (most of it). Idealy, we will want to get 10 matchIds for each puuid (each request will give us 20 for each puuid)</span><br>
<span style ="font-size: 16px;">However, there are circumstances where this is impossible:</span><br>
<span style ="font-size: 16px;">Ranks like master, grandmaster and challenger has very few players, the probality of them facing each other in a match is really high(each match has 10 players, and challenger, for example, has about 150 players)</span><br>
<span style ="font-size: 16px;">Because of that, getting just 10 matches for each puuid won't be enough for these ranks, so a reserve set - reserve_matchids is created to store unique exceeding matchIds of a puuid after getting enough 10 from it.</span><br>
<span style ="font-size: 16px;">After going through all the puuids in the rank, if the number of unique matchIds is less than 2000, we will add matchIds from reserve_matchids in.</span><br>
<span style ="font-size: 16px;">Also, players of 2 different ranks can also be in the same match, so we will need to store all the matchIds we got to a set and check if any new matchId we want to add is already in it.</span><br>
<span style ="font-size: 16px;">For the highest rank - challenger, since it has even less player, even getting all of the matchIds won't be enough.</span><br>
<span style ="font-size: 16px;">Luckily, these players play alot, so we will just need to wait for them to play more, by get the matchIds once, and do it again few days later until we get enough 2000 unique matches.</span><br>
<span style ="font-size: 16px;">Similar to puuId, we will have to send 1 request for each player. Doing 9 ranks at the same time is not a wise choice and so we will do it rank by rank</span><br>

In [None]:
# create a set to store all the matchIds
match_ids = set({})

In [None]:
def get_matchId(api_key,tier):
    """get 2000 unique matchId of the given tier, from the puuid got from that rank

    Args:
        api_key (string): the api key to make requests
        tier (string): the rank you want to get its matchId

    Returns:
        set: a set of 2000 matchId of the given rank
    """

    # create a set to store the rank's matchIds
    matchids = set({})

    # create the reserve set
    reserve_matchids=set({})

    # get the puuids of the rank
    path = f"puuids/{tier}_puuids.json"
    with open(path, "r") as file:
        puuids = json.load(file)

    for puuid in puuids:
        url = f"https://sea.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?type=ranked&start=0&count=20&api_key={api_key}"
        retries = 12
        for i in range(retries):
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json() # data is a list of 20 matchIds - 20 latest matches of that account
                count = 0 # count how many matchIds of this puuid have been put to the set
                for i in range(len(data)):
                    if data[i] not in match_ids: # check if the matchId is already in the set
                        if count < 10:
                            match_ids.add(data[i])
                            matchids.add(data[i])
                            count+=1
                        elif count == 10: # if the puuid has dedicated 10 unique matchIds to the set, add the rest to reserve
                            reserve_matchids.add(data[i])
                            count+=1
                            break
                break
            elif response.status_code == 429:
                time.sleep(6)
            else:
                print("Failed to retrieve account PUUID. Status Code:", response.status_code)
                break

    # Check if the number of matchIds is less than 2000 to compensate for it
    if len(matchids) < 2000:
        ids_to_add = list(reserve_matchids)[:2000-len(matchids)]
        match_ids.update(ids_to_add)
        matchids.update(ids_to_add)

    # Check the result
    print("from", len(match_ids), f"matchIds of rank {tier}")
    print("got", len(matchids), "matchIds")
    return matchids

Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
Rate limit exceeded. Retrying in 12 seconds...
2000
2000


In [None]:
# if nothing go wrong, save
# for example:
api_key = "RGAPI-898a3f42-867f-410a-9845-ca10dd463281"
tier = "master"
master_match_ids = get_matchId(api_key, tier)
with open(f"match_ids/{tier}_puuids.json", 'w', encoding='utf-8') as file:
    json.dump(master_match_ids, file, ensure_ascii=False, indent=4)

<span style ="font-size: 24px;"> 1.4.Getting data of 20000 matches</span><br>

<span style ="font-size: 16px;">Since the limit of the api key is 100 requests/minute, and each response of a request is a 150kb file, getting 2000 matches of a singular rank alone takes approximately 40 minutes, and all 20000 matches would be around 3gb.</span><br>
<span style ="font-size: 16px;">That's why we will get 2000 matches of a rank each time, to get the data faster using 3 devices, avoid wasting time just to receive some random error, and lagging when trying to save all the data to file</span><br>

In [None]:
def get_match_data(api_key, tier):
    """Get the data of each match from 2000 matchId of the given rank

    Args:
        api_key (string): api key to make requests
        tier (string): the rank you want to get its matches's data

    Returns:
        list: a list containing data of 2000 matches
    """

    # A list containing data of 2000 matches
    match_datas = []

    # Get a list of 2000 matchIds of the rank
    with open(f"match_ids/{tier}_matchids.json", "r") as file:
        match_ids = json.load(file)

    for match_id in match_ids:
        url = f"https://sea.api.riotgames.com/lol/match/v5/matches/{match_id}?api_key={api_key}"
        retries = 12
        for i in range(retries):
            response = requests.get(url)
            if response.status_code == 200:
                match_datas.append(response.json())
                # Check for progress
                if len(match_datas)%100==0:
                    print("got the data of", len(match_datas), "matches")
                break
            elif response.status_code == 429:
                time.sleep(6)
            else:
                print("Failed to retrieve account PUUID. Status Code:", response.status_code)
                break
    return match_datas 

In [None]:
# after checking, we save
# for challenger, we save the result of the first run, then add more later
# for example:
api_key = "RGAPI-898a3f42-867f-410a-9845-ca10dd463281"
tier = "iron"
iron_match_datas = get_match_data(api_key, tier)
with open(f"match_datas/{tier}_match_datas", 'w', encoding='utf-8') as file:
        json.dump(iron_match_datas, file, ensure_ascii=False, indent=4)

## 2. Data cleaning

<span style ="font-size: 24px;"> 2.1. Some information about data of a match</span><br>

<span style ="font-size: 16px;">For each matchId, we will have a json file of about 150kb after requesting the data.</span><br>
<span style ="font-size: 16px;">Since this is just a small project and we will only use some basic information, all we need are some information mentioned below, together with some examples:</span><br>

In [21]:
# Data of a match, here saved in test.json
with open("test.json", 'r') as file:
    match_data = json.load(file)

# match_data is a dict with 2 keys metadata and info. we only need the value of info
info = match_data["info"]

"""
info is a dict with 16 keys, we will only go for participants, gameCreation, gameDuraion and teams:
    - participants's value is a 1 list of 10 elements representing 10 players of that match
    - teams's value is a list of 2 elements representing 2 teams blue/red
    - gameCreation's value is a timestamp in miliseconds that tells the time when the match starts
    - gameDuration's value is the time the match lasts, measured in seconds
"""
participants = info["participants"]
teams = info['teams']
gameCreation = info['gameCreation']
gameDuration = info['gameDuration']

"""
Each player has a dict with 132 keys which are data of that player.
We will only get 10 data about championId, teamPosition, item0, item1, item2, item3, item4, item5 and win:
    - championId is the id of the champion used by the player
    - teamPosition is the role that player takes charge of, among 5 roles: TOP, JUNGLE, MIDDLE, BOTTOM, UTILITY
    - iteam1, iteam2,... are the items of that player at the end of the match. We will get data of big items only
    - win is a bool true/false corresponds to win/lose
"""
player = participants[0]
championId = player['championId']
teamPosition = player['teamPosition']
item0 = player["item0"]
win = player['win']

"""
Each team has a dict with 4 keys: bans, objectives, teamId and win:
    - bans is a list with 5 elements representing 5 champions that the team banned from being chose in that match (each team has 5 bans)
        + Each element of bans is a dict with 2 keys: championId(Id of the banned champion) and pickTurn(the turn at which the champion is banned),
          this represent a ban (We only need championId to know which champion is banned)
              *for example: Leesin got banned at turn 1, the dict will be: {championId : 64, pickTurn : 1}
    - objectives are targets(muc tieu) that the team tookdown(killed), it is a dict with 7 keys:
      baron, champion(tuong), dragon(rong), horde(sau hu khong), inhibitor(nha linh), riftHerald(su gia khe nut) and tower(tru).
      Each objective is given by a dict with 2 keys: first and kills:
        + first is a bool true/false that tells whether that team tookdown that target first (before the other team does it)
              *for example: if first of champion of blue team is True, that means blue tookdown a player
               of red team before any of their player is tookdown by red
               in other words, blue team is the first team to get a kill
        + kills is the number of times the team tookdown that target
              *for example: dragon's skills of blue team is 2, that means blue team killed 2 dragons
    - teamId is the id of the team, 100 is blue va 200 is red (we won't use teamId for the project)
    - win is a bool that tells whether the win has won the match
"""
team = teams[0]
bans = list(team['bans'][i]['championId'] for i in range(5))
objectives = team['objectives']
team_win = team['win']

# Check the stats:
print("game start date:", datetime.datetime.fromtimestamp(gameCreation / 1000.0))
print("game duration:", str("{:.2f}".format(gameDuration/60)), "minutes")

print("player1 basic infos:")
print(championId, teamPosition, item0, win)
print()
print("team1 basic infos:")
print(bans)
for objective in objectives:
    print(objective)
    print(objectives[objective])
print(team_win)

game start date: 2024-10-19 16:20:13.153000
game duration: 27.87 minutes
player1 basic infos:
777 TOP 1055 False

team1 basic infos:
[523, 121, 203, 104, 36]
baron
{'first': False, 'kills': 0}
champion
{'first': True, 'kills': 22}
dragon
{'first': True, 'kills': 2}
horde
{'first': True, 'kills': 5}
inhibitor
{'first': False, 'kills': 0}
riftHerald
{'first': True, 'kills': 1}
tower
{'first': True, 'kills': 5}
False


<span style ="font-size: 24px;"> 2.2.Getting clean data</span><br>

<span style ="font-size: 16px;">Now that we know what we need from a match, it's time we get it and reduce the size of our data</span><br>

In [None]:

def clean_data():
    """get the necessary information from the huge chunk of data

    Returns:
        tuple: a tuple with 4 elements: first_objective_info, champion_info, champion_big_items and champion_role
    """
    # create a dict to store information about total win/lose when the team got the objective first
    # This will later be used to calculate the probality of winning when the team is the first team to takedown a specific objective
    first_objective_info = {'baron' : {'win' : 0, 'lose' : 0}, 'champion' : {'win' : 0, 'lose' : 0}, 'dragon' : {'win' : 0, 'lose' : 0},
                            'horde' : {'win' : 0, 'lose' : 0}, 'inhibitor' : {'win' : 0, 'lose' : 0},
                            'riftHerald' : {'win' : 0, 'lose' : 0}, 'tower' : {'win' : 0, 'lose' : 0}}
    
    # create a list to store timestamp
    timestamp_list = []

    # get the list of big items's id and their name
    with open("dragon_data/item_list.json", 'r') as file:
        list_of_big_items = json.load(file)

    # create a dict champion_info to store each champion's id, name, total win, total lose and total ban
    # create a dict champion_big_items to store all the items bought for each champion and their number, later used to get 3 most bought items
    # create a dict champion_role to store each champion's number of times used for each role, later used to get the champions's roles
    with open("dragon_data/champion_list.json", 'r') as file:
        champions = json.load(file) #a dict of all the champions's id and their name
        champion_info = {key : {'name' : champions[key], 'win' : 0, 'lose' : 0, 'ban' : 0} for key in champions}
        champion_big_items = {key : {} for key in champions}
        champion_role = {key : {'TOP' : 0, 'JUNGLE' : 0, 'MIDDLE' : 0, 'BOTTOM' : 0, 'UTILITY' : 0} for key in champions}

    tiers = ['iron', 'bronze', 'silver', 'gold', 'platinum', 'emerald', 'diamond', 'master', 'grandmaster', 'challenger']

    for tier in tiers:
        # load the data of 2000 matches of the rank
        with open(f"matchinfo/{tier}.json", 'r') as file:
            match_datas = json.load(file)

        for each_match in match_datas:
            for i in range(10):
                # get information of the ith player of the match
                player = each_match["info"]["participants"][i]
                championId = str(player['championId']) # the player's champion
                if player['win'] == True:
                    champion_info[championId]['win'] += 1
                else:
                    champion_info[championId]['lose'] += 1
                if player['teamPosition'] != "":
                    champion_role[championId][player['teamPosition']] += 1
                for i in range(5):
                    if str(player[f"item{i}"]) in list_of_big_items:
                        if player[f"item{i}"] not in champion_big_items[championId]:
                            champion_big_items[championId][player[f"item{i}"]] = 1
                        else:
                            champion_big_items[championId][player[f"item{i}"]] += 1

            for i in range(2):
                # get information of the ith team
                team = each_match['info']['teams'][i]
                for j in range(5):
                    championId = str(team['bans'][j]['championId'])
                    if championId in champion_info:
                        champion_info[championId]['ban'] +=1
                for objective in team['objectives']:
                    if team['objectives'][objective]['first'] == True:
                        if team['win'] == True:
                            first_objective_info[objective]['win'] += 1
                        else:
                            first_objective_info[objective]['lose'] += 1

    return first_objective_info, champion_info, champion_big_items, champion_role

In [43]:
# get the cleaned data
first_objective_info, champion_info, champion_big_items, champion_role = clean_data()

In [40]:
first_objective_info

{'baron': {'win': 11213, 'lose': 2472},
 'champion': {'win': 11679, 'lose': 8013},
 'dragon': {'win': 11872, 'lose': 7579},
 'horde': {'win': 10766, 'lose': 8705},
 'inhibitor': {'win': 14616, 'lose': 1387},
 'riftHerald': {'win': 11940, 'lose': 5629},
 'tower': {'win': 13886, 'lose': 5515}}

In [32]:
champion_info

{'266': {'name': 'Aatrox', 'win': 1575, 'lose': 1555, 'ban': 5439},
 '103': {'name': 'Ahri', 'win': 1370, 'lose': 1213, 'ban': 1067},
 '84': {'name': 'Akali', 'win': 783, 'lose': 811, 'ban': 1611},
 '166': {'name': 'Akshan', 'win': 161, 'lose': 140, 'ban': 469},
 '12': {'name': 'Alistar', 'win': 504, 'lose': 496, 'ban': 542},
 '32': {'name': 'Amumu', 'win': 221, 'lose': 225, 'ban': 456},
 '34': {'name': 'Anivia', 'win': 236, 'lose': 210, 'ban': 371},
 '1': {'name': 'Annie', 'win': 139, 'lose': 145, 'ban': 191},
 '523': {'name': 'Aphelios', 'win': 357, 'lose': 389, 'ban': 160},
 '22': {'name': 'Ashe', 'win': 1370, 'lose': 1391, 'ban': 398},
 '136': {'name': 'AurelionSol', 'win': 482, 'lose': 440, 'ban': 448},
 '893': {'name': 'Aurora', 'win': 568, 'lose': 635, 'ban': 5956},
 '268': {'name': 'Azir', 'win': 119, 'lose': 154, 'ban': 107},
 '432': {'name': 'Bard', 'win': 163, 'lose': 163, 'ban': 107},
 '200': {'name': 'Belveth', 'win': 186, 'lose': 212, 'ban': 1182},
 '53': {'name': 'Blitzc

In [41]:
champion_big_items

{'266': {3143: 32,
  3047: 1687,
  6694: 253,
  6692: 1599,
  3156: 288,
  6698: 569,
  3111: 751,
  3814: 75,
  6333: 270,
  6610: 1495,
  3071: 840,
  3110: 5,
  3053: 399,
  6609: 49,
  3161: 427,
  6631: 137,
  3026: 18,
  6699: 221,
  3009: 14,
  6697: 26,
  6701: 59,
  3065: 69,
  3158: 53,
  3078: 2,
  3075: 12,
  3074: 8,
  2501: 5,
  3869: 3,
  3877: 12,
  6676: 4,
  3072: 1,
  3033: 4,
  2504: 3,
  6695: 12,
  3181: 2,
  6665: 9,
  3139: 2,
  3179: 2,
  3013: 1,
  3084: 2,
  3142: 6,
  3742: 1,
  3190: 1,
  3748: 1,
  3865: 1,
  3876: 1,
  6664: 1,
  3006: 1,
  8020: 2,
  3091: 1},
 '103': {6655: 453,
  3158: 505,
  4628: 459,
  6653: 488,
  3118: 1826,
  3020: 1353,
  3866: 3,
  4645: 588,
  3157: 637,
  3165: 79,
  4646: 518,
  3135: 63,
  3100: 149,
  2503: 17,
  3089: 270,
  3111: 236,
  3137: 248,
  3116: 15,
  3047: 17,
  3102: 92,
  3040: 20,
  3871: 34,
  3041: 123,
  3152: 14,
  4629: 16,
  3009: 5,
  3865: 2,
  4005: 1,
  3869: 3,
  3115: 5,
  3143: 2,
  3087: 2,
  

In [42]:
champion_role

{'266': {'TOP': 3008, 'JUNGLE': 8, 'MIDDLE': 78, 'BOTTOM': 7, 'UTILITY': 24},
 '103': {'TOP': 23, 'JUNGLE': 0, 'MIDDLE': 2505, 'BOTTOM': 8, 'UTILITY': 46},
 '84': {'TOP': 501, 'JUNGLE': 1, 'MIDDLE': 1080, 'BOTTOM': 2, 'UTILITY': 8},
 '166': {'TOP': 19, 'JUNGLE': 2, 'MIDDLE': 263, 'BOTTOM': 9, 'UTILITY': 8},
 '12': {'TOP': 13, 'JUNGLE': 2, 'MIDDLE': 6, 'BOTTOM': 3, 'UTILITY': 975},
 '32': {'TOP': 3, 'JUNGLE': 395, 'MIDDLE': 3, 'BOTTOM': 0, 'UTILITY': 44},
 '34': {'TOP': 39, 'JUNGLE': 0, 'MIDDLE': 331, 'BOTTOM': 3, 'UTILITY': 72},
 '1': {'TOP': 9, 'JUNGLE': 0, 'MIDDLE': 232, 'BOTTOM': 2, 'UTILITY': 41},
 '523': {'TOP': 3, 'JUNGLE': 1, 'MIDDLE': 5, 'BOTTOM': 736, 'UTILITY': 1},
 '22': {'TOP': 33, 'JUNGLE': 2, 'MIDDLE': 18, 'BOTTOM': 2438, 'UTILITY': 265},
 '136': {'TOP': 27, 'JUNGLE': 7, 'MIDDLE': 860, 'BOTTOM': 7, 'UTILITY': 20},
 '893': {'TOP': 495, 'JUNGLE': 2, 'MIDDLE': 667, 'BOTTOM': 7, 'UTILITY': 32},
 '268': {'TOP': 6, 'JUNGLE': 0, 'MIDDLE': 263, 'BOTTOM': 1, 'UTILITY': 3},
 '432':

In [44]:
# save the cleaned data
with open(f"matchinfo/first_objective.json", 'w', encoding='utf-8') as file:
        json.dump(first_objective_info, file, ensure_ascii=False, indent=4)
with open(f"matchinfo/champion_info.json", 'w', encoding='utf-8') as file:
        json.dump(champion_info, file, ensure_ascii=False, indent=4)
with open(f"matchinfo/champion_big_items.json", 'w', encoding='utf-8') as file:
        json.dump(champion_big_items, file, ensure_ascii=False, indent=4)
with open(f"matchinfo/champion_role.json", 'w', encoding='utf-8') as file:
        json.dump(champion_role, file, ensure_ascii=False, indent=4)

## 3. Calculate stats

<span style ="font-size: 16px;">After cleaning, we now have all the numbers we need. But numbers alone doesn't mean anything. Now we will need to calculate all the stats to make them somewhat more meaningful.</span><br>

<span style ="font-size: 24px;">3.1 Calculate win rate, ban/pick rate, core items and main roles</span><br>

<span style ="font-size: 16px;"> - Win rate is a champion's total win divided by that champion's total match. Formula: win/(win + lose)</span><br>
<span style ="font-size: 16px;"> - Ban/pick rate of each champion is the sum of its number of banned or picked times divided by 20000(total match). This tells us if that champion is so strong a lot of people doesn's want to face it and have to give it a ban, or pick it when they have a chance to. Formula: (win + lose + ban)/20000</span><br>
<span style ="font-size: 16px;"> - Together, the two stats above tells us if a champion is strong or not. A strong champion will have high win rate as well as high ban/pick rate. Just high win rate alone doesn't mean much (for example, out of 20000 matches, champion A is played in 8 matches, and won 6. That win rate seems great, but saying that champion is strong just based on that won't be right). And just high ban/pick rate with low win rate only means that the champion is popular among players, not quite good enough to be called strong.</span><br>
<span style ="font-size: 16px;"> - For core items of a champion, we will take 3 most bought items for that champion as core items. The function clean_data() above got all the items bought for each champion and their count. Now we only need to get 3 most bought items for each champion.</span><br>
<span style ="font-size: 16px;"> - Champion's main roles is more complicated. Some champions can be used for multiple roles, while some can only be used for only one. And sometimes, players go creative and use their champion for an unsual role. This makes it a bit harder to calculate. To get each champion's main roles, we will calculate each role's ratio in that champion total played match, and take the roles with a ratio of at least 16%  (if a champion can be used at all 5 roles equally, the expected ratio would be 20 - 20 - 20 - 20 - 20, though that is impossible. So the least ratio would be 20% - 4% of spread) as a champion's roles</span><br>

In [2]:
import pandas as pd

In [25]:
with open("matchinfo/champion_info.json", 'r') as file:
    champion_info = json.load(file)
champion_winrate = pd.Series(index = list(champion_info[championId]['name'] for championId in champion_info))
champion_ban_pick_rate = pd.Series(index = list(champion_info[championId]['name'] for championId in champion_info))
for championId in champion_info:
    champion = champion_info[championId]
    champion_winrate[champion['name']] = champion['win'] / (champion['win'] + champion['lose'])
    champion_ban_pick_rate[champion['name']] = (champion['ban'] + champion['win'] + champion['lose']) / 20000

print(champion_winrate.apply(lambda x: f'{x*100:.2f}%'))
print(champion_ban_pick_rate.apply(lambda x: f'{x*100:.2f}%'))
print(champion_ban_pick_rate.max())

Aatrox     50.32%
Ahri       53.04%
Akali      49.12%
Akshan     53.49%
Alistar    50.40%
            ...  
Zeri       48.02%
Ziggs      51.36%
Zilean     50.68%
Zoe        52.90%
Zyra       49.21%
Length: 168, dtype: object
Aatrox     42.84%
Ahri       18.25%
Akali      16.02%
Akshan      3.85%
Alistar     7.71%
            ...  
Zeri        4.95%
Ziggs       7.88%
Zilean      2.04%
Zoe        13.76%
Zyra        4.12%
Length: 168, dtype: object
0.471


In [38]:
with open("matchinfo/champion_big_items.json", 'r') as file:
    champion_big_items = json.load(file)
champion_core_items = pd.Series(index = list(championId for championId in champion_big_items))
for championId in champion_big_items:
    champion_items = champion_big_items[championId]
    champion_core_items[championId] = sorted(champion_items, key = champion_items.get, reverse = True)[:3]
with open("dragon_data/champion_list.json", 'r') as file:
    champion_list = json.load(file)
champion_core_items.index = champion_list.values()
print(champion_core_items)

Aatrox     [3047, 6692, 6610]
Ahri       [3118, 3020, 3157]
Akali      [3020, 4646, 4645]
Akshan     [6676, 6672, 3031]
Alistar    [3190, 3869, 3047]
                  ...        
Zeri       [3087, 3085, 3006]
Ziggs      [6655, 3020, 3040]
Zilean     [3158, 2065, 3869]
Zoe        [3020, 6655, 3100]
Zyra       [3871, 6653, 3020]
Length: 168, dtype: object


  champion_core_items[championId] = sorted(champion_items, key = champion_items.get, reverse = True)[:3]


In [None]:
with open("matchinfo/champion_role.json", 'r') as file:
    champion_role = json.load(file)
champion

{'266': {'name': 'Aatrox', 'win': 1575, 'lose': 1555, 'ban': 5439},
 '103': {'name': 'Ahri', 'win': 1370, 'lose': 1213, 'ban': 1067},
 '84': {'name': 'Akali', 'win': 783, 'lose': 811, 'ban': 1611},
 '166': {'name': 'Akshan', 'win': 161, 'lose': 140, 'ban': 469},
 '12': {'name': 'Alistar', 'win': 504, 'lose': 496, 'ban': 542},
 '32': {'name': 'Amumu', 'win': 221, 'lose': 225, 'ban': 456},
 '34': {'name': 'Anivia', 'win': 236, 'lose': 210, 'ban': 371},
 '1': {'name': 'Annie', 'win': 139, 'lose': 145, 'ban': 191},
 '523': {'name': 'Aphelios', 'win': 357, 'lose': 389, 'ban': 160},
 '22': {'name': 'Ashe', 'win': 1370, 'lose': 1391, 'ban': 398},
 '136': {'name': 'AurelionSol', 'win': 482, 'lose': 440, 'ban': 448},
 '893': {'name': 'Aurora', 'win': 568, 'lose': 635, 'ban': 5956},
 '268': {'name': 'Azir', 'win': 119, 'lose': 154, 'ban': 107},
 '432': {'name': 'Bard', 'win': 163, 'lose': 163, 'ban': 107},
 '200': {'name': 'Belveth', 'win': 186, 'lose': 212, 'ban': 1182},
 '53': {'name': 'Blitzc

In [None]:
datetime.datetime.fromtimestamp(1729329613153/1000.0).

16