# Creating the functions to handle all of the previous mess :D

- Takes a list of matches id as parameter
- Requests the matches to the API (this will be on the main function, thus we can start with the next ones)
- Extracts match information
- Extracts Champions data
- Extracts Players info
- Extracts Players statistics

In [1]:
import time

from itertools import chain

import requests as req
import pandas as pd
import numpy as np

from settings import TOKEN

## Helper functions to keep the code below clean (done after all functions were assembled)

In [2]:
# Create a dataframe, and assign the right values to lane variable
def adjust_lane_names(from_list: list, combat=False) -> pd.DataFrame:
    df = pd.DataFrame(from_list)

    support = df.loc[df["role"] == "DUO_SUPPORT"].index
    middle = df.loc[df["role"] == "DUO"].index
    df.loc[df.index[support], 'lane'] = "SUPPORT"
    df.loc[df.index[middle], 'lane'] = "MIDDLE"
    
    if combat:
        df = (df.assign(
                first_blood=np.where(df["first_blood"], 1, 0),
                first_blood_assist=np.where(df["first_blood_assist"], 1, 0)))
        
    df = df.drop(columns="role")
    
    return df

def update_champions_info() -> dict:
    champions = req.get("https://ddragon.leagueoflegends.com/cdn/10.14.1/data/en_US/champion.json").json()
    champion_keys = champions["data"].keys()
    champions_list = {}
    
    for x in champions["data"]:
        champions_list[int(champions["data"][x]["key"])] = champions["data"][x]["name"]

    return champions_list

## **Lets start with the processing functions**

### Match info

In [3]:
def extract_match_info(raw_matches: list, path: str, start: int, end: int):
    
    # Basic match info
    match_keys = ["gameId", "platformId", "gameCreation", "gameDuration", "gameVersion"]
    matches_list = [{key: match[key] for key in match if key in match_keys} for match in raw_matches]
    
    # Winners
    winners = [team["teamId"] for x in raw_matches for team in x["teams"] if team["win"] == "Win"]
    
    # Merge the lists
    matches_df = pd.DataFrame(matches_list)
    matches_df = matches_df.assign(winner=winners)
    matches_df["winner"] = matches_df["winner"].replace({100:"Blue", 200: "Red"})
    
    # Change gameCreation to DateTime format
    matches_df["gameCreation"] = pd.to_datetime(matches_df["gameCreation"], unit="ms").dt.to_period("D")

    # Change column names to a better format
    matches_df.rename(columns={
        "gameId": "match_id",
        "platformId": "region",
        "gameCreation": "date_created",
        "gameDuration": "match_duration",
        "gameVersion": "patch"
    }, inplace=True)
    
    # Change patch format to a double digit e.g. 10.12
    matches_df["patch"] = matches_df["patch"].str.slice(stop=5)
    
    return matches_df.to_pickle(f"{path}/match_info_{start}-{end}.pkl", protocol=4)

### Champions data

In [4]:
def extract_champions_data(raw_matches: list, champions_list: list, path: str, start: int, end: int):
    
    # Get banned champions for each match
    bans = []
    for i, x in enumerate(raw_matches):
        for y in x["teams"]:
            for z in y["bans"]:
                bans.append({"champion": z["championId"],
                             "match_id": x["gameId"], 
                             "banned": 1})
    bans = pd.DataFrame(bans)
    
    bans = bans.replace({"champion": champions_list})
    
    # Get picked champions for each match
    picks = []
    for x in raw_matches:
        for i, y in enumerate(x["participants"]):
            picks.append({
                "champion": y["championId"],
                "match_id": x["gameId"],
                "region": x["platformId"],
                "picked": 1,
                "lane": y["timeline"]["lane"],
                "role": y["timeline"]["role"],
                "opponent": ([x["championId"] 
                                  for x in x["participants"][5:]] 
                                     if i < 5 
                                     else [x["championId"] 
                                        for x in x["participants"][:5]]),
                "won": y["stats"]["win"],
                "lost": not y["stats"]["win"],
            })
            
    picks = pd.DataFrame(picks)
    
    # Replace True, False with 1, 0 respectively
    picks = picks.assign(won=np.where(picks["won"], 1, 0), lost=np.where(picks["lost"], 1, 0))
    picks = picks.replace({"champion": champions_list, "opponent": champions_list})
    
    # Now we need to explode the dataframe to get each individual matchup
    picks = picks.explode(column="opponent").reset_index()
    picks.drop(columns="index", inplace=True)
    
    picks = adjust_lane_names(picks)
    
    # export bans and picks
    bans.to_pickle(f"{path}/champion_bans_{start}-{end}.pkl", protocol=4)
    picks.to_pickle(f"{path}/champion_picks_{start}-{end}.pkl", protocol=4)

### **Extract players data**

**Extract players information**

In [5]:
def extract_players_info(raw_matches: list, champions_list: list, path: str, start: int, end: int):
    
    # Getting the account info, champion used and role
    participants = []
    for x in raw_matches:
        for i, y in enumerate(x["participants"]):
            participants.append({
                "account_id": x["participantIdentities"][i]["player"]["accountId"],
                "summoner_id": x["participantIdentities"][i]["player"]["summonerId"],
                "region": x["participantIdentities"][i]["player"]["currentPlatformId"],
                "name": x["participantIdentities"][i]["player"]["summonerName"],
                "champion": y["championId"],
                "lane": y["timeline"]["lane"],
                "role": y["timeline"]["role"],
                "won": 1 if y["stats"]["win"] == True else 0,
            })
    # Create a dataframe, and assign the right values to lane variable
    participants_df = adjust_lane_names(participants)
    
    # Now lets create frequency colums for each lane per account_id as a new table
    players_lane = participants_df[["account_id", "lane", "won"]]
    
    # Do the same for champions
    # Replacing the ids of champions with the updated ones
    participants_df = participants_df.replace({"champion": champions_list})
    
    # Creating the DataFrame
    players_champions = participants_df[["account_id", "champion", "won"]]
    
    # Isolate the participants info
    participants_df = participants_df.drop(columns=["lane", "champion", "won"])
    participants_df = participants_df.drop_duplicates()
    
    # export
    players_lane.to_pickle(f"{path}/players_lanes_{start}-{end}.pkl", protocol=4)
    players_champions.to_pickle(f"{path}/players_champions_{start}-{end}.pkl", protocol=4)
    participants_df.to_pickle(f"{path}/players_info_{start}-{end}.pkl", protocol=4)
    

**Extract players statistics**

In [6]:
def extract_players_stats(raw_matches: list, champions_list: list, path: str, start: int, end: int):
    
    # Containers
    laning = []
    combat = []
    flair = []
    objectives = []
    
    for x in raw_matches:
        for i, y in enumerate(x["participants"]):
            
            found = True
            
            try:
                # laning
                laning.append({
                    "account_id": x["participantIdentities"][i]["player"]["accountId"],
                    "region": x["participantIdentities"][i]["player"]["currentPlatformId"],
                    "champion": y["championId"],
                    "lane": y["timeline"]["lane"],
                    "role": y["timeline"]["role"],
                    "xppm_10": y["timeline"]["xpPerMinDeltas"]["0-10"],
                    "cspm_10": y["timeline"]["creepsPerMinDeltas"]["0-10"],
                    "goldpm_10": y["timeline"]["goldPerMinDeltas"]["0-10"],
                    "dmg_takenpm_10": y["timeline"]["damageTakenPerMinDeltas"]["0-10"],
                    "won": 1 if y["stats"]["win"] == True else 0,
                })
            
                # combat
                combat.append({
                    "account_id": x["participantIdentities"][i]["player"]["accountId"],
                    "region": x["participantIdentities"][i]["player"]["currentPlatformId"],
                    "champion": y["championId"],
                    "lane": y["timeline"]["lane"],
                    "role": y["timeline"]["role"],
                    "dmg_total": y["stats"]["totalDamageDealtToChampions"],
                    "healing_total": y["stats"]["totalHeal"],
                    "units_healed": y["stats"]["totalUnitsHealed"],
                    "damage_mitigated": y["stats"]["damageSelfMitigated"],
                    "crowd_control": y["stats"]["totalTimeCrowdControlDealt"],
                    "dmg_taken": y["stats"]["totalDamageTaken"],
                    "first_blood": y["stats"]["firstBloodKill"] if found else None,
                    "first_blood_assist": y["stats"]["firstBloodAssist"],
                    "won": 1 if y["stats"]["win"] == True else 0,
                })
            
                # flair
                flair.append({
                    "account_id": x["participantIdentities"][i]["player"]["accountId"],
                    "region": x["participantIdentities"][i]["player"]["currentPlatformId"],
                    "champion": y["championId"],
                    "lane": y["timeline"]["lane"],
                    "role": y["timeline"]["role"],
                    "killing_sprees": y["stats"]["killingSprees"],
                    "longest_time_alive": y["stats"]["longestTimeSpentLiving"],
                    "double_kills": y["stats"]["doubleKills"],
                    "triple_kills": y["stats"]["tripleKills"],
                    "quadra_kills": y["stats"]["quadraKills"],
                    "penta_kills": y["stats"]["pentaKills"],
                    "won": 1 if y["stats"]["win"] == True else 0,
                })
            
                # objectives
                objectives.append({
                    "account_id": x["participantIdentities"][i]["player"]["accountId"],
                    "region": x["participantIdentities"][i]["player"]["currentPlatformId"],
                    "champion": y["championId"],
                    "lane": y["timeline"]["lane"],
                    "role": y["timeline"]["role"],
                    "dmg_to_objectives": y["stats"]["damageDealtToObjectives"],
                    "dmg_to_turrets": y["stats"]["damageDealtToTurrets"],
                    "total_cs": y["stats"]["totalMinionsKilled"],
                    "jungle_cs": y["stats"]["neutralMinionsKilled"],
                    "jungle_invaded": y["stats"]["neutralMinionsKilledEnemyJungle"],
                    "wards_placed": y["stats"]["wardsPlaced"],
                    "wards_killed": y["stats"]["wardsKilled"],
                    "won": 1 if y["stats"]["win"] == True else 0,
                })
            except:
                continue
    
    # Creating and processing the dataframe
    laning = adjust_lane_names(laning)
    combat = adjust_lane_names(combat, True)
    flair = adjust_lane_names(flair)
    objectives = adjust_lane_names(objectives)
    
    # updating champons names
    laning = laning.replace({"champion": champions_list})
    combat = combat.replace({"champion": champions_list})
    flair = flair.replace({"champion": champions_list})
    objectives = objectives.replace({"champion": champions_list})
    
    #save files
    laning.to_pickle(f"{path}/player_laning_stats_{start}-{end}.pkl", protocol=4)
    combat.to_pickle(f"{path}/player_combat_stats_{start}-{end}.pkl", protocol=4)
    flair.to_pickle(f"{path}/player_flair_stats_{start}-{end}.pkl", protocol=4)
    objectives.to_pickle(f"{path}/player_objective_stats_{start}-{end}.pkl", protocol=4)

# Get the matches and process the data

In [7]:
def get_matches(id_list: list, path: str, token: str, start: int, end: int):

    n_requests = 0
    
    # update champions information
    champions_list = update_champions_info()
    raw_matches = []
    
    for i, id in enumerate(id_list[start:end]):
        # control the number of requests
        if n_requests >= 100:
            print(f"Number of Matches so far: {len(raw_matches)}")
            n_requests = 0
            time.sleep(121)
        
        res = req.get(f"https://euw1.api.riotgames.com/lol/match/v4/matches/{id}?api_key={token}")
        n_requests += 1
        if res.status_code == 200:
            raw_matches.append(res.json())
        else: 
            print(f"Skipping match at index {i}, {res.json()['status']['message']}")
            continue
        
        # store backups
        if i != 0 and i % 100 == 0:
            extract_match_info(raw_matches, path, start, i)
            extract_champions_data(raw_matches, champions_list, path, start, i)
            extract_players_info(raw_matches, champions_list, path, start, i)
            extract_players_stats(raw_matches, champions_list, path, start, i)
            
    #process data and save files
    extract_match_info(raw_matches, path, start, end)
    extract_champions_data(raw_matches, champions_list, path, start, end)
    extract_players_info(raw_matches, champions_list, path, start, end)
    extract_players_stats(raw_matches, champions_list, path, start, end)

In [8]:
path = "../raw_data"
matches_id = pd.read_pickle(f"{path}/matches_id.pkl")["gameId"].to_list()
matches_id[:5]

[4671787510, 4671790912, 4704868250, 4718171384, 4718095374]

In [9]:
get_matches(matches_id, f"{path}/test", TOKEN, 0, 100)

Skipping match at index 59, Gateway timeout


## Check the data

In [10]:
pd.read_pickle(f"../raw_data/test/match_info_0-100.pkl").head()

Unnamed: 0,match_id,region,date_created,match_duration,patch,winner
0,4671787510,EUW1,2020-06-21,1557,10.12,Red
1,4671790912,EUW1,2020-06-21,1716,10.12,Blue
2,4704868250,EUW1,2020-07-13,1235,10.14,Red
3,4718171384,EUW1,2020-07-21,2227,10.14,Red
4,4718095374,EUW1,2020-07-21,1822,10.14,Blue


In [11]:
pd.read_pickle(f"../raw_data/test/players_info_0-100.pkl").head()

Unnamed: 0,account_id,summoner_id,region,name
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,CGDTBnwxRTW_K9mtm5yvruEb4GHhdOKpNl09dBbBRy5-w-Uw,EUW1,mental ƒortress
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,C84ZjVnfQ-pwqcISLYNSop0JLFoR3smMwxdag22Budw,EUW1,NonoCASTER Hi
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,fW-WSP0bZEEIxieoBzCNP8EGmo9S-x7AR50mwNKBaVb32Iw,EUW1,eminemorslim
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,7KhpW4cmmhIkojRE1Es6NnAZYxkqJsuaGFNbo_LR37Zh8OI-,EUW1,ˆKoichi
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,dkPzTIROBk6T6Ot7DD4hmd0YN0ogmfji_z-05s6mMZSM,EUW1,Minamas


In [12]:
pd.read_pickle(f"../raw_data/test/champion_bans_0-100.pkl").head()

Unnamed: 0,champion,match_id,banned
0,Fiddlesticks,4671787510,1
1,Aphelios,4671787510,1
2,Zac,4671787510,1
3,Yuumi,4671787510,1
4,Irelia,4671787510,1


In [13]:
pd.read_pickle(f"../raw_data/test/champion_picks_0-100.pkl").head()

Unnamed: 0,champion,match_id,region,picked,lane,opponent,won,lost
0,Wukong,4671787510,EUW1,1,JUNGLE,55,0,1
1,Wukong,4671787510,EUW1,1,JUNGLE,64,0,1
2,Wukong,4671787510,EUW1,1,JUNGLE,51,0,1
3,Wukong,4671787510,EUW1,1,JUNGLE,117,0,1
4,Wukong,4671787510,EUW1,1,JUNGLE,164,0,1


In [14]:
pd.read_pickle(f"../raw_data/test/player_laning_stats_0-100.pkl").head()

Unnamed: 0,account_id,region,champion,lane,xppm_10,cspm_10,goldpm_10,dmg_takenpm_10,won
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,EUW1,Wukong,JUNGLE,267.3,0.5,261.4,654.3,0
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,EUW1,Jinx,BOTTOM,342.6,6.9,345.3,243.3,0
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,EUW1,Thresh,SUPPORT,274.1,1.1,289.8,311.0,0
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,EUW1,Pyke,MIDDLE,359.4,4.3,341.8,618.7,0
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,EUW1,Malphite,TOP,384.4,5.6,207.2,335.3,0


In [15]:
pd.read_pickle(f"../raw_data/test/player_combat_stats_0-100.pkl").head()

Unnamed: 0,account_id,region,champion,lane,dmg_total,healing_total,units_healed,damage_mitigated,crowd_control,dmg_taken,first_blood,first_blood_assist,won
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,EUW1,Wukong,JUNGLE,8575,6486,1,14108,72,22451,0,0,0
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,EUW1,Jinx,BOTTOM,15109,365,1,7401,92,16096,0,0,0
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,EUW1,Thresh,SUPPORT,7913,774,5,16784,125,16743,0,0,0
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,EUW1,Pyke,MIDDLE,15786,4251,1,11752,152,21388,0,0,0
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,EUW1,Malphite,TOP,12793,979,1,34204,505,21185,0,0,0


In [16]:
pd.read_pickle(f"../raw_data/test/player_objective_stats_0-100.pkl").head()

Unnamed: 0,account_id,region,champion,lane,dmg_to_objectives,dmg_to_turrets,total_cs,jungle_cs,jungle_invaded,wards_placed,wards_killed,won
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,EUW1,Wukong,JUNGLE,4718,119,32,57,4,6,6,0
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,EUW1,Jinx,BOTTOM,4964,4964,190,13,0,6,2,0
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,EUW1,Thresh,SUPPORT,342,342,23,0,0,31,2,0
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,EUW1,Pyke,MIDDLE,1646,1264,88,0,0,4,3,0
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,EUW1,Malphite,TOP,1143,1143,150,0,0,9,2,0


In [32]:
stats = pd.read_pickle(f"../raw_data/test/player_flair_stats_0-100.pkl")
stats.loc[stats["won"] == 1].head()

Unnamed: 0,account_id,region,champion,lane,killing_sprees,longest_time_alive,double_kills,triple_kills,quadra_kills,penta_kills,won
5,icd3noXGJbAHZRlDGhehW3atovTc0kEX6kuIaFjBcbHNOIE,EUW1,Katarina,MIDDLE,2,412,1,0,0,0,1
6,UKhAX-yjKWwgMMP5AeXlueeBMis89wckIFX1pS5y_VbFiA,EUW1,Lee Sin,JUNGLE,1,914,0,0,0,0,1
7,JBcZi9En4AAfO5VXXse8nLcVjqftrbxTzTY8y3WgNXQ,EUW1,Caitlyn,BOTTOM,4,480,2,1,0,0,1
8,muUtGzGAchW2lTA4eSnrsgkW3uLKkjgkZ23gStTQs_gNcw,EUW1,Lulu,SUPPORT,0,771,0,0,0,0,1
9,z5_PqojJ5c4gCs0WJAr-kTTtFfJ4uZTsCjGJFp5yWiWkww,EUW1,Camille,TOP,2,542,0,0,0,0,1


In [18]:
pd.read_pickle(f"../raw_data/test/players_champions_0-100.pkl").head()

Unnamed: 0,account_id,champion,won
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,Wukong,0
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,Jinx,0
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,Thresh,0
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,Pyke,0
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,Malphite,0


In [19]:
pd.read_pickle(f"../raw_data/test/players_lanes_0-100.pkl").head()

Unnamed: 0,account_id,lane,won
0,7UTpYZvoj06Si113SIlBe-jyteHrh-XRaYzuYfXWentoKm...,JUNGLE,0
1,fG8JDk5zVxKmoAMUqBIE8nbgMqzn8zuJrDJFPslxAg,BOTTOM,0
2,Kc97-m0MqgpSk3DFoY17uq39_Roh9Qvi-xtoEFXPsMhEWPY,SUPPORT,0
3,WmwA8a6PWVm1SkA3JWpID3CDFAbqxjrsU9f345u3_qR12c...,MIDDLE,0
4,4NI6_UJFRWXe6swXbvsN9dQl6ORPjOfU1EA7ybGJjuc,TOP,0
