# Data mining

First of all we need to import every library we're going to use during the data mining process.

In [None]:
import folium
import pandas as pd
import numpy as np
import json
import requests
from bs4 import BeautifulSoup
from lxml import etree
import requests
from IPython.display import display, HTML
import re
import matplotlib.pyplot as plt
import time
import pickle
import os
import urllib.request

## Global parameters

We are going to get all the information we need from the API that Riot Games has for League of Legends developers. We need an API Key to perform all the requests and the season, type of game and API limits we have. Riot Games defines a limit of 10 requests every ten seconds and 500 requests every ten minutes, blocking every extra request until the time is reset.

In [None]:
API_KEY = 'RGAPI-13b30cdc-783c-47f6-85f0-d43855b5e322'
SEASON = '2016'
RANKED_QUEUE = 'RANKED_SOLO_5x5'
TEN_SECONDS_MAX = '9'
TEN_MINUTES_MAX = '490'
MIN_MATCHES = 50

matchlist = []
player_list = []

In order to prevent information losses and make our algorithms more efficient, we have implemented a function that will take the API request limits into account while performing requests and will wait when necessary in order to prevent blocks. This function is also prepared to support multiple API Keys, so it would be possible to switch between the different keys to avoid long waits. However, in our experience this hasn't been necessary.

In [None]:
def checkTime(headers, time_ten_seconds, time_ten_minutes):
    ten_seconds_limit = headers['X-Rate-Limit-Count'].split(',')[0].split(':')[0]
    ten_minutes_limit = headers['X-Rate-Limit-Count'].split(',')[1].split(':')[0]
    if ten_seconds_limit == '1':
        time_ten_seconds = time.time()
    if ten_minutes_limit == '1':
        time_ten_minutes = time.time()
    elif ten_minutes_limit == TEN_MINUTES_MAX:
        to_sleep = 600 - (time.time()-time_ten_minutes)
        if to_sleep < 0:
            to_sleep = 0
        print("Ten minutes limit! Sleeping " + str(to_sleep) + " seconds")
        time.sleep(to_sleep)
        time_ten_minutes = time.time()
    elif ten_seconds_limit == TEN_SECONDS_MAX:
        to_sleep = 10 - (time.time() - time_ten_seconds)
        if to_sleep < 0:
            to_sleep = 0
        #print("Ten seconds limit! Sleeping " + str(to_sleep) + " seconds")
        time.sleep(to_sleep)
        time_ten_seconds = time.time()
    return time_ten_seconds, time_ten_minutes

We have defined a series of functions that make use of the API features to get the information we need. First of all, the function *getUser* will take a list of summoner names and return their correspondent IDs.

In [None]:
def getUser(summoner_name_list):
    summoner_names = []
    for summoner_name in summoner_name_list:
        url = 'https://euw.api.pvp.net/api/lol/euw/v1.4/summoner/by-name/' + summoner_name + '?api_key='+API_KEY
        info = requests.request("GET", url)
        summoner_names.append(info.json()[summoner_name]['id'])
    return summoner_names


The function *getStats* will take a summoner ID and return the game stats for that summoner for a given Season (2015, 2016..)

In [None]:
def getStats(summoner_id):
    url = 'https://euw.api.pvp.net/api/lol/euw/v1.3/stats/by-summoner/' + summoner_id + '/ranked?season=SEASON' + SEASON + '&api_key='+API_KEY
    info = requests.request("GET", url)
    return info.json()

In order to work with an acceptable pool of LoL players, we decided to start from one player from each league (Bronze, Silver, Gold, Platinum, Diamond, Master). From that, we will get a list of all the matches each player has played during the given season with the function *getMatchList*

In [None]:
def getMatchList(summoner_id_list):
    matchlist = []
    for summoner_id in summoner_id_list:
        url = url = 'https://euw.api.pvp.net/api/lol/euw/v2.2/matchlist/by-summoner/' + str(summoner_id) +'?rankedQueues=' + RANKED_QUEUE + '&seasons=SEASON' + SEASON + '&api_key='+API_KEY
        summoner_matchlist = requests.request("GET", url).json()
        matchlist += [i['matchId'] for i in summoner_matchlist['matches'] if i['matchId'] not in matchlist]
    return matchlist

In [None]:
list_name_seeds_summoners = ["makiaveliko84",          #always Bronze
                        "limoneslocos",           #Silver -> Silver
                        "skinny1love",          #Gold015 -> Platinum16
                        "singedairlines",        #Platinum -> Platinum
                        "bestluxitaxd"]        #Diamond  -> Diamond

list_ids_seeds_summoners= getUser(list_name_seeds_summoners)

Now that we have a large list of matches played during the given season, we get a list of the Summoner IDs that have played them with *getPlayersList*. By doing this, from the five initial players we were able to get a list of 30.000 IDs of Europe West Server Players.

In [None]:
def getPlayersList(matchlist):
    time_ten_seconds = time.time()
    time_ten_minutes = time.time()
    players_list = []
    print(len(matchlist))
    for match_id in matchlist:
        url = 'https://euw.api.pvp.net/api/lol/euw/v2.2/match/' + str(match_id) + '?api_key=' + API_KEY
        info = requests.request("GET", url)
        headers = info.headers
        body = info.json()
        if len(body.keys()) == 14:
            players_match=[body['participantIdentities'][i]['player']['summonerId'] for i in range(len(body['participantIdentities'])) if i not in players_list]
            players_list += players_match
            time_ten_seconds, time_ten_minutes = checkTime(headers, time_ten_seconds, time_ten_minutes)
        
    return players_list

We want to save all the information downloaded not only locally, as we need it in other Notebooks to perform the Data Analysis. For this reason, we are going to save the JSON where we save the information locally into Pickles:

- summoner_ids.pkl: list of Summoner IDs used
- summoner_stats.pkl: stats aggregated and for the most used Champion for each Summoner ID
- summoner_elos.pkl: final position achieved at the end of the season por each Summoner (which league they ended at)
- champion_tags.pkl: passive information of each existent LoL Champion (non-changing information)

In [None]:
if not os.path.isfile('./summoner_ids.pkl'):
    info = {'ids': player_list }
    with open('./summoner_ids.pkl', 'wb') as f:
        pickle.dump(info, f)
if not os.path.isfile('./summoner_stats.pkl'):
    info = {}
    with open('./summoner_stats.pkl', 'wb') as f:
        pickle.dump(info, f)
if not os.path.isfile('./summoner_elos.pkl'):
    info = {}
    with open('./summoner_elos.pkl', 'wb') as f:
        pickle.dump(info, f)

With all the functions defined above, we get the final list of summoners and save it in the pickle.

In [None]:
for i in range(1,len(list_ids_seeds_summoners)):
    matchlist = getMatchList([list_ids_seeds_summoners[i]])
    player_list = getPlayersList(matchlist)
    print("Loading pickle " + str(i))
    with open('./summoner_ids.pkl', 'rb') as f:
        saved = pickle.load(f)
        saved['ids'] += player_list
    print("Saving player " + str(i))
    with open('./summoner_ids.pkl', 'wb') as f:
        pickle.dump(saved, f)

Now, for every ID recovered, we want its *Stats*: aggregated and average stats for all possible Champions and the Stats of the most played Champion of that Summoner. So, we will have the Summoner ID as the Key and both JSONS as value.

In [None]:
with open('./summoner_ids.pkl', 'rb') as f:
    ids_json = pickle.load(f)
    ids_list = ids_json['ids']
with open('./summoner_stats.pkl', 'rb') as f:
    stats_json = pickle.load(f)
time_ten_seconds = time.time()
time_ten_minutes = time.time()
counter = 1
for summoner_id in ids_list:
    id_ = str(summoner_id)
    if id_ in stats_json:
        counter += 1
        if counter%100 ==0:
            print(str(counter), "players done")
        continue
    if counter%100 ==0:
        with open('./summoner_stats.pkl', 'wb') as f:
            pickle.dump(stats_json, f)
        print(str(counter), "players done")

    url = 'https://euw.api.pvp.net/api/lol/euw/v1.3/stats/by-summoner/' + id_ + '/ranked?season=SEASON' + SEASON + '&api_key='+API_KEY
    info = requests.request("GET", url)
    body = info.json()
    headers = info.headers
    while (len(body)!=3):
        time_ten_seconds, time_ten_minutes = checkTime(headers, time_ten_seconds, time_ten_minutes)
        info = requests.request("GET", url)
        body = info.json()
        headers = info.headers
        if (len(body)==3):
            break
    
    champions = body['champions']
    pref_champ = {}
    pref_champ_sessions = 0
    total_champ = {}
    discard = False
    for champ in champions:
        if champ['id'] == 0:
            if champ['stats']['totalSessionsPlayed']<MIN_MATCHES:
                discard = True
                break
            total_champs = champ 
        elif pref_champ_sessions < champ['stats']['totalSessionsPlayed']:
            pref_champ = champ
            pref_champ_sessions = champ['stats']['totalSessionsPlayed']
    if not discard:
        stats_json[id_]={'total': total_champs, 'pref_champ': pref_champ}    
    else:
        pass
    counter += 1
    time_ten_seconds, time_ten_minutes = checkTime(headers, time_ten_seconds, time_ten_minutes)

We want to know where a player has finished the season (Gold, Silver...) in order to check whether our predictions were correct or not. For that, we need to get the Elo at the end of the season with *getElo* function

In [None]:
def getELO(summoner_id):
    url = 'http://www.lolking.net/summoner/euw/' +  str(summoner_id)
    # We parse the response to analyze it 
    soup_project = BeautifulSoup(requests.request("GET", url).text, 'html.parser')
    elo = soup_project.findAll('div', {'class': 'summoner-name'})[0].findAll('div', {'class': 'medal-image'})
    if len(elo) > 1:
        elo = elo[1]['style'].split('medals/')[1][:-7]
    else:
        elo = ''
    return elo

In [None]:
with open('./summoner_elos.pkl', 'wb') as f:
    for s in range(len(summoner_ids)):
        if str(summoner_ids[s]) not in elos:
            elo = getELO(summoner_ids[s])
            elos[str(summoner_ids[s])] = elo
        if s%100==0 and s!=0:
            pickle.dump(elos, f)
            print(str(s), "players done")
with open('./summoner_elos.pkl', 'wb') as f:
    pickle.dump(elos, f)

We also need the information that Riot Games provides for each Champion, such as that type of Champion.

In [None]:
def getChampions():
    url = 'https://euw.api.pvp.net/api/lol/euw/v1.2/champion?api_key='+API_KEY
    info = requests.request("GET", url)
    return info.json()
def getChampionsPassive():
    url = 'https://global.api.pvp.net/api/lol/static-data/euw/v1.2/champion?dataById=true&champData=all&api_key='+API_KEY
    info = requests.request("GET", url)
    return info.json()

In [None]:
with open('./champion_tags.pkl', 'wb') as f:
    pickle.dump(getChampionsPassive(), f)

Finally, we get the icon image for each Champion for future visualization.

In [None]:
for i in range(1,len(info['data'])+1):
    if str(i) in info['data']:
        name = info['data'][str(i)]['image']['full']
        urllib.request.urlretrieve('http://ddragon.leagueoflegends.com/cdn/4.2.6/img/champion/' + name, 'champ_images/' + str(i) + '.png')