# 02 - Data Cleaning

In [21]:
import json
import glob
import requests
import time
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt
import os

This is my API key that I will be using to make requests from Riot's API

In [22]:
#constants
myKey = "6aacc2fb-5fb7-4b4e-8250-e5005b85e22b"

I downloaded a given random sample of a thousand matches provided in the form of JSON files. However, this isn't the information I'm looking for, I want the actual player information from the API. The API request requires a player's ID to acquire the information, so what I'll do is extract every summoner Id from these 1,000 matches.

In [23]:
matchFiles = glob.glob("/data/andyl7an/*")
summonerIDs = []
platformIDs = []
matchFiles

['/data/andyl7an/matches10.json',
 '/data/andyl7an/matches9.json',
 '/data/andyl7an/matches8.json',
 '/data/andyl7an/matches6.json',
 '/data/andyl7an/matches4.json',
 '/data/andyl7an/champData',
 '/data/andyl7an/matches2.json',
 '/data/andyl7an/matches1.json',
 '/data/andyl7an/champData27',
 '/data/andyl7an/matches3.json',
 '/data/andyl7an/matches5.json',
 '/data/andyl7an/matches7.json']

In [24]:
for file in matchFiles:
    if (file == '/data/andyl7an/champData' or file == '/data/andyl7an/champData27'):
        continue
    with open(file, encoding = "ISO-8859-1") as data_file:    
        data = json.load(data_file)
        for i in range(len(data['matches'])):
            for j in range(len(data['matches'][0]['participantIdentities'])):
                summonerIDs.append(data['matches'][i]['participantIdentities'][j]['player']['summonerId'])
                platformIDs.append(data['matches'][i]['platformId'])



In [25]:
assert(len(summonerIDs) == 10000)

## API Request Functions

This method will help me get the tier of each player, ranging from Bronze to Challenger.

In [26]:
def getRank(id):
    
    url = "https://na.api.pvp.net/api/lol/na/v2.5/league/by-summoner/"
    url = url + str(id) 
    url = url + "?api_key=" + myKey
    output = requests.get(url)
    
    output = output.json()
    tempList = output[str(id)]
    length = len(tempList) - 1
    x = tempList[length]['tier']
    return x



This method will help me determine if a player is specialized on a champion. I've defined a specialized player as someone who plays more of their most played champion than their second and third combined.

In [27]:
def isOTP(id):
    url = "https://na.api.pvp.net/championmastery/location/NA1/player/"
    url = url + str(id)
    url = url + "/topchampions?api_key=" + myKey
    output = requests.get(url)
    output = output.json()
    points1 = output[0]['championPoints']
    points2 = output[1]['championPoints']
    points3 = output[2]['championPoints']
    if (points1 > (points2 + points3)):
        return True
    else:
        return False

    

This will get the division information to accompany the rank, the divisions range from 5 to 1. So if a player is Bronze, the player could be anywhere between Bronze5 to Bronze1, this will specify that.

In [28]:
def getDiv(id):
    url = "https://na.api.pvp.net/api/lol/na/v2.5/league/by-summoner/"
    url = url + str(id)
    url = url + "/entry?api_key=" + myKey
    output = requests.get(url).json()
    return output[str(id)][0]['entries'][0]['division']
    
    

In [29]:
div = getDiv(28520860)
tier = getRank(28520860)
otp = isOTP(28520860)
assert("PLATINUM" == tier)
assert("II" == div)
assert(otp)
assert("PLATINUMII" == tier + div)

In [30]:
rankList = []
OTPList = []

This method will return information on every champion the player has played recently, in the form of "Champion Points"

In [None]:

def champUsage(id):
    url = "https://na.api.pvp.net/championmastery/location/NA1/player/"
    url = url + str(id)
    url = url + "/champions?api_key=" + myKey
    output = requests.get(url)
    output = output.json()
    return output

Since the champion information returns a Champion id, I need a method to make the conversion to champion name to know which champion I'm looking at
    

In [32]:
def getChampName(id):
    url = 'https://global.api.pvp.net/api/lol/static-data/na/v1.2/champion/'
    url = url + str(id)
    url = url + '?api_key=' + myKey
    output = requests.get(url).json()
    return output['name']

## Data Acquisition

This will create a file where I will append the rank of the player and whether the player is specialized in pairs.

In [None]:
with open('otpRankData.txt', 'w') as f:
    for id in summonerIDs:
        try:
            rank = getRank(id)
            time.sleep(1.5)
            div = getDiv(id)
            time.sleep(1.5)
            otp = isOTP(id)
            time.sleep(1.5)
            
            f.write(rank + div)
            f.write(',')
            f.write(str(otp))
            f.write('\n')

        except (KeyError, ValueError):
            pass
        

I will then take each player's three most played champions, and create a data set for each champion that shows how often other champions are played by the same player. I will repeat this for every player I have in my sample.

In [None]:

for id in summonerIDs:
    try:
        allChamps = champUsage(id)
        time.sleep(1)
        path = os.getcwd() + '/champData/'
        name = getChampName(allChamps[0]['championId'])
        name = os.path.join(path,name)

        for i in range(1,len(allChamps)):
            champ = allChamps[i]
            with open(name, 'a') as f:
                f.write(getChampName(champ['championId']))

                f.write(':')
                f.write(str(champ['championPoints']))

                f.write('\n')
    except (KeyError, ValueError):
            pass
            
for id in summonerIDs:
    try:
        print('Running: ' + str(id))
        allChamps = champUsage(id)
        time.sleep(1)
        path = os.getcwd() + '/champData/'
        name = getChampName(allChamps[1]['championId'])
        name = os.path.join(path,name)

        for i in range(0,len(allChamps)):
            if (i == 1):
                continue
            champ = allChamps[i]
            with open(name, 'a') as f:
                f.write(getChampName(champ['championId']))

                f.write(':')
                f.write(str(champ['championPoints']))

                f.write('\n')
    except (KeyError, ValueError):
            pass
            
for id in summonerIDs:
    try:
        print('Running: ' + str(id))
        allChamps = champUsage(id)
        time.sleep(1)
        path = os.getcwd() + '/champData/'
        name = getChampName(allChamps[2]['championId'])
        name = os.path.join(path,name)

        for i in range(0,len(allChamps)):
            if (i == 2):
                continue
            champ = allChamps[i]
            with open(name, 'a') as f:
                f.write(getChampName(champ['championId']))

                f.write(':')
                f.write(str(champ['championPoints']))

                f.write('\n')
    except (KeyError, ValueError):
            pass

Running: 50352314
Running: 51723537