### League of Legends: Spring 2020 Dataset

This datasets contains all games across differnt professional League of Legends 

In [1]:
#import packages
import pandas as pd
import requests
import json
import pickle
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import pdb

In [2]:
#----Read competitve history data set for list of patches ----#
patches = pd.read_csv('../data/2020 spring match data OraclesElixir 2020-05-15.csv')
patches = list(set(patches['patch']))
patches = [str(x) for x in patches]

In [3]:
def reformat_patch_number(patch):
    patch_split = patch.split('.')
    patch_split = [str(int(x)) for x in patch_split]
    patch_output = patch_split[0] + '.' + patch_split[1] + '.1'
    return(patch_output)
    

In [4]:
patches = [reformat_patch_number(x) for x in patches]

In [5]:
#----- In case we need to use API ------#

#One time api key creation
file_name = '/Users/horacefung/Documents/keys/lol_api_key'
#key = {'key':''} #don't show key
#output = open(file_name,'wb')
#pickle.dump(key, output)
#output.close()

input_file = open(file_name,'rb')
api_key = pickle.load(input_file)['key']
input_file.close()

#api_root_url = "https://www.googleapis.com/youtube/v3/videos?"
#url_params = "part=snippet&type=video&chart=mostPopular"

#url = api_root_url + url_params + api_key
#data = requests.get(url).json()

### Champion Data

Retreive and format champion data for each relevant patch

In [6]:
#----- Pull champion list -----#
url = 'http://ddragon.leagueoflegends.com/cdn/' + patches[-1] + '/data/en_US/champion.json'
data = requests.get(url).json()
champion_list = list(data['data'].keys())

In [7]:
def extract_data(champion, champion_name, patch):
    
    #Initialize porter object
    porter = PorterStemmer()
    
    #Split into base stats & spells & classes
    stats = champion['stats']
    passive = champion['passive']
    spells = champion['spells']
    classes = champion['tags']
    
    #Make sure even single class champions have type list
    if type(classes) == 'str':
        classes = [classes]
    
    #Keywords
    hard_cc_list = ['airborne', 'charm', 'flee', 'taunt', 'sleep', 'stun', 'supression', 'suspension', 
                    'stasis', 'pull', 'knock']
    soft_cc_list = ['blind', 'cripple', 'disarm', 'ground', 'knockdown', 'nearsight', 'root', 'silence', 'slow']
    gap_closer_list = ['dash', 'blink', 'leap', 'launch', 'movementspeed', 'teleport']
    vision_list = ['vision', 'sight']
    protection_list = ['shield', 'heal']
    
    #initialize variables
    hard_cc_value = 0
    soft_cc_value = 0
    spells_average_range_value = 0
    gap_closer_value = 0
    protection_value = 0
    
    #Passive
    passive = passive['description']
    passive = passive.lower()
    passive = passive.replace('movement speed', 'movementspeed') #specific logic for move speed
    passive = [porter.stem(x) for x in word_tokenize(passive)]
    hard_cc = len(list(set(passive) & set(hard_cc_list))) #unique key words
    soft_cc = len(list(set(passive) & set(soft_cc_list))) #unique key words
    protection = len(list(set(passive) & set(protection_list)))
    gap_closer = len(list(set(passive) & set(gap_closer_list)))
    
    #Update talley with passive
    hard_cc_value = hard_cc_value + hard_cc
    soft_cc_value = soft_cc_value + soft_cc
    protection_value = protection_value + protection
    gap_closer_value = gap_closer_value + gap_closer
    
    
    #Four spells
    for i in range(4):
        
        #----Extract text information from tooltip
        tooltip = spells[i]['tooltip'].lower()
        tooltip = tooltip.replace('movement speed', 'movementspeed') #specific logic for move speed
        tooltip = [porter.stem(x) for x in word_tokenize(tooltip)]
        hard_cc = len(list(set(tooltip) & set(hard_cc_list))) #unique key words
        soft_cc = len(list(set(tooltip) & set(soft_cc_list))) #unique key words
        protection = len(list(set(tooltip) & set(protection_list)))
        gap_closer = len(list(set(tooltip) & set(gap_closer_list)))
        spells_range = np.mean(spells[i]['range'])
        
        #Update talley
        hard_cc_value = hard_cc_value + hard_cc
        soft_cc_value = soft_cc_value + soft_cc
        protection_value = protection_value + protection
        gap_closer_value = gap_closer_value + gap_closer
        spells_average_range_value = np.mean([spells_average_range_value, spells_range])
        
    #
    
    #-----Setup Dataframe---------#
    dict_temp = {'champion' : champion_name,
                 'patch' : patch,
                 'hard_cc_value' : hard_cc_value, 
                 'soft_cc_value' : soft_cc_value,
                 'spells_average_range_value' : spells_average_range_value,
                 'gap_closer_value' : gap_closer_value,
                 'protection_value' : protection_value,
                 'classes': [classes]} #make this a list
    
    dict_temp = {**stats, **dict_temp}
    
    output_df = pd.DataFrame(dict_temp, index = [0])
    output_df = output_df.set_index(['champion', 'patch'])
    
    
    return(output_df)

In [8]:
#----- Pull Relevant Champion Patch Data ---------#
def data_pull(patches, champion_list):
    
    champion_output = pd.DataFrame()
    loops = len(patches)
    counter = 0
    
    for patch in patches:
        
        for champion in champion_list:
            url = 'http://ddragon.leagueoflegends.com/cdn/{}/data/en_US/champion/{}.json'.format(patch, champion)
            
            try:
                data = requests.get(url).json()
                data = data['data'][champion]
                champion = extract_data(data, champion, patch)
                champion_output = pd.concat([champion_output, champion])
                #print('Added:' + champion)
                #print(champion)
            except:
                continue
                #print('Request failed')
                #do nothing if champion not in this patch, e.g. new releases
        
        counter = counter + 1
        print('Completed patch: ' + patch + ' | ' + str(counter) + '/' + str(loops))
    
    champion_output = champion_output.reset_index()
    
    #One-hot encode champion classes. This function is pretty sweet
    mlb = MultiLabelBinarizer()
    champion_output = champion_output.join(pd.DataFrame(mlb.fit_transform(champion_output.pop('classes')),
                                                        columns=mlb.classes_,
                                                        index=champion_output.index))
    
    
    
    return(champion_output)

In [9]:
champion_dataset = data_pull(patches, champion_list)

Completed patch: 10.4.1 | 1/8
Completed patch: 10.6.1 | 2/8
Completed patch: 10.1.1 | 3/8
Completed patch: 10.2.1 | 4/8
Completed patch: 10.3.1 | 5/8
Completed patch: 10.5.1 | 6/8
Completed patch: 10.7.1 | 7/8
Completed patch: 10.8.1 | 8/8


In [20]:
champion_dataset.head()

Unnamed: 0,champion,patch,armor,armorperlevel,attackdamage,attackdamageperlevel,attackrange,attackspeed,attackspeedperlevel,crit,...,soft_cc_value,spellblock,spellblockperlevel,spells_average_range_value,Assassin,Fighter,Mage,Marksman,Support,Tank
0,Evelynn,10.4.1,37.0,3.5,61.0,3.0,125,0.667,2.1,0,...,1,32.1,1.25,12777.5,1,0,1,0,0,0
1,JarvanIV,10.4.1,34.0,3.6,64.0,3.4,175,0.658,2.5,0,...,1,32.1,1.25,666.25,0,1,0,0,0,1
2,Kalista,10.4.1,23.0,4.0,69.0,4.0,525,0.694,4.0,0,...,1,30.0,0.5,1446.875,0,0,0,1,0,0
3,Zilean,10.4.1,24.0,3.8,51.64,3.0,550,0.625,2.13,0,...,1,30.0,0.5,718.75,0,0,1,0,1,0
4,Jhin,10.4.1,24.0,3.5,59.0,4.7,550,0.625,0.0,0,...,3,30.0,0.5,13096.875,0,0,1,1,0,0


In [13]:
#---- Pickle Patch Data -----#
file_name = '../data/patch_notes.pickle'
#output = open(file_name,'wb')
#pickle.dump(champion_dataset, output)
#output.close()

#input_file = open(file_name,'rb')
#patch_data = pickle.load(input_file)
#input_file.close()

In [21]:
match_data = pd.read_csv('../data/2020 spring match data OraclesElixir 2020-05-15.csv')

In [22]:
columns = ['date','gameid', 'patch','side', 'position', 'player','champion','ban1', 'ban2', 'ban3', 'ban4', 'ban5',
           'gamelength','kills', 'deaths', 'assists', 'damagetochampions','wardsplaced', 'wardskilled', 'totalgold',
           'total cs', 'monsterkills', 'result']

match_data = match_data[columns]
match_data = match_data.rename(columns = {'total cs' : 'total_cs'})
match_data = match_data[match_data['position'] != 'team']
player_data = match_data.sort_values(['player', 'date'], ascending = True)

In [23]:
#player_data = player_data['date', 'gameid','player','kills', 'deaths', 'assists', 'damagetochampions', 'wardsplaced', 
#                          'wardskilled', 'totalgold', 'total_cs','monsterkills']

#player_data = match_data.sort_values(['player', 'date'], ascending = True)
#player_data = player_data.groupby('object').rolling(10)['value'].mean()

In [24]:
def create_player_profile(player_df, window):
    
    #We will use a moving average based on the window.
    #Treat this as time dependent, 
    player_df = player_df[['date', 'gameid','player', 'position', 'side', 'champion', 'patch','kills','deaths',
                           'assists', 'damagetochampions', 'wardsplaced', 'wardskilled', 'totalgold', 
                           'total_cs','monsterkills']]
    
    player_df = player_df.sort_values(['player', 'date'], ascending = True)
    player_df = player_df.reset_index(drop=True) #need to drop index in new pandas version for groupby mean 
    
    #Values fields
    value_fields = ['player','kills', 'deaths', 'assists', 'damagetochampions','wardsplaced', 'wardskilled',
                    'totalgold', 'total_cs','monsterkills']
    player_df2 = player_df[value_fields]
    player_df2= player_df2.groupby(['player']).rolling(window).mean().shift(-window+1).reset_index().fillna(method = 'ffill')
    player_df2 = player_df2.drop('level_1', axis = 1)
    
    #Recombine
    player_df = player_df[['date', 'gameid', 'position', 'side', 'champion', 'patch']]
    player_df = pd.concat([player_df, player_df2], axis = 1)
    #player_df = player_df.set_index(['champion', 'patch'])
    
    player_df['patch'] = player_df['patch'].apply(lambda x : reformat_patch_number(str(x)))
    
    return(player_df)

In [25]:
player_profiles = create_player_profile(player_data, window = 3)

### Combine with champions data to create complete stage-1 dataset, before further engineering

1. Join the player-game data with their respective champion-patch data
2. Create red vs blue split for each game, and build our blue minus red deltas

In [26]:
player_profiles = pd.merge(player_profiles, champion_dataset, how = 'left', 
                           left_on = ['champion', 'patch'], right_on = ['champion', 'patch'])

In [27]:
player_profiles.columns

Index(['date', 'gameid', 'position', 'side', 'champion', 'patch', 'player',
       'assists', 'damagetochampions', 'deaths', 'kills', 'monsterkills',
       'total_cs', 'totalgold', 'wardskilled', 'wardsplaced', 'armor',
       'armorperlevel', 'attackdamage', 'attackdamageperlevel', 'attackrange',
       'attackspeed', 'attackspeedperlevel', 'crit', 'critperlevel',
       'gap_closer_value', 'hard_cc_value', 'hp', 'hpperlevel', 'hpregen',
       'hpregenperlevel', 'movespeed', 'mp', 'mpperlevel', 'mpregen',
       'mpregenperlevel', 'protection_value', 'soft_cc_value', 'spellblock',
       'spellblockperlevel', 'spells_average_range_value', 'Assassin',
       'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'],
      dtype='object')

In [28]:
def head_to_head(df):
    
    #Split into blue and red
    blue = df[df['side'] == 'Blue']
    red = df[df['side'] == 'Red']
    
    blue = blue.drop('side', axis = 1)
    red = red.drop('side', axis = 1)
    
    blue = blue.set_index(['gameid', 'position', 'date'])
    red = red.set_index(['gameid', 'position', 'date'])
    
    #Rename red columns
    original_columns = red.columns
    red_columns = ['red_' + x for x in original_columns]
    red.columns = red_columns
    
    #Merge
    blue = pd.merge(blue, red, how = 'left', left_index = True, right_index = True)
    
    #Create delta columns
    delta_columns = []
    for column in original_columns:
        column_name = 'delta_' + column
        blue[column_name] = blue[column] - blue['red_' + column]
        delta_columns = delta_columns + [column_name]
        
    blue = blue[delta_columns]
    
    #
    
    return(blue)
    
    

In [29]:
player_profiles2 = player_profiles.drop(['champion', 'patch', 'player'], axis = 1)

In [30]:
head_to_head_output = head_to_head(player_profiles2)

In [31]:
head_to_head_output.columns

Index(['delta_assists', 'delta_damagetochampions', 'delta_deaths',
       'delta_kills', 'delta_monsterkills', 'delta_total_cs',
       'delta_totalgold', 'delta_wardskilled', 'delta_wardsplaced',
       'delta_armor', 'delta_armorperlevel', 'delta_attackdamage',
       'delta_attackdamageperlevel', 'delta_attackrange', 'delta_attackspeed',
       'delta_attackspeedperlevel', 'delta_crit', 'delta_critperlevel',
       'delta_gap_closer_value', 'delta_hard_cc_value', 'delta_hp',
       'delta_hpperlevel', 'delta_hpregen', 'delta_hpregenperlevel',
       'delta_movespeed', 'delta_mp', 'delta_mpperlevel', 'delta_mpregen',
       'delta_mpregenperlevel', 'delta_protection_value',
       'delta_soft_cc_value', 'delta_spellblock', 'delta_spellblockperlevel',
       'delta_spells_average_range_value', 'delta_Assassin', 'delta_Fighter',
       'delta_Mage', 'delta_Marksman', 'delta_Support', 'delta_Tank'],
      dtype='object')

In [32]:
head_to_head_output.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,delta_assists,delta_damagetochampions,delta_deaths,delta_kills,delta_monsterkills,delta_total_cs,delta_totalgold,delta_wardskilled,delta_wardsplaced,delta_armor,...,delta_soft_cc_value,delta_spellblock,delta_spellblockperlevel,delta_spells_average_range_value,delta_Assassin,delta_Fighter,delta_Mage,delta_Marksman,delta_Support,delta_Tank
gameid,position,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
5664-7271,top,2020-01-17 13:16:51,-3.0,2615.666667,-1.333333,1.666667,31.0,31.333333,535.666667,-1.0,0.666667,1.0,...,-2.0,-0.1,0.0,-870.3125,0.0,0.0,0.0,0.0,0.0,-1.0
5668-7282,top,2020-01-19 13:19:00,0.333333,1070.0,-1.0,1.666667,14.0,18.666667,1232.666667,-2.666667,-3.0,-1.0,...,-1.0,-0.1,0.0,-16748.4375,0.0,0.0,0.0,0.0,0.0,-1.0
5971-7347,top,2020-03-15 10:38:46,-3.333333,6322.0,0.0,-0.666667,15.0,60.333333,1559.333333,0.333333,-4.666667,-2.0,...,-1.0,0.1,0.0,-3486.875,0.0,0.0,0.0,0.0,0.0,1.0
5978-7365,top,2020-03-18 09:00:53,2.0,3548.333333,-0.333333,0.333333,16.0,12.333333,1543.666667,0.333333,-3.0,2.0,...,1.0,-0.1,0.0,3486.875,0.0,0.0,0.0,0.0,0.0,-1.0
5992-7398,top,2020-03-23 06:14:50,3.333333,1348.666667,-0.666667,-0.333333,28.333333,27.333333,2043.333333,-3.666667,7.666667,2.0,...,-1.0,0.0,0.0,15878.125,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
player_profiles2[(player_profiles2['gameid'] == '5664-7271') &( player_profiles2['position'] == 'top')]

Unnamed: 0,date,gameid,position,side,assists,damagetochampions,deaths,kills,monsterkills,total_cs,...,soft_cc_value,spellblock,spellblockperlevel,spells_average_range_value,Assassin,Fighter,Mage,Marksman,Support,Tank
1,2020-01-17 13:16:51,5664-7271,top,Blue,3.666667,12793.666667,1.333333,3.0,35.0,262.666667,...,1.0,32.0,1.25,3667.1875,0.0,1.0,0.0,0.0,0.0,0.0
3314,2020-01-17 13:16:51,5664-7271,top,Red,6.666667,10178.0,2.666667,1.333333,4.0,231.333333,...,3.0,32.1,1.25,4537.5,0.0,1.0,0.0,0.0,0.0,1.0
