### League of Legends: Spring 2020 Dataset

This datasets contains all games across differnt professional League of Legends 

In [76]:
#import packages
import pandas as pd
import requests
import json
import pickle
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import numpy as np
import pdb

In [2]:
#----Read competitve history data set for list of patches ----#
patches = pd.read_csv('../data/2020 spring match data OraclesElixir 2020-05-15.csv')
patches = list(set(patches['patch']))
patches = [str(x) for x in patches]

In [3]:
def reformat_patch_number(patch):
    patch_split = patch.split('.')
    patch_split = [str(int(x)) for x in patch_split]
    patch_output = patch_split[0] + '.' + patch_split[1] + '.1'
    return(patch_output)
    

In [4]:
patches = [reformat_patch_number(x) for x in patches]

In [5]:
#----- In case we need to use API ------#

#One time api key creation
file_name = '/Users/horacefung/Documents/keys/lol_api_key'
#key = {'key':''} #don't show key
#output = open(file_name,'wb')
#pickle.dump(key, output)
#output.close()

input_file = open(file_name,'rb')
api_key = pickle.load(input_file)['key']
input_file.close()

#api_root_url = "https://www.googleapis.com/youtube/v3/videos?"
#url_params = "part=snippet&type=video&chart=mostPopular"

#url = api_root_url + url_params + api_key
#data = requests.get(url).json()

### Champion Data

Retreive and format champion data for each relevant patch

In [6]:
#----- Pull champion list -----#
url = 'http://ddragon.leagueoflegends.com/cdn/' + patches[-1] + '/data/en_US/champion.json'
data = requests.get(url).json()
champion_list = list(data['data'].keys())

In [7]:
def extract_data(champion, champion_name, patch):
    
    #Initialize porter object
    porter = PorterStemmer()
    
    #Split into base stats & spells & classes
    stats = champion['stats']
    passive = champion['passive']
    spells = champion['spells']
    classes = champion['tags']
    
    #Make sure even single class champions have type list
    if type(classes) == 'str':
        classes = [classes]
    
    #Keywords
    hard_cc_list = ['airborne', 'charm', 'flee', 'taunt', 'sleep', 'stun', 'supression', 'suspension', 
                    'stasis', 'pull', 'knock']
    soft_cc_list = ['blind', 'cripple', 'disarm', 'ground', 'knockdown', 'nearsight', 'root', 'silence', 'slow']
    gap_closer_list = ['dash', 'blink', 'leap', 'launch', 'movementspeed', 'teleport']
    vision_list = ['vision', 'sight']
    protection_list = ['shield', 'heal']
    
    #initialize variables
    hard_cc_value = 0
    soft_cc_value = 0
    spells_average_range_value = 0
    gap_closer_value = 0
    protection_value = 0
    
    #Passive
    passive = passive['description']
    passive = passive.lower()
    passive = passive.replace('movement speed', 'movementspeed') #specific logic for move speed
    passive = [porter.stem(x) for x in word_tokenize(passive)]
    hard_cc = len(list(set(passive) & set(hard_cc_list))) #unique key words
    soft_cc = len(list(set(passive) & set(soft_cc_list))) #unique key words
    protection = len(list(set(passive) & set(protection_list)))
    gap_closer = len(list(set(passive) & set(gap_closer_list)))
    
    #Update talley with passive
    hard_cc_value = hard_cc_value + hard_cc
    soft_cc_value = soft_cc_value + soft_cc
    protection_value = protection_value + protection
    gap_closer_value = gap_closer_value + gap_closer
    
    
    #Four spells
    for i in range(4):
        
        #----Extract text information from tooltip
        tooltip = spells[i]['tooltip'].lower()
        tooltip = tooltip.replace('movement speed', 'movementspeed') #specific logic for move speed
        tooltip = [porter.stem(x) for x in word_tokenize(tooltip)]
        hard_cc = len(list(set(tooltip) & set(hard_cc_list))) #unique key words
        soft_cc = len(list(set(tooltip) & set(soft_cc_list))) #unique key words
        protection = len(list(set(tooltip) & set(protection_list)))
        gap_closer = len(list(set(tooltip) & set(gap_closer_list)))
        spells_range = np.mean(spells[i]['range'])
        
        #Update talley
        hard_cc_value = hard_cc_value + hard_cc
        soft_cc_value = soft_cc_value + soft_cc
        protection_value = protection_value + protection
        gap_closer_value = gap_closer_value + gap_closer
        spells_average_range_value = np.mean([spells_average_range_value, spells_range])
        
    #
    
    #-----Setup Dataframe---------#
    dict_temp = {'champion' : champion_name,
                 'patch' : patch,
                 'hard_cc_value' : hard_cc_value, 
                 'soft_cc_value' : soft_cc_value,
                 'spells_average_range_value' : spells_average_range_value,
                 'gap_closer_value' : gap_closer_value,
                 'protection_value' : protection_value,
                 'classes': [classes]} #make this a list
    
    dict_temp = {**stats, **dict_temp}
    
    output_df = pd.DataFrame(dict_temp, index = [0])
    output_df = output_df.set_index(['champion', 'patch'])
    
    
    return(output_df)

In [8]:
#----- Pull Relevant Champion Patch Data ---------#
def data_pull(patches, champion_list):
    
    champion_output = pd.DataFrame()
    loops = len(patches)
    counter = 0
    
    for patch in patches:
        
        for champion in champion_list:
            url = 'http://ddragon.leagueoflegends.com/cdn/{}/data/en_US/champion/{}.json'.format(patch, champion)
            
            try:
                data = requests.get(url).json()
                data = data['data'][champion]
                champion = extract_data(data, champion, patch)
                champion_output = pd.concat([champion_output, champion])
                #print('Added:' + champion)
                #print(champion)
            except:
                continue
                #print('Request failed')
                #do nothing if champion not in this patch, e.g. new releases
        
        counter = counter + 1
        print('Completed patch: ' + patch + ' | ' + str(counter) + '/' + str(loops))
    
    champion_output = champion_output.reset_index()
    
    #One-hot encode champion classes. This function is pretty sweet
    mlb = MultiLabelBinarizer()
    champion_output = champion_output.join(pd.DataFrame(mlb.fit_transform(champion_output.pop('classes')),
                                                        columns=mlb.classes_,
                                                        index=champion_output.index))
    
    
    
    return(champion_output)

In [9]:
champion_dataset = data_pull(patches, champion_list)

Completed patch: 10.4.1 | 1/8
Completed patch: 10.6.1 | 2/8
Completed patch: 10.1.1 | 3/8
Completed patch: 10.2.1 | 4/8
Completed patch: 10.3.1 | 5/8
Completed patch: 10.5.1 | 6/8
Completed patch: 10.7.1 | 7/8
Completed patch: 10.8.1 | 8/8


In [20]:
champion_dataset.head()

Unnamed: 0,champion,patch,armor,armorperlevel,attackdamage,attackdamageperlevel,attackrange,attackspeed,attackspeedperlevel,crit,...,soft_cc_value,spellblock,spellblockperlevel,spells_average_range_value,Assassin,Fighter,Mage,Marksman,Support,Tank
0,Evelynn,10.4.1,37.0,3.5,61.0,3.0,125,0.667,2.1,0,...,1,32.1,1.25,12777.5,1,0,1,0,0,0
1,JarvanIV,10.4.1,34.0,3.6,64.0,3.4,175,0.658,2.5,0,...,1,32.1,1.25,666.25,0,1,0,0,0,1
2,Kalista,10.4.1,23.0,4.0,69.0,4.0,525,0.694,4.0,0,...,1,30.0,0.5,1446.875,0,0,0,1,0,0
3,Zilean,10.4.1,24.0,3.8,51.64,3.0,550,0.625,2.13,0,...,1,30.0,0.5,718.75,0,0,1,0,1,0
4,Jhin,10.4.1,24.0,3.5,59.0,4.7,550,0.625,0.0,0,...,3,30.0,0.5,13096.875,0,0,1,1,0,0


In [13]:
#---- Pickle Patch Data -----#
file_name = '../data/patch_notes.pickle'
#output = open(file_name,'wb')
#pickle.dump(champion_dataset, output)
#output.close()

#input_file = open(file_name,'rb')
#patch_data = pickle.load(input_file)
#input_file.close()

In [21]:
match_data = pd.read_csv('../data/2020 spring match data OraclesElixir 2020-05-15.csv')

In [22]:
columns = ['date','gameid', 'patch','side', 'position', 'player','champion','ban1', 'ban2', 'ban3', 'ban4', 'ban5',
           'gamelength','kills', 'deaths', 'assists', 'damagetochampions','wardsplaced', 'wardskilled', 'totalgold',
           'total cs', 'monsterkills', 'result']

match_data = match_data[columns]
match_data = match_data.rename(columns = {'total cs' : 'total_cs'})
match_data = match_data[match_data['position'] != 'team']
player_data = match_data.sort_values(['player', 'date'], ascending = True)

In [23]:
#player_data = player_data['date', 'gameid','player','kills', 'deaths', 'assists', 'damagetochampions', 'wardsplaced', 
#                          'wardskilled', 'totalgold', 'total_cs','monsterkills']

#player_data = match_data.sort_values(['player', 'date'], ascending = True)
#player_data = player_data.groupby('object').rolling(10)['value'].mean()

In [24]:
def create_player_profile(player_df, window):
    
    #We will use a moving average based on the window.
    #Treat this as time dependent, 
    player_df = player_df[['date', 'gameid','player', 'position', 'side', 'champion', 'patch','kills','deaths',
                           'assists', 'damagetochampions', 'wardsplaced', 'wardskilled', 'totalgold', 
                           'total_cs','monsterkills']]
    
    player_df = player_df.sort_values(['player', 'date'], ascending = True)
    player_df = player_df.reset_index(drop=True) #need to drop index in new pandas version for groupby mean 
    
    #Values fields
    value_fields = ['player','kills', 'deaths', 'assists', 'damagetochampions','wardsplaced', 'wardskilled',
                    'totalgold', 'total_cs','monsterkills']
    player_df2 = player_df[value_fields]
    player_df2= player_df2.groupby(['player']).rolling(window).mean().shift(-window+1).reset_index().fillna(method = 'ffill')
    player_df2 = player_df2.drop('level_1', axis = 1)
    
    #Recombine
    player_df = player_df[['date', 'gameid', 'position', 'side', 'champion', 'patch']]
    player_df = pd.concat([player_df, player_df2], axis = 1)
    #player_df = player_df.set_index(['champion', 'patch'])
    
    player_df['patch'] = player_df['patch'].apply(lambda x : reformat_patch_number(str(x)))
    
    return(player_df)

In [25]:
player_profiles = create_player_profile(player_data, window = 3)

### Combine with champions data to create complete stage-1 dataset, before further engineering

1. Join the player-game data with their respective champion-patch data
2. Create red vs blue split for each game, and build our blue minus red deltas

In [26]:
player_profiles = pd.merge(player_profiles, champion_dataset, how = 'left', 
                           left_on = ['champion', 'patch'], right_on = ['champion', 'patch'])

In [27]:
player_profiles.columns

Index(['date', 'gameid', 'position', 'side', 'champion', 'patch', 'player',
       'assists', 'damagetochampions', 'deaths', 'kills', 'monsterkills',
       'total_cs', 'totalgold', 'wardskilled', 'wardsplaced', 'armor',
       'armorperlevel', 'attackdamage', 'attackdamageperlevel', 'attackrange',
       'attackspeed', 'attackspeedperlevel', 'crit', 'critperlevel',
       'gap_closer_value', 'hard_cc_value', 'hp', 'hpperlevel', 'hpregen',
       'hpregenperlevel', 'movespeed', 'mp', 'mpperlevel', 'mpregen',
       'mpregenperlevel', 'protection_value', 'soft_cc_value', 'spellblock',
       'spellblockperlevel', 'spells_average_range_value', 'Assassin',
       'Fighter', 'Mage', 'Marksman', 'Support', 'Tank'],
      dtype='object')

In [43]:
def head_to_head(df):
    
    #Split into blue and red
    blue = df[df['side'] == 'Blue']
    red = df[df['side'] == 'Red']
    
    blue = blue.drop('side', axis = 1)
    red = red.drop('side', axis = 1)
    
    blue = blue.set_index(['gameid', 'position', 'date'])
    red = red.set_index(['gameid', 'position', 'date'])
    
    #Rename red columns
    original_columns = red.columns
    red_columns = ['red_' + x for x in original_columns]
    red.columns = red_columns
    
    #Merge
    blue = pd.merge(blue, red, how = 'left', left_index = True, right_index = True)
    
    #Create delta columns
    delta_columns = []
    for column in original_columns:
        column_name = 'delta_' + column
        blue[column_name] = blue[column] - blue['red_' + column]
        delta_columns = delta_columns + [column_name]
        
    blue = blue[delta_columns]
    
    #Sum up for now
    blue = blue.reset_index()
    blue = blue.drop(['position', 'date'], axis = 1)
    blue = blue.groupby(['gameid']).sum()
    
    return(blue)
    
    

In [41]:
player_profiles2 = player_profiles.drop(['champion', 'patch', 'player'], axis = 1)

In [44]:
head_to_head_output = head_to_head(player_profiles2)

In [45]:
head_to_head_output.columns

Index(['delta_assists', 'delta_damagetochampions', 'delta_deaths',
       'delta_kills', 'delta_monsterkills', 'delta_total_cs',
       'delta_totalgold', 'delta_wardskilled', 'delta_wardsplaced',
       'delta_armor', 'delta_armorperlevel', 'delta_attackdamage',
       'delta_attackdamageperlevel', 'delta_attackrange', 'delta_attackspeed',
       'delta_attackspeedperlevel', 'delta_crit', 'delta_critperlevel',
       'delta_gap_closer_value', 'delta_hard_cc_value', 'delta_hp',
       'delta_hpperlevel', 'delta_hpregen', 'delta_hpregenperlevel',
       'delta_movespeed', 'delta_mp', 'delta_mpperlevel', 'delta_mpregen',
       'delta_mpregenperlevel', 'delta_protection_value',
       'delta_soft_cc_value', 'delta_spellblock', 'delta_spellblockperlevel',
       'delta_spells_average_range_value', 'delta_Assassin', 'delta_Fighter',
       'delta_Mage', 'delta_Marksman', 'delta_Support', 'delta_Tank'],
      dtype='object')

### Merge back match data to get results

In [74]:
target = match_data[['gameid', 'side', 'result']]
target = target[(target['side'] == 'Blue')].drop('side', axis = 1).drop_duplicates()
final = pd.merge(head_to_head_output, target, how = 'left', left_on = 'gameid', right_on = 'gameid')

In [77]:
x = final[final.columns[~final.columns.isin(['result'])]]
y = final['result']

In [79]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

Unnamed: 0,gameid,delta_assists,delta_damagetochampions,delta_deaths,delta_kills,delta_monsterkills,delta_total_cs,delta_totalgold,delta_wardskilled,delta_wardsplaced,...,delta_soft_cc_value,delta_spellblock,delta_spellblockperlevel,delta_spells_average_range_value,delta_Assassin,delta_Fighter,delta_Mage,delta_Marksman,delta_Support,delta_Tank
1221,ESPORTSTMNT06/1160095,6.666667e+00,-4721.333333,-8.666667,4.000000,5.666667e+00,23.666667,3855.000000,-6.000000e+00,-8.666667,...,1.0,2.0,0.75,-18004.0625,1.0,1.0,-2.0,-1.0,1.0,0.0
644,ESPORTSTMNT02/1292006,-2.166667e+01,4549.000000,5.333333,-9.666667,-1.566667e+01,107.666667,-1640.666667,6.000000e+00,26.333333,...,-5.0,2.1,0.75,-20351.5625,0.0,1.0,0.0,0.0,0.0,-1.0
70,5967-7338,-1.000000e+00,7116.333333,1.333333,0.333333,2.033333e+01,-29.333333,-2051.666667,-1.600000e+01,2.000000,...,1.0,-2.1,-1.25,-2541.8750,-1.0,2.0,0.0,0.0,0.0,-2.0
541,ESPORTSTMNT01/1325066,0.000000e+00,9719.666667,0.666667,-1.333333,2.333333e+01,206.666667,5835.333333,2.233333e+01,37.666667,...,-1.0,-2.1,-0.75,16329.3750,0.0,0.0,1.0,0.0,1.0,-2.0
10,5659-7259,-1.800000e+01,-13699.000000,7.333333,-7.333333,-3.766667e+01,2.666667,-6792.000000,-1.066667e+01,3.000000,...,0.0,0.0,0.00,-2180.6250,-2.0,0.0,0.0,1.0,0.0,1.0
905,ESPORTSTMNT03/1334908,2.200000e+01,17686.333333,-5.333333,1.333333,2.000000e+00,-60.666667,1345.666667,6.666667e+00,7.000000,...,-1.0,0.0,0.00,27329.0625,0.0,0.0,-1.0,0.0,1.0,-1.0
268,6073-7564,-2.333333e+00,-186.333333,-1.666667,-0.333333,7.666667e+00,-25.000000,537.333333,-1.633333e+01,17.333333,...,-1.0,0.1,0.00,9995.3125,0.0,0.0,-1.0,0.0,1.0,0.0
487,ESPORTSTMNT01/1314267,2.200000e+01,17865.000000,0.333333,9.000000,2.666667e+00,-115.000000,2432.000000,-2.000000e+00,-40.333333,...,-4.0,4.1,1.50,13411.5625,0.0,1.0,-2.0,-1.0,1.0,2.0
436,ESPORTSTMNT01/1304412,-2.333333e+00,-21963.000000,-0.666667,-2.666667,-3.100000e+01,-129.000000,-9144.333333,-5.333333e+00,-23.666667,...,0.0,4.1,0.00,15935.9375,0.0,0.0,0.0,0.0,0.0,0.0
64,5964-7332,1.566667e+01,-2896.666667,-1.666667,7.333333,-1.400000e+01,-80.333333,3233.333333,1.666667e+01,9.666667,...,-1.0,-1.9,-1.25,-3175.0000,-1.0,1.0,0.0,0.0,1.0,0.0


In [None]:
#---- Pickle Patch Data -----#
file_name = '../data/x_train.pickle'
output = open(file_name,'wb')
pickle.dump(x_train, output)
output.close()

file_name = '../data/x_train.pickle'
output = open(file_name,'wb')
pickle.dump(x_train, output)
output.close()

