## Data Wrangling: League of Legends Ranked Games

##### Import Module / Libraries

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
from pandas.io.json import json_normalize

##### Loading CSV files into dataframes

In [99]:
champs = 'champs.csv'
champs_df = pd.read_csv(champs)

duration = 'matches.csv'
duration_df = pd.read_csv(duration)

summoner_spell = 'summoner_spell_info.json'
summoner_spell_df = pd.read_json(summoner_spell)

participants = 'participants.csv'
participants_df = pd.read_csv(participants)

stats1 = 'stats1.csv'
stats1_df = pd.read_csv(stats1)

stats2 = 'stats2.csv'
stats2_df = pd.read_csv(stats2, low_memory=False)

bans = 'teambans.csv'
bans_df = pd.read_csv(bans)

macro_stats = 'games.csv'
macro_stats_df = pd.read_csv(macro_stats)

### Prepare Summoner Spell Dataframe

In [84]:
summoner_spell = pd.DataFrame(columns=['data']) # Prepare Dataframe from JSON
for row in summoner_spell_df.data:
    summoner_spell = summoner_spell.append(json_normalize(row))
summoner_spell.reset_index(inplace = True, drop = True)

summoner_spell = summoner_spell[['id','name']] # Select only neccessary columns

summoner_spell.id = summoner_spell.id.astype(int) # Convert id to whole numbers

summoner_spell = summoner_spell.rename(index = str, columns = {"name":'summoner_spell'}) # Rename column

summoner_spell.head(5) # Confirm changes

Unnamed: 0,id,summoner_spell
0,1,Cleanse
1,11,Smite
2,12,Teleport
3,13,Clarity
4,14,Ignite


### Prepare Duration Dataframe

In [85]:
duration_df.head(2)

Unnamed: 0,id,gameid,platformid,queueid,seasonid,duration,creation,version
0,10,3187427022,EUW1,420,8,1909,1495068946860,7.10.187.9675
1,11,3187425281,EUW1,420,8,1693,1495066760778,7.10.187.9675


In [87]:
duration_df.duration = duration_df.duration * .01  # Convert duration to minutes

In [88]:
conditions = [ # Create list of current values
        duration_df.platformid == 'EUN1',
        duration_df.platformid == 'EUW1',
        duration_df.platformid == 'NA1',
        duration_df.platformid == 'TR1']
choices = ['Europe North', 'Europe West', 'North America', 'Turkey'] # Create list of new values

duration = duration_df[['platformid', 'seasonid', 'duration']].copy()

In [90]:
duration.platformid = np.select(conditions, choices) # Map conditions with choices
duration = duration.rename(index = str, columns = {"platformid": "Region","duration":"Duration","seasonid":'Season'}) #Rename columns

In [91]:
duration.head(2) # Confirm changes made

Unnamed: 0,Region,Season,Duration
0,Europe West,8,19.09
1,Europe West,8,16.93


In [92]:
path=r'/Users/Henry/Desktop/Springboard/Capstone Project/EDA' 
matches.to_csv(os.path.join(path,r'duration.csv')) # Export file to EDA folder

### Preparing Participants Dataframe

In [10]:
participants_df.head(2) # Explore dataframe

Unnamed: 0,id,matchid,player,championid,ss1,ss2,role,position
0,9,10,1,19,4,11,NONE,JUNGLE
1,10,10,2,267,3,4,DUO_SUPPORT,BOT


In [80]:
participants_merged = pd.merge(participants_df, # Merge champions dataframe with participants dataframe
                               champs_df,how='inner', left_on='championid'
                               , right_on='id').sort_values(by =['matchid'])

participants_columns = participants_merged[['matchid', # Select columns
                                            'player',
                                            'ss1',
                                            'ss2',
                                            'name']] 

participants = participants_columns.rename(index = str, #Rename columns
                                           columns = {"matchid": "Match ID",
                                                      "player":"Player",
                                                      "name": 'Champion'}) 

participants_merged = pd.merge(participants, # Merge participants dataframe with summoner_spells dataframe
                               summoner_spell,how='inner', left_on='ss1'
                               , right_on='id').sort_values(by =['Match ID'])

participants_merged2 = pd.merge(participants_merged, # Merge participants dataframe with summoner_spells dataframe
                               summoner_spell,how='inner', left_on='ss2'
                               , right_on='id').sort_values(by =['Match ID'])

participants = participants.reset_index() # Reset index

participants = participants_merged2[['Match ID', 'Champion', 'summoner_spell_x', 'summoner_spell_y']] # Select columns

participants = participants.rename(index = str, #Rename columns
                                           columns = {"summoner_spell_x": "SS1",
                                                      "summoner_spell_y":"SS2"}) 



In [82]:
participants.head(10) # Confirm preparation

Unnamed: 0,Match ID,Champion,SS1,SS2
0,10,Warwick,Flash,Smite
981535,10,Viktor,Flash,Exhaust
1155620,10,Galio,Flash,Teleport
199705,10,Draven,Heal,Flash
199706,10,Jinx,Heal,Flash
199707,10,Fiora,Teleport,Flash
199708,10,Nami,Exhaust,Flash
199709,10,Ahri,Ignite,Flash
199710,10,VelKoz,Ignite,Flash
199704,10,Skarner,Smite,Flash


In [93]:
path=r'/Users/Henry/Desktop/Springboard/Capstone Project/EDA' 
participants.to_csv(os.path.join(path,r'participants_SS.csv')) # Export file to EDA folder

### Preparing Team Bans

In [13]:
bans_df.head(2)

Unnamed: 0,matchid,teamid,championid,banturn
0,10,100,11,1
1,10,100,117,3


In [14]:
bans_merged = pd.merge(bans_df, # Merge champions dataframe with bans dataframe
                               champs_df,how='inner', left_on='championid'
                               , right_on='id').sort_values(by =['matchid'])
bans_columns = bans_merged[['matchid', # Select columns
                                            'banturn',
                                            'name']] 
bans = bans_columns.rename(index = str, #Rename columns
                                           columns = {"matchid": "Match ID",
                                                      "banturn":"Ban Turn",
                                                      "name": 'Champion'})

In [15]:
bans = bans.reset_index() # Reset index
bans = bans[['Match ID', 'Ban Turn', 'Champion']] # Reselect columns after resetting index

In [16]:
bans.head(2)

Unnamed: 0,Match ID,Ban Turn,Champion
0,10,1,Master Yi
1,10,6,Fizz


In [29]:
path=r'/Users/Henry/Desktop/Springboard/Capstone Project/EDA' 
bans.to_csv(os.path.join(path,r'bans.csv')) # Export file to EDA folder

### Preparing Micro Stats

In [17]:
stats1_df.head(2) # Explore stats1 dataframe

Unnamed: 0,id,win,item1,item2,item3,item4,item5,item6,trinket,kills,...,neutralminionskilled,ownjunglekills,enemyjunglekills,totcctimedealt,champlvl,pinksbought,wardsbought,wardsplaced,wardskilled,firstblood
0,9,0,3748,2003,3111,3053,1419,1042,3340,6,...,69,42,27,610,13,0,0,10,0,0
1,10,0,2301,3111,3190,3107,0,0,3364,0,...,1,1,0,211,14,1,0,17,3,0


In [18]:
stats2_df.head(2) # Explore stats2 dataframe

Unnamed: 0,id,win,item1,item2,item3,item4,item5,item6,trinket,kills,...,neutralminionskilled,ownjunglekills,enemyjunglekills,totcctimedealt,champlvl,pinksbought,wardsbought,wardsplaced,wardskilled,firstblood
0,1028382,0,1056,3001,1052,3020,1058,1026,3340,7,...,0,0,0,50,12,0,0,6,0,0
1,1028383,0,1041,2003,0,0,0,0,3340,0,...,11,11,0,114,3,0,0,0,0,0


In [19]:
stats = stats1_df.append([stats2_df]) # Union stats dataframes
stats.shape # Confirm union

(1834517, 56)

In [20]:
stats = stats[['id', 'win', 'kills', # Select Columns
               'neutralminionskilled','ownjunglekills','enemyjunglekills',
               'totcctimedealt', 'champlvl', 'pinksbought','wardsbought','wardsplaced','wardskilled','firstblood']]

In [30]:
stats.head(2)

Unnamed: 0,id,win,kills,neutralminionskilled,ownjunglekills,enemyjunglekills,totcctimedealt,champlvl,pinksbought,wardsbought,wardsplaced,wardskilled,firstblood
0,9,0,6,69,42,27,610,13,0,0,10,0,0
1,10,0,0,1,1,0,211,14,1,0,17,3,0


In [94]:
path=r'/Users/Henry/Desktop/Springboard/Capstone Project/EDA' 
stats.to_csv(os.path.join(path,r'micro_stats.csv')) # Export file to EDA folder

### Preparing Macro Stats

In [105]:
macro_stats_df.columns # Explore Dataframe

Index(['gameId', 'creationTime', 'gameDuration', 'seasonId', 'winner',
       'firstBlood', 'firstTower', 'firstInhibitor', 'firstBaron',
       'firstDragon', 'firstRiftHerald', 't1_champ1id', 't1_champ1_sum1',
       't1_champ1_sum2', 't1_champ2id', 't1_champ2_sum1', 't1_champ2_sum2',
       't1_champ3id', 't1_champ3_sum1', 't1_champ3_sum2', 't1_champ4id',
       't1_champ4_sum1', 't1_champ4_sum2', 't1_champ5id', 't1_champ5_sum1',
       't1_champ5_sum2', 't1_towerKills', 't1_inhibitorKills', 't1_baronKills',
       't1_dragonKills', 't1_riftHeraldKills', 't1_ban1', 't1_ban2', 't1_ban3',
       't1_ban4', 't1_ban5', 't2_champ1id', 't2_champ1_sum1', 't2_champ1_sum2',
       't2_champ2id', 't2_champ2_sum1', 't2_champ2_sum2', 't2_champ3id',
       't2_champ3_sum1', 't2_champ3_sum2', 't2_champ4id', 't2_champ4_sum1',
       't2_champ4_sum2', 't2_champ5id', 't2_champ5_sum1', 't2_champ5_sum2',
       't2_towerKills', 't2_inhibitorKills', 't2_baronKills', 't2_dragonKills',
       't2_riftHer

In [106]:
columns = ['t1_towerKills','t1_inhibitorKills','t1_dragonKills','t1_baronKills', # Select potential variables that can predict the outcome of a game
                  't2_towerKills','t2_inhibitorKills','t2_dragonKills','t2_baronKills',
                       'firstBlood', 'firstTower', 'firstInhibitor', 
                           'firstBaron','firstDragon', 'firstRiftHerald',
                              'winner'] 

In [108]:
macro_stats_df = macro_stats_df[columns] # Select columns
macro_stats_df.head() # Confirm changes

Unnamed: 0,t1_towerKills,t1_inhibitorKills,t1_dragonKills,t1_baronKills,t2_towerKills,t2_inhibitorKills,t2_dragonKills,t2_baronKills,firstBlood,firstTower,firstInhibitor,firstBaron,firstDragon,firstRiftHerald,winner
0,11,1,3,2,5,0,1,0,2,1,1,1,1,2,1
1,10,4,2,0,2,0,0,0,1,1,1,0,1,1,1
2,8,1,1,1,2,0,1,0,2,1,1,1,2,0,1
3,9,2,2,1,0,0,0,0,1,1,1,1,1,0,1
4,9,2,3,1,3,0,1,0,2,1,1,1,1,0,1


In [109]:
path=r'/Users/Henry/Desktop/Springboard/Capstone Project/EDA' 
teamstats_df.to_csv(os.path.join(path,r'macro_stats.csv')) # Export file to EDA folder