Topic: Project 3      
Subject: Creating Dataframes for EDA   
Date: 10/25/2017   
Name: Zach Heick  

**Summary**: The objective of this notebook is to make organized dataframes for creating visualizations easier. The dataframes will have data on pick and ban counts for champions, their roles, and their win rates.

In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
games_df = pd.read_pickle('EDA_df.pickle')
champ_df = pd.read_pickle('champ_info.pickle')

## Picks and Bans

In [3]:
picks = pd.concat(
    [games_df['blue_champ1id'], games_df['blue_champ2id'], games_df['blue_champ3id'], games_df['blue_champ4id'],
     games_df['blue_champ5id'], games_df['red_champ1id'], games_df['red_champ2id'], games_df['red_champ3id'],
     games_df['red_champ4id'], games_df['red_champ5id']], ignore_index=True
)

picks_df = pd.DataFrame(picks.value_counts())
picks_df.rename(columns={0:'Picks'}, inplace=True)
roles = []

# Get the role of each champion
for champ in picks_df.index:
    role = champ_df[champ_df['name'] == champ]['role'].to_string().split()[2]
    roles.append(role)
picks_df['Role'] = roles

In [4]:
picks_df.sample(5)

Unnamed: 0,Picks,Role
Katarina,3188,assassin
Kindred,1067,marksman
Braum,3507,support
Viktor,1652,mage
Tryndamere,3859,fighter


In [5]:
bans = pd.concat(
    [games_df['blue_ban1'], games_df['blue_ban2'], games_df['blue_ban3'], games_df['blue_ban4'], games_df['blue_ban5'],
     games_df['red_ban1'], games_df['red_ban2'], games_df['red_ban3'], games_df['red_ban4'], games_df['red_ban5']],
    ignore_index=True
)

bans_df = pd.DataFrame(bans.value_counts())
bans_df.rename(columns={0:'Bans'}, inplace=True)
roles = []

# Get the role of each champion
for champ in bans_df.index:
    role = champ_df[champ_df['name'] == champ]['role'].to_string().split()[2]
    roles.append(role)
bans_df['Role'] = roles

In [6]:
bans_df.sample(5)

Unnamed: 0,Bans,Role
Akali,4235,assassin
Singed,471,tank
Draven,19087,marksman
Irelia,1076,fighter
Galio,1956,tank


Get the most popular champions and merge with their ban count.

In [7]:
picks_and_bans_df = picks_df.merge(bans_df, how='left', left_index=True,right_index=True)

In [8]:
roles = []
for champ in picks_and_bans_df.index:
    role = champ_df[champ_df['name'] == champ]['role'].to_string().split()[2]
    roles.append(role)
picks_and_bans_df['Role'] = roles

# Drop extra columns
picks_and_bans_df.drop(['Role_x'], axis=1, inplace=True)
picks_and_bans_df.drop(['Role_y'], axis=1, inplace=True)

In [9]:
picks_and_bans_df.sample(5)

Unnamed: 0,Picks,Bans,Role
Nunu,1210,355,tank
Lulu,6275,12491,support
Nautilus,1965,185,tank
Shaco,2380,1417,assassin
Skarner,544,64,fighter


## Win Rate

For each champion, get their win rate on blue and red side, then average the two win rates.

In [10]:
win_rate = []

for champ in picks_df.index:
    blue_df = games_df[(games_df['blue_champ1id'].str.contains(champ))|
        (games_df['blue_champ2id'].str.contains(champ))|
        (games_df['blue_champ3id'].str.contains(champ))|
        (games_df['blue_champ4id'].str.contains(champ))|
        (games_df['blue_champ5id'].str.contains(champ))]
    
    red_df = games_df[(games_df['red_champ1id'].str.contains(champ))|
        (games_df['red_champ2id'].str.contains(champ))|
        (games_df['red_champ3id'].str.contains(champ))|
        (games_df['red_champ4id'].str.contains(champ))|
        (games_df['red_champ5id'].str.contains(champ))]
    
    blue_total = len(blue_df)
    red_total = len(red_df)
    
    blue_wins = len(blue_df[blue_df['winner'] == 1])
    red_wins = len(red_df[red_df['winner'] == 2])
    
    blue_win_rate = blue_wins/blue_total
    red_win_rate = red_wins/red_total
    
    avg_win_rate = (blue_win_rate + red_win_rate)/2
    
    win_rate.append(avg_win_rate)

In [11]:
picks_df['Win_rate'] = win_rate

In [12]:
picks_df.sample(5)

Unnamed: 0,Picks,Role,Win_rate
Singed,1328,tank,0.535265
Olaf,1476,fighter,0.476366
Annie,3073,mage,0.502903
Twitch,8277,marksman,0.529267
Zyra,2215,mage,0.50154


Pickle dataframe for visualization.

In [13]:
picks_df.to_pickle('picks.pickle')
bans_df.to_pickle('bans.pickle')
picks_and_bans_df.to_pickle('picks_and_bans.pickle')