# Data Analysis

In [2]:
import pandas as pd
import numpy as np
import pickle

We import the data from the pickle file as JSON:

In [3]:
data_json = pd.read_pickle('summoner_stats.pkl')

## Dataframes generation

We will generate two raw dataframe: one of them will contain the preffered champion statdistics and the other one the stadistics for all champions.

In [4]:
def build_dataframes(json):
    pref_list = []
    total_list = []
    for index in json:
        pref_dict = json[index]['pref_champ']['stats']
        pref_dict['summID'] = index
        pref_dict['champID'] = json[index]['pref_champ']['id']
        pref_list.append(pref_dict)
        total_dict = json[index]['total']['stats']
        total_dict['summID'] = index
        total_list.append(total_dict)
    df_pc = pd.DataFrame(pref_list).set_index('summID')
    df_total = pd.DataFrame(total_list).set_index('summID')
    return df_pc, df_total

In [5]:
df_pc, df_total = build_dataframes(data_json)

In [6]:
def put_elos(json, df):
    list_league, list_division = [], []
    for index in df.index:
        values = json[index]
        if values == '':
            league, division = [np.nan]*2
        else:
            league, division = values.split('_')
        list_league.append(league)
        list_division.append(division)
    
    df['league'] = list_league
    df['division'] = list_division
            
    return df

data_json = pd.read_pickle('summoner_elos.pkl')
df_pc = put_elos( data_json, df_pc)
df_total = put_elos( data_json, df_total)

In [7]:
df_pc = df_pc.dropna(axis=0, subset = ['league'])
df_total = df_total.dropna(axis = 0, subset = ['league'])

df_pc.league.value_counts()

platinum    8157
gold        4604
diamond     4601
silver      1376
bronze       470
master        27
Name: league, dtype: int64

In [8]:
from sklearn import preprocessing
encoLeague, encoDiv = preprocessing.LabelEncoder(), preprocessing.LabelEncoder()
encoLeague.fit(['bronze', 'silver', 'gold', 'platinum', 'diamond', 'master'])
print('League Encoder Classes: ', encoLeague.classes_)
encoDiv.fit(['1', '2', '3', '4', '5'])
print('Division Encoder Classes: ', encoDiv.classes_)

df_pc['league'] = encoLeague.transform(df_pc['league'])
df_total['league'] = encoLeague.transform(df_total['league'])

df_pc['division'] = encoDiv.transform(df_pc['division'])
df_total['division'] = encoDiv.transform(df_total['division'])

League Encoder Classes:  ['bronze' 'diamond' 'gold' 'master' 'platinum' 'silver']
Division Encoder Classes:  ['1' '2' '3' '4' '5']


We inspect the columns for the 'Preferred champion" dataframe. We also set the "Summoner ID" as the index. 

In [9]:
print(df_pc.columns)
df_pc.head()

Index(['champID', 'maxChampionsKilled', 'maxNumDeaths',
       'mostChampionKillsPerSession', 'mostSpellsCast', 'totalAssists',
       'totalChampionKills', 'totalDamageDealt', 'totalDamageTaken',
       'totalDeathsPerSession', 'totalDoubleKills', 'totalFirstBlood',
       'totalGoldEarned', 'totalMagicDamageDealt', 'totalMinionKills',
       'totalPentaKills', 'totalPhysicalDamageDealt', 'totalQuadraKills',
       'totalSessionsLost', 'totalSessionsPlayed', 'totalSessionsWon',
       'totalTripleKills', 'totalTurretsKilled', 'totalUnrealKills', 'league',
       'division'],
      dtype='object')


Unnamed: 0_level_0,champID,maxChampionsKilled,maxNumDeaths,mostChampionKillsPerSession,mostSpellsCast,totalAssists,totalChampionKills,totalDamageDealt,totalDamageTaken,totalDeathsPerSession,...,totalPhysicalDamageDealt,totalQuadraKills,totalSessionsLost,totalSessionsPlayed,totalSessionsWon,totalTripleKills,totalTurretsKilled,totalUnrealKills,league,division
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
45179106,157,20,19,20,0,827,775,26097768,2894729,832,...,19193861,0,60,121,61,11,222,0,4,2
27518339,14,12,15,12,0,669,319,9157413,2455608,476,...,5583162,0,18,61,43,1,52,0,4,0
37827325,67,24,18,24,0,1289,1887,32533122,5559731,1681,...,28892582,4,108,221,113,40,325,0,4,1
54158691,79,11,11,11,0,533,218,6093978,1874068,291,...,1079994,0,31,53,22,1,12,0,1,3
380210,13,16,12,16,0,78,93,1601136,306650,77,...,237764,0,7,13,6,2,14,0,2,1


We reorder the columns and drop some of them like the 0 value stats (deprecated data from the Riot API) and unrelevant data ('unreal kills', 'games against IA', etc..)

In [10]:
df_pc = df_pc[['champID', 'totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost', 'totalGoldEarned', 
               'totalMinionKills', 'totalTurretsKilled', 'totalPhysicalDamageDealt', 'totalMagicDamageDealt', 
               'totalDamageTaken', 'totalDeathsPerSession', 'maxNumDeaths', 'totalAssists', 'totalChampionKills', 'totalDoubleKills', 
               'totalTripleKills', 'totalQuadraKills', 'totalPentaKills', 'mostChampionKillsPerSession', 'league']]
print(df_pc.dtypes)
df_pc.head()

champID                        int64
totalSessionsPlayed            int64
totalSessionsWon               int64
totalSessionsLost              int64
totalGoldEarned                int64
totalMinionKills               int64
totalTurretsKilled             int64
totalPhysicalDamageDealt       int64
totalMagicDamageDealt          int64
totalDamageTaken               int64
totalDeathsPerSession          int64
maxNumDeaths                   int64
totalAssists                   int64
totalChampionKills             int64
totalDoubleKills               int64
totalTripleKills               int64
totalQuadraKills               int64
totalPentaKills                int64
mostChampionKillsPerSession    int64
league                         int64
dtype: object


Unnamed: 0_level_0,champID,totalSessionsPlayed,totalSessionsWon,totalSessionsLost,totalGoldEarned,totalMinionKills,totalTurretsKilled,totalPhysicalDamageDealt,totalMagicDamageDealt,totalDamageTaken,totalDeathsPerSession,maxNumDeaths,totalAssists,totalChampionKills,totalDoubleKills,totalTripleKills,totalQuadraKills,totalPentaKills,mostChampionKillsPerSession,league
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
45179106,157,121,61,60,1654026,28111,222,19193861,6781835,2894729,832,19,827,775,75,11,0,0,20,4
27518339,14,61,43,18,756643,10699,52,5583162,3492404,2455608,476,15,669,319,7,1,0,0,12,4
37827325,67,221,113,108,2818597,41787,325,28892582,546693,5559731,1681,18,1289,1887,267,40,4,2,24,4
54158691,79,53,22,31,549701,2678,12,1079994,3918276,1874068,291,11,533,218,11,1,0,0,11,1
380210,13,13,6,7,153017,2337,14,237764,1361107,306650,77,12,78,93,11,2,0,0,16,2


In [11]:
print(df_pc.columns)
df_pc.head()

Index(['champID', 'totalSessionsPlayed', 'totalSessionsWon',
       'totalSessionsLost', 'totalGoldEarned', 'totalMinionKills',
       'totalTurretsKilled', 'totalPhysicalDamageDealt',
       'totalMagicDamageDealt', 'totalDamageTaken', 'totalDeathsPerSession',
       'maxNumDeaths', 'totalAssists', 'totalChampionKills',
       'totalDoubleKills', 'totalTripleKills', 'totalQuadraKills',
       'totalPentaKills', 'mostChampionKillsPerSession', 'league'],
      dtype='object')


Unnamed: 0_level_0,champID,totalSessionsPlayed,totalSessionsWon,totalSessionsLost,totalGoldEarned,totalMinionKills,totalTurretsKilled,totalPhysicalDamageDealt,totalMagicDamageDealt,totalDamageTaken,totalDeathsPerSession,maxNumDeaths,totalAssists,totalChampionKills,totalDoubleKills,totalTripleKills,totalQuadraKills,totalPentaKills,mostChampionKillsPerSession,league
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
45179106,157,121,61,60,1654026,28111,222,19193861,6781835,2894729,832,19,827,775,75,11,0,0,20,4
27518339,14,61,43,18,756643,10699,52,5583162,3492404,2455608,476,15,669,319,7,1,0,0,12,4
37827325,67,221,113,108,2818597,41787,325,28892582,546693,5559731,1681,18,1289,1887,267,40,4,2,24,4
54158691,79,53,22,31,549701,2678,12,1079994,3918276,1874068,291,11,533,218,11,1,0,0,11,1
380210,13,13,6,7,153017,2337,14,237764,1361107,306650,77,12,78,93,11,2,0,0,16,2


Now we perform the same actions for the 'All champions' dataframe.

In [12]:
df_total = df_total[['totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost', 'totalGoldEarned', 
               'totalMinionKills', 'totalNeutralMinionsKilled', 'totalTurretsKilled', 'totalPhysicalDamageDealt', 
                'totalMagicDamageDealt', 'totalDamageTaken', 'totalHeal', 'totalDeathsPerSession', 'totalAssists', 
                'totalChampionKills', 'totalDoubleKills', 'totalTripleKills', 'totalQuadraKills', 'totalPentaKills',
                    'league']]
print(df_total.dtypes)
df_total.head()

totalSessionsPlayed          int64
totalSessionsWon             int64
totalSessionsLost            int64
totalGoldEarned              int64
totalMinionKills             int64
totalNeutralMinionsKilled    int64
totalTurretsKilled           int64
totalPhysicalDamageDealt     int64
totalMagicDamageDealt        int64
totalDamageTaken             int64
totalHeal                    int64
totalDeathsPerSession        int64
totalAssists                 int64
totalChampionKills           int64
totalDoubleKills             int64
totalTripleKills             int64
totalQuadraKills             int64
totalPentaKills              int64
league                       int64
dtype: object


Unnamed: 0_level_0,totalSessionsPlayed,totalSessionsWon,totalSessionsLost,totalGoldEarned,totalMinionKills,totalNeutralMinionsKilled,totalTurretsKilled,totalPhysicalDamageDealt,totalMagicDamageDealt,totalDamageTaken,totalHeal,totalDeathsPerSession,totalAssists,totalChampionKills,totalDoubleKills,totalTripleKills,totalQuadraKills,totalPentaKills,league
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
45179106,1089,553,536,12878538,158646,17539,1227,93434625,52221094,26529605,3784525,7378,10533,5813,501,63,5,1,4
27518339,862,440,422,9706950,109250,11752,679,53274448,47702698,26750977,5130288,7648,8112,4765,377,35,4,0,4
37827325,1162,580,582,14373663,192930,20136,1347,128882418,38694164,31707279,3754321,9054,8141,8543,971,162,30,5,4
54158691,446,215,231,4938835,48409,15401,377,30262946,23064722,13102568,2175249,2828,3879,2170,182,19,2,0,1
380210,214,100,114,2297091,23942,4436,180,12064973,10003829,5014175,699482,1232,1634,1373,127,15,1,0,2


## Data filtering and standarization by sessions played

We only consider the accounts that have 50 or more games played. 

In [13]:
df_pc = df_pc[df_pc.totalSessionsPlayed >= 50]
df_total = df_total[df_total.totalSessionsPlayed >= 50]

The "total" stadisitics are calculated as the sum of the totalSessionsPlayed so we must calculate the average data for all games.

In [14]:
df_pc['GoldEarned'] = df_pc['totalGoldEarned'] / df_pc['totalSessionsPlayed']
df_pc['MinionKills'] = df_pc['totalMinionKills'] / df_pc['totalSessionsPlayed']             
df_pc['PhysicalDamageDealt'] = df_pc['totalPhysicalDamageDealt'] / df_pc['totalSessionsPlayed']     
df_pc['MagicDamageDealt'] = df_pc['totalMagicDamageDealt'] / df_pc['totalSessionsPlayed']  
df_pc['DamageTaken'] = df_pc['totalDamageTaken'] / df_pc['totalSessionsPlayed']              
df_pc['DoubleKills'] = df_pc['totalDoubleKills'] / df_pc['totalSessionsPlayed']
df_pc['TripleKills'] = df_pc['totalTripleKills'] / df_pc['totalSessionsPlayed']
df_pc['QuadraKills'] = df_pc['totalQuadraKills'] / df_pc['totalSessionsPlayed']
df_pc['PentaKills'] = df_pc['totalPentaKills'] / df_pc['totalSessionsPlayed']
df_pc['TurretsKilled'] = df_pc['totalTurretsKilled'] / df_pc['totalSessionsPlayed']

df_total['GoldEarned'] = df_total['totalGoldEarned'] / df_total['totalSessionsPlayed']
df_total['MinionKills'] = df_total['totalMinionKills'] / df_total['totalSessionsPlayed']             
df_total['PhysicalDamageDealt'] = df_total['totalPhysicalDamageDealt'] / df_total['totalSessionsPlayed']     
df_total['MagicDamageDealt'] = df_total['totalMagicDamageDealt'] / df_total['totalSessionsPlayed']
df_total['DamageTaken'] = df_total['totalDamageTaken'] / df_total['totalSessionsPlayed']
df_total['DoubleKills'] = df_total['totalDoubleKills'] / df_total['totalSessionsPlayed']
df_total['TripleKills'] = df_total['totalTripleKills'] / df_total['totalSessionsPlayed']
df_total['QuadraKills'] = df_total['totalQuadraKills'] / df_total['totalSessionsPlayed']
df_total['PentaKills'] = df_total['totalPentaKills'] / df_total['totalSessionsPlayed']
df_total['TurretsKilled'] = df_total['totalTurretsKilled'] / df_total['totalSessionsPlayed']
df_total['Heal'] = df_total['totalHeal'] / df_total['totalSessionsPlayed']
df_total['NeutralMinionsKilled	'] = df_total['totalNeutralMinionsKilled'] / df_total['totalSessionsPlayed']

### KDA calculation

One important metric in competitive games as League of Legends is the KDA (Kills-Deaths-Assists) ratio. For a given player when can calculate the KDA for the ranked Season 5 as 
\begin{equation}
KDA = \frac{totalChampionKills + totalAssists}{totalDeathsPerSession} 
\end{equation}

In [15]:
df_pc['KDA'] = (df_pc['totalChampionKills'] + df_pc['totalAssists']) / df_pc['totalDeathsPerSession']

In [16]:
df_pc[['KDA', 'totalChampionKills', 'totalAssists', 'totalDeathsPerSession']].head()

Unnamed: 0_level_0,KDA,totalChampionKills,totalAssists,totalDeathsPerSession
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
45179106,1.925481,775,827,832
27518339,2.07563,319,669,476
37827325,1.889352,1887,1289,1681
54158691,2.580756,218,533,291
46828075,2.307054,611,501,482


In [17]:
df_total['totalKDA'] = (df_total['totalChampionKills'] + df_total['totalAssists']) / df_total['totalDeathsPerSession']

In [18]:
df_total[['totalKDA', 'totalChampionKills', 'totalAssists', 'totalDeathsPerSession']].head()

Unnamed: 0_level_0,totalKDA,totalChampionKills,totalAssists,totalDeathsPerSession
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
45179106,2.215506,5813,10533,7378
27518339,1.683708,4765,8112,7648
37827325,1.842721,8543,8141,9054
54158691,2.138967,2170,3879,2828
380210,2.440747,1373,1634,1232


### Win rate calculation

We calculate the Win Rate percentage per player for both dataframes (it will be the same as they are the same players).

In [19]:
df_pc['WinRate'] = df_pc['totalSessionsWon'] / df_pc['totalSessionsPlayed']

In [20]:
df_pc[['WinRate', 'totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost']].head()

Unnamed: 0_level_0,WinRate,totalSessionsPlayed,totalSessionsWon,totalSessionsLost
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
45179106,0.504132,121,61,60
27518339,0.704918,61,43,18
37827325,0.511312,221,113,108
54158691,0.415094,53,22,31
46828075,0.492754,69,34,35


In [21]:
df_total['WinRate'] = df_pc['totalSessionsWon'] / df_pc['totalSessionsPlayed']

In [22]:
df_total[['WinRate', 'totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost']].head()

Unnamed: 0_level_0,WinRate,totalSessionsPlayed,totalSessionsWon,totalSessionsLost
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
45179106,0.504132,1089,553,536
27518339,0.704918,862,440,422
37827325,0.511312,1162,580,582
54158691,0.415094,446,215,231
380210,,214,100,114


### Drop the unnecessary data

In [23]:
df_pc.drop(df_pc[['totalChampionKills', 'totalTurretsKilled', 'totalAssists', 'totalDeathsPerSession', 'totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost', 'totalGoldEarned', 'totalMinionKills', 'totalPhysicalDamageDealt', 'totalMagicDamageDealt','totalDamageTaken', 'totalDoubleKills', 'totalTripleKills', 'totalQuadraKills', 'totalPentaKills']], axis=1, inplace=True)

In [24]:
df_total.drop(df_total[['totalHeal' ,'totalNeutralMinionsKilled','totalChampionKills', 'totalTurretsKilled', 'totalAssists', 'totalDeathsPerSession', 'totalSessionsPlayed', 'totalSessionsWon', 'totalSessionsLost', 'totalGoldEarned', 'totalMinionKills', 'totalPhysicalDamageDealt', 'totalMagicDamageDealt','totalDamageTaken', 'totalDoubleKills', 'totalTripleKills', 'totalQuadraKills', 'totalPentaKills']], axis=1, inplace=True)

## Dataframe description 

In [25]:
df_pc.head()

Unnamed: 0_level_0,champID,maxNumDeaths,mostChampionKillsPerSession,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,KDA,WinRate
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
45179106,157,19,20,4,13669.636364,232.322314,158626.950413,56048.22314,23923.380165,0.619835,0.090909,0.0,0.0,1.834711,1.925481,0.504132
27518339,14,15,12,4,12403.983607,175.393443,91527.245902,57252.52459,40255.868852,0.114754,0.016393,0.0,0.0,0.852459,2.07563,0.704918
37827325,67,18,24,4,12753.832579,189.081448,130735.665158,2473.723982,25157.153846,1.208145,0.180995,0.0181,0.00905,1.470588,1.889352,0.511312
54158691,79,11,11,1,10371.716981,50.528302,20377.245283,73929.735849,35359.773585,0.207547,0.018868,0.0,0.0,0.226415,2.580756,0.415094
46828075,131,16,18,2,12132.869565,164.333333,21984.434783,119351.028986,21507.57971,0.826087,0.072464,0.0,0.0,1.130435,2.307054,0.492754


In [26]:
df_total.head()

Unnamed: 0_level_0,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,Heal,NeutralMinionsKilled,totalKDA,WinRate
summID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
45179106,4,11826.022039,145.680441,85798.553719,47953.254362,24361.437098,0.460055,0.057851,0.004591,0.000918,1.126722,3475.229568,16.105601,2.215506,0.504132
27518339,4,11260.962877,126.740139,61803.303944,55339.556845,31033.616009,0.437355,0.040603,0.00464,0.0,0.787703,5951.610209,13.633411,1.683708,0.704918
37827325,4,12369.761618,166.032702,110914.301205,33299.624785,27286.814974,0.835628,0.139415,0.025818,0.004303,1.159208,3230.913081,17.328744,1.842721,0.511312
54158691,1,11073.621076,108.540359,67854.139013,51714.623318,29377.955157,0.408072,0.042601,0.004484,0.0,0.845291,4877.23991,34.53139,2.138967,0.415094
380210,2,10734.070093,111.878505,56378.378505,46746.864486,23430.724299,0.593458,0.070093,0.004673,0.0,0.841121,3268.607477,20.728972,2.440747,


### Mean, std, min, max and percentiles

In [27]:
df_pc.describe()

Unnamed: 0,champID,maxNumDeaths,mostChampionKillsPerSession,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,KDA,WinRate
count,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0,13550.0
mean,129.856384,14.077122,19.81786,2.748487,12212.636387,134.334126,85175.497898,42217.726866,24464.986951,0.746201,0.115665,0.017785,0.003163,1.056519,2.948795,0.560401
std,118.390212,2.618882,7.378841,1.438657,1593.731657,74.756716,66950.118745,46943.865823,5643.265365,0.478554,0.10848,0.024911,0.007231,0.630363,0.754146,0.05999
min,1.0,6.0,2.0,0.0,7312.767123,4.178571,1669.872814,32.338235,7345.445525,0.0,0.0,0.0,0.0,0.018519,0.969806,0.096491
25%,55.0,12.0,16.0,1.0,11477.238383,50.856346,14882.809664,8639.013763,20351.690511,0.388456,0.026918,0.0,0.0,0.512195,2.44497,0.522388
50%,90.0,14.0,21.0,4.0,12608.660317,166.298692,91009.029349,20303.598138,23653.957353,0.766931,0.095238,0.009654,0.0,0.979084,2.835881,0.560731
75%,157.0,16.0,25.0,4.0,13324.544627,199.261848,145374.039807,63859.751393,28040.998517,1.077967,0.174603,0.026619,0.001774,1.514494,3.309295,0.597403
max,432.0,37.0,54.0,5.0,17377.960784,272.441176,267872.785714,230219.826923,52609.8875,3.112903,0.961039,0.266881,0.135593,3.586207,9.892157,0.86


In [28]:
df_total.describe()

Unnamed: 0,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,Heal,NeutralMinionsKilled,totalKDA,WinRate
count,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,19235.0,13550.0
mean,2.776085,11906.865305,131.281016,75996.084143,48493.730015,25134.666053,0.626636,0.089555,0.012615,0.002096,0.94392,4022.698832,20.272242,2.643273,0.560401
std,1.44092,831.825566,34.755858,27348.403472,17687.029839,2993.937,0.243009,0.051633,0.010963,0.003108,0.288106,1271.908756,10.437303,0.4674,0.05999
min,0.0,7953.09009,10.779956,3548.28976,438.50813,11588.1303,0.008715,0.0,0.0,0.0,0.09589,827.818898,0.132653,1.171344,0.096491
25%,1.0,11458.398113,109.161373,58252.421002,36315.655694,23167.437019,0.467175,0.054003,0.005136,0.0,0.755596,3221.605698,13.132965,2.32713,0.522388
50%,2.0,11992.792633,134.816667,76226.251599,46775.587413,25121.62622,0.620523,0.082335,0.010363,0.001112,0.933622,3819.839879,18.555556,2.58714,0.560731
75%,4.0,12450.212873,156.105975,93549.963519,58355.169131,27085.394681,0.775909,0.117038,0.017391,0.003155,1.117294,4568.554042,25.284555,2.895108,0.597403
max,5.0,16127.776423,239.860963,252527.516556,149002.325,40111.350993,2.569106,0.658537,0.178862,0.071429,2.707692,26442.601307,82.748521,7.159737,0.86


### Covariances and Correlation between features

In [29]:
df_pc.cov()

Unnamed: 0,champID,maxNumDeaths,mostChampionKillsPerSession,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,KDA,WinRate
champID,14016.24,-11.646167,-281.220713,0.97229,-65512.6,-2079.186,-681685.0,-1359271.0,-68084.11,-13.392558,-1.98227,-0.243721,-0.03556,-11.843599,6.280621,-0.82926
maxNumDeaths,-11.64617,6.858544,6.472532,0.074384,741.9534,15.57641,23916.97,-1585.954,2719.027,0.316706,0.059301,0.010334,0.001978,0.177434,-1.201069,-0.01872
mostChampionKillsPerSession,-281.2207,6.472532,54.447301,-0.050907,9469.812,340.7942,267466.1,40409.07,2252.132,2.912944,0.543669,0.094069,0.018097,2.619104,-1.893388,0.042211
league,0.9722899,0.074384,-0.050907,2.069735,21.96313,-1.563786,-310.3945,-887.7009,139.8082,0.002421,0.000885,0.000276,4.4e-05,-0.00633,-0.025588,-0.004018
GoldEarned,-65512.6,741.953355,9469.812241,21.963132,2539981.0,89921.0,72834880.0,10444970.0,732562.5,628.393643,117.204632,20.223895,3.785356,723.94167,-307.89907,17.475435
MinionKills,-2079.186,15.576408,340.794174,-1.563786,89921.0,5588.567,2890368.0,312431.3,-86866.96,24.328132,4.806393,0.848359,0.161359,36.812927,-16.314756,0.445927
PhysicalDamageDealt,-681685.0,23916.97015,267466.143101,-310.394513,72834880.0,2890368.0,4482318000.0,-1792515000.0,39102950.0,19438.564368,3851.973577,713.568292,135.319243,32657.249285,-16463.730228,14.026798
MagicDamageDealt,-1359271.0,-1585.953958,40409.07207,-887.700912,10444970.0,312431.3,-1792515000.0,2203727000.0,-6792027.0,299.367383,-224.539065,-96.737844,-21.032575,-6309.656624,3090.05658,393.28504
DamageTaken,-68084.11,2719.026954,2252.131509,139.808173,732562.5,-86866.96,39102950.0,-6792027.0,31846440.0,-213.75038,-77.175453,-18.02165,-4.023471,-310.239583,-1047.195965,-9.124384
DoubleKills,-13.39256,0.316706,2.912944,0.002421,628.3936,24.32813,19438.56,299.3674,-213.7504,0.229014,0.047363,0.008838,0.001734,0.199024,-0.108934,0.004896


In [30]:
df_pc.corr()

Unnamed: 0,champID,maxNumDeaths,mostChampionKillsPerSession,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,KDA,WinRate
champID,1.0,-0.037562,-0.321917,0.005709,-0.347211,-0.234924,-0.086004,-0.244575,-0.101906,-0.236383,-0.154346,-0.082639,-0.041537,-0.1587,0.070345,-0.11676
maxNumDeaths,-0.037562,1.0,0.334942,0.019743,0.177765,0.079561,0.136408,-0.0129,0.183979,0.252703,0.208735,0.158403,0.104472,0.107481,-0.60813,-0.119154
mostChampionKillsPerSession,-0.321917,0.334942,1.0,-0.004795,0.805263,0.617808,0.541414,0.116657,0.054085,0.824922,0.679197,0.511757,0.339153,0.563085,-0.340248,0.095359
league,0.005709,0.019743,-0.004795,1.0,0.009579,-0.01454,-0.003223,-0.013144,0.01722,0.003517,0.005673,0.0077,0.004273,-0.00698,-0.023584,-0.046553
GoldEarned,-0.347211,0.177765,0.805263,0.009579,1.0,0.754737,0.68261,0.139609,0.081451,0.823921,0.67792,0.509397,0.328459,0.720605,-0.256176,0.182781
MinionKills,-0.234924,0.079561,0.617808,-0.01454,0.754737,1.0,0.577499,0.089028,-0.205908,0.680029,0.592677,0.45555,0.298491,0.781195,-0.289384,0.099433
PhysicalDamageDealt,-0.086004,0.136408,0.541414,-0.003223,0.68261,0.577499,1.0,-0.570338,0.103497,0.606711,0.530373,0.427849,0.27951,0.773815,-0.326078,0.003492
MagicDamageDealt,-0.244575,-0.0129,0.116657,-0.013144,0.139609,0.089028,-0.570338,1.0,-0.025638,0.013326,-0.044092,-0.082723,-0.061959,-0.213224,0.087283,0.139652
DamageTaken,-0.101906,0.183979,0.054085,0.01722,0.081451,-0.205908,0.103497,-0.025638,1.0,-0.079149,-0.126066,-0.128195,-0.098596,-0.087212,-0.246061,-0.026952
DoubleKills,-0.236383,0.252703,0.824922,0.003517,0.823921,0.680029,0.606711,0.013326,-0.079149,1.0,0.912343,0.741332,0.501127,0.659756,-0.30184,0.170538


In [31]:
df_total.cov()

Unnamed: 0,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,Heal,NeutralMinionsKilled,totalKDA,WinRate
league,2.07625,26.29453,-0.920936,7.599373,-368.6804,147.7681,0.001417,-1e-05,-3.5e-05,-2.9e-05,-0.0033,-39.26216,-0.284838,-0.00938,-0.004018
GoldEarned,26.294531,691933.8,21871.114598,16808120.0,3597987.0,494498.3,165.218178,30.91684,5.201418,0.958699,166.019777,-137112.9,2680.82552,-55.832975,4.134267
MinionKills,-0.920936,21871.11,1207.969683,619817.3,122313.2,-15799.08,5.500943,1.067148,0.181254,0.033111,7.513303,-11956.38,-70.421174,-1.922474,0.169048
PhysicalDamageDealt,7.599373,16808120.0,619817.329617,747935200.0,-181901700.0,9790563.0,4567.874833,894.67359,157.574655,30.027637,6219.895909,-7887498.0,70417.058877,-2433.58672,35.715665
MagicDamageDealt,-368.680443,3597987.0,122313.160628,-181901700.0,312831000.0,4744405.0,176.133275,-15.401667,-10.904803,-3.535845,-618.281956,3498894.0,30142.083897,-208.55302,105.190991
DamageTaken,147.768077,494498.3,-15799.075461,9790563.0,4744405.0,8963659.0,28.618198,-3.957098,-1.733647,-0.414548,-26.448515,1494882.0,15539.340774,-677.210035,-2.13007
DoubleKills,0.001417,165.2182,5.500943,4567.875,176.1333,28.6182,0.059053,0.011671,0.002058,0.000401,0.04554,-84.36321,0.371424,-0.020975,0.001567
TripleKills,-1e-05,30.91684,1.067148,894.6736,-15.40167,-3.957098,0.011671,0.002666,0.000482,9.6e-05,0.008974,-17.00266,0.045953,-0.00307,0.000355
QuadraKills,-3.5e-05,5.201418,0.181254,157.5747,-10.9048,-1.733647,0.002058,0.000482,0.00012,2.3e-05,0.001569,-3.039772,0.005971,-0.000409,6.2e-05
PentaKills,-2.9e-05,0.9586987,0.033111,30.02764,-3.535845,-0.4145483,0.000401,9.6e-05,2.3e-05,1e-05,0.000302,-0.5794034,0.000878,-5.9e-05,1.2e-05


In [32]:
df_total.corr()

Unnamed: 0,league,GoldEarned,MinionKills,PhysicalDamageDealt,MagicDamageDealt,DamageTaken,DoubleKills,TripleKills,QuadraKills,PentaKills,TurretsKilled,Heal,NeutralMinionsKilled,totalKDA,WinRate
league,1.0,0.021938,-0.018389,0.000193,-0.014466,0.034253,0.004047,-0.000129,-0.002234,-0.006457,-0.00795,-0.021423,-0.01894,-0.013927,-0.046553
GoldEarned,0.021938,1.0,0.756503,0.738848,0.244553,0.198559,0.817341,0.719843,0.570387,0.370811,0.692747,-0.129596,0.308779,-0.143605,0.082933
MinionKills,-0.018389,0.756503,1.0,0.652084,0.198971,-0.151831,0.651309,0.594664,0.475708,0.306514,0.750327,-0.270468,-0.194127,-0.118343,0.078302
PhysicalDamageDealt,0.000193,0.738848,0.652084,1.0,-0.376054,0.119573,0.687322,0.633589,0.525575,0.353258,0.789403,-0.226752,0.246693,-0.190382,0.020744
MagicDamageDealt,-0.014466,0.244553,0.198971,-0.376054,1.0,0.089595,0.040979,-0.016865,-0.05624,-0.064319,-0.121333,0.155532,0.163279,-0.025227,0.094787
DamageTaken,0.034253,0.198559,-0.151831,0.119573,0.089595,1.0,0.039335,-0.025598,-0.05282,-0.044549,-0.030662,0.392562,0.497281,-0.483941,-0.011638
DoubleKills,0.004047,0.817341,0.651309,0.687322,0.040979,0.039335,1.0,0.930132,0.772457,0.530322,0.650455,-0.272945,0.14644,-0.184671,0.106455
TripleKills,-0.000129,0.719843,0.594664,0.633589,-0.016865,-0.025598,0.930132,1.0,0.85193,0.595706,0.603249,-0.258902,0.085271,-0.12722,0.115461
QuadraKills,-0.002234,0.570387,0.475708,0.525575,-0.05624,-0.05282,0.772457,0.85193,1.0,0.689231,0.496754,-0.218004,0.052182,-0.079905,0.09789
PentaKills,-0.006457,0.370811,0.306514,0.353258,-0.064319,-0.044549,0.530322,0.595706,0.689231,1.0,0.337264,-0.146564,0.027063,-0.040519,0.0754


In [33]:
import plotly.plotly as py
import plotly.graph_objs as go

py.sign_in('juanluismoralperez', 'MUMjWF4QwNy8AtZW6WUq')

trace1 = go.Scatter3d(
    x=df_pc.PhysicalDamageDealt,
    y=df_pc.KDA,
    z=df_pc.league,
    mode='markers',
    marker=dict(
        size=12,
        color=df_pc.league,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)

data = [trace1]
layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='3d-scatter-colorscale')


High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~juanluismoralperez/0 or inside your plot.ly account where it is named '3d-scatter-colorscale'


## Data visualization by champion 

In [None]:
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set(style="whitegrid", color_codes=True)

In [None]:
fig = plt.figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(1,1,1)
ax.scatter(df_pc['champID'], df_pc['KDA'])
plt.xlabel('ChampID')
plt.ylabel('KDA')
plt.xlim(0, 440)
plt.ylim(0, 10.5)
plt.show()

In [None]:
fig = plt.figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(1,1,1)
ax.scatter(df_pc['champID'], df_pc['WinRate'])
plt.xlabel('ChampID')
plt.ylabel('WinRate(%)')
plt.xlim(0, 440)
plt.ylim(0, 1)
plt.show()

In [None]:
fig = plt.figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(1,1,1)
ax.scatter(df_pc['champID'], df_pc['GoldEarned'])
plt.xlabel('ChampID')
plt.ylabel('Average Gold Earned')
plt.xlim(-1, 440)
plt.ylim(6500, 18000)
plt.show()

In [None]:
fig = plt.figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
ax = fig.add_subplot(1,1,1)
ax.scatter(df_pc['champID'], df_pc['MinionKills'])
plt.xlabel('ChampID')
plt.ylabel('Creep Score (CS)')
plt.xlim(-1, 440)
plt.ylim(-0.05, 300)
plt.show()

In [None]:
fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k')
ax1 = fig.add_subplot(1,3,1)
ax1.scatter(df_pc['champID'], df_pc['PhysicalDamageDealt'])
ax2 = fig.add_subplot(1,3,2)
ax2.scatter(df_pc['champID'], df_pc['MagicDamageDealt'])
ax3 = fig.add_subplot(1,3,3)
ax3.scatter(df_pc['champID'], df_pc['DamageTaken'])
ax1.set_xlim([-1, 440])
ax1.set_ylim([0, 300000])
ax1.set_xlabel('ChampID')
ax1.set_ylabel('Physical Dmg')
ax2.set_xlim([-1, 440])
ax2.set_ylim([0, 250000])
ax2.set_xlabel('ChampID')
ax2.set_ylabel('Magic Dmg')
ax3.set_xlim([-1, 440])
ax3.set_ylim([0, 55000])
ax3.set_xlabel('ChampID')
ax3.set_ylabel('Dmg taken')
plt.show()

In [None]:
fig = plt.figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
ax1 = fig.add_subplot(2,2,1)
ax1.scatter(df_pc['champID'], df_pc['PentaKills'])
ax2 = fig.add_subplot(2,2,2)
ax2.scatter(df_pc['champID'], df_pc['QuadraKills'])
ax3 = fig.add_subplot(2,2,3)
ax3.scatter(df_pc['champID'], df_pc['TripleKills'])
ax4 = fig.add_subplot(2,2,4)
ax4.scatter(df_pc['champID'], df_pc['DoubleKills'])
ax1.set_xlim([-0.5, 440])
ax1.set_ylim([0, 0.2])
ax1.set_xlabel('ChampID')
ax1.set_ylabel('Pentakills')
ax2.set_xlim([-0.5, 440])
ax2.set_ylim([0, 0.35])
ax2.set_xlabel('ChampID')
ax2.set_ylabel('Quadrakills')
ax3.set_xlim([-0.5, 440])
ax3.set_ylim([0, 1])
ax3.set_xlabel('ChampID')
ax3.set_ylabel('Triplekills')
ax4.set_xlim([-0.5, 440])
ax4.set_ylim([0, 3.5])
ax4.set_xlabel('ChampID')
ax4.set_ylabel('Doublekills')
plt.show()

In [None]:
from pandas.tools.plotting import scatter_matrix
scatter_matrix(df_pc, figsize = (20,20))
plt.show()

## Data visualization by player (all champions played)

In [None]:
scatter_matrix(df_total, figsize = (20,20))
plt.show()

## Export the processed data

In [34]:
df_pc.to_pickle('pc_processed.pkl')

In [36]:
df_total.to_pickle('total_processed.pkl')