In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

## Injury Record

In [2]:
InjuryRecord = pd.read_csv('data/InjuryRecord.csv')
InjuryRecord.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
0,39873,39873-4,39873-4-32,Knee,Synthetic,1,1,1,1
1,46074,46074-7,46074-7-26,Knee,Natural,1,1,0,0
2,36557,36557-1,36557-1-70,Ankle,Synthetic,1,1,1,1
3,46646,46646-3,46646-3-30,Ankle,Natural,1,0,0,0
4,43532,43532-5,43532-5-69,Ankle,Synthetic,1,1,1,1


In [3]:
InjuryRecord.dtypes

PlayerKey     int64
GameID       object
PlayKey      object
BodyPart     object
Surface      object
DM_M1         int64
DM_M7         int64
DM_M28        int64
DM_M42        int64
dtype: object

In [4]:
InjuryRecord.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105 entries, 0 to 104
Data columns (total 9 columns):
PlayerKey    105 non-null int64
GameID       105 non-null object
PlayKey      77 non-null object
BodyPart     105 non-null object
Surface      105 non-null object
DM_M1        105 non-null int64
DM_M7        105 non-null int64
DM_M28       105 non-null int64
DM_M42       105 non-null int64
dtypes: int64(5), object(4)
memory usage: 5.8+ KB


In [5]:
InjuryRecord_grouped = InjuryRecord[pd.isnull(InjuryRecord['PlayKey'])]
InjuryRecord_grouped.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
46,33337,33337-2,,Foot,Natural,1,1,1,1
47,45099,45099-5,,Knee,Natural,1,1,1,1
48,36591,36591-9,,Knee,Natural,1,1,1,1
49,45950,45950-6,,Toes,Synthetic,1,1,0,0
50,39653,39653-4,,Ankle,Synthetic,1,0,0,0


In [6]:
InjuryRecord_by_PlayerKey = InjuryRecord_grouped.groupby('PlayerKey').sum()
InjuryRecord_by_PlayerKey.head()

Unnamed: 0_level_0,DM_M1,DM_M7,DM_M28,DM_M42
PlayerKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31933,1,0,0,0
33337,1,1,1,1
35648,1,0,0,0
36572,1,0,0,0
36573,1,1,1,1


## Rollup by Player Key

In [7]:
InjuryRecord_rollup_features = pd.DataFrame(InjuryRecord_by_PlayerKey.sum(axis= 0)).reset_index()
InjuryRecord_rollup_features.columns = ['PlayerKey', 'totals']
InjuryRecord_rollup_features['percentage'] = InjuryRecord_rollup_features['totals']/InjuryRecord_rollup_features['totals'] .sum()
InjuryRecord_rollup_features

Unnamed: 0,PlayerKey,totals,percentage
0,DM_M1,28,0.509091
1,DM_M7,16,0.290909
2,DM_M28,6,0.109091
3,DM_M42,5,0.090909


In [8]:
InjuryRecord_rollup_instances = pd.DataFrame(InjuryRecord_by_PlayerKey.sum(axis= 1)).reset_index()
InjuryRecord_rollup_instances.columns = ['PlayerKey', 'totals']
InjuryRecord_rollup_instances.sort_values(by= 'totals', ascending= False, inplace= True)
InjuryRecord_rollup_instances.reset_index(drop= True, inplace= True)
InjuryRecord_rollup_instances['percentage'] = InjuryRecord_rollup_instances['totals']/InjuryRecord_rollup_instances['totals'] .sum()
InjuryRecord_rollup_instances.head(6)

Unnamed: 0,PlayerKey,totals,percentage
0,33337,4,0.072727
1,36573,4,0.072727
2,36591,4,0.072727
3,45099,4,0.072727
4,39671,4,0.072727
5,38253,3,0.054545


## Rollup by Surface

In [9]:
InjuryRecord_by_Surface = InjuryRecord_grouped.groupby('Surface').sum()
InjuryRecord_by_Surface.drop('PlayerKey', axis= 1, inplace= True)
InjuryRecord_by_Surface

Unnamed: 0_level_0,DM_M1,DM_M7,DM_M28,DM_M42
Surface,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Natural,12,8,4,4
Synthetic,16,8,2,1


In [10]:
InjuryRecord_rollup_instances = pd.DataFrame(InjuryRecord_by_Surface.sum(axis= 1)).reset_index()
InjuryRecord_rollup_instances.columns = ['Surface', 'totals']
InjuryRecord_rollup_instances.sort_values(by= 'totals', ascending= False, inplace= True)
InjuryRecord_rollup_instances.reset_index(drop= True, inplace= True)
InjuryRecord_rollup_instances['percentage'] = InjuryRecord_rollup_instances['totals']/InjuryRecord_rollup_instances['totals'] .sum()
InjuryRecord_rollup_instances


Unnamed: 0,Surface,totals,percentage
0,Natural,28,0.509091
1,Synthetic,27,0.490909


## Rollup by BodyPart

In [11]:
InjuryRecord_by_BodyPart = InjuryRecord_grouped.groupby('BodyPart').sum()
InjuryRecord_by_BodyPart.drop('PlayerKey', axis= 1, inplace= True)
InjuryRecord_by_BodyPart

Unnamed: 0_level_0,DM_M1,DM_M7,DM_M28,DM_M42
BodyPart,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ankle,7,3,2,2
Foot,1,1,1,1
Heel,1,1,0,0
Knee,12,6,2,2
Toes,7,5,1,0


In [12]:
InjuryRecord_rollup_instances = pd.DataFrame(InjuryRecord_by_BodyPart.sum(axis= 1)).reset_index()
InjuryRecord_rollup_instances.columns = ['BodyPart', 'totals']
InjuryRecord_rollup_instances.sort_values(by= 'totals', ascending= False, inplace= True)
InjuryRecord_rollup_instances.reset_index(drop= True, inplace= True)
InjuryRecord_rollup_instances['percentage'] = InjuryRecord_rollup_instances['totals']/InjuryRecord_rollup_instances['totals'] .sum()
InjuryRecord_rollup_instances

Unnamed: 0,BodyPart,totals,percentage
0,Knee,22,0.4
1,Ankle,14,0.254545
2,Toes,13,0.236364
3,Foot,4,0.072727
4,Heel,2,0.036364


## Rollup by GameID

In [14]:
InjuryRecord_by_GameID = InjuryRecord_grouped.groupby('GameID').sum()
InjuryRecord_by_GameID.drop('PlayerKey', axis= 1, inplace= True)
InjuryRecord_by_GameID

Unnamed: 0_level_0,DM_M1,DM_M7,DM_M28,DM_M42
GameID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31933-20,1,0,0,0
33337-2,1,1,1,1
35648-12,1,0,0,0
36572-4,1,0,0,0
36573-14,1,1,1,1
36591-9,1,1,1,1
36696-24,1,1,0,0
37068-19,1,1,0,0
38214-12,1,0,0,0
38253-10,1,1,1,0


In [17]:
InjuryRecord_rollup_instances = pd.DataFrame(InjuryRecord_by_GameID.sum(axis= 1)).reset_index()
InjuryRecord_rollup_instances.columns = ['GameID', 'totals']
InjuryRecord_rollup_instances.sort_values(by= 'totals', ascending= False, inplace= True)
InjuryRecord_rollup_instances.reset_index(drop= True, inplace= True)
InjuryRecord_rollup_instances['percentage'] = InjuryRecord_rollup_instances['totals']/InjuryRecord_rollup_instances['totals'] .sum()
InjuryRecord_rollup_instances.head(6)

Unnamed: 0,GameID,totals,percentage
0,33337-2,4,0.072727
1,36573-14,4,0.072727
2,36591-9,4,0.072727
3,45099-5,4,0.072727
4,39671-12,4,0.072727
5,38253-10,3,0.054545
