# Data Cleaning

In [1]:
import datetime as dt

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth' ,999)

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(3792)

In [4]:
#Initial Datasets
kickoff_df = pd.read_csv('../data/kickoff_plays.csv')
punt_df = pd.read_csv('../data/punt_plays.csv')

In [5]:
kickoffs_df.head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,possessionTeam,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
0,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,ATL,37267.0,44979,ATL,35,05:01:00,NP,NP,0,0,3,64.0,30.0,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22


## Kickoff Data Cleaning pt. I

In [7]:
#looking for columns that have no bearing on kickoffs
kick_na_val_df = pd.DataFrame(zip(kickoff_df.columns, list(kickoff_df.isnull().sum())), columns=['Column', 'Null values'])
kick_na_val_df[kick_na_val_df['Null values'] == 7843]

Unnamed: 0,Column,Null values
11,kickBlockerId,7843
20,passResult,7843
25,snapDetail,7843
26,snapTime,7843
27,operationTime,7843
38,gunners,7843
39,puntRushers,7843
41,vises,7843
42,kickContactType,7843


In [8]:
#dropping said columns
kickoff_df.drop(columns=['kickBlockerId', 'passResult', 'snapDetail', 'snapTime', 'operationTime', 'gunners', 
                         'puntRushers', 'vises', 'kickContactType'], inplace=True)
kickoff_df.head(2)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
0,2018090600,37,"J.Elliott kicks 65 yards from PHI 35 to end zone, Touchback.",1,0,0,PHI,Kickoff,Touchback,44966.0,,PHI,35,15:00:00,,,,0,0,66.0,,40,45,3.85,D,R,R,,,,,,8-0-2,PHI 23; PHI 27
1,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,0,0,ATL,Kickoff,Return,37267.0,44979.0,ATL,35,05:01:00,,,,0,3,64.0,30.0,34,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22


In [9]:
kickoff_df.shape

(7843, 34)

### Condensing Kickoff Data to just kickoffs with a return

In [11]:
#Only interested in the kicks in which the receiving team returned kick
kickoff_df = kickoff_df[kickoff_df['specialTeamsResult'] == 'Return']
kickoff_df.head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
1,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,0,0,ATL,Kickoff,Return,37267.0,44979,ATL,35,05:01:00,,,,0,3,64.0,30.0,34,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22


In [103]:
#Dropping unneeded columns
kickoff_df.drop(columns=['specialTeamsResult', 'specialTeamsPlayType', 'down', 'yardsToGo', 'playResult'], inplace=True)

In [12]:
# Search description for onside kicks attempts
kickoff_df[kickoff_df['playDescription'].map(lambda x: True if 'onside' in x else False)].index

Int64Index([  50,  167,  294,  347,  376,  638,  689,  710,  730,  741,
            ...
            7291, 7348, 7391, 7445, 7490, 7515, 7626, 7681, 7720, 7842],
           dtype='int64', length=139)

In [13]:
kickoff_df.shape

(2921, 34)

In [14]:
#dropping rows with an onside kick
kickoff_df.drop(kickoff_df[kickoff_df['playDescription'].map(lambda x: True if 'onside' in x else False)].index, inplace=True)
#shape to check inplace worked
kickoff_df.shape

(2782, 34)

### High Null Value Columns

In [16]:
kickoff_df.isna().sum().sort_values(ascending= False)[:15]

penaltyYards               2487
penaltyJerseyNumbers       2487
penaltyCodes               2487
assistTackler              2183
missedTackler              2182
tackler                     238
hangTime                    230
yardlineSide                 30
returnDirectionIntended      27
returnDirectionActual        25
specialTeamsSafeties         18
kickReturnYardage             5
kickType                      0
absoluteYardlineNumber        0
kickLength                    0
dtype: int64

Columns related to penalties and tacklers seems to have the highest number of null values so let's address those first, starting with the penalty columns.

#### Penalty Column Cleaning

In [17]:
kickoff_df[['penaltyYards', 'penaltyJerseyNumbers', 'penaltyCodes']].head()

Unnamed: 0,penaltyYards,penaltyJerseyNumbers,penaltyCodes
1,,,
3,5.0,PHI 29,OH
8,0.0,BAL 37;BUF 30,UNR;FMM
9,,,
12,,,


In [18]:
#Changing all NaN values to NP or No Penalty to represent a play that did not involve a penalty
kickoff_df['penaltyCodes'] = kickoff_df.penaltyCodes.map(lambda penaltyCodes: 'NP' if penaltyCodes is np.nan else penaltyCodes)
kickoff_df[['penaltyCodes']].head()

Unnamed: 0,penaltyCodes
1,NP
3,OH
8,UNR;FMM
9,NP
12,NP


In [19]:
#Changing all NaN values to 0 or 0 Penalty yards to represent a play that did not involve a penalty
kickoff_df['penaltyYards'] = kickoff_df['penaltyYards'].fillna(0).astype(int)
kickoff_df[['penaltyYards']].head()

Unnamed: 0,penaltyYards
1,0
3,5
8,0
9,0
12,0


In [20]:
#Changing all NaN values to NP or No Penalty to represent a play that did not involve a penalty, therefore no jersey numbers were called
kickoff_df['penaltyJerseyNumbers'] = kickoff_df.penaltyJerseyNumbers.map(lambda penaltyJerseyNumbers: 'NP' if penaltyJerseyNumbers is np.nan else penaltyJerseyNumbers)
kickoff_df[['penaltyJerseyNumbers']].head()

Unnamed: 0,penaltyJerseyNumbers
1,NP
3,PHI 29
8,BAL 37;BUF 30
9,NP
12,NP


In [22]:
#Lets recheck our Nulls now
kickoff_df.isna().sum().sort_values(ascending = False)[:10]

assistTackler              2183
missedTackler              2182
tackler                     238
hangTime                    230
yardlineSide                 30
returnDirectionIntended      27
returnDirectionActual        25
specialTeamsSafeties         18
kickReturnYardage             5
playResult                    0
dtype: int64

#### Tackler Column Cleaning

In [66]:
#All kickoffs with a solo tackle
solo_tackle = kickoff_df[(kickoff_df['assistTackler'].isnull()) & (kickoff_df['tackler'].notnull())].index

In [73]:
kickoff_df.loc[solo_tackle, 'assistTackler'] = kickoff_df.loc[solo_tackle, 'assistTackler'].fillna('Solo')
kickoff_df.head(2)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
1,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,0,0,ATL,Kickoff,Return,37267.0,44979,ATL,35,05:01:00,NP,NP,0,0,3,64.0,30.0,34,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22
3,2018090600,1606,"M.Bosher kicks 68 yards from ATL 35 to PHI -3. S.Gibson to PHI 10 for 13 yards (K.Ishmael). PENALTY on PHI-A.Maddox, Offensive Holding, 5 yards, enforced at PHI 10.",2,0,0,ATL,Kickoff,Return,37267.0,44979,ATL,35,02:08:00,OH,PHI 29,5,3,6,68.0,13.0,60,45,3.63,D,C,C,C,R,,Solo,ATL 36,8-0-2,ATL 14; ATL 17


In [74]:
kickoff_df.isna().sum().sort_values(ascending = False)[:10]

missedTackler              2182
tackler                     238
assistTackler               238
hangTime                    230
yardlineSide                 30
returnDirectionIntended      27
returnDirectionActual        25
specialTeamsSafeties         18
kickReturnYardage             5
playResult                    0
dtype: int64

In [81]:
no_missed = kickoff_df[(kickoff_df['missedTackler'].isna()) & (kickoff_df['tackler'].notnull())].index
no_missed

Int64Index([   3,    8,    9,   12,   15,   21,   22,   23,   25,   32,
            ...
            7801, 7807, 7808, 7815, 7817, 7820, 7824, 7828, 7835, 7839],
           dtype='int64', length=2003)

In [86]:
#Filling Nans for missed tackle column
kickoff_df.loc[no_missed, 'missedTackler'] = kickoff_df.loc[no_missed, 'missedTackler'].fillna('returner_tackled')
kickoff_df.head(2)

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,specialTeamsPlayType,specialTeamsResult,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,playResult,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
1,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,0,0,ATL,Kickoff,Return,37267.0,44979,ATL,35,05:01:00,NP,NP,0,0,3,64.0,30.0,34,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22
3,2018090600,1606,"M.Bosher kicks 68 yards from ATL 35 to PHI -3. S.Gibson to PHI 10 for 13 yards (K.Ishmael). PENALTY on PHI-A.Maddox, Offensive Holding, 5 yards, enforced at PHI 10.",2,0,0,ATL,Kickoff,Return,37267.0,44979,ATL,35,02:08:00,OH,PHI 29,5,3,6,68.0,13.0,60,45,3.63,D,C,C,C,R,returner_tackled,Solo,ATL 36,8-0-2,ATL 14; ATL 17


In [155]:
#given that a value in assist tackler implies one player has assisted another in tackling a returner, 
#a lack of value or a null in these two columns likely means the returner stepped out of bounds, a TD return, a penalty, or something else. 
#lets investigate
kickoff_df[(kickoff_df['assistTackler'].isnull() ) & (kickoff_df['tackler'].isnull())].head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,possessionTeam,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
20,2018090901,2022,"C.Boswell kicks 65 yards from PIT 35 to CLE 0. J.Peppers to CLE 24 for 24 yards (T.Matakevich). PENALTY on CLE-D.Rice, Illegal Block Above the Waist, 10 yards, enforced at CLE 24.",3,PIT,41953.0,44837,PIT,35,15:00:00,IBW,CLE 37,10,0,7,65.0,24.0,45,3.9,D,C,C,L,L,,,,8-0-1,PIT 20; PIT 28


In [156]:
#Total number of returns without a tackle
kickoff_df[(kickoff_df['assistTackler'].isnull() ) & (kickoff_df['tackler'].isnull())].shape

(231, 29)

In [162]:
#check for plays with a TD
td_return = kickoff_df[kickoff_df['playDescription'].map(lambda x: True if 'TOUCHDOWN' in x else False)].index

In [167]:
#kickoff_df[(kickoff_df['assistTackler'].isnull() ) & (kickoff_df['tackler'].isnull())].loc[td_return]
#All td_returns have NaNs in tackler cols

In [172]:
#Filling NaNs with NT for no tackle
kickoff_df.loc[td_return, 'assistTackler'] = kickoff_df.loc[td_return, 'assistTackler'].fillna('NT')
kickoff_df.loc[td_return, 'tackler'] = kickoff_df.loc[td_return, 'tackler'].fillna('NT')

In [174]:
#59 of these kickoffs without a tackle involved no penalty
kickoff_df[(kickoff_df['assistTackler'].isnull()) & (kickoff_df['tackler'].isnull()) & (kickoff_df['penaltyYards'] == 0) & (kickoff_df['penaltyCodes'] == 'NP')].shape

(42, 29)

In [179]:
kickoff_df.isna().sum().sort_values(ascending = False)[:10]

tackler                    213
assistTackler              213
missedTackler              173
returnDirectionIntended     27
returnDirectionActual       25
specialTeamsSafeties        18
kickLength                   0
kickoffReturnFormation       0
kickDirectionActual          0
kickDirectionIntended        0
dtype: int64

In [186]:
#For times sake I'm going to fill the rest of the NaNs in tackling cols as NT for no tackle
kickoff_df.loc[kickoff_df[kickoff_df['tackler'].isna()].index, 'tackler'] = kickoff_df.loc[kickoff_df[kickoff_df['tackler'].isna()].index, 'tackler'].fillna('NT')

In [188]:
kickoff_df.loc[kickoff_df[kickoff_df['assistTackler'].isna()].index, 'assistTackler'] = kickoff_df.loc[kickoff_df[kickoff_df['assistTackler'].isna()].index, 'assistTackler'].fillna('NT')

In [190]:
kickoff_df.isna().sum().sort_values(ascending = False)[:5]

missedTackler              173
returnDirectionIntended     27
returnDirectionActual       25
specialTeamsSafeties        18
kickLength                   0
dtype: int64

In [195]:
kickoff_df.loc[kickoff_df[kickoff_df['missedTackler'].isna()].index, 'missedTackler'] = kickoff_df.loc[kickoff_df[kickoff_df['missedTackler'].isna()].index, 'missedTackler'].fillna('NT')

#### Addressing All other Null columns

In [105]:
kickoff_df.isna().sum().sort_values(ascending = False)[:10]

tackler                    238
assistTackler              238
hangTime                   230
missedTackler              179
yardlineSide                30
returnDirectionIntended     27
returnDirectionActual       25
specialTeamsSafeties        18
kickReturnYardage            5
kickLength                   0
dtype: int64

In [106]:
#For ease to modeling, given current time constraints I'm going to drop the columns ith 30 or less nulls after looking at them

In [108]:
#Kicks from midfield have no yardline side
midfield = kickoff_df[kickoff_df['yardlineSide'].isna()].index

In [112]:
kickoff_df.loc[midfield, 'yardlineSide'] = kickoff_df.loc[midfield, 'yardlineSide'].fillna('midfield')

In [113]:
kickoff_df.isna().sum().sort_values(ascending = False)[:5]

tackler                    238
assistTackler              238
hangTime                   230
missedTackler              179
returnDirectionIntended     27
returnDirectionActual       25
specialTeamsSafeties        18
kickReturnYardage            5
kickLength                   0
kickoffReturnFormation       0
dtype: int64

In [115]:
#fumbles on kickoffs
fumble_rec = kickoff_df[kickoff_df['kickReturnYardage'].isna()].index

In [120]:
#dropped
kickoff_df.drop(fumble_rec, inplace=True)

In [124]:
kickoff_df.isna().sum().sort_values(ascending = False)[:8]

tackler                    238
assistTackler              238
hangTime                   228
missedTackler              179
returnDirectionIntended     27
returnDirectionActual       25
specialTeamsSafeties        18
kickLength                   0
dtype: int64

In [128]:
kickoff_df.shape

(2777, 29)

In [132]:
#So null hang times fall into a Squib kick (Q) or a Flat kick (k) based on info from the data dictionary (https://www.kaggle.com/c/nfl-big-data-bowl-2022/data)
#Need to double check if any other instances of these occur
kickoff_df[kickoff_df['hangTime'].isna()]['kickType'].value_counts()

Q    121
F    107
Name: kickType, dtype: int64

In [133]:
#It looks like we have one flat has a hang time given
kickoff_df['kickType'].value_counts()

D    2307
P     195
Q     121
F     108
K      46
Name: kickType, dtype: int64

In [137]:
kickoff_df[(kickoff_df['kickType'] == 'F') & (kickoff_df['hangTime'].notna())]

Unnamed: 0,gameId,playId,playDescription,quarter,possessionTeam,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
2871,2019091513,2481,M.Bryant kicks 65 yards from ATL 35 to PHI 0. M.Sanders to PHI 29 for 29 yards (I.Oliver).,3,ATL,27091.0,47836,ATL,35,13:23:00,NP,NP,0,17,6,65.0,29.0,75,3.2,F,C,C,L,L,ATL 83,Solo,ATL 26,8-0-2,ATL 26; ATL 33


So then why does only this flat kick have a hang time? I guess that PFF may have categorized this one incorrectly? 

For the time being I'm going to drop this kickoff, but I may come back and attempt to reclassify it at a later date.

In [139]:
kickoff_df.drop(2871, inplace=True)

In [143]:
#I haven't been able to find information to adequately understand what a 'Flat Kick' is so I'm going to seperatate out to Squib and Flat
Q_kicks = kickoff_df[kickoff_df['kickType'] == 'Q']['kickType'].index
Q_kicks

Int64Index([  13,   33,  249,  271,  462,  481,  515,  735,  756,  802,
            ...
            7182, 7197, 7270, 7374, 7425, 7435, 7450, 7548, 7652, 7727],
           dtype='int64', length=121)

In [146]:
kickoff_df.loc[Q_kicks, 'hangTime'] = kickoff_df.loc[Q_kicks, 'hangTime'].fillna(0)
kickoff_df.head(1)

Unnamed: 0,gameId,playId,playDescription,quarter,possessionTeam,kickerId,returnerId,yardlineSide,yardlineNumber,gameClock,penaltyCodes,penaltyJerseyNumbers,penaltyYards,preSnapHomeScore,preSnapVisitorScore,kickLength,kickReturnYardage,absoluteYardlineNumber,hangTime,kickType,kickDirectionIntended,kickDirectionActual,returnDirectionIntended,returnDirectionActual,missedTackler,assistTackler,tackler,kickoffReturnFormation,specialTeamsSafeties
1,2018090600,677,"M.Bosher kicks 64 yards from ATL 35 to PHI 1. S.Gibson to PHI 31 for 30 yards (D.Kazee, D.Riley).",1,ATL,37267.0,44979,ATL,35,05:01:00,NP,NP,0,0,3,64.0,30.0,75,4.06,D,R,R,C,C,ATL 83,ATL 22,ATL 27,8-0-2,ATL 17; ATL 22


In [150]:
kickoff_df.shape

(2776, 29)

In [151]:
#I haven't been able to find information to adequately understand what a 'Flat Kick' is so for the time being I'm going to drop these from the overall dataset
#The type of kickoff kick that is selected should always be assumed to be an intentional decision, given my lack of knowledge here I don't want to include these 
#If I don't know why a flat kick would be used.
#F_kicks = kickoff_df[kickoff_df['kickType'] == 'F']['kickType'].index
kickoff_df.drop(F_kicks, inplace=True)

In [198]:
#Just going to fill the rest of these with 0s for now, may not end up using the player tracking columns immediately, so these may be dropped before modeling.
kickoff_df.isna().sum().sort_values(ascending = False)[:5]

returnDirectionIntended    27
returnDirectionActual      25
specialTeamsSafeties       18
kickLength                  0
kickoffReturnFormation      0
dtype: int64

In [204]:
kickoff_df.loc[kickoff_df[kickoff_df['specialTeamsSafeties'].isna()].index, 'specialTeamsSafeties'] = kickoff_df.loc[kickoff_df[kickoff_df['specialTeamsSafeties'].isna()].index, 'specialTeamsSafeties'].fillna('no safety')

In [212]:
kickoff_df.shape

(2669, 29)

In [214]:
#There's actually a large amount of overlap between the nans in both actual and intended return direction to the point that I'm just going to drop these 27 rows.
kickoff_df.drop(kickoff_df[kickoff_df['returnDirectionIntended'].isna()].index, inplace=True)

In [216]:
#and now we finally have all of the nulls from the kickoff data dropped or filled.
kickoff_df.isna().sum()

gameId                     0
playId                     0
playDescription            0
quarter                    0
possessionTeam             0
kickerId                   0
returnerId                 0
yardlineSide               0
yardlineNumber             0
gameClock                  0
penaltyCodes               0
penaltyJerseyNumbers       0
penaltyYards               0
preSnapHomeScore           0
preSnapVisitorScore        0
kickLength                 0
kickReturnYardage          0
absoluteYardlineNumber     0
hangTime                   0
kickType                   0
kickDirectionIntended      0
kickDirectionActual        0
returnDirectionIntended    0
returnDirectionActual      0
missedTackler              0
assistTackler              0
tackler                    0
kickoffReturnFormation     0
specialTeamsSafeties       0
dtype: int64

In [222]:
kickoff_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2642 entries, 1 to 7839
Data columns (total 29 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   gameId                   2642 non-null   int64  
 1   playId                   2642 non-null   int64  
 2   playDescription          2642 non-null   object 
 3   quarter                  2642 non-null   int64  
 4   possessionTeam           2642 non-null   object 
 5   kickerId                 2642 non-null   float64
 6   returnerId               2642 non-null   object 
 7   yardlineSide             2642 non-null   object 
 8   yardlineNumber           2642 non-null   int64  
 9   gameClock                2642 non-null   object 
 10  penaltyCodes             2642 non-null   object 
 11  penaltyJerseyNumbers     2642 non-null   object 
 12  penaltyYards             2642 non-null   int32  
 13  preSnapHomeScore         2642 non-null   int64  
 14  preSnapVisitorScore     

In [223]:
kickoff_df.to_csv('../data/kickoff_no_nulls.csv')

### Player Data (Grab later or delete)

In [220]:
# #Can get further measurables on returners and others on kick and covarage unit by using the Player DF
# # Will look at this more later, need to build a model first
# player_df[player_df['nflId'] == 44979]

In [218]:
# player_df = pd.read_csv('../data/players.csv')
# player_df.head()

In [219]:
# player_df['Position'].value_counts()