# Data Preprocessing

## Imports

#### Library Imports and Initial Settings

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

#### Data Imports

In [2]:
# 01_passing-rushing-recieving.csv
prr = pd.read_csv('imports/01_passing-rushing-receiving.csv', header=1)
# 02_defense.csv
defense = pd.read_csv('imports/02_defense.csv', header=1)
# 03_kick-punt-returns.csv
kpr = pd.read_csv('imports/03_kick-punt-returns.csv', header=1)
# 04_kicking-punting.csv
kp = pd.read_csv('imports/04_kicking-punting.csv', header=1)
# 05_adv-passing.csv
advpass = pd.read_csv('imports/05_adv-passing.csv')
# 06_adv-rushing.csv
advrush = pd.read_csv('imports/06_adv-rushing.csv')
# 07_adv-receiving.csv
advrec = pd.read_csv('imports/07_adv-receiving.csv')
# 08_adv-defense.csv
advdef = pd.read_csv('imports/08_adv-defense.csv')
# 09_home-snap-counts.csv
homesnap = pd.read_csv('imports/09_home-snap-counts.csv', header=1)
# 10_away-snap-counts.csv
awaysnap = pd.read_csv('imports/10_away-snap-counts.csv', header=1)
# 11_play-by-play.csv
plays = pd.read_csv('imports/11_play-by-play.csv')

## Initial Preprocessing and Column Renaming

#### 01_passing-rushing-receiving.csv

In [3]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in prr['Player']]
prr['Player'] = players

# Pulling DataFrames from 01_passing-rushing-receiving.csv 
pas = prr[['Player','Tm','Cmp','Att','Yds','TD','Int','Sk','Yds.1','Lng','Rate']]
pas = pas.loc[pas['Att'] > 0]
rush = prr[['Player','Tm','Att.1','Yds.2','TD.1','Lng.1']].loc[prr['Att.1'] > 0]
rec = prr[['Player','Tm','Tgt','Rec','Yds.3','TD.2','Lng.2']].loc[prr['Tgt'] > 0]
fumbles = prr[['Player','Tm','Fmb','FL']].loc[prr['Fmb'] > 0]

# Renaming Columns for New DataFrames
pas = pas.rename(columns={'Yds.1':'SkYds'})
rush = rush.rename(columns={'Att.1':'Att','Yds.2':'Yds','TD.1':'TD','Lng.1':'Lng'})
rec = rec.rename(columns={'Yds.3':'Yds','TD.2':'TD','Lng.2':'Lng'})

#### 02_defense.csv

In [4]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in defense['Player']]
defense['Player'] = players

# Pulling DataFrames from 02_defense.csv
airD = defense[['Player','Tm','Int','Yds','TD','Lng','PD']].loc[(defense['Int'] > 0) | (defense['PD'] > 0)]
groundD = defense[['Player','Tm','Sk','Comb','Solo','Ast','TFL','QBHits','FR','Yds.1','TD.1','FF']]

# Renaming Columns for New DataFrames
groundD = groundD.rename(columns={'Yds.1':'FYds','TD.1':'FTD'})

#### 03_kick-punt-returns.csv

In [5]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in kpr['Player']]
kpr['Player'] = players

# Pulling DataFrames from 03_kick-punt-returns.csv
kr = kpr[['Player','Tm','Rt','Yds','Y/Rt','TD','Lng']].loc[kpr['Rt'] > 0]
pr = kpr[['Player','Tm','Ret','Yds.1','Y/R','TD.1','Lng.1']].loc[kpr['Ret'] > 0]

# Renaming Columns for New DataFrames
pr = pr.rename(columns={'Yds.1':'Yds','Y/R':'Y/Rt','TD.1':'TD','Lng.1':'Lng'})

#### 04_kicking-punting.csv

In [6]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in kp['Player']]
kp['Player'] = players

# Pulling DataFrames from 04_kicking-punting.csv
kick = kp[['Player','Tm','XPM','XPA','FGM','FGA']]
punt = kp[['Player','Tm','Pnt','Yds','Y/P','Lng']]

#### 05_adv-passing.csv

In [7]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advpass['Player']]
advpass['Player'] = players

# Dropping Columns Included Elsewhere
advpass = advpass[['Player','Tm','1D','1D%','IAY','IAY/PA','CAY',
                   'CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%',
                  'BadTh','Bad%','Sk','Bltz','Hrry','Hits',
                  'Prss','Prss%','Scrm','Yds/Scr']]


#### 06_adv-rushing.csv

In [8]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advrush['Player']]
advrush['Player'] = players

# Dropping Columns Included Elsewhere
advrush = advrush[['Player','Tm','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]

#### 07_adv-receiving.csv

In [9]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advrec['Player']]
advrec['Player'] = players

# Dropping Columns Included Elsewhere
advrec = advrec[['Player','Tm','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat']]

#### 08_adv-defense.csv

In [10]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advdef['Player']]
advdef['Player'] = players

# Dropping Columns Included Elsewhere
advdef = advdef[['Player','Tm','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                 'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Bltz','Hrry',
                'QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

#### 09_home-snap-counts.csv

In [15]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in homesnap['Player']]
homesnap['Player'] = players

# Renaming Columns
homesnap = homesnap.rename(columns={'Num':'offSnaps','Pct':'offPct','Num.1':'defSnaps',
                                   'Pct.1':'defSnaps','Num.2':'stSnaps','Pct.2':'stPct'})

#### 10_away-snap-counts

In [17]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in awaysnap['Player']]
awaysnap['Player'] = players

# Renaming Columns
awaysnap = awaysnap.rename(columns={'Num':'offSnaps','Pct':'offPct','Num.1':'defSnaps',
                                   'Pct.1':'defSnaps','Num.2':'stSnaps','Pct.2':'stPct'})

#### 11_play-by-play.csv

In [18]:
# No Handling Here Yet

Unnamed: 0,Quarter,Time,Down,ToGo,Location,Detail,MIN,CIN,EPB,EPA
0,,,,,,Bengals won the coin toss and deferred Vikings...,,,,
1,1,15:00,,,CIN 35,Evan McPherson kicks off 65 yards touchback.,0.0,0.0,0.00,0.61
2,1,15:00,1.0,10.0,MIN 25,Penalty on C.J. Ham: False Start 5 yards (acce...,0.0,0.0,0.61,0.28
3,1,15:00,1.0,15.0,MIN 20,Kirk Cousins pass complete short right to Dalv...,0.0,0.0,0.28,0.60
4,1,14:25,2.0,6.0,MIN 29,Penalty on Tyler Conklin: False Start 5 yards ...,0.0,0.0,0.60,-0.07
...,...,...,...,...,...,...,...,...,...,...
189,OT,1:11,3.0,2.0,CIN 47,Joe Mixon up the middle for 1 yard (tackle by ...,24.0,24.0,1.36,-0.19
190,OT,0:40,4.0,1.0,CIN 48,Joe Burrow pass complete short left to C.J. Uz...,24.0,24.0,-0.19,4.24
191,OT,0:31,1.0,10.0,MIN 20,Joe Mixon up the middle for 5 yards (tackle by...,24.0,24.0,4.24,4.28
192,OT,0:06,2.0,6.0,MIN 16,Joe Burrow spiked the ball,24.0,24.0,4.28,3.52


## Creating Summary DataFrames for Export