# Data Preprocessing

## Imports

#### Library Imports and Initial Settings

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

#### Data Imports

In [2]:
# 01_passing-rushing-recieving.csv
prr = pd.read_csv('imports/01_passing-rushing-receiving.csv', header=1)
# 02_defense.csv
defense = pd.read_csv('imports/02_defense.csv', header=1)
# 03_kick-punt-returns.csv
kpr = pd.read_csv('imports/03_kick-punt-returns.csv', header=1)
# 04_kicking-punting.csv
kp = pd.read_csv('imports/04_kicking-punting.csv', header=1)
# 05_adv-passing.csv
advpass = pd.read_csv('imports/05_adv-passing.csv')
# 06_adv-rushing.csv
advrush = pd.read_csv('imports/06_adv-rushing.csv')
# 07_adv-receiving.csv
advrec = pd.read_csv('imports/07_adv-receiving.csv')
# 08_adv-defense.csv
advdef = pd.read_csv('imports/08_adv-defense.csv')
# 09_home-snap-counts.csv
homesnap = pd.read_csv('imports/09_home-snap-counts.csv', header=1)
# 10_away-snap-counts.csv
awaysnap = pd.read_csv('imports/10_away-snap-counts.csv', header=1)
# 11_play-by-play.csv
plays = pd.read_csv('imports/11_play-by-play.csv')

## Initial Preprocessing and Column Renaming

#### 01_passing-rushing-receiving.csv

In [3]:
# Cleaning Player Names
prr['Player'] = [player.split('\\')[0] for player in prr['Player']]

# Pulling DataFrames from 01_passing-rushing-receiving.csv 
pas = prr[['Player','Tm','Cmp','Att','Yds','TD','Int','Sk','Yds.1','Lng','Rate']].loc[prr['Att'] > 0]
rush = prr[['Player','Tm','Att.1','Yds.2','TD.1','Lng.1']].loc[prr['Att.1'] > 0]
rec = prr[['Player','Tm','Tgt','Rec','Yds.3','TD.2','Lng.2']].loc[prr['Tgt'] > 0]
fumbles = prr[['Player','Tm','Fmb','FL']].loc[prr['Fmb'] > 0]

# Renaming Columns for New DataFrames
pas.columns = [['Player','Tm','Cmp','Att','Yds','TD','Int','Sk','SkYds','Lng','Rate']]
rush.columns = [['Player','Tm','Att','Yds','TD','Lng']]
rec.columns = [['Player','Tm','Tgt','Rec','Yds','TD','Lng']]

#### 02_defense.csv

In [4]:
# Cleaning Player Names
defense['Player'] = [player.split('\\')[0] for player in defense['Player']]

# Pulling DataFrames from 02_defense.csv
airD = defense[['Player','Tm','Int','Yds','TD','Lng','PD']].loc[(defense['Int'] > 0) | (defense['PD'] > 0)]
groundD = defense[['Player','Tm','Sk','Comb','Solo','Ast','TFL','QBHits','FR','Yds.1','TD.1','FF']]

# Renaming Columns for New DataFrames
groundD.columns = [['Player','Tm','Sk','Comb','Solo','Ast','TFL','QBHit','FR','FYds','FTD','FF']]

#### 03_kick-punt-returns.csv

In [5]:
# Cleaning Player Names
kpr['Player'] = [player.split('\\')[0] for player in kpr['Player']]

# Pulling DataFrames from 03_kick-punt-returns.csv
kr = kpr[['Player','Tm','Rt','Yds','Y/Rt','TD','Lng']].loc[kpr['Rt'] > 0]
pr = kpr[['Player','Tm','Ret','Yds.1','Y/R','TD.1','Lng.1']].loc[kpr['Ret'] > 0]

# Renaming Columns for New DataFrames
pr.columns = [['Player','Tm','Rt','Yds','Y/Rt','TD','Lng']]

#### 04_kicking-punting.csv

In [6]:
# Cleaning Player Names
kp['Player'] = [player.split('\\')[0] for player in kp['Player']]

# Pulling DataFrames from 04_kicking-punting.csv
kick = kp[['Player','Tm','XPM','XPA','FGM','FGA']]
punt = kp[['Player','Tm','Pnt','Yds','Y/P','Lng']]

#### 05_adv-passing.csv

In [7]:
# Cleaning Player Names
advpass['Player'] = [player.split('\\')[0] for player in advpass['Player']]

# Dropping Columns Included Elsewhere
advpass = advpass[['Player','Tm','1D','1D%','IAY','IAY/PA','CAY',
                   'CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%',
                  'BadTh','Bad%','Sk','Bltz','Hrry','Hits',
                  'Prss','Prss%','Scrm','Yds/Scr']]


#### 06_adv-rushing.csv

In [8]:
# Cleaning Player Names
advrush['Player'] = [player.split('\\')[0] for player in advrush['Player']]

# Dropping Columns Included Elsewhere
advrush = advrush[['Player','Tm','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]

#### 07_adv-receiving.csv

In [9]:
# Cleaning Player Names
advrec['Player'] = [player.split('\\')[0] for player in advrec['Player']]

# Dropping Columns Included Elsewhere
advrec = advrec[['Player','Tm','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat']]

#### 08_adv-defense.csv

In [10]:
# Cleaning Player Names
advdef['Player'] = [player.split('\\')[0] for player in advdef['Player']]

# Dropping Columns Included Elsewhere
advdef = advdef[['Player','Tm','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                 'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Bltz','Hrry',
                'QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

#### 09_home-snap-counts.csv

In [11]:
# Cleaning Player Names
homesnap['Player'] = [player.split('\\')[0] for player in homesnap['Player']]

# Renaming Columns
homesnap.columns = [['Player','Pos','offSnaps','offPct','defSnaps','defPct','stSnaps','stPct']]

#### 10_away-snap-counts

In [12]:
# Cleaning Player Names
awaysnap['Player'] = [player.split('\\')[0] for player in awaysnap['Player']]

# Renaming Columns
awaysnap.columns = [['Player','Pos','offSnaps','offPct','defSnaps','defPct','stSnaps','stPct']]

#### 11_play-by-play.csv

In [13]:
# No Handling Here Yet

## Creating Summary DataFrames for Export