# Data Preprocessing

## Imports

#### Library Imports and Initial Settings

In [1]:
import pandas as pd
import numpy as np
import json

pd.set_option('display.max_columns', None)

#### Data Imports

In [2]:
# 01_passing-rushing-recieving.csv
prr = pd.read_csv('imports/01_passing-rushing-receiving.csv', header=1)
# 02_defense.csv
defense = pd.read_csv('imports/02_defense.csv', header=1)
# 03_kick-punt-returns.csv
kpr = pd.read_csv('imports/03_kick-punt-returns.csv', header=1)
# 04_kicking-punting.csv
kp = pd.read_csv('imports/04_kicking-punting.csv', header=1)
# 05_adv-passing.csv
advpass = pd.read_csv('imports/05_adv-passing.csv')
# 06_adv-rushing.csv
advrush = pd.read_csv('imports/06_adv-rushing.csv')
# 07_adv-receiving.csv
advrec = pd.read_csv('imports/07_adv-receiving.csv')
# 08_adv-defense.csv
advdef = pd.read_csv('imports/08_adv-defense.csv')
# 09_home-snap-counts.csv
homesnap = pd.read_csv('imports/09_home-snap-counts.csv', header=1)
# 10_away-snap-counts.csv
awaysnap = pd.read_csv('imports/10_away-snap-counts.csv', header=1)
# 11_play-by-play.csv
plays = pd.read_csv('imports/11_play-by-play.csv')

## Initial Preprocessing and Column Renaming

#### 01_passing-rushing-receiving.csv

In [3]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in prr['Player']]
prr['Player'] = players

# Pulling DataFrames from 01_passing-rushing-receiving.csv 
pas = prr[['Player','Tm','Cmp','Att','Yds','TD','Int','Sk','Yds.1','Lng','Rate']]
pas = pas.loc[pas['Att'] > 0]
rush = prr[['Player','Tm','Att.1','Yds.2','TD.1','Lng.1']].loc[prr['Att.1'] > 0]
rec = prr[['Player','Tm','Tgt','Rec','Yds.3','TD.2','Lng.2']].loc[prr['Tgt'] > 0]
fumbles = prr[['Player','Tm','Fmb','FL']].loc[prr['Fmb'] > 0]

# Renaming Columns for New DataFrames
pas = pas.rename(columns={'Yds.1':'SkYds'})
rush = rush.rename(columns={'Att.1':'Att','Yds.2':'Yds','TD.1':'TD','Lng.1':'Lng'})
rec = rec.rename(columns={'Yds.3':'Yds','TD.2':'TD','Lng.2':'Lng'})

#### 02_defense.csv

In [4]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in defense['Player']]
defense['Player'] = players

# Pulling DataFrames from 02_defense.csv
airD = defense[['Player','Tm','Int','Yds','TD','Lng','PD']].loc[(defense['Int'] > 0) | (defense['PD'] > 0)]
groundD = defense[['Player','Tm','Sk','Comb','Solo','Ast','TFL','QBHits','FR','Yds.1','TD.1','FF']]

# Renaming Columns for New DataFrames
groundD = groundD.rename(columns={'Yds.1':'FYds','TD.1':'FTD'})

#### 03_kick-punt-returns.csv

In [5]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in kpr['Player']]
kpr['Player'] = players

# Pulling DataFrames from 03_kick-punt-returns.csv
kr = kpr[['Player','Tm','Rt','Yds','Y/Rt','TD','Lng']].loc[kpr['Rt'] > 0]
pr = kpr[['Player','Tm','Ret','Yds.1','Y/R','TD.1','Lng.1']].loc[kpr['Ret'] > 0]

# Renaming Columns for New DataFrames
pr = pr.rename(columns={'Yds.1':'Yds','Y/R':'Y/Rt','TD.1':'TD','Lng.1':'Lng'})

#### 04_kicking-punting.csv

In [6]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in kp['Player']]
kp['Player'] = players

# Pulling DataFrames from 04_kicking-punting.csv
kick = kp[['Player','Tm','XPM','XPA','FGM','FGA']]
punt = kp[['Player','Tm','Pnt','Yds','Y/P','Lng']]

#### 05_adv-passing.csv

In [7]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advpass['Player']]
advpass['Player'] = players

# Dropping Columns Included Elsewhere
advpass = advpass[['Player','Tm','1D','1D%','IAY','IAY/PA','CAY',
                   'CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%',
                  'BadTh','Bad%','Sk','Bltz','Hrry','Hits',
                  'Prss','Prss%','Scrm','Yds/Scr']]


#### 06_adv-rushing.csv

In [8]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advrush['Player']]
advrush['Player'] = players

# Dropping Columns Included Elsewhere
advrush = advrush[['Player','Tm','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]

#### 07_adv-receiving.csv

In [9]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advrec['Player']]
advrec['Player'] = players

# Dropping Columns Included Elsewhere
advrec = advrec[['Player','Tm','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat']]

#### 08_adv-defense.csv

In [10]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in advdef['Player']]
advdef['Player'] = players

# Dropping Columns Included Elsewhere
advdef = advdef[['Player','Tm','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                 'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Bltz','Hrry',
                'QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

#### 09_home-snap-counts.csv

In [11]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in homesnap['Player']]
homesnap['Player'] = players

# Renaming Columns
homesnap = homesnap.rename(columns={'Num':'offSnaps','Pct':'offPct','Num.1':'defSnaps',
                                   'Pct.1':'defSnaps','Num.2':'stSnaps','Pct.2':'stPct'})

#### 10_away-snap-counts

In [12]:
# Cleaning Player Names
players = [player.split('\\')[0] for player in awaysnap['Player']]
awaysnap['Player'] = players

# Renaming Columns
awaysnap = awaysnap.rename(columns={'Num':'offSnaps','Pct':'offPct','Num.1':'defSnaps',
                                   'Pct.1':'defSnaps','Num.2':'stSnaps','Pct.2':'stPct'})

#### 11_play-by-play.csv

In [13]:
# No Handling Here Yet

## Creating Dictionaries for Database Upload

## Score

In [23]:
def safetyHandling():
    safeties = np.array(plays['Location'].loc[plays['EPA'] == 2])
    safeties = [play.split(' ')[0] for play in safeties]
    awaySafetyPoints = 0
    homeSafetyPoints = 0
    for safety in safeties:
        if safety == awayAbbr:
            homeSafetyPoints += 2
        else:
            awaySafetyPoints += 2
    return homeSafetyPoints, awaySafetyPoints

In [24]:
kp

Unnamed: 0,Player,Tm,XPM,XPA,FGM,FGA,Pnt,Yds,Y/P,Lng
0,Greg Joseph,MIN,3.0,3.0,1.0,1.0,0,0,,0
1,Jordan Berry,MIN,,,,,8,402,50.3,63
2,Evan McPherson,CIN,3.0,3.0,2.0,2.0,0,0,,0
3,Kevin Huber,CIN,,,,,7,332,47.4,61


In [15]:
# Pulling Home and Away Abbreviations
awayAbbr = prr['Tm'][0]
homeAbbr = prr['Tm'][(len(prr['Tm'])-1)]

# Summing Scores
# Away
awayTDs = 6 * int(sum(kp['XPA'].loc[(kp['Tm'] == awayAbbr) & (kp['XPA'] > 0)]))
awayFGs = 3 * int(sum(kp['FGM'].loc[(kp['Tm'] == awayAbbr) & (kp['FGA'] > 0)]))
awayPATs = int(sum(kp['XPM'].loc[(kp['Tm'] == awayAbbr) & (kp['XPA'] > 0)]))
# Home
homeTDs = 6 * int(sum(kp['XPA'].loc[(kp['Tm'] == homeAbbr) & (kp['XPA'] > 0)]))
homeFGs = 3 * int(sum(kp['FGM'].loc[(kp['Tm'] == homeAbbr) & (kp['FGA'] > 0)]))
homePATs = int(sum(kp['XPM'].loc[(kp['Tm'] == homeAbbr) & (kp['XPA'] > 0)]))

# Safeties
homeSafetyPoints, awaySafetyPoints = safetyHandling()
        
awayScore = awayTDs + awayFGs + awayPATs + awaySafetyPoints
homeScore = homeTDs + homeFGs + homePATs + homeSafetyPoints

In [22]:
print(awayAbbr, awayScore, homeAbbr, homeScore)

MIN 24 CIN 27


In [16]:
gameMetaData = {
    'awayTeam': awayAbbr,
    'homeTeam': homeAbbr,
    'awayScore': awayScore,
    'homeScore': homeScore
}

In [17]:
pas

Unnamed: 0,Player,Tm,Cmp,Att,Yds,TD,Int,Sk,SkYds,Lng,Rate
0,Kirk Cousins,MIN,36,49,351,2,0,3,26,34,106.8
1,Justin Jefferson,MIN,1,1,11,0,0,0,0,11,112.5
11,Joe Burrow,CIN,20,27,261,2,0,5,44,50,128.8


In [18]:
advpass

Unnamed: 0,Player,Tm,1D,1D%,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Drops,Drop%,BadTh,Bad%,Sk,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr
0,Kirk Cousins,MIN,15,28.8,297,6.1,176,4.9,3.6,175,4.9,2,4.3%,6,13.0%,3,19,7,5,15,28.8%,0,
1,Justin Jefferson,MIN,1,100.0,11,11.0,11,11.0,11.0,0,0.0,0,0.0%,0,0.0%,0,1,0,1,1,100.0%,0,
2,Joe Burrow,CIN,12,37.5,220,8.1,137,6.9,5.1,124,6.2,0,0.0%,5,19.2%,5,10,3,2,10,31.3%,0,


In [19]:
# Passing


In [20]:
flashCard = {
    'gameMetaData': gameMetaData,
    'offense': {
        'passing': {},
        'rushing': {},
        'receiving': {}
    },
    'defense': {},
    'special_teams': {
        'kicking': {},
        'returning': {}
    }
}

In [21]:
print(json.dumps(flashCard, indent = 4))

{
    "gameMetaData": {
        "awayTeam": "MIN",
        "homeTeam": "CIN",
        "awayScore": 24,
        "homeScore": 27
    },
    "offense": {
        "passing": {},
        "rushing": {},
        "receiving": {}
    },
    "defense": {},
    "special_teams": {
        "kicking": {},
        "returning": {}
    }
}
