In [1]:
import os

import numpy as np
import pandas as pd

import matplotlib.pylab as plt
import seaborn as sns
sns.set(style="whitegrid")

%load_ext watermark
%matplotlib inline

In [2]:
%watermark -v -d -t -z -u -r -iv

pandas           0.25.2
matplotlib.pylab 1.17.3
seaborn          0.9.0
numpy            1.17.3
last updated: 2019-12-06 09:56:32 PST 

CPython 3.7.4
IPython 7.9.0
Git repo: git@github.com:clausherther/nfl-analysis.git


In [9]:
df = (pd
      .read_csv("data/fourth_downs.csv")
      .query("is_field_goal_attempt == False")
     )

df.rename(columns={"off_team_code": "team_code"}, inplace=True)
df["game_date"] = pd.to_datetime(df["game_date"])

# We add game weeks, which we'll start on Tuesdays
df["game_week"] = df["game_date"] - pd.offsets.Week(weekday=1)
df["game_week_code"] = df["game_week"].dt.strftime("%Y%m%d")

# Then we add numbered weeks for the season
df["season_week"] = df.assign(season_week=df.groupby(["season"])["game_week"].rank(method="dense", ascending=True))["season_week"].astype(int)

df["season_code"] = "R" + df["season"].astype(str)

# We add some simple metrics columns so we can sum() more easily later when aggregating
df["fourth_downs"] = 1
df["fourth_down_attempts"] = df["is_fourth_down_attempt"].astype(int)
df["fourth_down_conversions"] = df["is_fourth_down_converted"].astype(int)

# Let's save that for later
df.to_hdf("data/fourth_downs_attempts.hd5", "fourth_downs_attempts")