In [6]:
import os

import numpy as np
import pandas as pd

import matplotlib.pylab as plt
import seaborn as sns
sns.set(style="whitegrid")

%load_ext watermark
%matplotlib inline

In [10]:
!pip install tables

Collecting tables
  Downloading tables-3.6.1-cp38-cp38-macosx_10_9_x86_64.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 1.6 MB/s eta 0:00:01
Collecting numexpr>=2.6.2
  Downloading numexpr-2.7.1-cp38-cp38-macosx_10_9_x86_64.whl (101 kB)
[K     |████████████████████████████████| 101 kB 19.2 MB/s ta 0:00:01
[?25hInstalling collected packages: numexpr, tables
Successfully installed numexpr-2.7.1 tables-3.6.1


In [11]:
%watermark -v -d -t -z -u -r -iv

matplotlib.pylab 1.18.1
numpy            1.18.1
pandas           1.0.1
seaborn          0.10.0
last updated: 2020-02-25 20:44:34 PST 

CPython 3.8.1
IPython 7.12.0
Git repo: git@github.com:clausherther/nfl-analysis.git


In [12]:
df = (pd
      .read_csv("data/fourth_downs.csv")
      .query("is_field_goal_attempt == False")
     )

df.rename(columns={"off_team_code": "team_code"}, inplace=True)
df["game_date"] = pd.to_datetime(df["game_date"])

# We add game weeks, which we'll start on Tuesdays
df["game_week"] = df["game_date"] - pd.offsets.Week(weekday=1)
df["game_week_code"] = df["game_week"].dt.strftime("%Y%m%d")

# Then we add numbered weeks for the season
df["season_week"] = df.assign(season_week=df.groupby(["season"])["game_week"].rank(method="dense", ascending=True))["season_week"].astype(int)

df["season_code"] = "R" + df["season"].astype(str)

# We add some simple metrics columns so we can sum() more easily later when aggregating
df["fourth_downs"] = 1
df["fourth_down_attempts"] = df["is_fourth_down_attempt"].astype(int)
df["fourth_down_conversions"] = df["is_fourth_down_converted"].astype(int)

# Let's save that for later
df.to_hdf("data/fourth_downs_attempts.hd5", "fourth_downs_attempts")