In [107]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np

In [119]:
class GameData:
    def __init__(self, team, game):
        self.team = team
        self.stats = {}
        self.game = game
        self.id = game['game_id'][0]

        self.is_home = team == game['home_team'][0]
        self.stats['team'] = team
        self.stats['id'] = self.id
        self.initialize_stats()

    
    def initialize_stats(self):
        winner = self.game['home_team'][0] if self.game['home_score'][0] > self.game['away_score'][0] else self.game['away_team'][0]
        self.stats['won_game'] = winner == self.team
        self.stats['points'] = self.game['home_score'][0] if self.is_home else self.game['away_score'][0]
        self.stats['opp_points'] = self.game['home_score'][0] if not self.is_home else self.game['away_score'][0]
        for index, play in self.game.iterrows():
            self.add_stats_from_play(play)

    def add_stats_from_play(self, play):
        team = play['posteam']
        
        if team == self.team:
            self.add_offensive_stats(play)
        elif team != None:
            self.add_defensive_stats(play)
    
    def add_offensive_stats(self, play):
        yards_gained = play['yards_gained']
        pass_yards = play['passing_yards']
        rush_yards = play['rushing_yards']
        play_type = play['play_type']

        if not np.isnan(yards_gained):
            self.stats['yards'] = self.stats.get('yards', 0) + yards_gained

        if not np.isnan(pass_yards):
            self.stats['pass_yards'] = self.stats.get('pass_yards', 0) + pass_yards

        if not np.isnan(rush_yards):
            self.stats['rush_yards'] = self.stats.get('rush_yards', 0) + rush_yards

        if play_type == 'field_goal':
            self.stats['field_goals_attempted'] = self.stats.get('field_goals_attempted', 0) + 1
            made = play['field_goal_result'] == 'made'
            self.stats['field_goals_made'] = self.stats.get('field_goals_made', 0) + made
        
        if play['interception'] == 1 or play['fumble_lost'] == 1:
            self.stats['offensive_turnovers'] = self.stats.get('offensive_turnovers', 0) + 1

    def add_defensive_stats(self, play):
        yards_gained = play['yards_gained']
        play_type = play['play_type']

        if not np.isnan(yards_gained):
            self.stats['opp_yards'] = self.stats.get('opp_yards', 0) + yards_gained

        if play['interception'] == 1 or play['fumble_lost'] == 1:
            self.stats['defensive_turnovers'] = self.stats.get('defensive_turnovers', 0) + 1

        # [None, 'kickoff', 'run', 'pass', 'punt', 'no_play', 'extra_point', 'field_goal', 'qb_kneel', 'qb_spike']
    
    def __str__(self):
        return self.id
    
    def __repr__(self):
        return self.id

In [121]:
def generate_stat_df(year):
    schedule = nfl.import_schedules([year])
    data = nfl.import_pbp_data([year])

    game_data_arr = []

    for game_id in data['game_id'].unique():
        if schedule[schedule['game_id'] == game_id]['game_type'].values[0] == 'REG':
            game = data[data['game_id'] == game_id].reset_index()

            if len(game['posteam'].unique()) == 0:
                # this means the game doesn't have PBP data yet
                continue

            home_team = game['home_team'][0]
            away_team = game['away_team'][0]

            home_game = GameData(home_team, game)
            away_game = GameData(away_team, game)
            game_data_arr.append(home_game)
            game_data_arr.append(away_game)

    df = pd.DataFrame([game.stats for game in game_data_arr])
    df = df.drop(columns=['id'])

    aggregated = df.groupby('team').mean()
    return aggregated

In [122]:
dfs = []
for year in range(2000, 2024):
    print(year)
    
    df = generate_stat_df(year)
    df['year'] = year
    dfs.append(df)

2000
2000 done.
Downcasting floats.
2001
2001 done.
Downcasting floats.
2002
2002 done.
Downcasting floats.
2003
2003 done.
Downcasting floats.
2004
2004 done.
Downcasting floats.
2005
2005 done.
Downcasting floats.
2006
2006 done.
Downcasting floats.
2007
2007 done.
Downcasting floats.
2008
2008 done.
Downcasting floats.
2009
2009 done.
Downcasting floats.
2010
2010 done.
Downcasting floats.
2011
2011 done.
Downcasting floats.
2012
2012 done.
Downcasting floats.
2013
2013 done.
Downcasting floats.
2014
2014 done.
Downcasting floats.
2015
2015 done.
Downcasting floats.
2016
2016 done.
Downcasting floats.
2017
2017 done.
Downcasting floats.
2018
2018 done.
Downcasting floats.
2019
2019 done.
Downcasting floats.
2020
2020 done.
Downcasting floats.
2021
2021 done.
Downcasting floats.
2022
2022 done.
Downcasting floats.
2023
2023 done.
Downcasting floats.


In [128]:
# todo: normalize
for i, df in enumerate(dfs):
    df = (df - df.mean()) / df.std()
    df.to_csv(f"Data/{2000 + i}.csv")

In [129]:
aggregate_df = pd.concat(dfs)

In [130]:
aggregate_df
aggregate_df.to_csv("Data/stats.csv")