In [1]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np

In [90]:
class GameData:
    def __init__(self, team, game):
        self.team = team
        self.stats = {}
        self.game = game
        self.id = game['game_id'][0]

        self.is_home = team == game['home_team'][0]
        self.stats['team'] = team
        self.stats['id'] = self.id
        self.initialize_stats()

    
    def initialize_stats(self):
        winner = self.game['home_team'][0] if self.game['home_score'][0] > self.game['away_score'][0] else self.game['away_team'][0]
        self.stats['won_game'] = winner == self.team
        self.stats['points'] = self.game['home_score'][0] if self.is_home else self.game['away_score'][0]
        self.stats['opp_points'] = self.game['home_score'][0] if not self.is_home else self.game['away_score'][0]
        for index, play in self.game.iterrows():
            self.add_stats_from_play(play)

    def add_stats_from_play(self, play):
        team = play['posteam']
        
        if team == self.team:
            self.add_offensive_stats(play)
        elif team != None:
            self.add_defensive_stats(play)
    
    def add_offensive_stats(self, play):
        yards_gained = play['yards_gained']
        play_type = play['play_type']

        if not np.isnan(yards_gained):
            self.stats['yards'] = self.stats.get('yards', 0) + yards_gained

        if play_type == 'field_goal':
            self.stats['field_goals_attempted'] = self.stats.get('field_goals_attempted', 0) + 1
            made = play['field_goal_result'] == 'made'
            self.stats['field_goals_made'] = self.stats.get('field_goals_made', 0) + made
        
        if play['interception'] == 1 or play['fumble_lost'] == 1:
            self.stats['offensive_turnovers'] = self.stats.get('offensive_turnovers', 0) + 1

    def add_defensive_stats(self, play):
        yards_gained = play['yards_gained']
        play_type = play['play_type']

        if not np.isnan(yards_gained):
            self.stats['opp_yards'] = self.stats.get('opp_yards', 0) + yards_gained

        if play['interception'] == 1 or play['fumble_lost'] == 1:
            self.stats['defensive_turnovers'] = self.stats.get('defensive_turnovers', 0) + 1

        # [None, 'kickoff', 'run', 'pass', 'punt', 'no_play', 'extra_point', 'field_goal', 'qb_kneel', 'qb_spike']
    
    def __str__(self):
        return self.id
    
    def __repr__(self):
        return self.id

In [3]:
def generate_stat_df(year):
    schedule = nfl.import_schedules([year])
    data = nfl.import_pbp_data([year])

    game_data_arr = []

    for game_id in data['game_id'].unique():
        if schedule[schedule['game_id'] == game_id]['game_type'].values[0] == 'REG':
            game = data[data['game_id'] == game_id].reset_index()

            if len(game['posteam'].unique()) == 0:
                # this means the game doesn't have PBP data yet
                continue

            home_team = game['home_team'][0]
            away_team = game['away_team'][0]

            home_game = GameData(home_team, game)
            away_game = GameData(away_team, game)
            game_data_arr.append(home_game)
            game_data_arr.append(away_game)

    df = pd.DataFrame([game.stats for game in game_data_arr])
    df = df.drop(columns=['id'])

    aggregated = df.groupby('team').mean()
    return aggregated

In [None]:
dfs = []
for year in range(2000, 2024):
    print(year)
    df = generate_stat_df(year)
    df['year'] = year
    dfs.append(df)

In [92]:
generate_stat_df(2022)

2022 done.
Downcasting floats.
     index  play_id          game_id old_game_id home_team away_team  \
0        0      1.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
1        1     43.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
2        2     68.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
3        3     89.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
4        4    115.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
..     ...      ...              ...         ...       ...       ...   
174    174   4135.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
175    175   4151.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
176    176   4171.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
177    177   4200.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   
178    178   4221.0  2022_01_BAL_NYJ  2022091107       NYJ       BAL   

    season_type  week posteam posteam_type  ...  \
0           REG     1    None         None  ...   
1 

Unnamed: 0_level_0,won_game,points,opp_points,yards,opp_yards,offensive_turnovers,field_goals_attempted,field_goals_made,defensive_turnovers
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ARI,0.235294,20.0,26.411765,324.058824,349.0,2.0,2.461538,2.153846,1.615385
ATL,0.411765,21.470588,22.705882,319.0,362.588235,1.357143,2.3125,2.0,1.636364
BAL,0.588235,20.588235,18.529412,339.058824,324.411765,1.692308,2.529412,2.176471,1.846154
BUF,0.8125,28.4375,17.875,397.8125,319.25,1.928571,2.066667,1.8,2.076923
CAR,0.411765,20.411765,22.0,306.352941,350.294118,2.0,2.333333,2.2,1.5
CHI,0.176471,19.176471,27.235294,307.941176,376.352941,1.5625,2.076923,1.923077,1.916667
CIN,0.75,26.125,20.125,360.875,335.8125,1.888889,1.933333,1.6,2.083333
CLE,0.411765,21.235294,22.411765,349.176471,331.352941,1.615385,2.0,1.5,1.538462
DAL,0.705882,27.470588,20.117647,354.941176,330.294118,1.692308,2.461538,2.230769,2.428571
DEN,0.294118,16.882353,21.117647,325.235294,320.117647,1.846154,2.4,1.866667,2.090909
