In [1]:
# Import pandas and numpy packages, read basic stats and games csv's in as dataframes
# Create new columns for later use in prediction model

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

bas_2024_df = pd.read_csv('team_season_stats_by_week_2024.csv')
games_2024_df = pd.read_csv('games_2024.csv')

bas_2024_df['off_plays'] = bas_2024_df['passAttempts'] + bas_2024_df['rushingAttempts']
bas_2024_df['off_plays_per_game'] = bas_2024_df['off_plays']/bas_2024_df['games']
bas_2024_df['pass_attempts_per_game'] = bas_2024_df['passAttempts']/bas_2024_df['games']
bas_2024_df['rush_attempts_per_game'] = bas_2024_df['rushingAttempts']/bas_2024_df['games']
bas_2024_df['off_pace'] = bas_2024_df['possessionTime']/bas_2024_df['off_plays']
bas_2024_df['off_interception_ratio'] = bas_2024_df['interceptions']/bas_2024_df['passAttempts']
bas_2024_df['off_turnover_rate'] = bas_2024_df['turnovers']/bas_2024_df['off_plays']
bas_2024_df['off_pass_rate'] = bas_2024_df['passAttempts']/bas_2024_df['off_plays']
bas_2024_df['off_rush_rate'] = bas_2024_df['rushingAttempts']/bas_2024_df['off_plays']


bas_2024_df = bas_2024_df[['season','through_week','team','games','off_plays','off_plays_per_game','passAttempts','pass_attempts_per_game',
                           'off_pass_rate','rushingAttempts','rush_attempts_per_game', 'off_rush_rate','possessionTime','off_pace','clock_control',
                           'first_downs_per_game','third_down_efficiency','fourth_down_efficiency','penalties','penalties_per_game','yards_per_penalty',
                           'off_interception_ratio','turnovers','passesIntercepted','sacks','tacklesForLoss']]

In [2]:
# Read in advanced metrics dataframe, and reformat


adv_2024_df = pd.read_csv('adv_season_stats_by_week_2024.csv')

adv_2024_df = adv_2024_df[['season','through_week','team','conference','defense_overall_explosiveness','defense_fp_avg_ppa','defense_fp_avg_start',
                           'defense_havoc_dbs','defense_havoc_f7','defense_havoc_total','defense_line_yards','defense_open_field_yards',
                           'defense_passing_downs_explosiveness','defense_passing_downs_ppa','defense_passing_downs_rate','defense_passing_downs_success_rate',
                           'defense_passing_explosiveness','defense_passing_ppa_per_play','defense_passing_rate','defense_passing_success_rate',
                           'defense_points_per_opportunity','defense_power_success','defense_ppa_per_play','defense_rushing_explosiveness','defense_rushing_ppa_per_play',
                           'defense_rushing_rate','defense_rushing_success_rate','defense_second_level_yards','defense_standard_downs_explosiveness',
                           'defense_standard_downs_ppa_per_play','defense_standard_downs_rate','defense_standard_downs_success_rate','defense_stuff_rate',
                           'defense_overall_success_rate','offense_overall_explosiveness','offense_fp_avg_ppa','offense_fp_avg_start',
                           'offense_havoc_dbs','offense_havoc_f7','offense_havoc_total','offense_line_yards','offense_open_field_yards',
                           'offense_passing_downs_explosiveness','offense_passing_downs_ppa','offense_passing_downs_rate','offense_passing_downs_success_rate',
                           'offense_passing_explosiveness','offense_passing_ppa_per_play','offense_passing_rate','offense_passing_success_rate',
                           'offense_points_per_opportunity','offense_power_success','offense_ppa_per_play','offense_rushing_explosiveness','offense_rushing_ppa_per_play',
                           'offense_rushing_rate','offense_rushing_success_rate','offense_second_level_yards','offense_standard_downs_explosiveness',
                           'offense_standard_downs_ppa_per_play','offense_standard_downs_rate','offense_standard_downs_success_rate','offense_stuff_rate',
                           'offense_overall_success_rate']]


In [3]:
# Merge basic and advanced dataframes

merged_2024_df = bas_2024_df.merge(adv_2024_df, left_on=['season','through_week','team'],
                  right_on=['season','through_week','team'],
                  how = 'outer',
                  suffixes=(False, False))


In [4]:
# Create new columns to be used later & fill any null values

merged_2024_df['before_week']=(merged_2024_df['through_week']+1)
merged_2024_df = merged_2024_df.fillna(0)

merged_2024_df = merged_2024_df.sort_values(['season','team','through_week'])

merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) == 0, 'off_bye_week'] = 1
merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) != 0, 'off_bye_week'] = 0

merged_2024_df.head()

Unnamed: 0,season,through_week,team,games,off_plays,off_plays_per_game,passAttempts,pass_attempts_per_game,off_pass_rate,rushingAttempts,rush_attempts_per_game,off_rush_rate,possessionTime,off_pace,clock_control,first_downs_per_game,third_down_efficiency,fourth_down_efficiency,penalties,penalties_per_game,yards_per_penalty,off_interception_ratio,turnovers,passesIntercepted,sacks,tacklesForLoss,conference,defense_overall_explosiveness,defense_fp_avg_ppa,defense_fp_avg_start,defense_havoc_dbs,defense_havoc_f7,defense_havoc_total,defense_line_yards,defense_open_field_yards,defense_passing_downs_explosiveness,defense_passing_downs_ppa,defense_passing_downs_rate,defense_passing_downs_success_rate,defense_passing_explosiveness,defense_passing_ppa_per_play,defense_passing_rate,defense_passing_success_rate,defense_points_per_opportunity,defense_power_success,defense_ppa_per_play,defense_rushing_explosiveness,defense_rushing_ppa_per_play,defense_rushing_rate,defense_rushing_success_rate,defense_second_level_yards,defense_standard_downs_explosiveness,defense_standard_downs_ppa_per_play,defense_standard_downs_rate,defense_standard_downs_success_rate,defense_stuff_rate,defense_overall_success_rate,offense_overall_explosiveness,offense_fp_avg_ppa,offense_fp_avg_start,offense_havoc_dbs,offense_havoc_f7,offense_havoc_total,offense_line_yards,offense_open_field_yards,offense_passing_downs_explosiveness,offense_passing_downs_ppa,offense_passing_downs_rate,offense_passing_downs_success_rate,offense_passing_explosiveness,offense_passing_ppa_per_play,offense_passing_rate,offense_passing_success_rate,offense_points_per_opportunity,offense_power_success,offense_ppa_per_play,offense_rushing_explosiveness,offense_rushing_ppa_per_play,offense_rushing_rate,offense_rushing_success_rate,offense_second_level_yards,offense_standard_downs_explosiveness,offense_standard_downs_ppa_per_play,offense_standard_downs_rate,offense_standard_downs_success_rate,offense_stuff_rate,offense_overall_success_rate,before_week,off_bye_week
0,2024,1,Air Force,1.0,69.0,69.0,14.0,14.0,0.202899,55.0,55.0,0.797101,1984.0,28.753623,0.551111,15.0,0.411765,0.666667,3.0,3.0,11.666667,0.0,0.0,0.0,1.0,5.0,Mountain West,0.985932,-1.326,72.0,0.035714,0.160714,0.196429,2.685714,0.821429,2.484054,0.184053,0.357143,0.15,1.42538,-0.041802,0.5,0.214286,2.333333,0.666667,-0.026979,0.722263,-0.012156,0.5,0.357143,0.928571,0.640212,-0.144219,0.642857,0.361111,0.178571,0.285714,0.776619,2.049,60.6,0.028986,0.043478,0.072464,2.656364,0.036364,1.791125,-0.004593,0.275362,0.157895,1.53693,0.056393,0.202899,0.285714,5.25,0.846154,0.040839,0.638381,0.03688,0.797101,0.4,0.418182,0.644292,0.058104,0.724638,0.46,0.145455,0.376812,2,0.0
133,2024,2,Air Force,2.0,135.0,67.5,34.0,17.0,0.251852,101.0,50.5,0.748148,3747.0,27.755556,0.520417,12.5,0.30303,0.5,9.0,4.5,9.666667,0.058824,2.0,1.0,3.0,11.0,Mountain West,1.199004,-1.507,69.3,0.057851,0.123967,0.181818,2.15,0.4,2.045653,-0.03348,0.380165,0.152174,1.664051,0.197515,0.504132,0.344262,2.428571,0.714286,0.001604,0.624533,-0.197573,0.495868,0.283333,0.683333,1.007825,0.023122,0.619835,0.413333,0.25,0.31405,0.825132,1.633,66.1,0.044776,0.067164,0.11194,2.573267,0.079208,1.800527,-0.097824,0.313433,0.095238,1.106202,-0.252499,0.246269,0.242424,4.666667,0.727273,-0.02897,0.76436,0.044064,0.753731,0.366337,0.485149,0.729971,0.002463,0.686567,0.445652,0.158416,0.335821,3,0.0
267,2024,3,Air Force,3.0,199.0,66.333333,42.0,14.0,0.211055,157.0,52.333333,0.788945,5785.0,29.070352,0.535648,12.333333,0.32,0.545455,14.0,4.666667,9.642857,0.071429,5.0,1.0,4.0,14.0,Mountain West,1.02384,-1.755,65.7,0.038462,0.120879,0.159341,2.892857,0.846939,1.81802,0.054607,0.318681,0.206897,1.538672,0.262087,0.461538,0.404762,2.764706,0.615385,0.056044,0.563201,-0.120565,0.538462,0.387755,0.94898,0.865004,0.056716,0.681319,0.483871,0.183673,0.395604,0.927432,1.477,68.1,0.040404,0.075758,0.116162,2.555414,0.210191,1.617323,-0.106477,0.318182,0.111111,1.165134,-0.305171,0.207071,0.219512,3.444444,0.733333,-0.046758,0.886291,0.020726,0.792929,0.33121,0.547771,0.838001,-0.018889,0.681818,0.4,0.165605,0.308081,4,0.0
401,2024,4,Air Force,3.0,199.0,66.333333,42.0,14.0,0.211055,157.0,52.333333,0.788945,5785.0,29.070352,0.535648,12.333333,0.32,0.545455,14.0,4.666667,9.642857,0.071429,5.0,1.0,4.0,14.0,Mountain West,1.02384,-1.755,65.7,0.038462,0.120879,0.159341,2.892857,0.846939,1.81802,0.054607,0.318681,0.206897,1.538672,0.262087,0.461538,0.404762,2.764706,0.615385,0.056044,0.563201,-0.120565,0.538462,0.387755,0.94898,0.865004,0.056716,0.681319,0.483871,0.183673,0.395604,0.927432,1.477,68.1,0.040404,0.075758,0.116162,2.555414,0.210191,1.617323,-0.106477,0.318182,0.111111,1.165134,-0.305171,0.207071,0.219512,3.444444,0.733333,-0.046758,0.886291,0.020726,0.792929,0.33121,0.547771,0.838001,-0.018889,0.681818,0.4,0.165605,0.308081,5,1.0
535,2024,5,Air Force,4.0,258.0,64.5,55.0,13.75,0.213178,203.0,50.75,0.786822,7497.0,29.05814,0.520625,14.0,0.322581,0.461538,18.0,4.5,9.444444,0.054545,5.0,1.0,4.0,19.0,Mountain West,1.129707,-1.668,66.9,0.032653,0.118367,0.15102,2.736429,0.907143,1.833,0.128909,0.310204,0.25,1.612662,0.314579,0.428571,0.419048,3.391304,0.611111,0.105716,0.736188,-0.05093,0.571429,0.385714,0.971429,0.960561,0.095287,0.689796,0.467456,0.192857,0.4,1.008816,1.368,69.6,0.03125,0.089844,0.121094,2.646269,0.437811,1.887064,0.012397,0.320312,0.146341,1.39063,-0.205177,0.214844,0.236364,3.923077,0.764706,0.006562,0.939877,0.0645,0.785156,0.358209,0.681592,0.864446,0.003811,0.679688,0.41954,0.159204,0.332031,6,0.0


In [5]:
# Merge the stats dataframe with games dataframe and add suffix for home/away teams

stats_games_merged_2024_df = games_2024_df.merge(merged_2024_df, left_on=['season','week','home_team'],
                                      right_on=['season','before_week','team']).merge(merged_2024_df, left_on=['season','week','away_team'],
                                                                                      right_on=['season','before_week','team'],
                                                                                      suffixes=('_home','_away'))

In [6]:
# Drop any redundant columns from merge

columns_to_drop = ['through_week_home','team_home','conference_home','before_week_home','through_week_away','team_away','conference_away','before_week_away']

stats_games_merged_2024_df.drop(columns_to_drop, axis=1, inplace=True)

In [7]:
# Calculate anticipated yield

stats_games_merged_2024_df['total_clock_control'] = stats_games_merged_2024_df['clock_control_home'] + stats_games_merged_2024_df['clock_control_away']

stats_games_merged_2024_df['home_clock_control_ratio'] = stats_games_merged_2024_df['clock_control_home'] / stats_games_merged_2024_df['total_clock_control']
stats_games_merged_2024_df['home_estimated_possesion_time'] = stats_games_merged_2024_df['home_clock_control_ratio'] * 3600
stats_games_merged_2024_df['home_estimated_off_plays'] = stats_games_merged_2024_df['home_estimated_possesion_time'] / stats_games_merged_2024_df['off_pace_home']

stats_games_merged_2024_df['away_clock_control_ratio'] = stats_games_merged_2024_df['clock_control_away'] / stats_games_merged_2024_df['total_clock_control']
stats_games_merged_2024_df['away_estimated_possesion_time'] = stats_games_merged_2024_df['away_clock_control_ratio'] * 3600
stats_games_merged_2024_df['away_estimated_off_plays'] = stats_games_merged_2024_df['away_estimated_possesion_time'] / stats_games_merged_2024_df['off_pace_away']

stats_games_merged_2024_df['home_net_ppa'] = stats_games_merged_2024_df['offense_ppa_per_play_home'] + stats_games_merged_2024_df['defense_ppa_per_play_away']

stats_games_merged_2024_df['away_net_ppa'] = stats_games_merged_2024_df['offense_ppa_per_play_away'] + stats_games_merged_2024_df['defense_ppa_per_play_home']

stats_games_merged_2024_df['home_ant_yield'] = stats_games_merged_2024_df['home_net_ppa'] * stats_games_merged_2024_df['home_estimated_off_plays']
stats_games_merged_2024_df['away_ant_yield'] = stats_games_merged_2024_df['away_net_ppa'] * stats_games_merged_2024_df['away_estimated_off_plays']


In [8]:
# Calculate an index for comparing home offense and away defense in critical areas

stats_games_merged_2024_df['home_swing_explosiveness'] = (stats_games_merged_2024_df['offense_overall_explosiveness_home'] + stats_games_merged_2024_df['defense_overall_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_field_position'] = (stats_games_merged_2024_df['offense_fp_avg_ppa_home'] + stats_games_merged_2024_df['defense_fp_avg_ppa_away'])/2
stats_games_merged_2024_df['home_swing_dbs_havoc'] = (stats_games_merged_2024_df['offense_havoc_dbs_home'] + stats_games_merged_2024_df['defense_havoc_dbs_away'])/2
stats_games_merged_2024_df['home_swing_f7_havoc'] = (stats_games_merged_2024_df['offense_havoc_f7_home'] + stats_games_merged_2024_df['defense_havoc_f7_away'])/2
stats_games_merged_2024_df['home_swing_overall_havoc'] = (stats_games_merged_2024_df['offense_havoc_total_home'] + stats_games_merged_2024_df['defense_havoc_total_away'])/2
stats_games_merged_2024_df['home_swing_line_yards'] = (stats_games_merged_2024_df['offense_line_yards_home'] + stats_games_merged_2024_df['defense_line_yards_away'])/2
stats_games_merged_2024_df['home_swing_open_field_yards'] = (stats_games_merged_2024_df['offense_open_field_yards_home'] + stats_games_merged_2024_df['defense_open_field_yards_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_explosiveness'] = (stats_games_merged_2024_df['offense_passing_downs_explosiveness_home'] + stats_games_merged_2024_df['defense_open_field_yards_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_ppa'] = (stats_games_merged_2024_df['offense_passing_downs_ppa_home'] + stats_games_merged_2024_df['defense_passing_downs_ppa_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_rate'] = stats_games_merged_2024_df['defense_passing_downs_rate_away'] - stats_games_merged_2024_df['offense_passing_downs_rate_home']
stats_games_merged_2024_df['home_swing_passing_downs_success_rate'] = (stats_games_merged_2024_df['offense_passing_downs_success_rate_home'] + stats_games_merged_2024_df['defense_passing_downs_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_passing_explosiveness'] = (stats_games_merged_2024_df['offense_passing_explosiveness_home'] + stats_games_merged_2024_df['defense_passing_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_passing_ppa'] = (stats_games_merged_2024_df['offense_passing_ppa_per_play_home'] + stats_games_merged_2024_df['defense_passing_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_passing_success_rate'] = (stats_games_merged_2024_df['offense_passing_success_rate_home'] + stats_games_merged_2024_df['defense_passing_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_points_per_opp'] = (stats_games_merged_2024_df['offense_points_per_opportunity_home'] + stats_games_merged_2024_df['defense_points_per_opportunity_away'])/2
stats_games_merged_2024_df['home_swing_power_success'] = (stats_games_merged_2024_df['offense_power_success_home'] + stats_games_merged_2024_df['defense_power_success_away'])/2
stats_games_merged_2024_df['home_swing_rushing_explosiveness'] = (stats_games_merged_2024_df['offense_rushing_explosiveness_home'] + stats_games_merged_2024_df['defense_rushing_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_rushing_ppa'] = (stats_games_merged_2024_df['offense_rushing_ppa_per_play_home'] + stats_games_merged_2024_df['defense_rushing_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_second_level_yards'] = (stats_games_merged_2024_df['offense_second_level_yards_home'] + stats_games_merged_2024_df['defense_second_level_yards_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_explosiveness'] = (stats_games_merged_2024_df['offense_standard_downs_explosiveness_home'] + stats_games_merged_2024_df['defense_standard_downs_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_ppa'] = (stats_games_merged_2024_df['offense_standard_downs_ppa_per_play_home'] + stats_games_merged_2024_df['defense_standard_downs_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_rate'] = stats_games_merged_2024_df['offense_standard_downs_rate_home'] - stats_games_merged_2024_df['defense_standard_downs_rate_away']
stats_games_merged_2024_df['home_swing_standard_downs_success_rate'] = (stats_games_merged_2024_df['offense_standard_downs_success_rate_home'] + stats_games_merged_2024_df['defense_standard_downs_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_stuff_rate'] = (stats_games_merged_2024_df['offense_stuff_rate_home'] + stats_games_merged_2024_df['defense_stuff_rate_away'])/2
stats_games_merged_2024_df['home_swing_overall_success_rate'] = (stats_games_merged_2024_df['offense_overall_success_rate_home'] + stats_games_merged_2024_df['defense_overall_success_rate_away'])/2


In [9]:
# Calculate an index for comparing away offense and home defense in critical areas

stats_games_merged_2024_df['away_swing_explosiveness'] = (stats_games_merged_2024_df['offense_overall_explosiveness_away'] + stats_games_merged_2024_df['defense_overall_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_field_position'] = (stats_games_merged_2024_df['offense_fp_avg_ppa_away'] + stats_games_merged_2024_df['defense_fp_avg_ppa_home'])/2
stats_games_merged_2024_df['away_swing_dbs_havoc'] = (stats_games_merged_2024_df['offense_havoc_dbs_away'] + stats_games_merged_2024_df['defense_havoc_dbs_home'])/2
stats_games_merged_2024_df['away_swing_f7_havoc'] = (stats_games_merged_2024_df['offense_havoc_f7_away'] + stats_games_merged_2024_df['defense_havoc_f7_home'])/2
stats_games_merged_2024_df['away_swing_overall_havoc'] = (stats_games_merged_2024_df['offense_havoc_total_away'] + stats_games_merged_2024_df['defense_havoc_total_home'])/2
stats_games_merged_2024_df['away_swing_line_yards'] = (stats_games_merged_2024_df['offense_line_yards_away'] + stats_games_merged_2024_df['defense_line_yards_home'])/2
stats_games_merged_2024_df['away_swing_open_field_yards'] = (stats_games_merged_2024_df['offense_open_field_yards_away'] + stats_games_merged_2024_df['defense_open_field_yards_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_explosiveness'] = (stats_games_merged_2024_df['offense_passing_downs_explosiveness_away'] + stats_games_merged_2024_df['defense_open_field_yards_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_ppa'] = (stats_games_merged_2024_df['offense_passing_downs_ppa_away'] + stats_games_merged_2024_df['defense_passing_downs_ppa_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_rate'] = stats_games_merged_2024_df['defense_passing_downs_rate_away'] - stats_games_merged_2024_df['offense_passing_downs_rate_home']
stats_games_merged_2024_df['away_swing_passing_downs_success_rate'] = (stats_games_merged_2024_df['offense_passing_downs_success_rate_away'] + stats_games_merged_2024_df['defense_passing_downs_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_passing_explosiveness'] = (stats_games_merged_2024_df['offense_passing_explosiveness_away'] + stats_games_merged_2024_df['defense_passing_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_passing_ppa'] = (stats_games_merged_2024_df['offense_passing_ppa_per_play_away'] + stats_games_merged_2024_df['defense_passing_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_passing_success_rate'] = (stats_games_merged_2024_df['offense_passing_success_rate_away'] + stats_games_merged_2024_df['defense_passing_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_points_per_opp'] = (stats_games_merged_2024_df['offense_points_per_opportunity_away'] + stats_games_merged_2024_df['defense_points_per_opportunity_home'])/2
stats_games_merged_2024_df['away_swing_power_success'] = (stats_games_merged_2024_df['offense_power_success_away'] + stats_games_merged_2024_df['defense_power_success_home'])/2
stats_games_merged_2024_df['away_swing_rushing_explosiveness'] = (stats_games_merged_2024_df['offense_rushing_explosiveness_away'] + stats_games_merged_2024_df['defense_rushing_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_rushing_ppa'] = (stats_games_merged_2024_df['offense_rushing_ppa_per_play_away'] + stats_games_merged_2024_df['defense_rushing_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_second_level_yards'] = (stats_games_merged_2024_df['offense_second_level_yards_away'] + stats_games_merged_2024_df['defense_second_level_yards_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_explosiveness'] = (stats_games_merged_2024_df['offense_standard_downs_explosiveness_away'] + stats_games_merged_2024_df['defense_standard_downs_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_ppa'] = (stats_games_merged_2024_df['offense_standard_downs_ppa_per_play_away'] + stats_games_merged_2024_df['defense_standard_downs_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_rate'] = stats_games_merged_2024_df['offense_standard_downs_rate_away'] - stats_games_merged_2024_df['defense_standard_downs_rate_home']
stats_games_merged_2024_df['away_swing_standard_downs_success_rate'] = (stats_games_merged_2024_df['offense_standard_downs_success_rate_away'] + stats_games_merged_2024_df['defense_standard_downs_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_stuff_rate'] = (stats_games_merged_2024_df['offense_stuff_rate_away'] + stats_games_merged_2024_df['defense_stuff_rate_home'])/2
stats_games_merged_2024_df['away_swing_overall_success_rate'] = (stats_games_merged_2024_df['offense_overall_success_rate_away'] + stats_games_merged_2024_df['defense_overall_success_rate_home'])/2

In [10]:
# Split dataframe into two, one for home team offense vs away team defense (a)
# and the other for away team offense vs home team defense (b)

columns_to_keep_a = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'home_team',
 'home_conference',
 'home_points',
 'home_pregame_elo',
 'away_team',
 'away_conference',
 'away_pregame_elo',
 'games_home',
 'clock_control_home',
 'off_bye_week_home',
 'games_away',
 'off_bye_week_away',
 'home_net_ppa',
 'home_ant_yield',
 'home_swing_explosiveness',
 'home_swing_field_position',
 'home_swing_dbs_havoc',
 'home_swing_f7_havoc',
 'home_swing_overall_havoc',
 'home_swing_line_yards',
 'home_swing_open_field_yards',
 'home_swing_passing_downs_explosiveness',
 'home_swing_passing_downs_ppa',
 'home_swing_passing_downs_rate',
 'home_swing_passing_downs_success_rate',
 'home_swing_passing_explosiveness',
 'home_swing_passing_ppa',
 'home_swing_passing_success_rate',
 'home_swing_points_per_opp',
 'home_swing_power_success',
 'home_swing_rushing_explosiveness',
 'home_swing_rushing_ppa',
 'home_swing_second_level_yards',
 'home_swing_standard_downs_explosiveness',
 'home_swing_standard_downs_ppa',
 'home_swing_standard_downs_rate',
 'home_swing_standard_downs_success_rate',
 'home_swing_stuff_rate',
 'home_swing_overall_success_rate']

stats_games_merged_2024_df_a = pd.DataFrame(stats_games_merged_2024_df[columns_to_keep_a])
stats_games_merged_2024_df_a = stats_games_merged_2024_df_a

In [11]:
# Create a home_away column for all rows equal to 1 (home game for the offense)

stats_games_merged_2024_df_a['home_away'] = 1

In [12]:
# Rename columns for later merging with dataframe b

renamed_columns_a = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'team',
 'conference',
 'points',
 'pregame_elo',
 'opponent',
 'opp_conference',
 'opp_pregame_elo',
 'games',
 'clock_control',
 'off_bye_week',
 'opp_games',
 'opp_off_bye_week',
 'net_ppa',
 'ant_yield',
 'swing_explosiveness',
 'swing_field_position',
 'swing_dbs_havoc',
 'swing_f7_havoc',
 'swing_overall_havoc',
 'swing_line_yards',
 'swing_open_field_yards',
 'swing_passing_downs_explosiveness',
 'swing_passing_downs_ppa',
 'swing_passing_downs_rate',
 'swing_passing_downs_success_rate',
 'swing_passing_explosiveness',
 'swing_passing_ppa',
 'swing_passing_success_rate',
 'swing_points_per_opp',
 'swing_power_success',
 'swing_rushing_explosiveness',
 'swing_rushing_ppa',
 'swing_second_level_yards',
 'swing_standard_downs_explosiveness',
 'swing_standard_downs_ppa',
 'swing_standard_downs_rate',
 'swing_standard_downs_success_rate',
 'swing_stuff_rate',
 'swing_overall_success_rate',
 'home_away']

stats_games_merged_2024_df_a = stats_games_merged_2024_df_a.set_axis(renamed_columns_a, axis=1)

In [13]:
# Second half of split, away team offense vs home team defense (b)

columns_to_keep_b = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'home_team',
 'home_conference',
 'home_pregame_elo',
 'away_team',
 'away_conference',
 'away_points',
 'away_pregame_elo',
 'games_home',
 'clock_control_home',
 'off_bye_week_home',
 'games_away',
 'off_bye_week_away',
 'away_net_ppa',
 'away_ant_yield',
 'away_swing_explosiveness',
 'away_swing_field_position',
 'away_swing_dbs_havoc',
 'away_swing_f7_havoc',
 'away_swing_overall_havoc',
 'away_swing_line_yards',
 'away_swing_open_field_yards',
 'away_swing_passing_downs_explosiveness',
 'away_swing_passing_downs_ppa',
 'away_swing_passing_downs_rate',
 'away_swing_passing_downs_success_rate',
 'away_swing_passing_explosiveness',
 'away_swing_passing_ppa',
 'away_swing_passing_success_rate',
 'away_swing_points_per_opp',
 'away_swing_power_success',
 'away_swing_rushing_explosiveness',
 'away_swing_rushing_ppa',
 'away_swing_second_level_yards',
 'away_swing_standard_downs_explosiveness',
 'away_swing_standard_downs_ppa',
 'away_swing_standard_downs_rate',
 'away_swing_standard_downs_success_rate',
 'away_swing_stuff_rate',
 'away_swing_overall_success_rate']

stats_games_merged_2024_df_b = stats_games_merged_2024_df[columns_to_keep_b]

In [14]:
# New column to denote that the offense is away set equal to 0

stats_games_merged_2024_df_b['home_away'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stats_games_merged_2024_df_b['home_away'] = 0


In [15]:
# Rename columns for merging purposes

renamed_columns_b = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'opponent',
 'opp_conference',
 'opp_pregame_elo',
 'team',
 'conference',
 'points',
 'pregame_elo',
 'games',
 'clock_control',
 'off_bye_week',
 'opp_games',
 'opp_off_bye_week',
 'net_ppa',
 'ant_yield',
 'swing_explosiveness',
 'swing_field_position',
 'swing_dbs_havoc',
 'swing_f7_havoc',
 'swing_overall_havoc',
 'swing_line_yards',
 'swing_open_field_yards',
 'swing_passing_downs_explosiveness',
 'swing_passing_downs_ppa',
 'swing_passing_downs_rate',
 'swing_passing_downs_success_rate',
 'swing_passing_explosiveness',
 'swing_passing_ppa',
 'swing_passing_success_rate',
 'swing_points_per_opp',
 'swing_power_success',
 'swing_rushing_explosiveness',
 'swing_rushing_ppa',
 'swing_second_level_yards',
 'swing_standard_downs_explosiveness',
 'swing_standard_downs_ppa',
 'swing_standard_downs_rate',
 'swing_standard_downs_success_rate',
 'swing_stuff_rate',
 'swing_overall_success_rate',
 'home_away']

stats_games_merged_2024_df_b = stats_games_merged_2024_df_b.set_axis(renamed_columns_b, axis=1)

In [16]:
# Concatenate dataframe a and b to use as validation set for linear regression model
# Drop any rows with null elo ratings (necessary for predictions)
# Export validation data as csv

validation_df_csv = pd.concat([stats_games_merged_2024_df_a,stats_games_merged_2024_df_b],ignore_index=True)

validation_df_csv.dropna(subset=['pregame_elo','opp_pregame_elo'],inplace=True)

validation_df_csv.to_csv('validation_df.csv', index=False)