In [2]:
# Import pandas and numpy packages, read basic stats and games csv's in as dataframes
# Create new columns for later use in prediction model

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

bas_2024_df = pd.read_csv('team_season_stats_by_week_2024.csv')
games_2024_df = pd.read_csv('games_2024.csv')

bas_2024_df['off_plays'] = bas_2024_df['passAttempts'] + bas_2024_df['rushingAttempts']
bas_2024_df['off_plays_per_game'] = bas_2024_df['off_plays']/bas_2024_df['games']
bas_2024_df['pass_attempts_per_game'] = bas_2024_df['passAttempts']/bas_2024_df['games']
bas_2024_df['rush_attempts_per_game'] = bas_2024_df['rushingAttempts']/bas_2024_df['games']
bas_2024_df['off_pace'] = bas_2024_df['possessionTime']/bas_2024_df['off_plays']
bas_2024_df['off_interception_ratio'] = bas_2024_df['interceptions']/bas_2024_df['passAttempts']
bas_2024_df['off_turnover_rate'] = bas_2024_df['turnovers']/bas_2024_df['off_plays']
bas_2024_df['off_pass_rate'] = bas_2024_df['passAttempts']/bas_2024_df['off_plays']
bas_2024_df['off_rush_rate'] = bas_2024_df['rushingAttempts']/bas_2024_df['off_plays']


bas_2024_df = bas_2024_df[['season','through_week','team','games','off_plays','off_plays_per_game','passAttempts','pass_attempts_per_game',
                           'off_pass_rate','rushingAttempts','rush_attempts_per_game', 'off_rush_rate','possessionTime','off_pace','clock_control',
                           'first_downs_per_game','third_down_efficiency','fourth_down_efficiency','penalties','penalties_per_game','yards_per_penalty',
                           'off_interception_ratio','turnovers','passesIntercepted','sacks','tacklesForLoss']]

In [6]:
# Read in advanced metrics dataframe, and reformat


adv_2024_df = pd.read_csv('adv_season_stats_by_week_2024.csv')

adv_2024_df = adv_2024_df[['season','through_week','team','conference','defense_overall_explosiveness','defense_fp_avg_ppa','defense_fp_avg_start',
                           'defense_havoc_dbs','defense_havoc_f7','defense_havoc_total','defense_line_yards','defense_open_field_yards',
                           'defense_passing_downs_explosiveness','defense_passing_downs_ppa','defense_passing_downs_rate','defense_passing_downs_success_rate',
                           'defense_passing_explosiveness','defense_passing_ppa_per_play','defense_passing_rate','defense_passing_success_rate',
                           'defense_points_per_opportunity','defense_power_success','defense_ppa_per_play','defense_rushing_explosiveness','defense_rushing_ppa_per_play',
                           'defense_rushing_rate','defense_rushing_success_rate','defense_second_level_yards','defense_standard_downs_explosiveness',
                           'defense_standard_downs_ppa_per_play','defense_standard_downs_rate','defense_standard_downs_success_rate','defense_stuff_rate',
                           'defense_overall_success_rate','offense_overall_explosiveness','offense_fp_avg_ppa','offense_fp_avg_start',
                           'offense_havoc_dbs','offense_havoc_f7','offense_havoc_total','offense_line_yards','offense_open_field_yards',
                           'offense_passing_downs_explosiveness','offense_passing_downs_ppa','offense_passing_downs_rate','offense_passing_downs_success_rate',
                           'offense_passing_explosiveness','offense_passing_ppa_per_play','offense_passing_rate','offense_passing_success_rate',
                           'offense_points_per_opportunity','offense_power_success','offense_ppa_per_play','offense_rushing_explosiveness','offense_rushing_ppa_per_play',
                           'offense_rushing_rate','offense_rushing_success_rate','offense_second_level_yards','offense_standard_downs_explosiveness',
                           'offense_standard_downs_ppa_per_play','offense_standard_downs_rate','offense_standard_downs_success_rate','offense_stuff_rate',
                           'offense_overall_success_rate']]


In [8]:
# Merge basic and advanced dataframes

merged_2024_df = bas_2024_df.merge(adv_2024_df, left_on=['season','through_week','team'],
                  right_on=['season','through_week','team'],
                  how = 'outer',
                  suffixes=(False, False))


In [10]:
# Create new columns to be used later & fill any null values

merged_2024_df['before_week']=(merged_2024_df['through_week']+1)
merged_2024_df = merged_2024_df.fillna(0)

merged_2024_df = merged_2024_df.sort_values(['season','team','through_week'])

merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) == 0, 'off_bye_week'] = 1
merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) != 0, 'off_bye_week'] = 0

In [12]:
# Merge the stats dataframe with games dataframe and add suffix for home/away teams

stats_games_merged_2024_df = games_2024_df.merge(merged_2024_df, left_on=['season','week','home_team'],
                                      right_on=['season','before_week','team']).merge(merged_2024_df, left_on=['season','week','away_team'],
                                                                                      right_on=['season','before_week','team'],
                                                                                      suffixes=('_home','_away'))

In [14]:
# Drop any redundant columns from merge

columns_to_drop = ['through_week_home','team_home','conference_home','before_week_home','through_week_away','team_away','conference_away','before_week_away']

stats_games_merged_2024_df.drop(columns_to_drop, axis=1, inplace=True)

In [16]:
# Calculate anticipated yield

stats_games_merged_2024_df['total_clock_control'] = stats_games_merged_2024_df['clock_control_home'] + stats_games_merged_2024_df['clock_control_away']

stats_games_merged_2024_df['home_clock_control_ratio'] = stats_games_merged_2024_df['clock_control_home'] / stats_games_merged_2024_df['total_clock_control']
stats_games_merged_2024_df['home_estimated_possesion_time'] = stats_games_merged_2024_df['home_clock_control_ratio'] * 3600
stats_games_merged_2024_df['home_estimated_off_plays'] = stats_games_merged_2024_df['home_estimated_possesion_time'] / stats_games_merged_2024_df['off_pace_home']

stats_games_merged_2024_df['away_clock_control_ratio'] = stats_games_merged_2024_df['clock_control_away'] / stats_games_merged_2024_df['total_clock_control']
stats_games_merged_2024_df['away_estimated_possesion_time'] = stats_games_merged_2024_df['away_clock_control_ratio'] * 3600
stats_games_merged_2024_df['away_estimated_off_plays'] = stats_games_merged_2024_df['away_estimated_possesion_time'] / stats_games_merged_2024_df['off_pace_away']

stats_games_merged_2024_df['home_net_ppa'] = stats_games_merged_2024_df['offense_ppa_per_play_home'] + stats_games_merged_2024_df['defense_ppa_per_play_away']

stats_games_merged_2024_df['away_net_ppa'] = stats_games_merged_2024_df['offense_ppa_per_play_away'] + stats_games_merged_2024_df['defense_ppa_per_play_home']

stats_games_merged_2024_df['home_ant_yield'] = stats_games_merged_2024_df['home_net_ppa'] * stats_games_merged_2024_df['home_estimated_off_plays']
stats_games_merged_2024_df['away_ant_yield'] = stats_games_merged_2024_df['away_net_ppa'] * stats_games_merged_2024_df['away_estimated_off_plays']


In [18]:
# Calculate an index for comparing home offense and away defense in critical areas

stats_games_merged_2024_df['home_swing_explosiveness'] = (stats_games_merged_2024_df['offense_overall_explosiveness_home'] + stats_games_merged_2024_df['defense_overall_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_field_position'] = (stats_games_merged_2024_df['offense_fp_avg_ppa_home'] + stats_games_merged_2024_df['defense_fp_avg_ppa_away'])/2
stats_games_merged_2024_df['home_swing_dbs_havoc'] = (stats_games_merged_2024_df['offense_havoc_dbs_home'] + stats_games_merged_2024_df['defense_havoc_dbs_away'])/2
stats_games_merged_2024_df['home_swing_f7_havoc'] = (stats_games_merged_2024_df['offense_havoc_f7_home'] + stats_games_merged_2024_df['defense_havoc_f7_away'])/2
stats_games_merged_2024_df['home_swing_overall_havoc'] = (stats_games_merged_2024_df['offense_havoc_total_home'] + stats_games_merged_2024_df['defense_havoc_total_away'])/2
stats_games_merged_2024_df['home_swing_line_yards'] = (stats_games_merged_2024_df['offense_line_yards_home'] + stats_games_merged_2024_df['defense_line_yards_away'])/2
stats_games_merged_2024_df['home_swing_open_field_yards'] = (stats_games_merged_2024_df['offense_open_field_yards_home'] + stats_games_merged_2024_df['defense_open_field_yards_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_explosiveness'] = (stats_games_merged_2024_df['offense_passing_downs_explosiveness_home'] + stats_games_merged_2024_df['defense_open_field_yards_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_ppa'] = (stats_games_merged_2024_df['offense_passing_downs_ppa_home'] + stats_games_merged_2024_df['defense_passing_downs_ppa_away'])/2
stats_games_merged_2024_df['home_swing_passing_downs_rate'] = stats_games_merged_2024_df['defense_passing_downs_rate_away'] - stats_games_merged_2024_df['offense_passing_downs_rate_home']
stats_games_merged_2024_df['home_swing_passing_downs_success_rate'] = (stats_games_merged_2024_df['offense_passing_downs_success_rate_home'] + stats_games_merged_2024_df['defense_passing_downs_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_passing_explosiveness'] = (stats_games_merged_2024_df['offense_passing_explosiveness_home'] + stats_games_merged_2024_df['defense_passing_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_passing_ppa'] = (stats_games_merged_2024_df['offense_passing_ppa_per_play_home'] + stats_games_merged_2024_df['defense_passing_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_passing_success_rate'] = (stats_games_merged_2024_df['offense_passing_success_rate_home'] + stats_games_merged_2024_df['defense_passing_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_points_per_opp'] = (stats_games_merged_2024_df['offense_points_per_opportunity_home'] + stats_games_merged_2024_df['defense_points_per_opportunity_away'])/2
stats_games_merged_2024_df['home_swing_power_success'] = (stats_games_merged_2024_df['offense_power_success_home'] + stats_games_merged_2024_df['defense_power_success_away'])/2
stats_games_merged_2024_df['home_swing_rushing_explosiveness'] = (stats_games_merged_2024_df['offense_rushing_explosiveness_home'] + stats_games_merged_2024_df['defense_rushing_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_rushing_ppa'] = (stats_games_merged_2024_df['offense_rushing_ppa_per_play_home'] + stats_games_merged_2024_df['defense_rushing_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_second_level_yards'] = (stats_games_merged_2024_df['offense_second_level_yards_home'] + stats_games_merged_2024_df['defense_second_level_yards_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_explosiveness'] = (stats_games_merged_2024_df['offense_standard_downs_explosiveness_home'] + stats_games_merged_2024_df['defense_standard_downs_explosiveness_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_ppa'] = (stats_games_merged_2024_df['offense_standard_downs_ppa_per_play_home'] + stats_games_merged_2024_df['defense_standard_downs_ppa_per_play_away'])/2
stats_games_merged_2024_df['home_swing_standard_downs_rate'] = stats_games_merged_2024_df['offense_standard_downs_rate_home'] - stats_games_merged_2024_df['defense_standard_downs_rate_away']
stats_games_merged_2024_df['home_swing_standard_downs_success_rate'] = (stats_games_merged_2024_df['offense_standard_downs_success_rate_home'] + stats_games_merged_2024_df['defense_standard_downs_success_rate_away'])/2
stats_games_merged_2024_df['home_swing_stuff_rate'] = (stats_games_merged_2024_df['offense_stuff_rate_home'] + stats_games_merged_2024_df['defense_stuff_rate_away'])/2
stats_games_merged_2024_df['home_swing_overall_success_rate'] = (stats_games_merged_2024_df['offense_overall_success_rate_home'] + stats_games_merged_2024_df['defense_overall_success_rate_away'])/2


In [20]:
# Calculate an index for comparing away offense and home defense in critical areas

stats_games_merged_2024_df['away_swing_explosiveness'] = (stats_games_merged_2024_df['offense_overall_explosiveness_away'] + stats_games_merged_2024_df['defense_overall_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_field_position'] = (stats_games_merged_2024_df['offense_fp_avg_ppa_away'] + stats_games_merged_2024_df['defense_fp_avg_ppa_home'])/2
stats_games_merged_2024_df['away_swing_dbs_havoc'] = (stats_games_merged_2024_df['offense_havoc_dbs_away'] + stats_games_merged_2024_df['defense_havoc_dbs_home'])/2
stats_games_merged_2024_df['away_swing_f7_havoc'] = (stats_games_merged_2024_df['offense_havoc_f7_away'] + stats_games_merged_2024_df['defense_havoc_f7_home'])/2
stats_games_merged_2024_df['away_swing_overall_havoc'] = (stats_games_merged_2024_df['offense_havoc_total_away'] + stats_games_merged_2024_df['defense_havoc_total_home'])/2
stats_games_merged_2024_df['away_swing_line_yards'] = (stats_games_merged_2024_df['offense_line_yards_away'] + stats_games_merged_2024_df['defense_line_yards_home'])/2
stats_games_merged_2024_df['away_swing_open_field_yards'] = (stats_games_merged_2024_df['offense_open_field_yards_away'] + stats_games_merged_2024_df['defense_open_field_yards_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_explosiveness'] = (stats_games_merged_2024_df['offense_passing_downs_explosiveness_away'] + stats_games_merged_2024_df['defense_open_field_yards_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_ppa'] = (stats_games_merged_2024_df['offense_passing_downs_ppa_away'] + stats_games_merged_2024_df['defense_passing_downs_ppa_home'])/2
stats_games_merged_2024_df['away_swing_passing_downs_rate'] = stats_games_merged_2024_df['defense_passing_downs_rate_away'] - stats_games_merged_2024_df['offense_passing_downs_rate_home']
stats_games_merged_2024_df['away_swing_passing_downs_success_rate'] = (stats_games_merged_2024_df['offense_passing_downs_success_rate_away'] + stats_games_merged_2024_df['defense_passing_downs_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_passing_explosiveness'] = (stats_games_merged_2024_df['offense_passing_explosiveness_away'] + stats_games_merged_2024_df['defense_passing_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_passing_ppa'] = (stats_games_merged_2024_df['offense_passing_ppa_per_play_away'] + stats_games_merged_2024_df['defense_passing_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_passing_success_rate'] = (stats_games_merged_2024_df['offense_passing_success_rate_away'] + stats_games_merged_2024_df['defense_passing_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_points_per_opp'] = (stats_games_merged_2024_df['offense_points_per_opportunity_away'] + stats_games_merged_2024_df['defense_points_per_opportunity_home'])/2
stats_games_merged_2024_df['away_swing_power_success'] = (stats_games_merged_2024_df['offense_power_success_away'] + stats_games_merged_2024_df['defense_power_success_home'])/2
stats_games_merged_2024_df['away_swing_rushing_explosiveness'] = (stats_games_merged_2024_df['offense_rushing_explosiveness_away'] + stats_games_merged_2024_df['defense_rushing_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_rushing_ppa'] = (stats_games_merged_2024_df['offense_rushing_ppa_per_play_away'] + stats_games_merged_2024_df['defense_rushing_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_second_level_yards'] = (stats_games_merged_2024_df['offense_second_level_yards_away'] + stats_games_merged_2024_df['defense_second_level_yards_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_explosiveness'] = (stats_games_merged_2024_df['offense_standard_downs_explosiveness_away'] + stats_games_merged_2024_df['defense_standard_downs_explosiveness_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_ppa'] = (stats_games_merged_2024_df['offense_standard_downs_ppa_per_play_away'] + stats_games_merged_2024_df['defense_standard_downs_ppa_per_play_home'])/2
stats_games_merged_2024_df['away_swing_standard_downs_rate'] = stats_games_merged_2024_df['offense_standard_downs_rate_away'] - stats_games_merged_2024_df['defense_standard_downs_rate_home']
stats_games_merged_2024_df['away_swing_standard_downs_success_rate'] = (stats_games_merged_2024_df['offense_standard_downs_success_rate_away'] + stats_games_merged_2024_df['defense_standard_downs_success_rate_home'])/2
stats_games_merged_2024_df['away_swing_stuff_rate'] = (stats_games_merged_2024_df['offense_stuff_rate_away'] + stats_games_merged_2024_df['defense_stuff_rate_home'])/2
stats_games_merged_2024_df['away_swing_overall_success_rate'] = (stats_games_merged_2024_df['offense_overall_success_rate_away'] + stats_games_merged_2024_df['defense_overall_success_rate_home'])/2

In [24]:
# Split dataframe into two, one for home team offense vs away team defense (a)
# and the other for away team offense vs home team defense (b)

columns_to_keep_a = ['id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'home_team',
 'home_conference',
 'home_points',
 'home_pregame_elo',
 'away_team',
 'away_conference',
 'away_pregame_elo',
 'games_home',
 'clock_control_home',
 'off_bye_week_home',
 'games_away',
 'off_bye_week_away',
 'home_net_ppa',
 'home_ant_yield',
 'home_swing_explosiveness',
 'home_swing_field_position',
 'home_swing_dbs_havoc',
 'home_swing_f7_havoc',
 'home_swing_overall_havoc',
 'home_swing_line_yards',
 'home_swing_open_field_yards',
 'home_swing_passing_downs_explosiveness',
 'home_swing_passing_downs_ppa',
 'home_swing_passing_downs_rate',
 'home_swing_passing_downs_success_rate',
 'home_swing_passing_explosiveness',
 'home_swing_passing_ppa',
 'home_swing_passing_success_rate',
 'home_swing_points_per_opp',
 'home_swing_power_success',
 'home_swing_rushing_explosiveness',
 'home_swing_rushing_ppa',
 'home_swing_second_level_yards',
 'home_swing_standard_downs_explosiveness',
 'home_swing_standard_downs_ppa',
 'home_swing_standard_downs_rate',
 'home_swing_standard_downs_success_rate',
 'home_swing_stuff_rate',
 'home_swing_overall_success_rate']

stats_games_merged_2024_df_a = pd.DataFrame(stats_games_merged_2024_df[columns_to_keep_a])
stats_games_merged_2024_df_a = stats_games_merged_2024_df_a

In [26]:
# Create a home_away column for all rows equal to 1 (home game for the offense)

stats_games_merged_2024_df_a['home_away'] = 1

In [28]:
# Rename columns for later merging with dataframe b

renamed_columns_a = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'team',
 'conference',
 'points',
 'pregame_elo',
 'opponent',
 'opp_conference',
 'opp_pregame_elo',
 'games',
 'clock_control',
 'off_bye_week',
 'opp_games',
 'opp_off_bye_week',
 'net_ppa',
 'ant_yield',
 'swing_explosiveness',
 'swing_field_position',
 'swing_dbs_havoc',
 'swing_f7_havoc',
 'swing_overall_havoc',
 'swing_line_yards',
 'swing_open_field_yards',
 'swing_passing_downs_explosiveness',
 'swing_passing_downs_ppa',
 'swing_passing_downs_rate',
 'swing_passing_downs_success_rate',
 'swing_passing_explosiveness',
 'swing_passing_ppa',
 'swing_passing_success_rate',
 'swing_points_per_opp',
 'swing_power_success',
 'swing_rushing_explosiveness',
 'swing_rushing_ppa',
 'swing_second_level_yards',
 'swing_standard_downs_explosiveness',
 'swing_standard_downs_ppa',
 'swing_standard_downs_rate',
 'swing_standard_downs_success_rate',
 'swing_stuff_rate',
 'swing_overall_success_rate',
 'home_away']

stats_games_merged_2024_df_a = stats_games_merged_2024_df_a.set_axis(renamed_columns_a, axis=1)

In [30]:
# Second half of split, away team offense vs home team defense (b)

columns_to_keep_b = ['id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'home_team',
 'home_conference',
 'home_pregame_elo',
 'away_team',
 'away_conference',
 'away_points',
 'away_pregame_elo',
 'games_home',
 'clock_control_home',
 'off_bye_week_home',
 'games_away',
 'off_bye_week_away',
 'away_net_ppa',
 'away_ant_yield',
 'away_swing_explosiveness',
 'away_swing_field_position',
 'away_swing_dbs_havoc',
 'away_swing_f7_havoc',
 'away_swing_overall_havoc',
 'away_swing_line_yards',
 'away_swing_open_field_yards',
 'away_swing_passing_downs_explosiveness',
 'away_swing_passing_downs_ppa',
 'away_swing_passing_downs_rate',
 'away_swing_passing_downs_success_rate',
 'away_swing_passing_explosiveness',
 'away_swing_passing_ppa',
 'away_swing_passing_success_rate',
 'away_swing_points_per_opp',
 'away_swing_power_success',
 'away_swing_rushing_explosiveness',
 'away_swing_rushing_ppa',
 'away_swing_second_level_yards',
 'away_swing_standard_downs_explosiveness',
 'away_swing_standard_downs_ppa',
 'away_swing_standard_downs_rate',
 'away_swing_standard_downs_success_rate',
 'away_swing_stuff_rate',
 'away_swing_overall_success_rate']

stats_games_merged_2024_df_b = stats_games_merged_2024_df[columns_to_keep_b]

In [32]:
# New column to denote that the offense is away set equal to 0

stats_games_merged_2024_df_b['home_away'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stats_games_merged_2024_df_b['home_away'] = 0


In [34]:
# Rename columns for merging purposes

renamed_columns_b = ['game_id',
 'season',
 'week',
 'start_date',
 'neutral_site',
 'opponent',
 'opp_conference',
 'opp_pregame_elo',
 'team',
 'conference',
 'points',
 'pregame_elo',
 'games',
 'clock_control',
 'off_bye_week',
 'opp_games',
 'opp_off_bye_week',
 'net_ppa',
 'ant_yield',
 'swing_explosiveness',
 'swing_field_position',
 'swing_dbs_havoc',
 'swing_f7_havoc',
 'swing_overall_havoc',
 'swing_line_yards',
 'swing_open_field_yards',
 'swing_passing_downs_explosiveness',
 'swing_passing_downs_ppa',
 'swing_passing_downs_rate',
 'swing_passing_downs_success_rate',
 'swing_passing_explosiveness',
 'swing_passing_ppa',
 'swing_passing_success_rate',
 'swing_points_per_opp',
 'swing_power_success',
 'swing_rushing_explosiveness',
 'swing_rushing_ppa',
 'swing_second_level_yards',
 'swing_standard_downs_explosiveness',
 'swing_standard_downs_ppa',
 'swing_standard_downs_rate',
 'swing_standard_downs_success_rate',
 'swing_stuff_rate',
 'swing_overall_success_rate',
 'home_away']

stats_games_merged_2024_df_b = stats_games_merged_2024_df_b.set_axis(renamed_columns_b, axis=1)

In [36]:
# Concatenate dataframe a and b to use as validation set for linear regression model
# Drop any rows with null elo ratings (necessary for predictions)
# Export validation data as csv

validation_df_csv = pd.concat([stats_games_merged_2024_df_a,stats_games_merged_2024_df_b],ignore_index=True)

validation_df_csv.dropna(subset=['pregame_elo','opp_pregame_elo'],inplace=True)

validation_df_csv.to_csv('validation_df.csv', index=False)