In [1]:
# Import pandas and numpy packages, read basic stats and games csv's in as dataframes
# Create new columns for later use in prediction model

import cfbd
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'XXXXXXXX'
configuration.api_key_prefix['Authorization'] = 'Bearer'
api_config = cfbd.ApiClient(configuration)

api_ratings_instance = cfbd.RatingsApi(api_config)
api_games_instance = cfbd.GamesApi(api_config)

In [2]:
# Call in ELO ratings for use in prediction model

ratings_raw = []

ratings_response = api_ratings_instance.get_elo_ratings(year=2024, week=16)
ratings_raw = [*ratings_raw, *ratings_response]

In [3]:
# Format and flatten ELO ratings data

ratings=[]

ratings = [
    dict(
        year=r.year,
        team=r.team,
        elo=r.elo
    ) for r in ratings_raw]

ratings_df = pd.DataFrame.from_records(ratings)

In [4]:
# Call in necessary data to calculate predictions for every possible matchup

bas_2024_df = pd.read_csv('team_season_stats_by_week_2024.csv')

bas_2024_df['off_plays'] = bas_2024_df['passAttempts'] + bas_2024_df['rushingAttempts']
bas_2024_df['off_plays_per_game'] = bas_2024_df['off_plays']/bas_2024_df['games']
bas_2024_df['pass_attempts_per_game'] = bas_2024_df['passAttempts']/bas_2024_df['games']
bas_2024_df['rush_attempts_per_game'] = bas_2024_df['rushingAttempts']/bas_2024_df['games']
bas_2024_df['off_pace'] = bas_2024_df['possessionTime']/bas_2024_df['off_plays']
bas_2024_df['off_interception_ratio'] = bas_2024_df['interceptions']/bas_2024_df['passAttempts']
bas_2024_df['off_turnover_rate'] = bas_2024_df['turnovers']/bas_2024_df['off_plays']
bas_2024_df['off_pass_rate'] = bas_2024_df['passAttempts']/bas_2024_df['off_plays']
bas_2024_df['off_rush_rate'] = bas_2024_df['rushingAttempts']/bas_2024_df['off_plays']


bas_2024_df = bas_2024_df[['season','through_week','team','games','off_plays','off_plays_per_game','passAttempts','pass_attempts_per_game',
                           'off_pass_rate','rushingAttempts','rush_attempts_per_game', 'off_rush_rate','possessionTime','off_pace','clock_control',
                           'first_downs_per_game','third_down_efficiency','fourth_down_efficiency','penalties','penalties_per_game','yards_per_penalty',
                           'off_interception_ratio','turnovers','passesIntercepted','sacks','tacklesForLoss']]

In [5]:
# Read in advanced metrics dataframe, and reformat


adv_2024_df = pd.read_csv('adv_season_stats_by_week_2024.csv')

adv_2024_df = adv_2024_df[['season','through_week','team','conference','defense_overall_explosiveness','defense_fp_avg_ppa','defense_fp_avg_start',
                           'defense_havoc_dbs','defense_havoc_f7','defense_havoc_total','defense_line_yards','defense_open_field_yards',
                           'defense_passing_downs_explosiveness','defense_passing_downs_ppa','defense_passing_downs_rate','defense_passing_downs_success_rate',
                           'defense_passing_explosiveness','defense_passing_ppa_per_play','defense_passing_rate','defense_passing_success_rate',
                           'defense_points_per_opportunity','defense_power_success','defense_ppa_per_play','defense_rushing_explosiveness','defense_rushing_ppa_per_play',
                           'defense_rushing_rate','defense_rushing_success_rate','defense_second_level_yards','defense_standard_downs_explosiveness',
                           'defense_standard_downs_ppa_per_play','defense_standard_downs_rate','defense_standard_downs_success_rate','defense_stuff_rate',
                           'defense_overall_success_rate','offense_overall_explosiveness','offense_fp_avg_ppa','offense_fp_avg_start',
                           'offense_havoc_dbs','offense_havoc_f7','offense_havoc_total','offense_line_yards','offense_open_field_yards',
                           'offense_passing_downs_explosiveness','offense_passing_downs_ppa','offense_passing_downs_rate','offense_passing_downs_success_rate',
                           'offense_passing_explosiveness','offense_passing_ppa_per_play','offense_passing_rate','offense_passing_success_rate',
                           'offense_points_per_opportunity','offense_power_success','offense_ppa_per_play','offense_rushing_explosiveness','offense_rushing_ppa_per_play',
                           'offense_rushing_rate','offense_rushing_success_rate','offense_second_level_yards','offense_standard_downs_explosiveness',
                           'offense_standard_downs_ppa_per_play','offense_standard_downs_rate','offense_standard_downs_success_rate','offense_stuff_rate',
                           'offense_overall_success_rate']]


In [6]:
# Merge basic and advanced dataframes

merged_2024_df = bas_2024_df.merge(adv_2024_df, left_on=['season','through_week','team'],
                  right_on=['season','through_week','team'],
                  how = 'outer',
                  suffixes=(False, False))


In [7]:
# Create new columns to be used later & fill any null values

merged_2024_df['before_week']=(merged_2024_df['through_week']+1)
merged_2024_df = merged_2024_df.fillna(0)

merged_2024_df = merged_2024_df.sort_values(['season','team','through_week'])

merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) == 0, 'off_bye_week'] = 1
merged_2024_df.loc[(merged_2024_df['games'].shift(1) - merged_2024_df['games']) != 0, 'off_bye_week'] = 0

In [8]:
# Merge the stats dataframe with games dataframe and add suffix for home/away teams
merged_2024_df_a = merged_2024_df.loc[merged_2024_df['through_week']==16]

merged_2024_df_b = merged_2024_df_a.copy()

merged_2024_df_a = merged_2024_df_a.merge(ratings_df,
                                          left_on=['season','team'],
                                          right_on=['year','team'],
                                          how='left')

merged_2024_df_b = merged_2024_df_b.merge(ratings_df,
                                          left_on=['season','team'],
                                          right_on=['year','team'],
                                          how='left')

jmpi_2024_df = merged_2024_df_a.merge(merged_2024_df_b,
                                      how='cross',
                                      suffixes=['_team','_opp'])

jmpi_2024_df = jmpi_2024_df.rename(columns={"team_team": "team","team_opp":"opponent"})

jmpi_2024_df = jmpi_2024_df[jmpi_2024_df['team'] != jmpi_2024_df['opponent']]

jmpi_2024_df = jmpi_2024_df.reset_index()


In [9]:
# Calculate anticipated yield

jmpi_2024_df['total_clock_control'] = jmpi_2024_df['clock_control_team'] + jmpi_2024_df['clock_control_opp']

jmpi_2024_df['team_clock_control_ratio'] = jmpi_2024_df['clock_control_team'] / jmpi_2024_df['total_clock_control']
jmpi_2024_df['team_estimated_possesion_time'] = jmpi_2024_df['team_clock_control_ratio'] * 3600
jmpi_2024_df['team_estimated_off_plays'] = jmpi_2024_df['team_estimated_possesion_time'] / jmpi_2024_df['off_pace_team']

jmpi_2024_df['opp_clock_control_ratio'] = jmpi_2024_df['clock_control_opp'] / jmpi_2024_df['total_clock_control']
jmpi_2024_df['opp_estimated_possesion_time'] = jmpi_2024_df['opp_clock_control_ratio'] * 3600
jmpi_2024_df['opp_estimated_off_plays'] = jmpi_2024_df['opp_estimated_possesion_time'] / jmpi_2024_df['off_pace_opp']

jmpi_2024_df['team_net_ppa'] = jmpi_2024_df['offense_ppa_per_play_team'] + jmpi_2024_df['defense_ppa_per_play_opp']

jmpi_2024_df['opp_net_ppa'] = jmpi_2024_df['offense_ppa_per_play_opp'] + jmpi_2024_df['defense_ppa_per_play_team']

jmpi_2024_df['team_ant_yield'] = jmpi_2024_df['team_net_ppa'] * jmpi_2024_df['team_estimated_off_plays']
jmpi_2024_df['opp_ant_yield'] = jmpi_2024_df['opp_net_ppa'] * jmpi_2024_df['opp_estimated_off_plays']


In [10]:
# Calculate an index for comparing team offense and opp defense in critical areas

jmpi_2024_df['team_swing_explosiveness'] = (jmpi_2024_df['offense_overall_explosiveness_team'] + jmpi_2024_df['defense_overall_explosiveness_opp'])/2
jmpi_2024_df['team_swing_field_position'] = (jmpi_2024_df['offense_fp_avg_ppa_team'] + jmpi_2024_df['defense_fp_avg_ppa_opp'])/2
jmpi_2024_df['team_swing_dbs_havoc'] = (jmpi_2024_df['offense_havoc_dbs_team'] + jmpi_2024_df['defense_havoc_dbs_opp'])/2
jmpi_2024_df['team_swing_f7_havoc'] = (jmpi_2024_df['offense_havoc_f7_team'] + jmpi_2024_df['defense_havoc_f7_opp'])/2
jmpi_2024_df['team_swing_overall_havoc'] = (jmpi_2024_df['offense_havoc_total_team'] + jmpi_2024_df['defense_havoc_total_opp'])/2
jmpi_2024_df['team_swing_line_yards'] = (jmpi_2024_df['offense_line_yards_team'] + jmpi_2024_df['defense_line_yards_opp'])/2
jmpi_2024_df['team_swing_open_field_yards'] = (jmpi_2024_df['offense_open_field_yards_team'] + jmpi_2024_df['defense_open_field_yards_opp'])/2
jmpi_2024_df['team_swing_passing_downs_explosiveness'] = (jmpi_2024_df['offense_passing_downs_explosiveness_team'] + jmpi_2024_df['defense_open_field_yards_opp'])/2
jmpi_2024_df['team_swing_passing_downs_ppa'] = (jmpi_2024_df['offense_passing_downs_ppa_team'] + jmpi_2024_df['defense_passing_downs_ppa_opp'])/2
jmpi_2024_df['team_swing_passing_downs_rate'] = jmpi_2024_df['defense_passing_downs_rate_opp'] - jmpi_2024_df['offense_passing_downs_rate_team']
jmpi_2024_df['team_swing_passing_downs_success_rate'] = (jmpi_2024_df['offense_passing_downs_success_rate_team'] + jmpi_2024_df['defense_passing_downs_success_rate_opp'])/2
jmpi_2024_df['team_swing_passing_explosiveness'] = (jmpi_2024_df['offense_passing_explosiveness_team'] + jmpi_2024_df['defense_passing_explosiveness_opp'])/2
jmpi_2024_df['team_swing_passing_ppa'] = (jmpi_2024_df['offense_passing_ppa_per_play_team'] + jmpi_2024_df['defense_passing_ppa_per_play_opp'])/2
jmpi_2024_df['team_swing_passing_success_rate'] = (jmpi_2024_df['offense_passing_success_rate_team'] + jmpi_2024_df['defense_passing_success_rate_opp'])/2
jmpi_2024_df['team_swing_points_per_opp'] = (jmpi_2024_df['offense_points_per_opportunity_team'] + jmpi_2024_df['defense_points_per_opportunity_opp'])/2
jmpi_2024_df['team_swing_power_success'] = (jmpi_2024_df['offense_power_success_team'] + jmpi_2024_df['defense_power_success_opp'])/2
jmpi_2024_df['team_swing_rushing_explosiveness'] = (jmpi_2024_df['offense_rushing_explosiveness_team'] + jmpi_2024_df['defense_rushing_explosiveness_opp'])/2
jmpi_2024_df['team_swing_rushing_ppa'] = (jmpi_2024_df['offense_rushing_ppa_per_play_team'] + jmpi_2024_df['defense_rushing_ppa_per_play_opp'])/2
jmpi_2024_df['team_swing_second_level_yards'] = (jmpi_2024_df['offense_second_level_yards_team'] + jmpi_2024_df['defense_second_level_yards_opp'])/2
jmpi_2024_df['team_swing_standard_downs_explosiveness'] = (jmpi_2024_df['offense_standard_downs_explosiveness_team'] + jmpi_2024_df['defense_standard_downs_explosiveness_opp'])/2
jmpi_2024_df['team_swing_standard_downs_ppa'] = (jmpi_2024_df['offense_standard_downs_ppa_per_play_team'] + jmpi_2024_df['defense_standard_downs_ppa_per_play_opp'])/2
jmpi_2024_df['team_swing_standard_downs_rate'] = jmpi_2024_df['offense_standard_downs_rate_team'] - jmpi_2024_df['defense_standard_downs_rate_opp']
jmpi_2024_df['team_swing_standard_downs_success_rate'] = (jmpi_2024_df['offense_standard_downs_success_rate_team'] + jmpi_2024_df['defense_standard_downs_success_rate_opp'])/2
jmpi_2024_df['team_swing_stuff_rate'] = (jmpi_2024_df['offense_stuff_rate_team'] + jmpi_2024_df['defense_stuff_rate_opp'])/2
jmpi_2024_df['team_swing_overall_success_rate'] = (jmpi_2024_df['offense_overall_success_rate_team'] + jmpi_2024_df['defense_overall_success_rate_opp'])/2


In [11]:
jmpi_2024_df['opp_swing_explosiveness'] = (jmpi_2024_df['offense_overall_explosiveness_opp'] + jmpi_2024_df['defense_overall_explosiveness_team'])/2
jmpi_2024_df['opp_swing_field_position'] = (jmpi_2024_df['offense_fp_avg_ppa_opp'] + jmpi_2024_df['defense_fp_avg_ppa_team'])/2
jmpi_2024_df['opp_swing_dbs_havoc'] = (jmpi_2024_df['offense_havoc_dbs_opp'] + jmpi_2024_df['defense_havoc_dbs_team'])/2
jmpi_2024_df['opp_swing_f7_havoc'] = (jmpi_2024_df['offense_havoc_f7_opp'] + jmpi_2024_df['defense_havoc_f7_team'])/2
jmpi_2024_df['opp_swing_overall_havoc'] = (jmpi_2024_df['offense_havoc_total_opp'] + jmpi_2024_df['defense_havoc_total_team'])/2
jmpi_2024_df['opp_swing_line_yards'] = (jmpi_2024_df['offense_line_yards_opp'] + jmpi_2024_df['defense_line_yards_team'])/2
jmpi_2024_df['opp_swing_open_field_yards'] = (jmpi_2024_df['offense_open_field_yards_opp'] + jmpi_2024_df['defense_open_field_yards_team'])/2
jmpi_2024_df['opp_swing_passing_downs_explosiveness'] = (jmpi_2024_df['offense_passing_downs_explosiveness_opp'] + jmpi_2024_df['defense_open_field_yards_team'])/2
jmpi_2024_df['opp_swing_passing_downs_ppa'] = (jmpi_2024_df['offense_passing_downs_ppa_opp'] + jmpi_2024_df['defense_passing_downs_ppa_team'])/2
jmpi_2024_df['opp_swing_passing_downs_rate'] = jmpi_2024_df['defense_passing_downs_rate_opp'] - jmpi_2024_df['offense_passing_downs_rate_team']
jmpi_2024_df['opp_swing_passing_downs_success_rate'] = (jmpi_2024_df['offense_passing_downs_success_rate_opp'] + jmpi_2024_df['defense_passing_downs_success_rate_team'])/2
jmpi_2024_df['opp_swing_passing_explosiveness'] = (jmpi_2024_df['offense_passing_explosiveness_opp'] + jmpi_2024_df['defense_passing_explosiveness_team'])/2
jmpi_2024_df['opp_swing_passing_ppa'] = (jmpi_2024_df['offense_passing_ppa_per_play_opp'] + jmpi_2024_df['defense_passing_ppa_per_play_team'])/2
jmpi_2024_df['opp_swing_passing_success_rate'] = (jmpi_2024_df['offense_passing_success_rate_opp'] + jmpi_2024_df['defense_passing_success_rate_team'])/2
jmpi_2024_df['opp_swing_points_per_opp'] = (jmpi_2024_df['offense_points_per_opportunity_opp'] + jmpi_2024_df['defense_points_per_opportunity_team'])/2
jmpi_2024_df['opp_swing_power_success'] = (jmpi_2024_df['offense_power_success_opp'] + jmpi_2024_df['defense_power_success_team'])/2
jmpi_2024_df['opp_swing_rushing_explosiveness'] = (jmpi_2024_df['offense_rushing_explosiveness_opp'] + jmpi_2024_df['defense_rushing_explosiveness_team'])/2
jmpi_2024_df['opp_swing_rushing_ppa'] = (jmpi_2024_df['offense_rushing_ppa_per_play_opp'] + jmpi_2024_df['defense_rushing_ppa_per_play_team'])/2
jmpi_2024_df['opp_swing_second_level_yards'] = (jmpi_2024_df['offense_second_level_yards_opp'] + jmpi_2024_df['defense_second_level_yards_team'])/2
jmpi_2024_df['opp_swing_standard_downs_explosiveness'] = (jmpi_2024_df['offense_standard_downs_explosiveness_opp'] + jmpi_2024_df['defense_standard_downs_explosiveness_team'])/2
jmpi_2024_df['opp_swing_standard_downs_ppa'] = (jmpi_2024_df['offense_standard_downs_ppa_per_play_opp'] + jmpi_2024_df['defense_standard_downs_ppa_per_play_team'])/2
jmpi_2024_df['opp_swing_standard_downs_rate'] = jmpi_2024_df['offense_standard_downs_rate_opp'] - jmpi_2024_df['defense_standard_downs_rate_team']
jmpi_2024_df['opp_swing_standard_downs_success_rate'] = (jmpi_2024_df['offense_standard_downs_success_rate_opp'] + jmpi_2024_df['defense_standard_downs_success_rate_team'])/2
jmpi_2024_df['opp_swing_stuff_rate'] = (jmpi_2024_df['offense_stuff_rate_opp'] + jmpi_2024_df['defense_stuff_rate_team'])/2
jmpi_2024_df['opp_swing_overall_success_rate'] = (jmpi_2024_df['offense_overall_success_rate_opp'] + jmpi_2024_df['defense_overall_success_rate_team'])/2

In [12]:
model_df = pd.read_csv('model_df_final.csv')

model_df['elo_index'] = model_df['pregame_elo'] / model_df['opp_pregame_elo']
jmpi_2024_df['elo_index'] = jmpi_2024_df['elo_team'] / jmpi_2024_df['elo_opp']
jmpi_2024_df['rev_elo_index'] = jmpi_2024_df['elo_opp'] / jmpi_2024_df['elo_team']

model_df['hfa'] = np.where(model_df['neutral_site']==True,0,np.where(model_df['home_away']==1,1,-1))
jmpi_2024_df['hfa'] = 0

In [13]:
# Import necessary sklearn libraries and create linear regression model to predict scoring output

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X = model_df[['elo_index','hfa','games','off_bye_week','opp_games','opp_off_bye_week','swing_explosiveness','swing_field_position','swing_dbs_havoc','swing_f7_havoc','swing_overall_havoc','swing_line_yards','swing_open_field_yards',
              'swing_passing_downs_explosiveness','swing_passing_downs_ppa','swing_passing_downs_rate','swing_passing_downs_success_rate','swing_passing_explosiveness',
              'swing_passing_ppa','swing_passing_success_rate','swing_points_per_opp','swing_power_success','swing_rushing_explosiveness',
              'swing_rushing_ppa','swing_second_level_yards','swing_standard_downs_explosiveness','swing_standard_downs_ppa','swing_standard_downs_rate',
              'swing_standard_downs_success_rate','swing_stuff_rate','swing_overall_success_rate','ant_yield']]

y = model_df['points']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse_model = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
feature_importance = model.coef_
feat_dict = dict(zip(X,feature_importance))

print(f'Mean Squared Error: {mse_model}')
print(f'R-squared: {r2}')
print(f'Feature Importance: {feat_dict}')

Mean Squared Error: 140.991346493007
R-squared: 0.27104427545405263
Feature Importance: {'elo_index': 22.619171889855902, 'hfa': 1.4701348045728682, 'games': -0.3788924862778259, 'off_bye_week': -0.09224086472407268, 'opp_games': 0.3398826131651959, 'opp_off_bye_week': -0.06598910496416377, 'swing_explosiveness': -1.6858950831913406, 'swing_field_position': -0.20512124782342275, 'swing_dbs_havoc': -9.162215268933927, 'swing_f7_havoc': 8.455741219271987, 'swing_overall_havoc': -0.7064740496619678, 'swing_line_yards': -3.954894275267809, 'swing_open_field_yards': -0.37791118742513313, 'swing_passing_downs_explosiveness': 1.0019533067006314, 'swing_passing_downs_ppa': -5.302149913143255, 'swing_passing_downs_rate': 2.7251330024395077, 'swing_passing_downs_success_rate': -14.644417105890547, 'swing_passing_explosiveness': 3.312192135341503, 'swing_passing_ppa': -11.199145544779936, 'swing_passing_success_rate': 27.060675243699258, 'swing_points_per_opp': -0.07976678067284057, 'swing_power_

In [14]:
jmpi_X = jmpi_2024_df[['elo_index','hfa','games_team','off_bye_week_team','games_opp','off_bye_week_opp','team_swing_explosiveness','team_swing_field_position',
                       'team_swing_dbs_havoc','team_swing_f7_havoc','team_swing_overall_havoc','team_swing_line_yards','team_swing_open_field_yards',
                       'team_swing_passing_downs_explosiveness','team_swing_passing_downs_ppa','team_swing_passing_downs_rate','team_swing_passing_downs_success_rate',
                       'team_swing_passing_explosiveness','team_swing_passing_ppa','team_swing_passing_success_rate','team_swing_points_per_opp',
                       'team_swing_power_success','team_swing_rushing_explosiveness','team_swing_rushing_ppa','team_swing_second_level_yards',
                       'team_swing_standard_downs_explosiveness','team_swing_standard_downs_ppa','team_swing_standard_downs_rate',
                       'team_swing_standard_downs_success_rate','team_swing_stuff_rate','team_swing_overall_success_rate','team_ant_yield']]

jmpi_X.rename(columns = {'games_team':'games',
                         'off_bye_week_team':'off_bye_week',
                         'games_opp':'opp_games',
                         'off_bye_week_opp':'opp_off_bye_week',
                         'team_swing_explosiveness':'swing_explosiveness',
                         'team_swing_field_position':'swing_field_position',
                         'team_swing_dbs_havoc':'swing_dbs_havoc',
                         'team_swing_f7_havoc':'swing_f7_havoc',
                         'team_swing_overall_havoc':'swing_overall_havoc',
                         'team_swing_line_yards':'swing_line_yards',
                         'team_swing_open_field_yards':'swing_open_field_yards',
                         'team_swing_passing_downs_explosiveness':'swing_passing_downs_explosiveness',
                         'team_swing_passing_downs_ppa':'swing_passing_downs_ppa',
                         'team_swing_passing_downs_rate':'swing_passing_downs_rate',
                         'team_swing_passing_downs_success_rate':'swing_passing_downs_success_rate',
                         'team_swing_passing_explosiveness':'swing_passing_explosiveness',
                         'team_swing_passing_ppa':'swing_passing_ppa',
                         'team_swing_passing_success_rate':'swing_passing_success_rate',
                         'team_swing_points_per_opp':'swing_points_per_opp',
                         'team_swing_power_success':'swing_power_success',
                         'team_swing_rushing_explosiveness':'swing_rushing_explosiveness',
                         'team_swing_rushing_ppa':'swing_rushing_ppa',
                         'team_swing_second_level_yards':'swing_second_level_yards',
                         'team_swing_standard_downs_explosiveness':'swing_standard_downs_explosiveness',
                         'team_swing_standard_downs_ppa':'swing_standard_downs_ppa',
                         'team_swing_standard_downs_rate':'swing_standard_downs_rate',
                         'team_swing_standard_downs_success_rate':'swing_standard_downs_success_rate',
                         'team_swing_stuff_rate':'swing_stuff_rate',
                         'team_swing_overall_success_rate':'swing_overall_success_rate',
                         'team_ant_yield':'ant_yield'},
             inplace=True)

jmpi_A = pd.DataFrame()
jmpi_A['team_jay'] = model.predict(jmpi_X)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jmpi_X.rename(columns = {'games_team':'games',


In [15]:
jmpi_Y = jmpi_2024_df[['rev_elo_index','hfa','games_opp','off_bye_week_opp','games_team','off_bye_week_team','opp_swing_explosiveness','opp_swing_field_position',
                       'opp_swing_dbs_havoc','opp_swing_f7_havoc','opp_swing_overall_havoc','opp_swing_line_yards','opp_swing_open_field_yards',
                       'opp_swing_passing_downs_explosiveness','opp_swing_passing_downs_ppa','opp_swing_passing_downs_rate','opp_swing_passing_downs_success_rate',
                       'opp_swing_passing_explosiveness','opp_swing_passing_ppa','opp_swing_passing_success_rate','opp_swing_points_per_opp',
                       'opp_swing_power_success','opp_swing_rushing_explosiveness','opp_swing_rushing_ppa','opp_swing_second_level_yards',
                       'opp_swing_standard_downs_explosiveness','opp_swing_standard_downs_ppa','opp_swing_standard_downs_rate',
                       'opp_swing_standard_downs_success_rate','opp_swing_stuff_rate','opp_swing_overall_success_rate','opp_ant_yield']]

jmpi_Y.rename(columns = {'games_opp':'games',
                         'rev_elo_index':'elo_index',
                         'off_bye_week_opp':'off_bye_week',
                         'games_team':'opp_games',
                         'off_bye_week_team':'opp_off_bye_week',
                         'opp_swing_explosiveness':'swing_explosiveness',
                         'opp_swing_field_position':'swing_field_position',
                         'opp_swing_dbs_havoc':'swing_dbs_havoc',
                         'opp_swing_f7_havoc':'swing_f7_havoc',
                         'opp_swing_overall_havoc':'swing_overall_havoc',
                         'opp_swing_line_yards':'swing_line_yards',
                         'opp_swing_open_field_yards':'swing_open_field_yards',
                         'opp_swing_passing_downs_explosiveness':'swing_passing_downs_explosiveness',
                         'opp_swing_passing_downs_ppa':'swing_passing_downs_ppa',
                         'opp_swing_passing_downs_rate':'swing_passing_downs_rate',
                         'opp_swing_passing_downs_success_rate':'swing_passing_downs_success_rate',
                         'opp_swing_passing_explosiveness':'swing_passing_explosiveness',
                         'opp_swing_passing_ppa':'swing_passing_ppa',
                         'opp_swing_passing_success_rate':'swing_passing_success_rate',
                         'opp_swing_points_per_opp':'swing_points_per_opp',
                         'opp_swing_power_success':'swing_power_success',
                         'opp_swing_rushing_explosiveness':'swing_rushing_explosiveness',
                         'opp_swing_rushing_ppa':'swing_rushing_ppa',
                         'opp_swing_second_level_yards':'swing_second_level_yards',
                         'opp_swing_standard_downs_explosiveness':'swing_standard_downs_explosiveness',
                         'opp_swing_standard_downs_ppa':'swing_standard_downs_ppa',
                         'opp_swing_standard_downs_rate':'swing_standard_downs_rate',
                         'opp_swing_standard_downs_success_rate':'swing_standard_downs_success_rate',
                         'opp_swing_stuff_rate':'swing_stuff_rate',
                         'opp_swing_overall_success_rate':'swing_overall_success_rate',
                         'opp_ant_yield':'ant_yield'},
             inplace=True)

jmpi_B = pd.DataFrame()
jmpi_B['opp_jay'] = model.predict(jmpi_Y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jmpi_Y.rename(columns = {'games_opp':'games',


In [16]:
jmpi_2024_df = jmpi_2024_df[['team','opponent']]

jmpi_2024_df = jmpi_2024_df.merge(jmpi_A,
                                  left_index=True,
                                  right_index=True,
                                  how='left').merge(jmpi_B,
                                  left_index=True,
                                  right_index=True,
                                  how='left')

In [17]:
team_info_df = pd.read_csv('team_info.csv')

jmpi_2024_df = jmpi_2024_df.merge(team_info_df,
                                        left_on = ['team'],
                                        right_on = ['school'],
                                        how='left').merge(team_info_df,
                                        left_on = ['opponent'],
                                        right_on = ['school'],
                                        how='left',
                                        suffixes=['_team','_opp'])

jmpi_2024_df = jmpi_2024_df.drop(jmpi_2024_df[['Unnamed: 0_team','id_team','school_team',
                                              'Unnamed: 0_opp','id_opp','school_opp']],axis=1)


In [18]:
jmpi_2024_df['rounded_team_jay'] = np.where((jmpi_2024_df['team_jay'].round() == jmpi_2024_df['opp_jay'].round()) &
                                                     (jmpi_2024_df['team_jay'] > jmpi_2024_df['opp_jay']),
                                                     jmpi_2024_df['team_jay'].round()+1,
                                                     jmpi_2024_df['team_jay'].round())

jmpi_2024_df['rounded_jay_opp'] = np.where((jmpi_2024_df['team_jay'].round() == jmpi_2024_df['opp_jay'].round()) &
                                                     (jmpi_2024_df['opp_jay'] > jmpi_2024_df['team_jay']),
                                                     jmpi_2024_df['opp_jay'].round()+1,
                                                     jmpi_2024_df['opp_jay'].round())

In [19]:
jmpi_2024_df.to_csv('jmpi_head_to_head.csv',index=False)

In [20]:
jmpi_2024_df.loc[jmpi_2024_df['team_jay']>jmpi_2024_df['opp_jay'],'team_win']=1
jmpi_2024_df.loc[jmpi_2024_df['team_jay']<jmpi_2024_df['opp_jay'],'team_win']=0


In [21]:
jmpi_rank = jmpi_2024_df.groupby('team')['team_win'].sum()
jmpi_rank_df = pd.DataFrame(jmpi_rank)

jmpi_rank_df['jmpi_power'] = jmpi_rank_df['team_win'] / 133

In [22]:
jmpi_rank_df = jmpi_rank_df.sort_values(by=['jmpi_power'],ascending=False)

In [23]:
records = []

records_response = api_games_instance.get_team_records(year=2024)
records = [*records, *records_response]

In [24]:
records = [
    dict(
        team=r.team,
        total_wins=r.total.wins,
        total_games=r.total.games,
    ) for r in records]

In [25]:
records_2024_df = pd.DataFrame.from_records(records)

records_2024_df['win_percentage'] = records_2024_df['total_wins'] / records_2024_df['total_games']
records_2024_df['total_losses'] = records_2024_df['total_games'] - records_2024_df['total_wins']

In [80]:
jmpi_final_rank = jmpi_rank_df.merge(records_2024_df,
                                  left_on=['team'],
                                  right_on=['team'],
                                  how='left')

jmpi_final_rank['jmpi_index'] = (jmpi_final_rank['jmpi_power'] + (jmpi_final_rank['win_percentage']))/2

jmpi_final_rank.sort_values('jmpi_index',ascending=False,inplace=True)



In [82]:
jmpi_final_rank

Unnamed: 0,team,team_win,jmpi_power,total_wins,total_games,win_percentage,total_losses,jmpi_index
3,Oregon,130.0,0.977444,13,13,1.0,0,0.988722
1,Notre Dame,132.0,0.992481,11,12,0.916667,1,0.954574
5,Indiana,128.0,0.962406,11,12,0.916667,1,0.939536
0,Ohio State,133.0,1.0,10,12,0.833333,2,0.916667
2,Texas,131.0,0.984962,11,13,0.846154,2,0.915558
7,Penn State,126.0,0.947368,11,13,0.846154,2,0.896761
9,Georgia,124.0,0.932331,11,13,0.846154,2,0.889242
8,Tennessee,125.0,0.93985,10,12,0.833333,2,0.886591
21,Boise State,112.0,0.842105,12,13,0.923077,1,0.882591
11,SMU,122.0,0.917293,11,13,0.846154,2,0.881724


In [84]:
team_info_df = pd.read_csv('team_info.csv')

jmpi_final_rank = jmpi_final_rank.merge(team_info_df,
                                        left_on = ['team'],
                                        right_on = ['school'],
                                        how='left')

jmpi_final_rank = jmpi_final_rank.drop(jmpi_final_rank[['Unnamed: 0','id','school']],axis=1)

In [86]:
jmpi_final_rank['jmpi_rank'] = jmpi_final_rank['jmpi_index'].rank(ascending=False,method='min')

jmpi_final_rank_df = jmpi_final_rank[['jmpi_rank','team','jmpi_index','conference','logo','color','total_wins','total_losses']]

In [88]:
jmpi_final_rank_df.to_csv('jmpi_final_rank.csv')