In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, RidgeCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
game_results_big10_2023 = pd.read_csv('../data/game_results_big10_2023.csv')
betting_lines_big10_2023 = pd.read_csv('../data/betting_lines_big10_2023.csv')
team_recruiting_rankings_2023 = pd.read_csv('../data/team_recruiting_rankings_2023.csv')
team_talent_composite_ranking_2023 = pd.read_csv('../data/team_talent_composite_ranking_2023.csv')
team_stats_big10_2023 = pd.read_csv('../data/team_stats_big10_2023.csv')

In [3]:
game_results_big10_2023['Home_Minus_Away_Points'] = game_results_big10_2023['Home Points'] - game_results_big10_2023['Away Points']
game_results_big10_2023['Home_Team_Win'] = game_results_big10_2023['Home_Minus_Away_Points'] > 0

In [4]:
game_results_big10_2023.head().iloc[:,0:20]

Unnamed: 0,Id,Season,Week,Season Type,Start Date,Start Time Tbd,Completed,Neutral Site,Conference Game,Attendance,Venue Id,Venue,Home Id,Home Team,Home Conference,Home Division,Home Points,Home Line Scores[0],Home Line Scores[1],Home Line Scores[2]
0,401520148,2023,1,regular,2023-09-01T00:00:00.000Z,False,True,False,True,53629.0,3953,TCF Bank Stadium,135,Minnesota,Big Ten,fbs,13,0,3,0
1,401520163,2023,1,regular,2023-09-01T23:00:00.000Z,False,True,False,False,,3936,Spartan Stadium,127,Michigan State,Big Ten,fbs,31,0,10,7
2,401520170,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,54898.0,3907,Ross-Ade Stadium,2509,Purdue,Big Ten,fbs,35,7,14,7
3,401520162,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,109480.0,3558,Michigan Stadium,130,Michigan,Big Ten,fbs,30,7,16,7
4,401520157,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,69250.0,3793,Kinnick Stadium,2294,Iowa,Big Ten,fbs,24,14,3,0


In [5]:
game_results_big10_2023.shape

(106, 41)

In [6]:
game_results_big10_2023.head().iloc[:,20:]

Unnamed: 0,Home Line Scores[3],Home Post Win Prob,Home Pregame Elo,Home Postgame Elo,Away Id,Away Team,Away Conference,Away Division,Away Points,Away Line Scores[0],...,Away Line Scores[2],Away Line Scores[3],Away Post Win Prob,Away Pregame Elo,Away Postgame Elo,Excitement Index,Highlights,Notes,Home_Minus_Away_Points,Home_Team_Win
0,10,0.047056,1672,1671,158,Nebraska,Big Ten,fbs,10,0,...,7,3,0.952944,1521.0,1522.0,7.656758,,,3,True
1,14,0.989833,1533,1575,2117,Central Michigan,Mid-American,fbs,7,0,...,0,0,0.010167,1370.0,1328.0,6.660548,,,24,True
2,7,0.125611,1473,1479,278,Fresno State,Mountain West,fbs,39,7,...,8,14,0.874389,1677.0,1671.0,6.896309,,,-4,False
3,0,0.998549,1916,1941,151,East Carolina,American Athletic,fbs,3,0,...,0,3,0.001451,1506.0,1481.0,1.172112,,,27,True
4,7,0.890762,1665,1664,328,Utah State,Mountain West,fbs,14,0,...,3,8,0.109238,1395.0,1396.0,4.699575,,,10,True


In [7]:
betting_lines_big10_2023.head()

Unnamed: 0,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
0,401520340,Michigan State,0,Michigan,49,William Hill (New Jersey),46.5,25.5,Michigan -25.5,,,,
1,401520340,Michigan State,0,Michigan,49,DraftKings,46.0,24.5,Michigan -24.5,19.0,,1700.0,-4500.0
2,401520340,Michigan State,0,Michigan,49,Bovada,47.0,25.5,Michigan -25.5,17.0,47.5,1100.0,-2800.0
3,401520343,Ohio State,20,Penn State,12,William Hill (New Jersey),46.5,-4.0,Ohio State -4,,,,
4,401520343,Ohio State,20,Penn State,12,DraftKings,45.5,-4.0,Ohio State -4,-10.0,,-192.0,160.0


In [8]:
betting_lines_big10_2023.shape

(315, 13)

In [9]:
betting_lines_big10_2023[betting_lines_big10_2023['LineProvider']=='DraftKings']

Unnamed: 0,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
1,401520340,Michigan State,0,Michigan,49,DraftKings,46.0,24.5,Michigan -24.5,19.0,,1700.0,-4500.0
4,401520343,Ohio State,20,Penn State,12,DraftKings,45.5,-4.0,Ohio State -4,-10.0,,-192.0,160.0
7,401520336,Illinois,21,Wisconsin,25,DraftKings,40.5,3.0,Wisconsin -3,5.0,43.0,130.0,-155.0
10,401520301,Illinois,7,Nebraska,20,DraftKings,43.0,-3.5,Illinois -3.5,-3.0,44.5,-155.0,142.0
13,401520338,Iowa,10,Minnesota,12,DraftKings,30.5,-3.5,Iowa -3.5,-5.5,32.5,-166.0,140.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,401520392,Nebraska,10,Maryland,13,DraftKings,,1.0,Maryland -1,1.0,,102.0,-122.0
304,401520394,Penn State,15,Michigan,24,DraftKings,,6.5,Michigan -6.5,2.0,,160.0,-192.0
307,401520393,Ohio State,38,Michigan State,3,DraftKings,,-30.5,Ohio State -30.5,-25.5,,-20000.0,3500.0
310,401520194,Iowa State,13,Iowa,20,DraftKings,36.0,3.5,Iowa -3.5,4.0,36.5,150.0,-175.0


In [10]:
team_recruiting_rankings_2023.head()

Unnamed: 0,Year,Rank,Team,Points
0,2023,1,Alabama,328.0
1,2023,2,Georgia,315.68
2,2023,3,Texas,306.31
3,2023,4,Oklahoma,289.03
4,2023,5,Ohio State,288.98


In [11]:
team_talent_composite_ranking_2023.head()

Unnamed: 0,Year,School,Talent
0,2023,Alabama,1015.43
1,2023,Georgia,977.87
2,2023,Ohio State,974.79
3,2023,Texas A&M,925.92
4,2023,Clemson,917.88


In [12]:
team_stats_big10_2023.head()

Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Stat Category,Stat
0,401520365,Wisconsin,Big Ten,away,14,rushingTDs,0
1,401520365,Wisconsin,Big Ten,away,14,puntReturnYards,29
2,401520365,Wisconsin,Big Ten,away,14,puntReturnTDs,0
3,401520365,Wisconsin,Big Ten,away,14,puntReturns,4
4,401520365,Wisconsin,Big Ten,away,14,passingTDs,2


In [13]:
team_stats_big10_2023_pivoted = team_stats_big10_2023.pivot(index= ['Game Id', 'School', 'HomeAway'], columns='Stat Category', values='Stat').reset_index()
team_stats_big10_2023_pivoted.head().iloc[:,0:20]

Stat Category,Game Id,School,HomeAway,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs
0,401520148,Minnesota,home,24-44,0,20,2-2,0,1,0,14,1,0,49,3,7,196,3,3,1
1,401520148,Nebraska,away,11-19,0,18,0-0,1,0,0,0,3,0,78,2,4,114,4,1,1
2,401520155,Illinois,home,18-26,1,21,2-3,0,0,1,48,1,0,77,5,6,206,3,1,2
3,401520155,Toledo,away,20-37,0,27,2-2,0,0,0,0,1,0,61,4,10,230,3,1,2
4,401520156,Indiana,home,9-20,0,8,0-2,0,0,0,11,0,0,38,3,3,82,8,1,0


In [14]:
team_stats_big10_2023_pivoted.head().iloc[:,20:]

Stat Category,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,30:42,0.0,10.0,1.0,5,25,0,55,3,38,5,8-17,0.0,3-25,251,1,4.5,2.2
1,29:18,0.0,8.0,2.0,3,37,0,181,3,36,5,5-11,1.0,7-55,295,4,6.0,4.9
2,27:03,,,,1,35,1,168,1,50,5,2-8,,10-100,374,1,7.9,4.8
3,32:57,0.0,4.0,1.0,2,42,1,186,2,44,4,7-15,2.0,7-40,416,1,6.2,4.4
4,28:05,0.0,51.0,2.0,0,33,0,71,0,41,5,4-15,,8-75,153,0,4.1,2.2


In [15]:
team_stats_big10_2023_pivoted.columns

Index(['Game Id', 'School', 'HomeAway', 'completionAttempts', 'defensiveTDs',
       'firstDowns', 'fourthDownEff', 'fumblesLost', 'fumblesRecovered',
       'interceptionTDs', 'interceptionYards', 'interceptions',
       'kickReturnTDs', 'kickReturnYards', 'kickReturns', 'kickingPoints',
       'netPassingYards', 'passesDeflected', 'passesIntercepted', 'passingTDs',
       'possessionTime', 'puntReturnTDs', 'puntReturnYards', 'puntReturns',
       'qbHurries', 'rushingAttempts', 'rushingTDs', 'rushingYards', 'sacks',
       'tackles', 'tacklesForLoss', 'thirdDownEff', 'totalFumbles',
       'totalPenaltiesYards', 'totalYards', 'turnovers', 'yardsPerPass',
       'yardsPerRushAttempt'],
      dtype='object', name='Stat Category')

In [16]:
team_stat_float_columns = ['defensiveTDs',
       'firstDowns', 'fumblesLost', 'fumblesRecovered',
       'interceptionTDs', 'interceptionYards', 'interceptions',
       'kickReturnTDs', 'kickReturnYards', 'kickReturns', 'kickingPoints',
       'netPassingYards', 'passesDeflected', 'passesIntercepted', 'passingTDs',
        'puntReturnTDs', 'puntReturnYards', 'puntReturns',
       'qbHurries', 'rushingAttempts', 'rushingTDs', 'rushingYards', 'sacks',
       'tackles', 'tacklesForLoss', 'totalFumbles',
       'totalYards', 'turnovers', 'yardsPerPass',
       'yardsPerRushAttempt']
team_stat_nonfloat_stat_columns = ['HomeAway','completionAttempts', 'fourthDownEff', 'possessionTime', 'thirdDownEff', 'totalPenaltiesYards']
for col in team_stat_float_columns:
    team_stats_big10_2023_pivoted[col] = team_stats_big10_2023_pivoted[col].astype('float64')
team_stats_big10_2023_pivoted.dtypes

Stat Category
Game Id                  int64
School                  object
HomeAway                object
completionAttempts      object
defensiveTDs           float64
firstDowns             float64
fourthDownEff           object
fumblesLost            float64
fumblesRecovered       float64
interceptionTDs        float64
interceptionYards      float64
interceptions          float64
kickReturnTDs          float64
kickReturnYards        float64
kickReturns            float64
kickingPoints          float64
netPassingYards        float64
passesDeflected        float64
passesIntercepted      float64
passingTDs             float64
possessionTime          object
puntReturnTDs          float64
puntReturnYards        float64
puntReturns            float64
qbHurries              float64
rushingAttempts        float64
rushingTDs             float64
rushingYards           float64
sacks                  float64
tackles                float64
tacklesForLoss         float64
thirdDownEff            o

In [17]:
#home_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='home'].copy()
#.add_prefix('Home_').rename(columns = {'Home_Game Id':'Game Id', 'Home_School':'HomeTeam', 'Home_HomeAway':'HomeAway'}).drop(columns=['HomeAway'])


In [18]:
home_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='home'].reset_index().drop(columns='index').copy()
away_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='away'].reset_index().drop(columns='index').copy()
#.add_prefix('Away_').rename(columns = {'Away_Game Id':'Game Id', 'Away_School':'AwayTeam', 'Away_HomeAway':'HomeAway'}).drop(columns=['HomeAway'])

team_stats_big10_2023_home_minus_away =  home_team_stats_big10_2023.copy()
for col in team_stat_float_columns:
    team_stats_big10_2023_home_minus_away[col] = home_team_stats_big10_2023[col].subtract(away_team_stats_big10_2023[col], fill_value=0).copy()
#home_team_stats_big10_2023.sub(away_team_stats_big10_2023, fill_value=0)

# home_team_stats_big10_2023 = home_team_stats_big10_2023.drop(columns=['HomeAway']).add_prefix('Home_').rename(columns = {'Home_Game Id':'Id', 'Home_School':'HomeTeam'}).copy()
# away_team_stats_big10_2023 = away_team_stats_big10_2023.drop(columns=['HomeAway']).add_prefix('Away_').rename(columns = {'Away_Game Id':'Id', 'Away_School':'AwayTeam'}).copy()
home_team_stats_big10_2023 = home_team_stats_big10_2023.drop(columns=['HomeAway']).rename(columns = {'Game Id':'Id', 'School':'Team'}).copy()
away_team_stats_big10_2023 = away_team_stats_big10_2023.drop(columns=['HomeAway']).rename(columns = {'Game Id':'Id', 'School':'Team'}).copy()
team_stats_big10_2023_home_minus_away = team_stats_big10_2023_home_minus_away.drop(columns=team_stat_nonfloat_stat_columns).add_prefix('Home_Minus_Away_').rename(columns = {'Home_Minus_Away_Game Id':'Id', 'Home_Minus_Away_School':'HomeTeam'}).copy()
#NOTE: team_stat_nonfloat_stat_columns may need to be adjusted if any of those columns are desired to be subtracted later

team_stats_big10_2023_home_minus_away.head()

Stat Category,Id,HomeTeam,Home_Minus_Away_defensiveTDs,Home_Minus_Away_firstDowns,Home_Minus_Away_fumblesLost,Home_Minus_Away_fumblesRecovered,Home_Minus_Away_interceptionTDs,Home_Minus_Away_interceptionYards,Home_Minus_Away_interceptions,Home_Minus_Away_kickReturnTDs,...,Home_Minus_Away_rushingTDs,Home_Minus_Away_rushingYards,Home_Minus_Away_sacks,Home_Minus_Away_tackles,Home_Minus_Away_tacklesForLoss,Home_Minus_Away_totalFumbles,Home_Minus_Away_totalYards,Home_Minus_Away_turnovers,Home_Minus_Away_yardsPerPass,Home_Minus_Away_yardsPerRushAttempt
0,401520148,Minnesota,0.0,2.0,-1.0,1.0,0.0,14.0,-2.0,0.0,...,0.0,-126.0,0.0,2.0,0.0,-1.0,-44.0,-3.0,-1.5,-2.7
1,401520155,Illinois,1.0,-6.0,0.0,0.0,1.0,48.0,0.0,0.0,...,0.0,-18.0,-1.0,6.0,1.0,-2.0,-42.0,0.0,1.7,0.4
2,401520156,Indiana,0.0,-14.0,0.0,0.0,0.0,11.0,-1.0,0.0,...,-2.0,-72.0,-1.0,7.0,-1.0,,-227.0,-1.0,-2.5,-2.4
3,401520157,Iowa,0.0,-2.0,0.0,0.0,0.0,0.0,-1.0,0.0,...,1.0,-28.0,0.0,-3.0,-5.0,2.0,-45.0,-1.0,1.5,-2.4
4,401520161,Maryland,0.0,14.0,0.0,0.0,,,0.0,0.0,...,2.0,18.0,1.0,-4.0,0.0,,173.0,0.0,2.5,0.2


In [19]:
home_team_stats_big10_2023.head()

Stat Category,Id,Team,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,...,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,Minnesota,24-44,0.0,20.0,2-2,0.0,1.0,0.0,14.0,...,3.0,38.0,5.0,8-17,0.0,3-25,251.0,1.0,4.5,2.2
1,401520155,Illinois,18-26,1.0,21.0,2-3,0.0,0.0,1.0,48.0,...,1.0,50.0,5.0,2-8,,10-100,374.0,1.0,7.9,4.8
2,401520156,Indiana,9-20,0.0,8.0,0-2,0.0,0.0,0.0,11.0,...,0.0,41.0,5.0,4-15,,8-75,153.0,0.0,4.1,2.2
3,401520157,Iowa,18-33,0.0,17.0,1-2,0.0,0.0,0.0,0.0,...,1.0,41.0,4.0,6-17,2.0,7-50,284.0,0.0,5.9,2.4
4,401520161,Maryland,26-43,0.0,30.0,2-2,0.0,0.0,,,...,2.0,41.0,3.0,7-14,,6-54,449.0,0.0,6.6,4.6


In [20]:
away_team_stats_big10_2023.head()

Stat Category,Id,Team,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,...,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,Nebraska,11-19,0.0,18.0,0-0,1.0,0.0,0.0,0.0,...,3.0,36.0,5.0,5-11,1.0,7-55,295.0,4.0,6.0,4.9
1,401520155,Toledo,20-37,0.0,27.0,2-2,0.0,0.0,0.0,0.0,...,2.0,44.0,4.0,7-15,2.0,7-40,416.0,1.0,6.2,4.4
2,401520156,Ohio State,21-36,0.0,22.0,2-3,0.0,0.0,,,...,1.0,34.0,6.0,2-12,,5-41,380.0,1.0,6.6,4.6
3,401520157,Utah State,32-48,0.0,19.0,1-3,0.0,0.0,,,...,1.0,44.0,9.0,5-17,,10-91,329.0,1.0,4.4,4.8
4,401520161,Towson,16-31,0.0,16.0,1-3,0.0,0.0,,,...,1.0,45.0,3.0,5-15,,10-90,276.0,0.0,4.1,4.4


In [21]:
team_stats_results_big10_2023 = pd.merge(game_results_big10_2023,team_stats_big10_2023_home_minus_away, on=['Id'], how='inner')
team_stats_results_big10_2023.head()

Unnamed: 0,Id,Season,Week,Season Type,Start Date,Start Time Tbd,Completed,Neutral Site,Conference Game,Attendance,...,Home_Minus_Away_rushingTDs,Home_Minus_Away_rushingYards,Home_Minus_Away_sacks,Home_Minus_Away_tackles,Home_Minus_Away_tacklesForLoss,Home_Minus_Away_totalFumbles,Home_Minus_Away_totalYards,Home_Minus_Away_turnovers,Home_Minus_Away_yardsPerPass,Home_Minus_Away_yardsPerRushAttempt
0,401520148,2023,1,regular,2023-09-01T00:00:00.000Z,False,True,False,True,53629.0,...,0.0,-126.0,0.0,2.0,0.0,-1.0,-44.0,-3.0,-1.5,-2.7
1,401520163,2023,1,regular,2023-09-01T23:00:00.000Z,False,True,False,False,,...,2.0,4.0,1.0,-2.0,3.0,,187.0,-1.0,4.7,1.1
2,401520170,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,54898.0,...,1.0,-7.0,2.0,18.0,0.0,1.0,-124.0,-1.0,0.3,0.5
3,401520162,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,109480.0,...,1.0,19.0,0.0,-11.0,-1.0,1.0,167.0,-1.0,4.4,-0.1
4,401520157,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,69250.0,...,1.0,-28.0,0.0,-3.0,-5.0,2.0,-45.0,-1.0,1.5,-2.4


In [22]:
team_stats_results_big10_2023_nonobjects = team_stats_results_big10_2023.select_dtypes(exclude='object')
team_stats_results_big10_2023_nonobjects.columns
# team_stats_results_big10_2023_nonobjects 

Index(['Id', 'Season', 'Week', 'Start Time Tbd', 'Completed', 'Neutral Site',
       'Conference Game', 'Attendance', 'Venue Id', 'Home Id', 'Home Points',
       'Home Line Scores[0]', 'Home Line Scores[1]', 'Home Line Scores[2]',
       'Home Line Scores[3]', 'Home Post Win Prob', 'Home Pregame Elo',
       'Home Postgame Elo', 'Away Id', 'Away Points', 'Away Line Scores[0]',
       'Away Line Scores[1]', 'Away Line Scores[2]', 'Away Line Scores[3]',
       'Away Post Win Prob', 'Away Pregame Elo', 'Away Postgame Elo',
       'Excitement Index', 'Highlights', 'Home_Minus_Away_Points',
       'Home_Team_Win', 'Home_Minus_Away_defensiveTDs',
       'Home_Minus_Away_firstDowns', 'Home_Minus_Away_fumblesLost',
       'Home_Minus_Away_fumblesRecovered', 'Home_Minus_Away_interceptionTDs',
       'Home_Minus_Away_interceptionYards', 'Home_Minus_Away_interceptions',
       'Home_Minus_Away_kickReturnTDs', 'Home_Minus_Away_kickReturnYards',
       'Home_Minus_Away_kickReturns', 'Home_Minus_Aw

In [23]:
team_stats_results_big10_2023.select_dtypes(exclude='object').corr().iloc[29:,29:]

Unnamed: 0,Home_Minus_Away_Points,Home_Team_Win,Home_Minus_Away_defensiveTDs,Home_Minus_Away_firstDowns,Home_Minus_Away_fumblesLost,Home_Minus_Away_fumblesRecovered,Home_Minus_Away_interceptionTDs,Home_Minus_Away_interceptionYards,Home_Minus_Away_interceptions,Home_Minus_Away_kickReturnTDs,...,Home_Minus_Away_rushingTDs,Home_Minus_Away_rushingYards,Home_Minus_Away_sacks,Home_Minus_Away_tackles,Home_Minus_Away_tacklesForLoss,Home_Minus_Away_totalFumbles,Home_Minus_Away_totalYards,Home_Minus_Away_turnovers,Home_Minus_Away_yardsPerPass,Home_Minus_Away_yardsPerRushAttempt
Home_Minus_Away_Points,1.0,0.783386,0.398849,0.7322,-0.17635,0.211258,0.363222,0.48255,-0.403071,0.03800748,...,0.825617,0.65236,0.47489,-0.443221,0.479144,-0.224889,0.823535,-0.405351,0.700413,0.624321
Home_Team_Win,0.783386,1.0,0.23709,0.491128,-0.245763,0.265656,0.26682,0.396672,-0.40752,-0.0776931,...,0.654095,0.514291,0.34169,-0.341897,0.297533,-0.289957,0.59812,-0.45326,0.566092,0.436135
Home_Minus_Away_defensiveTDs,0.398849,0.23709,1.0,0.11779,0.093011,-0.058004,0.817732,0.520112,-0.269179,0.3504028,...,0.25997,0.115617,0.088469,-0.013278,0.245022,-0.009204,0.179413,-0.135407,0.319377,0.132661
Home_Minus_Away_firstDowns,0.7322,0.491128,0.11779,1.0,0.021003,0.000414,0.130819,0.19378,-0.055036,-0.1243719,...,0.603109,0.611642,0.370063,-0.715941,0.294739,0.065308,0.867038,-0.026323,0.503027,0.506377
Home_Minus_Away_fumblesLost,-0.17635,-0.245763,0.093011,0.021003,1.0,-0.980013,0.093877,0.018461,0.065821,-0.05682554,...,-0.113546,-0.106979,-0.037323,0.050318,-0.032074,0.784693,0.016668,0.691472,0.128027,-0.112474
Home_Minus_Away_fumblesRecovered,0.211258,0.265656,-0.058004,0.000414,-0.980013,1.0,-0.09581,-0.017717,-0.07537,0.05755351,...,0.129715,0.128861,0.063324,-0.040653,0.053222,-0.764805,0.003979,-0.685518,-0.103746,0.133721
Home_Minus_Away_interceptionTDs,0.363222,0.26682,0.817732,0.130819,0.093877,-0.09581,1.0,0.664098,-0.261244,-0.001941034,...,0.213424,0.083663,0.007504,-0.115344,0.125015,-0.069512,0.205087,-0.149019,0.362585,0.089103
Home_Minus_Away_interceptionYards,0.48255,0.396672,0.520112,0.19378,0.018461,-0.017717,0.664098,1.0,-0.551307,-0.07602538,...,0.319346,0.216312,0.124873,-0.267372,0.187064,-0.05426,0.276059,-0.415842,0.310051,0.22431
Home_Minus_Away_interceptions,-0.403071,-0.40752,-0.269179,-0.055036,0.065821,-0.07537,-0.261244,-0.551307,1.0,-0.02961701,...,-0.340977,-0.215274,-0.271828,0.032598,-0.208258,0.098078,-0.190335,0.76635,-0.286386,-0.202398
Home_Minus_Away_kickReturnTDs,0.038007,-0.077693,0.350403,-0.124372,-0.056826,0.057554,-0.001941,-0.076025,-0.029617,1.0,...,0.068311,-0.020049,0.040869,0.168621,0.131858,0.031626,-0.056261,-0.056268,0.050629,0.031528


In [24]:
teams_list = list(pd.unique(team_stats_results_big10_2023[['Home Team','Away Team']].values.ravel('K')))
teams_list

['Minnesota',
 'Michigan State',
 'Purdue',
 'Michigan',
 'Iowa',
 'Maryland',
 'Wisconsin',
 'Indiana',
 'Penn State',
 'Illinois',
 'Rutgers',
 'Kansas',
 'Ohio State',
 'Colorado',
 'Virginia Tech',
 'Northwestern',
 'Iowa State',
 'Washington State',
 'Duke',
 'North Carolina',
 'Nebraska',
 'Notre Dame',
 'Central Michigan',
 'Fresno State',
 'East Carolina',
 'Utah State',
 'Towson',
 'Buffalo',
 'West Virginia',
 'Toledo',
 'Indiana State',
 'Youngstown State',
 'Delaware',
 'UTEP',
 'UNLV',
 'Richmond',
 'Eastern Michigan',
 'Charlotte',
 'Temple',
 'Virginia',
 'Georgia Southern',
 'Louisville',
 'Western Michigan',
 'Western Kentucky',
 'Washington',
 'Northern Illinois',
 'Bowling Green',
 'Syracuse',
 'Louisiana Tech',
 'Florida Atlantic',
 'Akron',
 'Louisiana',
 'Wagner',
 'Howard',
 'UMass']

In [25]:
#shifted_rolling_avg_teamnames = [team.lower().replace(" ","_") + "_shifted_gamestats_2023_rollingavg" for team in teams_list]
shifted_rolling_avg_teamnames = [team.lower().replace(" ","_") for team in teams_list]
shifted_rolling_avg_teamnames

['minnesota',
 'michigan_state',
 'purdue',
 'michigan',
 'iowa',
 'maryland',
 'wisconsin',
 'indiana',
 'penn_state',
 'illinois',
 'rutgers',
 'kansas',
 'ohio_state',
 'colorado',
 'virginia_tech',
 'northwestern',
 'iowa_state',
 'washington_state',
 'duke',
 'north_carolina',
 'nebraska',
 'notre_dame',
 'central_michigan',
 'fresno_state',
 'east_carolina',
 'utah_state',
 'towson',
 'buffalo',
 'west_virginia',
 'toledo',
 'indiana_state',
 'youngstown_state',
 'delaware',
 'utep',
 'unlv',
 'richmond',
 'eastern_michigan',
 'charlotte',
 'temple',
 'virginia',
 'georgia_southern',
 'louisville',
 'western_michigan',
 'western_kentucky',
 'washington',
 'northern_illinois',
 'bowling_green',
 'syracuse',
 'louisiana_tech',
 'florida_atlantic',
 'akron',
 'louisiana',
 'wagner',
 'howard',
 'umass']

In [26]:
shifted_avg_stats_dfs_2023 = {}
for team in teams_list:
    team_games = team_stats_results_big10_2023[(team_stats_results_big10_2023['Home Team'] == team) | (team_stats_results_big10_2023['Away Team'] == team)]
    team_game_ids = team_games['Id'].tolist()
    team_home_stats_2023 = home_team_stats_big10_2023[(home_team_stats_big10_2023['Id'].isin(team_game_ids)) & (home_team_stats_big10_2023['Team']==team)]
    team_away_stats_2023 = away_team_stats_big10_2023[(away_team_stats_big10_2023['Id'].isin(team_game_ids)) & (away_team_stats_big10_2023['Team']==team)]
    team_allgame_stats_2023 = pd.concat([team_home_stats_2023, team_away_stats_2023], axis=0).sort_values('Id')
    team_gamestats_2023_rollingavg = team_allgame_stats_2023.set_index('Id').select_dtypes(exclude='object').expanding().mean().reset_index()
    shifted_team_gamestats_2023_rollingavg = team_gamestats_2023_rollingavg.set_index('Id').shift(1)#.reset_index()
    shifted_avg_stats_dfs_2023[team] = shifted_team_gamestats_2023_rollingavg
shifted_avg_stats_dfs_2023['Ohio State']
home_stats_list = shifted_avg_stats_dfs_2023['Ohio State'].add_prefix('Home_').add_suffix("_season_avg").copy().columns
away_stats_list = shifted_avg_stats_dfs_2023['Ohio State'].add_prefix('Away_').add_suffix("_season_avg").copy().columns
# .drop(columns=['Id'])
shifted_avg_stats_dfs_2023['Ohio State']
#away_stats_list

Stat Category,defensiveTDs,firstDowns,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,...,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,totalFumbles,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
401520156,,,,,,,,,,,...,,,,,,,,,,
401520208,0.0,22.0,0.0,0.0,,,1.0,0.0,23.0,1.0,...,2.0,143.0,1.0,34.0,6.0,,380.0,1.0,6.6,4.6
401520240,0.0,22.5,0.0,0.0,0.0,0.0,0.5,0.0,31.5,1.5,...,2.0,133.0,1.5,35.0,4.5,1.0,431.0,0.5,8.75,4.6
401520307,0.333333,23.0,0.333333,0.666667,0.5,29.0,0.666667,0.0,34.333333,1.666667,...,2.333333,156.666667,1.666667,36.0,5.666667,1.5,474.666667,1.0,10.266667,5.133333
401520323,0.5,22.0,0.25,0.5,0.666667,32.333333,0.5,0.0,31.75,1.5,...,2.0,133.0,1.75,36.0,5.5,1.5,451.5,0.75,10.45,4.325
401520343,0.4,22.4,0.6,0.4,0.666667,32.333333,0.4,0.0,35.2,1.6,...,2.0,136.8,2.0,35.8,6.2,1.666667,458.4,1.0,10.58,4.24
401520360,0.333333,22.333333,0.666667,0.333333,0.666667,32.333333,0.333333,0.0,31.666667,1.5,...,1.833333,127.166667,2.333333,35.0,6.166667,1.5,442.833333,1.0,10.183333,3.85
401520375,0.285714,22.428571,0.714286,0.428571,0.666667,32.333333,0.571429,0.0,31.666667,1.5,...,1.714286,134.857143,2.285714,33.857143,5.857143,1.4,437.714286,1.285714,9.971429,3.9
401520393,0.375,21.5,0.625,0.375,0.75,47.5,0.625,0.0,26.428571,1.428571,...,1.625,135.375,2.125,32.25,5.625,1.4,424.0,1.25,9.6375,4.0125
401520415,0.333333,21.888889,0.555556,0.333333,0.75,47.5,0.555556,0.0,26.428571,1.428571,...,1.666667,140.0,2.111111,32.0,5.666667,1.4,435.777778,1.111111,9.722222,4.177778


In [27]:
#team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == "Ohio State"][home_stats_list]

In [28]:
#Initialize new home and away columns for season rolling averages of calculated stats
for home_stats in home_stats_list:
    team_stats_results_big10_2023[home_stats] = pd.Series(dtype='int')
for away_stats in away_stats_list:
    team_stats_results_big10_2023[away_stats] = pd.Series(dtype='int')

team_stats_results_big10_2023=team_stats_results_big10_2023.set_index('Id')
#Add rolling averages to the team_stats_results dataframe
for team_stats in shifted_avg_stats_dfs_2023:
    team_stats_results_big10_2023.update(shifted_avg_stats_dfs_2023[team_stats].add_prefix('Home_').add_suffix("_season_avg").loc[team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == team_stats].index, :])
    team_stats_results_big10_2023.update(shifted_avg_stats_dfs_2023[team_stats].add_prefix('Away_').add_suffix("_season_avg").loc[team_stats_results_big10_2023[team_stats_results_big10_2023['Away Team'] == team_stats].index, :])

team_stats_results_big10_2023 = team_stats_results_big10_2023.reset_index()
team_stats_results_big10_2023.head().iloc[:,100:]

Unnamed: 0,Home_yardsPerPass_season_avg,Home_yardsPerRushAttempt_season_avg,Away_defensiveTDs_season_avg,Away_firstDowns_season_avg,Away_fumblesLost_season_avg,Away_fumblesRecovered_season_avg,Away_interceptionTDs_season_avg,Away_interceptionYards_season_avg,Away_interceptions_season_avg,Away_kickReturnTDs_season_avg,...,Away_rushingTDs_season_avg,Away_rushingYards_season_avg,Away_sacks_season_avg,Away_tackles_season_avg,Away_tacklesForLoss_season_avg,Away_totalFumbles_season_avg,Away_totalYards_season_avg,Away_turnovers_season_avg,Away_yardsPerPass_season_avg,Away_yardsPerRushAttempt_season_avg
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [29]:
###Note: These are other things that were attempted in the for team_stats in shifted_avg_stats_dfs_2023 loop 
###These notes are in case some error is found later and debugging is needed

#team_stats_results_big10_2023=team_stats_results_big10_2023.set_index('Id')
#for team_stats in shifted_avg_stats_dfs_2023:
    #team_stats_results_big10_2023[home_stats_list] = np.where(team_stats_results_big10_2023['Home Team'].iloc[list(shifted_avg_stats_dfs_2023[team_stats].index)] == team_stats, shifted_avg_stats_dfs_2023[team_stats], team_stats_results_big10_2023[home_stats_list].iloc[list(shifted_avg_stats_dfs_2023[team_stats].index)])
    #print(shifted_avg_stats_dfs_2023[team_stats].add_prefix('Home_').add_suffix("_season_avg"))
    #print(team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == team_stats][home_stats_list])
    #probably should be filter instead of if
    #df.loc[team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == team_stats].index, :] = shifted_avg_stats_dfs_2023[team_stats].add_prefix('Home_').add_suffix("_season_avg").loc[team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == team_stats].index, :]
    #this_team_home_stats = shifted_avg_stats_dfs_2023[team_stats].add_prefix('Home_').add_suffix("_season_avg")
    #team_stats_results_big10_2023[team_stats_results_big10_2023['Home Team'] == team_stats] = shifted_avg_stats_dfs_2023[team_stats].add_prefix('Home_').add_suffix("_season_avg")
    #for row in team_stats_results_big10_2023.itertuples(index=False):
        #if (team_stats_results_big10_2023['Id'] == team_stats['Id']) & (team_stats_results_big10_2023['Home Team'] == team_stats):
        #if (row['Id'] == team_stats['Id']) & (row['Home Team'] == team_stats):
        #    row[home_stats] = 
##NEED JOIN
    #if (team_stats_results_big10_2023['Id'] == team_stats['Id']) & (team_stats_results_big10_2023['Away Team'] == team_stats):
##NEED ANOTHER JOIN

In [30]:
team_stats_results_big10_2023.tail(50).iloc[:,90:]

Unnamed: 0,Home_qbHurries_season_avg,Home_rushingAttempts_season_avg,Home_rushingTDs_season_avg,Home_rushingYards_season_avg,Home_sacks_season_avg,Home_tackles_season_avg,Home_tacklesForLoss_season_avg,Home_totalFumbles_season_avg,Home_totalYards_season_avg,Home_turnovers_season_avg,...,Away_rushingTDs_season_avg,Away_rushingYards_season_avg,Away_sacks_season_avg,Away_tackles_season_avg,Away_tacklesForLoss_season_avg,Away_totalFumbles_season_avg,Away_totalYards_season_avg,Away_turnovers_season_avg,Away_yardsPerPass_season_avg,Away_yardsPerRushAttempt_season_avg
56,3.75,37.5,3.5,201.75,3.0,42.0,6.75,1.0,427.5,1.5,...,3.0,195.4,3.0,27.4,4.6,0.666667,346.6,0.6,6.9,4.74
57,1.75,30.4,1.2,87.4,1.75,33.5,4.5,1.333333,305.2,1.0,...,,,,,,,,,,
58,3.4,30.4,1.0,107.0,0.6,40.6,4.8,2.5,240.8,1.8,...,2.4,150.2,3.0,43.0,6.6,2.75,399.0,1.6,7.28,4.1
59,4.0,41.0,1.6,193.2,2.25,31.25,4.0,0.75,342.2,1.2,...,2.2,184.0,2.4,26.6,6.2,1.333333,411.4,1.0,9.9,4.94
60,3.0,37.833333,2.5,173.5,2.833333,28.833333,4.5,0.5,334.666667,0.666667,...,1.2,120.0,2.2,28.8,6.2,2.0,371.4,2.4,7.64,3.72
61,3.0,36.5,2.5,185.166667,2.333333,26.833333,6.333333,1.333333,414.833333,0.833333,...,1.4,110.2,1.8,36.4,5.2,1.5,334.2,1.0,6.9,3.04
62,3.166667,37.333333,2.166667,141.166667,2.666667,41.166667,6.333333,2.6,389.666667,1.666667,...,2.0,133.0,1.75,36.0,5.5,1.5,451.5,0.75,10.45,4.325
63,0.833333,31.833333,2.0,148.833333,2.666667,40.166667,5.5,1.0,429.333333,0.833333,...,1.166667,124.333333,1.333333,42.666667,4.333333,1.6,377.333333,2.166667,7.216667,3.733333
64,4.8,46.2,2.6,194.8,4.0,33.0,7.4,1.5,430.4,0.2,...,,,,,,,,,,
65,3.6,39.2,3.0,204.0,2.6,39.0,5.8,1.25,413.6,1.6,...,1.0,119.333333,1.5,40.666667,6.0,2.2,249.166667,1.666667,5.2,3.483333


In [31]:
minnesota_games = team_stats_results_big10_2023[(team_stats_results_big10_2023['Home Team'] == 'Minnesota') | (team_stats_results_big10_2023['Away Team'] == 'Minnesota')]
minnesota_game_ids = minnesota_games['Id'].tolist()
print(minnesota_game_ids)
minnesota_games

[401520148, 401520204, 401520239, 401520264, 401520284, 401520303, 401520338, 401520354, 401520370, 401520395, 401520415, 401520436]


Unnamed: 0,Id,Season,Week,Season Type,Start Date,Start Time Tbd,Completed,Neutral Site,Conference Game,Attendance,...,Away_rushingTDs_season_avg,Away_rushingYards_season_avg,Away_sacks_season_avg,Away_tackles_season_avg,Away_tacklesForLoss_season_avg,Away_totalFumbles_season_avg,Away_totalYards_season_avg,Away_turnovers_season_avg,Away_yardsPerPass_season_avg,Away_yardsPerRushAttempt_season_avg
0,401520148,2023,1,regular,2023-09-01T00:00:00.000Z,False,True,False,True,53629.0,...,,,,,,,,,,
21,401520204,2023,2,regular,2023-09-09T23:30:00.000Z,False,True,False,False,48101.0,...,,,,,,,,,,
31,401520239,2023,3,regular,2023-09-16T19:30:00.000Z,False,True,False,False,45151.0,...,1.0,175.5,3.5,31.0,5.0,1.0,332.0,1.5,6.15,3.75
44,401520264,2023,4,regular,2023-09-23T23:30:00.000Z,False,True,False,True,20148.0,...,1.0,173.666667,3.0,34.0,5.0,1.0,322.333333,1.666667,5.566667,4.333333
47,401520284,2023,5,regular,2023-09-30T16:00:00.000Z,False,True,False,False,46843.0,...,,,,,,,,,,
59,401520303,2023,6,regular,2023-10-07T23:30:00.000Z,False,True,False,True,52179.0,...,2.2,184.0,2.4,26.6,6.2,1.333333,411.4,1.0,9.9,4.94
69,401520338,2023,8,regular,2023-10-21T19:30:00.000Z,False,True,False,True,69250.0,...,1.333333,180.5,1.8,31.4,3.8,0.75,313.333333,1.333333,6.416667,4.283333
74,401520354,2023,9,regular,2023-10-28T19:30:00.000Z,False,True,False,True,47392.0,...,1.0,108.714286,2.142857,29.428571,5.714286,2.5,326.285714,2.428571,6.771429,3.285714
80,401520370,2023,10,regular,2023-11-04T19:30:00.000Z,False,True,False,True,42906.0,...,1.125,137.5,1.5,42.625,4.25,1.142857,365.5,1.75,6.9375,3.85
88,401520395,2023,11,regular,2023-11-11T20:30:00.000Z,False,True,False,True,59049.0,...,1.111111,165.444444,2.375,32.25,4.625,0.857143,308.777778,1.333333,6.688889,3.877778


In [32]:
minnesota_home_stats_2023 = home_team_stats_big10_2023[(home_team_stats_big10_2023['Id'].isin(minnesota_game_ids)) & (home_team_stats_big10_2023['Team']=='Minnesota')]
minnesota_home_stats_2023

Stat Category,Id,Team,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,...,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,Minnesota,24-44,0.0,20.0,2-2,0.0,1.0,0.0,14.0,...,3.0,38.0,5.0,8-17,0.0,3-25,251.0,1.0,4.5,2.2
18,401520204,Minnesota,10-15,0.0,28.0,0-1,1.0,0.0,0.0,19.0,...,4.0,24.0,5.0,8-13,2.0,2-17,413.0,2.0,7.8,5.3
48,401520284,Minnesota,12-14,0.0,26.0,3-3,0.0,0.0,0.0,0.0,...,0.0,23.0,1.0,7-13,1.0,1-15,347.0,1.0,10.4,3.9
55,401520303,Minnesota,5-16,0.0,10.0,2-2,0.0,0.0,,,...,0.0,32.0,3.0,3-14,,4-36,169.0,2.0,3.3,3.0
71,401520354,Minnesota,14-22,0.0,24.0,0-0,2.0,1.0,0.0,11.0,...,1.0,26.0,3.0,6-13,2.0,3-35,400.0,3.0,9.1,4.0
80,401520370,Minnesota,11-22,0.0,16.0,1-2,1.0,2.0,0.0,0.0,...,5.0,45.0,9.0,3-12,1.0,2-10,260.0,1.0,7.6,2.7
101,401520436,Minnesota,16-28,0.0,17.0,1-2,1.0,0.0,0.0,52.0,...,0.0,44.0,2.0,5-13,1.0,3-30,286.0,2.0,6.0,3.8


In [33]:
minnesota_away_stats_2023 = away_team_stats_big10_2023[(away_team_stats_big10_2023['Id'].isin(minnesota_game_ids)) & (away_team_stats_big10_2023['Team']=='Minnesota')]
minnesota_away_stats_2023

Stat Category,Id,Team,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,...,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
33,401520239,Minnesota,11-30,0.0,19.0,1-1,0.0,0.0,0.0,17.0,...,2.0,40.0,5.0,3-12,,3-25,303.0,2.0,4.4,5.5
43,401520264,Minnesota,14-19,,15.0,1-1,0.0,1.0,,,...,,,,3-13,0.0,5-59,397.0,0.0,8.1,5.8
67,401520338,Minnesota,10-25,0.0,12.0,0-0,0.0,2.0,0.0,1.0,...,4.0,30.0,6.0,4-18,0.0,7-58,239.0,0.0,5.0,2.5
88,401520395,Minnesota,18-42,0.0,21.0,2-2,0.0,0.0,,,...,1.0,39.0,1.0,4-14,1.0,2-19,407.0,0.0,7.0,4.3
94,401520415,Minnesota,11-19,0.0,10.0,0-0,1.0,0.0,,,...,1.0,42.0,2.0,3-12,1.0,1-15,159.0,2.0,4.7,2.4


In [34]:
minnesota_allgame_stats_2023 = pd.concat([minnesota_home_stats_2023, minnesota_away_stats_2023], axis=0).sort_values('Id')
minnesota_allgame_stats_2023

Stat Category,Id,Team,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,...,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,Minnesota,24-44,0.0,20.0,2-2,0.0,1.0,0.0,14.0,...,3.0,38.0,5.0,8-17,0.0,3-25,251.0,1.0,4.5,2.2
18,401520204,Minnesota,10-15,0.0,28.0,0-1,1.0,0.0,0.0,19.0,...,4.0,24.0,5.0,8-13,2.0,2-17,413.0,2.0,7.8,5.3
33,401520239,Minnesota,11-30,0.0,19.0,1-1,0.0,0.0,0.0,17.0,...,2.0,40.0,5.0,3-12,,3-25,303.0,2.0,4.4,5.5
43,401520264,Minnesota,14-19,,15.0,1-1,0.0,1.0,,,...,,,,3-13,0.0,5-59,397.0,0.0,8.1,5.8
48,401520284,Minnesota,12-14,0.0,26.0,3-3,0.0,0.0,0.0,0.0,...,0.0,23.0,1.0,7-13,1.0,1-15,347.0,1.0,10.4,3.9
55,401520303,Minnesota,5-16,0.0,10.0,2-2,0.0,0.0,,,...,0.0,32.0,3.0,3-14,,4-36,169.0,2.0,3.3,3.0
67,401520338,Minnesota,10-25,0.0,12.0,0-0,0.0,2.0,0.0,1.0,...,4.0,30.0,6.0,4-18,0.0,7-58,239.0,0.0,5.0,2.5
71,401520354,Minnesota,14-22,0.0,24.0,0-0,2.0,1.0,0.0,11.0,...,1.0,26.0,3.0,6-13,2.0,3-35,400.0,3.0,9.1,4.0
80,401520370,Minnesota,11-22,0.0,16.0,1-2,1.0,2.0,0.0,0.0,...,5.0,45.0,9.0,3-12,1.0,2-10,260.0,1.0,7.6,2.7
88,401520395,Minnesota,18-42,0.0,21.0,2-2,0.0,0.0,,,...,1.0,39.0,1.0,4-14,1.0,2-19,407.0,0.0,7.0,4.3


In [35]:
minnesota_gamestats_2023_rollingavg = minnesota_allgame_stats_2023.set_index('Id').select_dtypes(exclude='object').expanding().mean().reset_index()
# minnesota_gamestats_2023_rollingavg = minnesota_allgame_stats_2023[['Id']].join(minnesota_allgame_stats_2023.loc[:, minnesota_allgame_stats_2023.columns != 'Id'].select_dtypes(exclude='object').expanding().mean(), how='inner')
minnesota_gamestats_2023_rollingavg

Stat Category,Id,defensiveTDs,firstDowns,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,...,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,totalFumbles,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,0.0,20.0,0.0,1.0,0.0,14.0,1.0,0.0,49.0,...,0.0,55.0,3.0,38.0,5.0,0.0,251.0,1.0,4.5,2.2
1,401520204,0.0,24.0,0.5,0.5,0.0,16.5,1.0,0.0,53.5,...,1.0,175.5,3.5,31.0,5.0,1.0,332.0,1.5,6.15,3.75
2,401520239,0.0,22.333333,0.333333,0.333333,0.0,16.666667,1.333333,0.0,53.5,...,1.0,173.666667,3.0,34.0,5.0,1.0,322.333333,1.666667,5.566667,4.333333
3,401520264,0.0,20.5,0.25,0.5,0.0,16.666667,1.0,0.0,42.333333,...,1.25,191.25,3.0,34.0,5.0,0.666667,341.0,1.25,6.2,4.7
4,401520284,0.0,21.6,0.2,0.4,0.0,12.5,1.0,0.0,32.5,...,1.6,193.2,2.25,31.25,4.0,0.75,342.2,1.2,7.04,4.54
5,401520303,0.0,19.666667,0.166667,0.333333,0.0,12.5,1.166667,0.0,26.8,...,1.333333,180.5,1.8,31.4,3.8,0.75,313.333333,1.333333,6.416667,4.283333
6,401520338,0.0,18.571429,0.142857,0.571429,0.0,10.2,1.0,0.0,25.833333,...,1.142857,170.857143,2.166667,31.166667,4.166667,0.6,302.714286,1.142857,6.214286,4.028571
7,401520354,0.0,19.25,0.375,0.625,0.0,10.333333,1.0,0.0,25.833333,...,1.25,174.5,2.0,30.428571,4.0,0.833333,314.875,1.375,6.575,4.025
8,401520370,0.0,18.888889,0.444444,0.777778,0.0,8.857143,0.888889,0.0,24.857143,...,1.111111,165.444444,2.375,32.25,4.625,0.857143,308.777778,1.333333,6.688889,3.877778
9,401520395,0.0,19.1,0.4,0.7,0.0,8.857143,0.8,0.0,23.375,...,1.0,160.4,2.222222,33.0,4.222222,0.875,318.6,1.2,6.72,3.92


In [36]:
shifted_minnesota_gamestats_2023_rollingavg = minnesota_gamestats_2023_rollingavg.set_index('Id').shift(1).reset_index()
shifted_minnesota_gamestats_2023_rollingavg 

Stat Category,Id,defensiveTDs,firstDowns,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,...,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,totalFumbles,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,401520148,,,,,,,,,,...,,,,,,,,,,
1,401520204,0.0,20.0,0.0,1.0,0.0,14.0,1.0,0.0,49.0,...,0.0,55.0,3.0,38.0,5.0,0.0,251.0,1.0,4.5,2.2
2,401520239,0.0,24.0,0.5,0.5,0.0,16.5,1.0,0.0,53.5,...,1.0,175.5,3.5,31.0,5.0,1.0,332.0,1.5,6.15,3.75
3,401520264,0.0,22.333333,0.333333,0.333333,0.0,16.666667,1.333333,0.0,53.5,...,1.0,173.666667,3.0,34.0,5.0,1.0,322.333333,1.666667,5.566667,4.333333
4,401520284,0.0,20.5,0.25,0.5,0.0,16.666667,1.0,0.0,42.333333,...,1.25,191.25,3.0,34.0,5.0,0.666667,341.0,1.25,6.2,4.7
5,401520303,0.0,21.6,0.2,0.4,0.0,12.5,1.0,0.0,32.5,...,1.6,193.2,2.25,31.25,4.0,0.75,342.2,1.2,7.04,4.54
6,401520338,0.0,19.666667,0.166667,0.333333,0.0,12.5,1.166667,0.0,26.8,...,1.333333,180.5,1.8,31.4,3.8,0.75,313.333333,1.333333,6.416667,4.283333
7,401520354,0.0,18.571429,0.142857,0.571429,0.0,10.2,1.0,0.0,25.833333,...,1.142857,170.857143,2.166667,31.166667,4.166667,0.6,302.714286,1.142857,6.214286,4.028571
8,401520370,0.0,19.25,0.375,0.625,0.0,10.333333,1.0,0.0,25.833333,...,1.25,174.5,2.0,30.428571,4.0,0.833333,314.875,1.375,6.575,4.025
9,401520395,0.0,18.888889,0.444444,0.777778,0.0,8.857143,0.888889,0.0,24.857143,...,1.111111,165.444444,2.375,32.25,4.625,0.857143,308.777778,1.333333,6.688889,3.877778
