In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, RidgeCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
game_results_big10_2023 = pd.read_csv('../data/game_results_big10_2023.csv')
betting_lines_big10_2023 = pd.read_csv('../data/betting_lines_big10_2023.csv')
team_recruiting_rankings_2023 = pd.read_csv('../data/team_recruiting_rankings_2023.csv')
team_talent_composite_ranking_2023 = pd.read_csv('../data/team_talent_composite_ranking_2023.csv')
team_stats_big10_2023 = pd.read_csv('../data/team_stats_big10_2023.csv')

In [3]:
game_results_big10_2023.head().iloc[:,0:20]

Unnamed: 0,Id,Season,Week,Season Type,Start Date,Start Time Tbd,Completed,Neutral Site,Conference Game,Attendance,Venue Id,Venue,Home Id,Home Team,Home Conference,Home Division,Home Points,Home Line Scores[0],Home Line Scores[1],Home Line Scores[2]
0,401520148,2023,1,regular,2023-09-01T00:00:00.000Z,False,True,False,True,53629.0,3953,TCF Bank Stadium,135,Minnesota,Big Ten,fbs,13,0,3,0
1,401520163,2023,1,regular,2023-09-01T23:00:00.000Z,False,True,False,False,,3936,Spartan Stadium,127,Michigan State,Big Ten,fbs,31,0,10,7
2,401520170,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,54898.0,3907,Ross-Ade Stadium,2509,Purdue,Big Ten,fbs,35,7,14,7
3,401520162,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,109480.0,3558,Michigan Stadium,130,Michigan,Big Ten,fbs,30,7,16,7
4,401520157,2023,1,regular,2023-09-02T16:00:00.000Z,False,True,False,False,69250.0,3793,Kinnick Stadium,2294,Iowa,Big Ten,fbs,24,14,3,0


In [4]:
game_results_big10_2023.head().iloc[:,20:]

Unnamed: 0,Home Line Scores[3],Home Post Win Prob,Home Pregame Elo,Home Postgame Elo,Away Id,Away Team,Away Conference,Away Division,Away Points,Away Line Scores[0],Away Line Scores[1],Away Line Scores[2],Away Line Scores[3],Away Post Win Prob,Away Pregame Elo,Away Postgame Elo,Excitement Index,Highlights,Notes
0,10,0.047056,1672,1671,158,Nebraska,Big Ten,fbs,10,0,0,7,3,0.952944,1521.0,1522.0,7.656758,,
1,14,0.989833,1533,1575,2117,Central Michigan,Mid-American,fbs,7,0,7,0,0,0.010167,1370.0,1328.0,6.660548,,
2,7,0.125611,1473,1479,278,Fresno State,Mountain West,fbs,39,7,10,8,14,0.874389,1677.0,1671.0,6.896309,,
3,0,0.998549,1916,1941,151,East Carolina,American Athletic,fbs,3,0,0,0,3,0.001451,1506.0,1481.0,1.172112,,
4,7,0.890762,1665,1664,328,Utah State,Mountain West,fbs,14,0,3,3,8,0.109238,1395.0,1396.0,4.699575,,


In [5]:
betting_lines_big10_2023.head()

Unnamed: 0,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
0,401520340,Michigan State,0,Michigan,49,William Hill (New Jersey),46.5,25.5,Michigan -25.5,,,,
1,401520340,Michigan State,0,Michigan,49,DraftKings,46.0,24.5,Michigan -24.5,19.0,,1700.0,-4500.0
2,401520340,Michigan State,0,Michigan,49,Bovada,47.0,25.5,Michigan -25.5,17.0,47.5,1100.0,-2800.0
3,401520343,Ohio State,20,Penn State,12,William Hill (New Jersey),46.5,-4.0,Ohio State -4,,,,
4,401520343,Ohio State,20,Penn State,12,DraftKings,45.5,-4.0,Ohio State -4,-10.0,,-192.0,160.0


In [6]:
betting_lines_big10_2023.shape

(315, 13)

In [7]:
betting_lines_big10_2023[betting_lines_big10_2023['LineProvider']=='DraftKings']

Unnamed: 0,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
1,401520340,Michigan State,0,Michigan,49,DraftKings,46.0,24.5,Michigan -24.5,19.0,,1700.0,-4500.0
4,401520343,Ohio State,20,Penn State,12,DraftKings,45.5,-4.0,Ohio State -4,-10.0,,-192.0,160.0
7,401520336,Illinois,21,Wisconsin,25,DraftKings,40.5,3.0,Wisconsin -3,5.0,43.0,130.0,-155.0
10,401520301,Illinois,7,Nebraska,20,DraftKings,43.0,-3.5,Illinois -3.5,-3.0,44.5,-155.0,142.0
13,401520338,Iowa,10,Minnesota,12,DraftKings,30.5,-3.5,Iowa -3.5,-5.5,32.5,-166.0,140.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,401520392,Nebraska,10,Maryland,13,DraftKings,,1.0,Maryland -1,1.0,,102.0,-122.0
304,401520394,Penn State,15,Michigan,24,DraftKings,,6.5,Michigan -6.5,2.0,,160.0,-192.0
307,401520393,Ohio State,38,Michigan State,3,DraftKings,,-30.5,Ohio State -30.5,-25.5,,-20000.0,3500.0
310,401520194,Iowa State,13,Iowa,20,DraftKings,36.0,3.5,Iowa -3.5,4.0,36.5,150.0,-175.0


In [8]:
team_recruiting_rankings_2023.head()

Unnamed: 0,Year,Rank,Team,Points
0,2023,1,Alabama,328.0
1,2023,2,Georgia,315.68
2,2023,3,Texas,306.31
3,2023,4,Oklahoma,289.03
4,2023,5,Ohio State,288.98


In [9]:
team_talent_composite_ranking_2023.head()

Unnamed: 0,Year,School,Talent
0,2023,Alabama,1015.43
1,2023,Georgia,977.87
2,2023,Ohio State,974.79
3,2023,Texas A&M,925.92
4,2023,Clemson,917.88


In [10]:
team_stats_big10_2023.head()

Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Stat Category,Stat
0,401520365,Wisconsin,Big Ten,away,14,rushingTDs,0
1,401520365,Wisconsin,Big Ten,away,14,puntReturnYards,29
2,401520365,Wisconsin,Big Ten,away,14,puntReturnTDs,0
3,401520365,Wisconsin,Big Ten,away,14,puntReturns,4
4,401520365,Wisconsin,Big Ten,away,14,passingTDs,2


In [11]:
team_stats_big10_2023_pivoted = team_stats_big10_2023.pivot(index= ['Game Id', 'School', 'HomeAway'], columns='Stat Category', values='Stat').reset_index()
team_stats_big10_2023_pivoted.head().iloc[:,0:20]

Stat Category,Game Id,School,HomeAway,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs
0,401520148,Minnesota,home,24-44,0,20,2-2,0,1,0,14,1,0,49,3,7,196,3,3,1
1,401520148,Nebraska,away,11-19,0,18,0-0,1,0,0,0,3,0,78,2,4,114,4,1,1
2,401520155,Illinois,home,18-26,1,21,2-3,0,0,1,48,1,0,77,5,6,206,3,1,2
3,401520155,Toledo,away,20-37,0,27,2-2,0,0,0,0,1,0,61,4,10,230,3,1,2
4,401520156,Indiana,home,9-20,0,8,0-2,0,0,0,11,0,0,38,3,3,82,8,1,0


In [12]:
team_stats_big10_2023_pivoted.head().iloc[:,20:]

Stat Category,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt
0,30:42,0.0,10.0,1.0,5,25,0,55,3,38,5,8-17,0.0,3-25,251,1,4.5,2.2
1,29:18,0.0,8.0,2.0,3,37,0,181,3,36,5,5-11,1.0,7-55,295,4,6.0,4.9
2,27:03,,,,1,35,1,168,1,50,5,2-8,,10-100,374,1,7.9,4.8
3,32:57,0.0,4.0,1.0,2,42,1,186,2,44,4,7-15,2.0,7-40,416,1,6.2,4.4
4,28:05,0.0,51.0,2.0,0,33,0,71,0,41,5,4-15,,8-75,153,0,4.1,2.2


In [13]:
team_stats_big10_2023_pivoted.columns

Index(['Game Id', 'School', 'HomeAway', 'completionAttempts', 'defensiveTDs',
       'firstDowns', 'fourthDownEff', 'fumblesLost', 'fumblesRecovered',
       'interceptionTDs', 'interceptionYards', 'interceptions',
       'kickReturnTDs', 'kickReturnYards', 'kickReturns', 'kickingPoints',
       'netPassingYards', 'passesDeflected', 'passesIntercepted', 'passingTDs',
       'possessionTime', 'puntReturnTDs', 'puntReturnYards', 'puntReturns',
       'qbHurries', 'rushingAttempts', 'rushingTDs', 'rushingYards', 'sacks',
       'tackles', 'tacklesForLoss', 'thirdDownEff', 'totalFumbles',
       'totalPenaltiesYards', 'totalYards', 'turnovers', 'yardsPerPass',
       'yardsPerRushAttempt'],
      dtype='object', name='Stat Category')

In [14]:
team_stat_float_columns = ['defensiveTDs',
       'firstDowns', 'fumblesLost', 'fumblesRecovered',
       'interceptionTDs', 'interceptionYards', 'interceptions',
       'kickReturnTDs', 'kickReturnYards', 'kickReturns', 'kickingPoints',
       'netPassingYards', 'passesDeflected', 'passesIntercepted', 'passingTDs',
        'puntReturnTDs', 'puntReturnYards', 'puntReturns',
       'qbHurries', 'rushingAttempts', 'rushingTDs', 'rushingYards', 'sacks',
       'tackles', 'tacklesForLoss', 'totalFumbles',
       'totalYards', 'turnovers', 'yardsPerPass',
       'yardsPerRushAttempt']
for col in team_stat_float_columns:
    team_stats_big10_2023_pivoted[col] = team_stats_big10_2023_pivoted[col].astype('float64')
team_stats_big10_2023_pivoted.dtypes

Stat Category
Game Id                  int64
School                  object
HomeAway                object
completionAttempts      object
defensiveTDs           float64
firstDowns             float64
fourthDownEff           object
fumblesLost            float64
fumblesRecovered       float64
interceptionTDs        float64
interceptionYards      float64
interceptions          float64
kickReturnTDs          float64
kickReturnYards        float64
kickReturns            float64
kickingPoints          float64
netPassingYards        float64
passesDeflected        float64
passesIntercepted      float64
passingTDs             float64
possessionTime          object
puntReturnTDs          float64
puntReturnYards        float64
puntReturns            float64
qbHurries              float64
rushingAttempts        float64
rushingTDs             float64
rushingYards           float64
sacks                  float64
tackles                float64
tacklesForLoss         float64
thirdDownEff            o

In [15]:
#home_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='home'].copy()
#.add_prefix('Home_').rename(columns = {'Home_Game Id':'Game Id', 'Home_School':'HomeTeam', 'Home_HomeAway':'HomeAway'}).drop(columns=['HomeAway'])


In [36]:
home_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='home'].reset_index().drop(columns='index').copy()
away_team_stats_big10_2023 = team_stats_big10_2023_pivoted[team_stats_big10_2023_pivoted['HomeAway']=='away'].reset_index().drop(columns='index').copy()
#.add_prefix('Away_').rename(columns = {'Away_Game Id':'Game Id', 'Away_School':'AwayTeam', 'Away_HomeAway':'HomeAway'}).drop(columns=['HomeAway'])

team_stats_big10_2023_home_minus_away =  home_team_stats_big10_2023.copy()
for col in team_stat_float_columns:
    team_stats_big10_2023_home_minus_away[col] = home_team_stats_big10_2023[col].subtract(away_team_stats_big10_2023[col], fill_value=0).copy()
#home_team_stats_big10_2023.sub(away_team_stats_big10_2023, fill_value=0)

home_team_stats_big10_2023 = home_team_stats_big10_2023.add_prefix('Home_').rename(columns = {'Home_Game Id':'Game Id', 'Home_School':'HomeTeam', 'Home_HomeAway':'HomeAway'}).drop(columns=['HomeAway']).copy()
away_team_stats_big10_2023 = away_team_stats_big10_2023.add_prefix('Away_').rename(columns = {'Away_Game Id':'Game Id', 'Away_School':'AwayTeam', 'Away_HomeAway':'HomeAway'}).drop(columns=['HomeAway']).copy()
team_stats_big10_2023_home_minus_away = team_stats_big10_2023_home_minus_away.add_prefix('Home_Minus_Away_').rename(columns = {'Home_Minus_Away_Game Id':'Game Id', 'Home_Minus_Away_School':'AwayTeam', 'Home_Minus_Away_HomeAway':'HomeAway'}).drop(columns=['HomeAway']).copy()

team_stats_big10_2023_home_minus_away.head()

Stat Category,Game Id,AwayTeam,Home_Minus_Away_completionAttempts,Home_Minus_Away_defensiveTDs,Home_Minus_Away_firstDowns,Home_Minus_Away_fourthDownEff,Home_Minus_Away_fumblesLost,Home_Minus_Away_fumblesRecovered,Home_Minus_Away_interceptionTDs,Home_Minus_Away_interceptionYards,...,Home_Minus_Away_sacks,Home_Minus_Away_tackles,Home_Minus_Away_tacklesForLoss,Home_Minus_Away_thirdDownEff,Home_Minus_Away_totalFumbles,Home_Minus_Away_totalPenaltiesYards,Home_Minus_Away_totalYards,Home_Minus_Away_turnovers,Home_Minus_Away_yardsPerPass,Home_Minus_Away_yardsPerRushAttempt
0,401520148,Minnesota,24-44,0.0,2.0,2-2,-1.0,1.0,0.0,14.0,...,0.0,2.0,0.0,8-17,-1.0,3-25,-44.0,-3.0,-1.5,-2.7
1,401520155,Illinois,18-26,1.0,-6.0,2-3,0.0,0.0,1.0,48.0,...,-1.0,6.0,1.0,2-8,-2.0,10-100,-42.0,0.0,1.7,0.4
2,401520156,Indiana,9-20,0.0,-14.0,0-2,0.0,0.0,0.0,11.0,...,-1.0,7.0,-1.0,4-15,,8-75,-227.0,-1.0,-2.5,-2.4
3,401520157,Iowa,18-33,0.0,-2.0,1-2,0.0,0.0,0.0,0.0,...,0.0,-3.0,-5.0,6-17,2.0,7-50,-45.0,-1.0,1.5,-2.4
4,401520161,Maryland,26-43,0.0,14.0,2-2,0.0,0.0,,,...,1.0,-4.0,0.0,7-14,,6-54,173.0,0.0,2.5,0.2


In [37]:
home_team_stats_big10_2023.head()

Stat Category,Game Id,HomeTeam,Home_completionAttempts,Home_defensiveTDs,Home_firstDowns,Home_fourthDownEff,Home_fumblesLost,Home_fumblesRecovered,Home_interceptionTDs,Home_interceptionYards,...,Home_sacks,Home_tackles,Home_tacklesForLoss,Home_thirdDownEff,Home_totalFumbles,Home_totalPenaltiesYards,Home_totalYards,Home_turnovers,Home_yardsPerPass,Home_yardsPerRushAttempt
0,401520148,Minnesota,24-44,0.0,20.0,2-2,0.0,1.0,0.0,14.0,...,3.0,38.0,5.0,8-17,0.0,3-25,251.0,1.0,4.5,2.2
1,401520155,Illinois,18-26,1.0,21.0,2-3,0.0,0.0,1.0,48.0,...,1.0,50.0,5.0,2-8,,10-100,374.0,1.0,7.9,4.8
2,401520156,Indiana,9-20,0.0,8.0,0-2,0.0,0.0,0.0,11.0,...,0.0,41.0,5.0,4-15,,8-75,153.0,0.0,4.1,2.2
3,401520157,Iowa,18-33,0.0,17.0,1-2,0.0,0.0,0.0,0.0,...,1.0,41.0,4.0,6-17,2.0,7-50,284.0,0.0,5.9,2.4
4,401520161,Maryland,26-43,0.0,30.0,2-2,0.0,0.0,,,...,2.0,41.0,3.0,7-14,,6-54,449.0,0.0,6.6,4.6


In [38]:
away_team_stats_big10_2023.head()

Stat Category,Game Id,AwayTeam,Away_completionAttempts,Away_defensiveTDs,Away_firstDowns,Away_fourthDownEff,Away_fumblesLost,Away_fumblesRecovered,Away_interceptionTDs,Away_interceptionYards,...,Away_sacks,Away_tackles,Away_tacklesForLoss,Away_thirdDownEff,Away_totalFumbles,Away_totalPenaltiesYards,Away_totalYards,Away_turnovers,Away_yardsPerPass,Away_yardsPerRushAttempt
0,401520148,Nebraska,11-19,0.0,18.0,0-0,1.0,0.0,0.0,0.0,...,3.0,36.0,5.0,5-11,1.0,7-55,295.0,4.0,6.0,4.9
1,401520155,Toledo,20-37,0.0,27.0,2-2,0.0,0.0,0.0,0.0,...,2.0,44.0,4.0,7-15,2.0,7-40,416.0,1.0,6.2,4.4
2,401520156,Ohio State,21-36,0.0,22.0,2-3,0.0,0.0,,,...,1.0,34.0,6.0,2-12,,5-41,380.0,1.0,6.6,4.6
3,401520157,Utah State,32-48,0.0,19.0,1-3,0.0,0.0,,,...,1.0,44.0,9.0,5-17,,10-91,329.0,1.0,4.4,4.8
4,401520161,Towson,16-31,0.0,16.0,1-3,0.0,0.0,,,...,1.0,45.0,3.0,5-15,,10-90,276.0,0.0,4.1,4.4
