In [19]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

import xgboost as xgb

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [20]:
stats = pd.read_csv('../../pipeline/data/07_adjusted_value_models_aggregated.csv')
schedule = pd.read_csv('../../data/schedule_final.csv')
betting = pd.read_csv('../../data/betting_data_cleaned_with_returns.csv')
betting = betting.drop(columns=['Unnamed: 0'])

stats = stats.dropna()

In [21]:
nfl = schedule.copy()
nfl = nfl.merge(stats.add_suffix('_home'), how='left', left_on = ['season', 'week', 'home', 'home_qb_abv'], 
                right_on = ['season_home', 'week_home', 'team_full_home', 'qb_home'])
nfl = nfl.merge(stats.add_suffix('_away'), how='left', left_on = ['season', 'week', 'away', 'away_qb_abv'], 
                right_on = ['season_away', 'week_away', 'team_full_away', 'qb_away'])
nfl = nfl[['date', 'season', 'week', 'season_type', 'home', 'away', 'home_score', 'away_score', 'home_qb', 'away_qb',
          'passing_value_adjusted_home', 'rushing_value_adjusted_home', 'pass_def_value_adjusted_home', 'rush_def_value_adjusted_home', 'special_teams_value_home',
          'passing_value_adjusted_away', 'rushing_value_adjusted_away', 'pass_def_value_adjusted_away', 'rush_def_value_adjusted_away', 'special_teams_value_away',
          'total_possession_time_standardized_home', 'total_possession_time_standardized_away', 
          'total_plays_standardized_home', 'total_plays_standardized_away',
          'pass_percentage_standardized_home', 'pass_percentage_standardized_away']]
nfl.head()

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,passing_value_adjusted_home,rushing_value_adjusted_home,pass_def_value_adjusted_home,rush_def_value_adjusted_home,special_teams_value_home,passing_value_adjusted_away,rushing_value_adjusted_away,pass_def_value_adjusted_away,rush_def_value_adjusted_away,special_teams_value_away,total_possession_time_standardized_home,total_possession_time_standardized_away,total_plays_standardized_home,total_plays_standardized_away,pass_percentage_standardized_home,pass_percentage_standardized_away
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,,,,,,
1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,,,,,,,,,,,,,,,,
2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,,,,,,,,,,,,,,,,
3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,,,,,,,,,,,,,,,,
4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,,,,,,,,,,,,,,,,


In [22]:
# Join in betting data to get point spreads

nfl_cleaned_with_betting = nfl.merge(betting, left_on=['date', 'home', 'away'],
                                            right_on=['date', 'home_team','away_team'])

In [23]:
# Remove any pushes, add binary label for home cover

nfl_cleaned_with_betting_final = nfl_cleaned_with_betting.copy()
nfl_cleaned_with_betting_final['over_hits'] = nfl_cleaned_with_betting_final.apply(
    lambda x: 1 if x.over_under_result == 'over' else 0, axis=1)
nfl_cleaned_with_betting_final = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.over_under_result != 'push']
nfl_cleaned_with_betting_final

Unnamed: 0,date,season,week,season_type,home,away,home_score_x,away_score_x,home_qb,away_qb,passing_value_adjusted_home,rushing_value_adjusted_home,pass_def_value_adjusted_home,rush_def_value_adjusted_home,special_teams_value_home,passing_value_adjusted_away,rushing_value_adjusted_away,pass_def_value_adjusted_away,rush_def_value_adjusted_away,special_teams_value_away,total_possession_time_standardized_home,total_possession_time_standardized_away,total_plays_standardized_home,total_plays_standardized_away,pass_percentage_standardized_home,pass_percentage_standardized_away,home_team,away_team,home_score_y,away_score_y,total_score_actual,home_moneyline,away_moneyline,home_spread,away_spread,total_score_line,over_under_result,home_team_actual_line,away_team_actual_line,spread_cover_result,home_implied_prob,away_implied_prob,game_winner,favorite_return,underdog_return,home_return,away_return,favorite_spread_return,underdog_spread_return,home_spread_return,away_spread_return,over_return,under_return,over_hits
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,,,,,,,Seattle Seahawks,Green Bay Packers,36,16,52,-230.0,205.0,-4.5,4.5,46.5,over,-20,20,home,0.696970,0.327869,home,43.48,-100.0,43.48,-100.0,90.91,-100.00,90.91,-100.00,90.91,-100.00,1
1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,,,,,,,,,,,,,,,,,Baltimore Ravens,Cincinnati Bengals,16,23,39,-113.0,102.0,-1.0,1.0,43.5,under,7,-7,away,0.530516,0.495050,away,-100.00,102.0,-100.00,102.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,,,,,,,,,,,,,,,,,Houston Texans,Washington Football Team,17,6,23,-177.0,159.0,-3.0,3.0,43.5,under,-11,11,home,0.638989,0.386100,home,56.50,-100.0,56.50,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,0
3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,,,,,,,,,,,,,,,,,Chicago Bears,Buffalo Bills,20,23,43,-270.0,239.0,-7.0,7.0,47.0,under,3,-3,away,0.729730,0.294985,away,-100.00,239.0,-100.00,239.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,,,,,,,,,,,,,,,,,Pittsburgh Steelers,Cleveland Browns,30,27,57,-242.0,216.0,-5.5,5.5,41.5,over,-3,3,away,0.707602,0.316456,home,41.32,-100.0,41.32,-100.0,-100.00,90.91,-100.00,90.91,90.91,-100.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2402,2023-01-22,2022,20,POST,Buffalo Bills,Cincinnati Bengals,10,27,Josh Allen,Joe Burrow,-0.151187,0.650933,0.483813,0.589479,-0.215688,0.002339,-0.152032,-0.140611,1.049708,0.204819,-0.153360,0.421317,-0.011858,0.274171,-0.007230,0.889427,Buffalo Bills,Cincinnati Bengals,10,27,37,-260.0,215.0,-6.0,6.0,48.5,under,17,-17,away,0.722222,0.317460,away,-100.00,215.0,-100.00,215.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
2403,2023-01-22,2022,20,POST,San Francisco 49ers,Dallas Cowboys,19,12,Brock Purdy,Dak Prescott,1.271880,0.634522,0.303955,0.348498,0.331705,0.563132,-0.743069,-0.104332,0.297954,-0.500316,0.342732,0.301391,-0.247412,0.812579,-0.969892,-0.382391,San Francisco 49ers,Dallas Cowboys,19,12,31,-200.0,170.0,-3.5,3.5,46.5,under,-7,7,home,0.666667,0.370370,home,50.00,-100.0,50.00,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,0
2404,2023-01-29,2022,21,POST,Philadelphia Eagles,San Francisco 49ers,31,7,Jalen Hurts,Brock Purdy,0.131127,1.616761,0.742787,-0.131826,0.329906,,,,,,0.560610,,0.672368,,-0.941733,,Philadelphia Eagles,San Francisco 49ers,31,7,38,-155.0,135.0,-3.0,3.0,45.0,under,-24,24,home,0.607843,0.425532,home,64.52,-100.0,64.52,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,0
2405,2023-01-29,2022,21,POST,Kansas City Chiefs,Cincinnati Bengals,23,20,Patrick Mahomes,Joe Burrow,0.474375,0.512161,0.781146,-0.771093,-0.504759,0.363199,0.164351,-0.080416,0.555210,0.215284,-0.141815,0.625788,-0.449315,0.520941,-0.018159,0.405253,Kansas City Chiefs,Cincinnati Bengals,23,20,43,-130.0,110.0,-2.0,2.0,48.5,under,-3,3,home,0.565217,0.476190,home,76.92,-100.0,76.92,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,0


In [24]:
nfl_cleaned_with_betting_final.to_csv('../../data/adjusted_stats_over_under_modeling_base_df.csv')

In [25]:
# Split data into train, validation, test sets

train_df = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.season <= 2020]
train_df = train_df.dropna()

val_df = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.season == 2021]
val_df = val_df.dropna()

test_df = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.season == 2022]
test_df = test_df.dropna()

In [26]:
feature_list = ['passing_value_adjusted_home', 'rushing_value_adjusted_home', 'pass_def_value_adjusted_home', 'rush_def_value_adjusted_home', 'special_teams_value_home',
          'passing_value_adjusted_away', 'rushing_value_adjusted_away', 'pass_def_value_adjusted_away', 'rush_def_value_adjusted_away', 'special_teams_value_away',
          'total_possession_time_standardized_home', 'total_possession_time_standardized_away', 
          'total_plays_standardized_home', 'total_plays_standardized_away',
          'pass_percentage_standardized_home', 'pass_percentage_standardized_away']

In [27]:
# Get features, labels for train, val, and test sets

train_x = train_df[feature_list].to_numpy()
train_y = train_df.over_hits.to_numpy()

val_x = val_df[feature_list].to_numpy()
val_y = val_df.over_hits.to_numpy()

test_x = test_df[feature_list]
test_y = test_df.over_hits.to_numpy()

In [28]:
# Logistic Regression Modeling

model = LogisticRegression().fit(train_x, train_y)

train_preds = model.predict(train_x)
train_probs = model.predict_proba(train_x)

val_preds = model.predict(val_x)
val_probs = model.predict_proba(val_x)

print('Training Accuracy: {}'.format(np.round(accuracy_score(train_preds, train_y), 3)))
print('Validation Accuracy: {}'.format(np.round(accuracy_score(val_preds, val_y), 3)))

Training Accuracy: 0.537
Validation Accuracy: 0.531


In [29]:
# Logistic Regression, with hyperparameter tuning

def logistic_regression_tuning(train_x, train_y, verbose=True):
    
    # Hyperparameters to tune
    penalty_list = ['none', 'l2', 'l1']
    c_values = [100, 10, 1.0, 0.1, 0.01]
    solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
    
    train_acc_chosen_model = 0
    best_val_acc = 0
    
    for penalty in penalty_list:
        for c in c_values:
            for solver in solvers:
                
                if solver == 'newton-cg' and penalty in ['l1', 'elasticnet']:
                    pass
                elif solver == 'liblinear' and penalty in ['none', 'elasticnet']:
                    pass
                elif solver == 'lbfgs' and penalty in ['l1', 'elasticnet']:
                    pass
                elif solver == 'sag' and penalty in ['l1', 'elasticnet']:
                    pass
                
                else:
                
                    lr_model = LogisticRegression(penalty=penalty, solver=solver, C=c).fit(train_x, train_y)
                    train_acc = accuracy_score(lr_model.predict(train_x), train_y)
                    val_acc = accuracy_score(lr_model.predict(val_x), val_y)

                    if verbose:
                        print('Train Accuracy: {}, Validation Accuracy:{}'.format(train_acc, val_acc))

                    if val_acc > best_val_acc:
                        best_val_acc = val_acc
                        train_acc_chosen_model = train_acc
                        best_params = [penalty, c, solver]
                    
    print('Best Model:')
    print('Training Accuracy: {}'.format(train_acc_chosen_model))
    print('Validation Accuracy: {}'.format(best_val_acc))
    print('Best Parameters: {}'.format(best_params))
    
    return best_params

In [30]:
best_params_lr = logistic_regression_tuning(train_x, train_y, verbose = False)



Best Model:
Training Accuracy: 0.5057840616966581
Validation Accuracy: 0.5346938775510204
Best Parameters: ['l1', 0.01, 'liblinear']


In [36]:
# Save best logistic regression model 

best_penalty = best_params_lr[0]
best_c = best_params_lr[1]
best_solver = best_params_lr[2]

best_lr_model = LogisticRegression(penalty=best_penalty, C=best_c, solver=best_solver).fit(train_x, train_y)
best_lr_model = LogisticRegression().fit(train_x, train_y)
dump(best_lr_model, 'saved_models/ou_logistic_regression_av.joblib')

['saved_models/ou_logistic_regression_av.joblib']

In [15]:
# Random Forest Modeling

model = RandomForestClassifier().fit(train_x, train_y)

train_preds = model.predict(train_x)
train_probs = model.predict_proba(train_x)

val_preds = model.predict(val_x)
val_probs = model.predict_proba(val_x)

print('Training Accuracy: {}'.format(np.round(accuracy_score(train_preds, train_y), 3)))
print('Validation Accuracy: {}'.format(np.round(accuracy_score(val_preds, val_y), 3)))

Training Accuracy: 1.0
Validation Accuracy: 0.486


In [16]:
# Random Forest, with hyperparameter tuning

def random_forest_tuning(train_x, train_y, verbose=True):
    
    # Hyperparameters to tune
    estimators_list = [3, 5, 7, 10, 15, 20, 25, 40, 50, 60, 75, 90, 100, 125, 150, 200]
    criterions = ['gini', 'entropy', 'log_loss']
    depths = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None]
    
    train_acc_chosen_model = 0
    best_val_acc = 0
    
    for n_estimators in estimators_list:
        for criterion in criterions:
            for depth in depths:
                
                if False:
                    pass
                
                else:
                
                    rf_model = RandomForestClassifier(
                        n_estimators=n_estimators, criterion=criterion, max_depth=depth).fit(train_x, train_y)
                    train_acc = accuracy_score(rf_model.predict(train_x), train_y)
                    val_acc = accuracy_score(rf_model.predict(val_x), val_y)

                    if verbose:
                        print('Train Accuracy: {}, Validation Accuracy:{}, Params: {}'.format(
                            train_acc, val_acc, [n_estimators, criterion, depth]))

                    if val_acc > best_val_acc:
                        best_val_acc = val_acc
                        train_acc_chosen_model = train_acc
                        best_params = [n_estimators, criterion, depth]
                        best_model = rf_model
                    
    print('Best Model:')
    print('Training Accuracy: {}'.format(train_acc_chosen_model))
    print('Validation Accuracy: {}'.format(best_val_acc))
    print('Best Parameters: {}'.format(best_params))
    
    return best_params, best_model

In [17]:
best_params_rf, best_rf_model = random_forest_tuning(train_x, train_y, verbose = True)

Train Accuracy: 0.5179948586118251, Validation Accuracy:0.46122448979591835, Params: [3, 'gini', 1]
Train Accuracy: 0.5591259640102828, Validation Accuracy:0.46122448979591835, Params: [3, 'gini', 2]
Train Accuracy: 0.5739074550128535, Validation Accuracy:0.49387755102040815, Params: [3, 'gini', 3]
Train Accuracy: 0.5944730077120822, Validation Accuracy:0.4816326530612245, Params: [3, 'gini', 4]
Train Accuracy: 0.6561696658097687, Validation Accuracy:0.5102040816326531, Params: [3, 'gini', 5]
Train Accuracy: 0.6227506426735219, Validation Accuracy:0.5142857142857142, Params: [3, 'gini', 6]
Train Accuracy: 0.7146529562982005, Validation Accuracy:0.5020408163265306, Params: [3, 'gini', 7]
Train Accuracy: 0.7268637532133676, Validation Accuracy:0.5265306122448979, Params: [3, 'gini', 8]
Train Accuracy: 0.7390745501285347, Validation Accuracy:0.4897959183673469, Params: [3, 'gini', 9]
Train Accuracy: 0.8020565552699229, Validation Accuracy:0.5755102040816327, Params: [3, 'gini', 10]
Train 

Train Accuracy: 0.6568123393316195, Validation Accuracy:0.5102040816326531, Params: [7, 'entropy', 5]
Train Accuracy: 0.6831619537275064, Validation Accuracy:0.4897959183673469, Params: [7, 'entropy', 6]
Train Accuracy: 0.7320051413881749, Validation Accuracy:0.5142857142857142, Params: [7, 'entropy', 7]
Train Accuracy: 0.7641388174807198, Validation Accuracy:0.5918367346938775, Params: [7, 'entropy', 8]
Train Accuracy: 0.8508997429305912, Validation Accuracy:0.5387755102040817, Params: [7, 'entropy', 9]
Train Accuracy: 0.8502570694087404, Validation Accuracy:0.49387755102040815, Params: [7, 'entropy', 10]
Train Accuracy: 0.9762210796915167, Validation Accuracy:0.49795918367346936, Params: [7, 'entropy', None]
Train Accuracy: 0.5488431876606684, Validation Accuracy:0.4816326530612245, Params: [7, 'log_loss', 1]
Train Accuracy: 0.5809768637532133, Validation Accuracy:0.5061224489795918, Params: [7, 'log_loss', 2]
Train Accuracy: 0.5906169665809768, Validation Accuracy:0.4653061224489795

Train Accuracy: 0.8913881748071979, Validation Accuracy:0.46938775510204084, Params: [15, 'log_loss', 10]
Train Accuracy: 0.9974293059125964, Validation Accuracy:0.46530612244897956, Params: [15, 'log_loss', None]
Train Accuracy: 0.5758354755784062, Validation Accuracy:0.5714285714285714, Params: [20, 'gini', 1]
Train Accuracy: 0.5976863753213367, Validation Accuracy:0.46122448979591835, Params: [20, 'gini', 2]
Train Accuracy: 0.6401028277634961, Validation Accuracy:0.4897959183673469, Params: [20, 'gini', 3]
Train Accuracy: 0.6825192802056556, Validation Accuracy:0.43673469387755104, Params: [20, 'gini', 4]
Train Accuracy: 0.7467866323907455, Validation Accuracy:0.49795918367346936, Params: [20, 'gini', 5]
Train Accuracy: 0.7917737789203085, Validation Accuracy:0.5387755102040817, Params: [20, 'gini', 6]
Train Accuracy: 0.8534704370179949, Validation Accuracy:0.5265306122448979, Params: [20, 'gini', 7]
Train Accuracy: 0.9010282776349614, Validation Accuracy:0.5142857142857142, Params:

Train Accuracy: 0.7017994858611826, Validation Accuracy:0.4816326530612245, Params: [40, 'entropy', 4]
Train Accuracy: 0.756426735218509, Validation Accuracy:0.4775510204081633, Params: [40, 'entropy', 5]
Train Accuracy: 0.8123393316195373, Validation Accuracy:0.5224489795918368, Params: [40, 'entropy', 6]
Train Accuracy: 0.8476863753213367, Validation Accuracy:0.46530612244897956, Params: [40, 'entropy', 7]
Train Accuracy: 0.9003856041131105, Validation Accuracy:0.45714285714285713, Params: [40, 'entropy', 8]
Train Accuracy: 0.9235218508997429, Validation Accuracy:0.49795918367346936, Params: [40, 'entropy', 9]
Train Accuracy: 0.9517994858611826, Validation Accuracy:0.5020408163265306, Params: [40, 'entropy', 10]
Train Accuracy: 1.0, Validation Accuracy:0.5306122448979592, Params: [40, 'entropy', None]
Train Accuracy: 0.589974293059126, Validation Accuracy:0.5020408163265306, Params: [40, 'log_loss', 1]
Train Accuracy: 0.6176092544987146, Validation Accuracy:0.4530612244897959, Params

Train Accuracy: 0.9138817480719794, Validation Accuracy:0.5551020408163265, Params: [60, 'log_loss', 8]
Train Accuracy: 0.9473007712082262, Validation Accuracy:0.5183673469387755, Params: [60, 'log_loss', 9]
Train Accuracy: 0.968508997429306, Validation Accuracy:0.49387755102040815, Params: [60, 'log_loss', 10]
Train Accuracy: 1.0, Validation Accuracy:0.4897959183673469, Params: [60, 'log_loss', None]
Train Accuracy: 0.5655526992287918, Validation Accuracy:0.4816326530612245, Params: [75, 'gini', 1]
Train Accuracy: 0.6291773778920309, Validation Accuracy:0.5020408163265306, Params: [75, 'gini', 2]
Train Accuracy: 0.6709511568123393, Validation Accuracy:0.5061224489795918, Params: [75, 'gini', 3]
Train Accuracy: 0.7467866323907455, Validation Accuracy:0.5183673469387755, Params: [75, 'gini', 4]
Train Accuracy: 0.8194087403598972, Validation Accuracy:0.5142857142857142, Params: [75, 'gini', 5]
Train Accuracy: 0.8676092544987146, Validation Accuracy:0.49795918367346936, Params: [75, 'gini

Train Accuracy: 0.6118251928020566, Validation Accuracy:0.49795918367346936, Params: [100, 'entropy', 2]
Train Accuracy: 0.6741645244215938, Validation Accuracy:0.46530612244897956, Params: [100, 'entropy', 3]
Train Accuracy: 0.7223650385604113, Validation Accuracy:0.49795918367346936, Params: [100, 'entropy', 4]
Train Accuracy: 0.7840616966580977, Validation Accuracy:0.4857142857142857, Params: [100, 'entropy', 5]
Train Accuracy: 0.8341902313624678, Validation Accuracy:0.5142857142857142, Params: [100, 'entropy', 6]
Train Accuracy: 0.9003856041131105, Validation Accuracy:0.49387755102040815, Params: [100, 'entropy', 7]
Train Accuracy: 0.9389460154241646, Validation Accuracy:0.4816326530612245, Params: [100, 'entropy', 8]
Train Accuracy: 0.9704370179948586, Validation Accuracy:0.4897959183673469, Params: [100, 'entropy', 9]
Train Accuracy: 0.9877892030848329, Validation Accuracy:0.49387755102040815, Params: [100, 'entropy', 10]
Train Accuracy: 1.0, Validation Accuracy:0.518367346938775

Train Accuracy: 0.7898457583547558, Validation Accuracy:0.4857142857142857, Params: [150, 'log_loss', 5]
Train Accuracy: 0.852827763496144, Validation Accuracy:0.4816326530612245, Params: [150, 'log_loss', 6]
Train Accuracy: 0.8939588688946015, Validation Accuracy:0.5306122448979592, Params: [150, 'log_loss', 7]
Train Accuracy: 0.9350899742930592, Validation Accuracy:0.49795918367346936, Params: [150, 'log_loss', 8]
Train Accuracy: 0.9736503856041131, Validation Accuracy:0.46122448979591835, Params: [150, 'log_loss', 9]
Train Accuracy: 0.9852185089974294, Validation Accuracy:0.5102040816326531, Params: [150, 'log_loss', 10]
Train Accuracy: 1.0, Validation Accuracy:0.46938775510204084, Params: [150, 'log_loss', None]
Train Accuracy: 0.5848329048843187, Validation Accuracy:0.4857142857142857, Params: [200, 'gini', 1]
Train Accuracy: 0.6317480719794345, Validation Accuracy:0.5061224489795918, Params: [200, 'gini', 2]
Train Accuracy: 0.6921593830334191, Validation Accuracy:0.48163265306122

In [18]:
# Save best random forest model 

best_n_estimators = best_params_lr[0]
best_criterion = best_params_lr[1]
best_depth = best_params_lr[2]

dump(best_rf_model, 'saved_models/ou_random_forest_av.joblib')

['saved_models/ou_random_forest_av.joblib']