<a href="https://colab.research.google.com/github/chorltonm/fa-cup-upsets/blob/main/notebooks/models/best_ml_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import general python libaries
import os
import json
import pandas as pd
import numpy as np
import importlib

# Google Cloud libraries
from google.cloud import bigquery
from google.oauth2 import service_account
from google.colab import drive
from google.colab import userdata
import pandas_gbq

# Scikit Learn libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

# Other
from matplotlib import pyplot
import seaborn as sns
from xgboost import XGBClassifier

In [3]:
# Mount Google Drive
drive.mount('/content/drive')

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/python_files')



Mounted at /content/drive


In [16]:
# Import user defined python functions
import model_evaluation_functions
importlib.reload(model_evaluation_functions)
from model_evaluation_functions import create_model_results_df

In [5]:
# Authentication credentials and keys
# Google Service Account

# Load the JSON key from local Google Collab file
key = json.load(open('/content/drive/MyDrive/service_account.json', 'r'))

# Authenticate using the loaded key
credentials = service_account.Credentials.from_service_account_info(key)

# Set up the BigQuery client with the credentials to project
client = bigquery.Client(credentials=credentials, project='birkbeck-msc-project-422917')

In [63]:
# Feature data for FA Cup

# Query Google Big Query
fa_cup_features_all = """
    SELECT * FROM preparation_layer.view_fa_cup_round_3_features
"""

fa_cup_features_all_df = client.query(fa_cup_features_all).to_dataframe()
display(fa_cup_features_all_df)




Unnamed: 0,season_year,match_id,match_name,match_final_score,arena_stadium_capacity,home_team_league_level,home_win,home_team_round_3_position,home_team_massey,home_team_colley,...,match_weather_data_hours_dew,match_weather_data_hours_precip,match_weather_data_hours_snow,match_weather_data_hours_snowdepth,match_weather_data_hours_windspeed,match_weather_data_hours_pressure,match_weather_data_hours_visibility,match_weather_data_hours_cloudcover,match_weather_data_hours_condition,target_variable
0,09/10,505337,Chelsea vs Watford,5-0,41837,1,1,1,1,1,...,,,,,,,,,,0
1,14/15,1138603,Chelsea vs Watford,3-0,41837,1,1,1,1,1,...,34.70,0.000,0.0,0.00,4.80,1036.70,2.05,95.90,Overcast,0
2,16/17,1345813,Chelsea vs Peterborough United,4-1,41837,1,1,1,1,1,...,47.00,0.000,0.0,0.00,2.10,1031.70,4.20,87.00,Partially cloudy,0
3,20/21,201912,Manchester United vs Watford,1-0,74879,1,1,1,1,1,...,71.20,0.000,,0.00,7.65,1015.50,5.15,67.10,Partially cloudy,0
4,09/10,505338,Manchester United vs Leeds United,0-1,74879,1,0,2,3,7,...,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,14/15,1142554,Blyth Spartans vs Birmingham City,2-3,,6,0,64,58,64,...,,,,,,,,,,0
346,17/18,949974,Yeovil Town vs Bradford City,2-0,9527,4,1,64,58,64,...,37.15,0.000,0.0,0.00,14.45,1012.75,9.00,94.25,Overcast Partially cloudy,1
347,18/19,50205,Woking vs Watford,0-2,6064,6,0,64,58,64,...,41.45,0.000,0.0,0.00,6.35,1036.35,15.15,98.65,Overcast,0
348,20/21,200806,Chorley FC vs Derby County,2-0,4100,6,1,64,58,64,...,32.75,0.001,0.0,2.36,6.45,1026.75,6.20,69.50,"Snow, Rain, Partially cloudy Partially cloudy",1


In [7]:
# Function to calculate home advantage
def add_home_advantage(X):

    result = X.groupby(['home_team_league_level', 'away_team_league_level'])['home_win'].mean().reset_index()
    result.columns = ['home_team_league_level', 'away_team_league_level', 'home_win_factor']
    result['home_win_factor'] = result['home_win_factor'].round(3)

    X = X.merge(result, on=['home_team_league_level', 'away_team_league_level'], how='left')
    return X, 'home_win_factor'

In [73]:
def best_model_classifier (fa_cup_features_all_df, model_classifier, model_name, best_model_ranking_used, home_advt):

    # Define the ranking systems
    ranking_systems = ['no_ranking','round_3_position', 'massey', 'colley', 'keener', 'trueskill', 'borda_count', 'local_kemeny_optimisation']

    # Split data into training and validation sets
    fa_cup_features_train = fa_cup_features_all_df[~fa_cup_features_all_df['season_year'].isin(['21/22', '22/23'])]
    fa_cup_features_val = fa_cup_features_all_df[fa_cup_features_all_df['season_year'].isin(['21/22', '22/23'])]

    # Drop 'season_year, match id, match name' column from both sets so not included in model training but then retain for vlaidation set for futher analysis and comparision
    analysis_columns = ['season_year', 'match_id', 'match_name','match_final_score']

    fa_cup_features_train = fa_cup_features_train.drop(analysis_columns, axis=1)

    val_analysis_columns = fa_cup_features_val[analysis_columns].copy()
    fa_cup_features_val = fa_cup_features_val.drop(analysis_columns, axis=1)

    # Get all columns except the target and ranking columns
    target_variable = 'target_variable'
    base_features = [col for col in fa_cup_features_train.columns if col != target_variable and not any(f"{team}_{ranking}" in col for team in ['home_team', 'away_team'] for ranking in ranking_systems)]

    # Identify numeric and categorical columns
    numeric_features = fa_cup_features_train[base_features].select_dtypes(include=['int64', 'float64']).columns.tolist()
    categorical_features = fa_cup_features_train[base_features].select_dtypes(include=['object']).columns.tolist()

    # Define preprocessing steps
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', MinMaxScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    ranking_transformer = Pipeline(steps=[
        ('scaler', MinMaxScaler())
    ])

    if best_model_ranking_used == 'no_ranking':
        features = base_features
    else:
        features = base_features + [f'home_team_{best_model_ranking_used}', f'away_team_{best_model_ranking_used}']

    # Create X_train and y_train
    X_train = fa_cup_features_train[features]
    y_train = fa_cup_features_train[target_variable]

    # Create X_val and y_val
    X_val = fa_cup_features_val[features]
    y_val = fa_cup_features_val[target_variable]

    if home_advt == 'yes':
        X_train, home_advantage_column = add_home_advantage(X_train)
        X_val, _ = add_home_advantage(X_val)
        numeric_features = [home_advantage_column] + numeric_features

    # Create preprocessor
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    if ranking_used != 'no_ranking':
        preprocessor.transformers.append(('rank', ranking_transformer, [f'home_team_{ranking_used}', f'away_team_{ranking_used}']))

    # Create pipeline
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', model_classifier)
    ])

    # Fit the model
    pipeline.fit(X_train, y_train)

    # Make predictions
    y_pred = pipeline.predict(X_val).astype(int)
    y_pred_proba = pipeline.predict_proba(X_val)[:, 1]

    # Calculate metrics
    accuracy = accuracy_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    cr = classification_report(y_val, y_pred, output_dict=True)

    model_name_ranking = f"{model_name} {best_model_ranking_used}"

    results_df, cm_fig, roc_fig = create_model_results_df(y_val, y_pred, accuracy, recall, None, model_name_ranking)
    results_df = results_df.reset_index()
    results_df['metric_id'] = results_df.index + 1
    results_df = results_df[['metric_id', 'metric', model_name_ranking]]
    results_df = results_df[~results_df['metric'].str.startswith('Cross')]

    # Create a DataFrame with features, actual target, and predicted target
    comparison_df = pd.DataFrame({
        'Actual': y_val,
        'Predicted': y_pred
    })

    validation_df = X_val.copy()

    for col in analysis_columns:
      if col in val_analysis_columns.columns:
         validation_df[col] = val_analysis_columns[col]

    #display(X_val)

    comparison_df_reset = comparison_df.reset_index(drop=True)
    validation_data_reset = validation_df.reset_index(drop=True)

    all_data_act_pred_df = comparison_df_reset.merge(validation_data_reset, left_index=True, right_index=True)

    return results_df, all_data_act_pred_df



In [None]:
# MCP Neural Network no home advantage, no ranking
model_name = "MLP Classifier Neural Network"
home_advt = 'no'
best_model_ranking_used =  'no_ranking'

#Best Parameters from cross-validation folds evaluation analysis
model_classifier  = MLPClassifier( hidden_layer_sizes=(10,),
                                          activation='tanh',
                                          learning_rate_init = 0.1 ,
                                          max_iter=10000,
                                          random_state=47
                                      )
results_df, all_data_act_pred_df = best_model_classifier (fa_cup_features_all_df, model_classifier, model_name, best_model_ranking_used, home_advt)

results_nnn_df = results_df
all_data_act_pred_nnn_df  = all_data_act_pred_df

display(results_nnn_df)
display(all_data_act_pred_nnn_df)





In [None]:
# MCP Neural Network with home advantage, massey
model_name = "MLP Classifier Neural Network with home advantage"
home_advt = 'yes'
best_model_ranking_used =  'massey'

#Best Parameters from cross-validation folds evaluation analysis
model_classifier  = MLPClassifier( hidden_layer_sizes=(20,20),
                                          activation='logistic',
                                          learning_rate_init = 0.1 ,
                                          max_iter=10000,
                                          random_state=47
                                      )
results_df, all_data_act_pred_df = best_model_classifier (fa_cup_features_all_df, model_classifier, model_name, best_model_ranking_used, home_advt)

results_nnhm_df = results_df
all_data_act_pred_nnhm_df  = all_data_act_pred_df

display(results_nnhm_df)
display(all_data_act_pred_nnhm_df)





In [None]:
# MCP Neural Network with home advantage, keener
model_name = "MLP Classifier Neural Network with home advantage"
home_advt = 'yes'
best_model_ranking_used =  'keener'

#Best Parameters from cross-validation folds evaluation analysis
model_classifier  = MLPClassifier( hidden_layer_sizes=(20,20),
                                          activation='relu',
                                          learning_rate_init = 0.01 ,
                                          max_iter=10000,
                                          random_state=47
                                      )
results_df, all_data_act_pred_df = best_model_classifier (fa_cup_features_all_df, model_classifier, model_name, best_model_ranking_used, home_advt)

results_nnhk_df = results_df
all_data_act_pred_nnhk_df  = all_data_act_pred_df

display(results_nnhk_df)
display(all_data_act_pred_nnhk_df)


In [None]:
# MCP Neural Network with home advantage, local kemeny optimisation
model_name = "MLP Classifier Neural Network with home advantage"
home_advt = 'yes'
best_model_ranking_used =  'local_kemeny_optimisation'

#Best Parameters from cross-validation folds evaluation analysis
model_classifier  = MLPClassifier( hidden_layer_sizes=(20,20),
                                          activation='logistic',
                                          learning_rate_init = 0.1 ,
                                          max_iter=10000,
                                          random_state=47
                                      )
results_df, all_data_act_pred_df = best_model_classifier (fa_cup_features_all_df, model_classifier, model_name, best_model_ranking_used, home_advt)

results_nnhl_df = results_df
all_data_act_pred_nnhl_df  = all_data_act_pred_df

display(results_nnhl_df)
display(all_data_act_pred_nnhl_df)

In [84]:
all_results_df = results_nnn_df.merge(results_nnhm_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df.merge(results_nnhk_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df.merge(results_nnhl_df, on=['metric_id', 'metric'], how='outer')
#all_results_df = results_nnn_df.merge(results_nnn_df, on=['metric_id', 'metric'], how='outer')

all_results_df = all_results_df.rename(columns=lambda x: x.lower().replace(' ','(_)').replace('(', '').replace(')', ''))
display(all_results_df)
all_results_df.to_excel("best_ml_model_confusion_matrix_results.xlsx")

# Load data from Excel to Google BigQuery
all_results_from_excel = pd.read_excel("best_ml_model_confusion_matrix_results.xlsx")
load_dataset_name = 'analysis_layer'
load_table_name = 'best_ml_model_confusion_matrix_results'
full_table_name = f"{load_dataset_name}.{load_table_name}"

pandas_gbq.to_gbq(all_results_from_excel, full_table_name,
                  project_id='birkbeck-msc-project-422917',
                  if_exists='replace')

print(f"\nData loaded to BigQuery table: {full_table_name}")

100%|██████████| 1/1 [00:00<00:00, 6990.51it/s]


Data loaded to BigQuery table: analysis_layer.best_ml_model_confusion_matrix_results





In [None]:
# Random Forest Classifier
model_name = "Random Forest Classifier"
home_advt = 'no'
weighted = 'no'
random_state = 47
model_classifier  = RandomForestClassifier()

# Define parameter grid for GridSearchCV
param_grid = {
    'classifier__random_state': [random_state],
    'classifier__n_estimators': [50, 100, 200],
    'classifier__min_samples_leaf': [1, 2, 4],
    'classifier__max_features': [ None, 'sqrt']
}

all_results_df, fold_results_df  = classifer_models_optimisation (model_name, home_advt, weighted, model_classifier, param_grid, random_state)

all_results_df_rf = all_results_df
display(all_results_df_rf)

fold_results_df_rf = fold_results_df
display(fold_results_df_rf)

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')
# Save ranks to excel
all_results_df_rf.to_excel("all_rf_results.xlsx")
fold_results_df_rf.to_excel("fold_results_rf.xlsx")


In [None]:
# Random Forest Classifier with home advantage
model_name = "Random Forest Classifier with home advantage"
home_advt = 'yes'
weighted = 'no'
random_state = 47
model_classifier  = RandomForestClassifier()

# Define parameter grid for GridSearchCV
param_grid = {
    'classifier__random_state': [random_state],
    'classifier__n_estimators': [50, 100, 200],
    'classifier__min_samples_leaf': [1, 2, 4],
    'classifier__max_features': [ None, 'sqrt']
}

all_results_df, fold_results_df  = classifer_models_optimisation (model_name, home_advt, weighted, model_classifier, param_grid, random_state)

all_results_df_rfh = all_results_df
display(all_results_df_rfh)

fold_results_df_rfh = fold_results_df
display(fold_results_df_rfh)

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')
# Save ranks to excel
all_results_df_rfh.to_excel("all_rfh_results.xlsx")
fold_results_df_rfh.to_excel("fold_results_rfh.xlsx")

In [None]:
# XG Boost
model_name = "XG Boost"
home_advt = 'no'
weighted = 'no'
random_state = 47
model_classifier  = XGBClassifier()

# Define parameter grid for GridSearchCV
param_grid = {
    'classifier__random_state': [random_state],
    'classifier__max_depth': [3,5],
    'classifier__min_child_weight': [1, 2],
    'classifier__subsample': [ 0.8, 1.0],
    'classifier__colsample_bytree': [ 0.6, 1.0],
    'classifier__learning_rate':  [0.01, 0.1]
}

all_results_df, fold_results_df  = classifer_models_optimisation (model_name, home_advt, weighted, model_classifier, param_grid, random_state)

all_results_df_xg = all_results_df
display(all_results_df_xg)

fold_results_df_xg = fold_results_df
display(fold_results_df_xg)

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')
# Save ranks to excel
all_results_df_xg.to_excel("all_xg_results.xlsx")
fold_results_df_xg.to_excel("fold_results_xg.xlsx")

In [None]:
# XG Boost with home advantage
model_name = "XG Boost with home advantage"
home_advt = 'yes'
weighted = 'no'
random_state = 47
model_classifier  = XGBClassifier()

# Define parameter grid for GridSearchCV
param_grid = {
    'classifier__random_state': [random_state],
    'classifier__max_depth': [3,5],
    'classifier__min_child_weight': [1, 2],
    'classifier__subsample': [ 0.8, 1.0],
    'classifier__colsample_bytree': [ 0.6, 1.0],
    'classifier__learning_rate':  [0.01, 0.1]
}

all_results_df, fold_results_df  = classifer_models_optimisation (model_name, home_advt, weighted, model_classifier, param_grid, random_state)

all_results_df_xgh = all_results_df
display(all_results_df_xgh)

fold_results_df_xgh = fold_results_df
display(fold_results_df_xgh)

# Change default ouput directory
os.chdir('/content/drive/MyDrive/birkbeck_msc-project/output_files')
# Save ranks to excel
all_results_df_xgh.to_excel("all_xgh_results.xlsx")
fold_results_df_xgh.to_excel("fold_results_xgh.xlsx")

In [None]:
all_results_df = all_results_df_xg.merge(all_results_df_xgh, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_rfh.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_rf.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_nnh.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_nn.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_wlrh.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_wlr.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_slrh.merge(all_results_df, on=['metric_id', 'metric'], how='outer')
all_results_df = all_results_df_slr.merge(all_results_df, on=['metric_id', 'metric'], how='outer')

all_results_df = all_results_df.rename(columns=lambda x: x.lower().replace(' ','(_)').replace('(', '').replace(')', ''))
display(all_results_df)
all_results_df.to_excel("ml_model_confusion_matrix_results.xlsx")

# Load data from Excel to Google BigQuery
all_results_from_excel = pd.read_excel("all_results.xlsx")
load_dataset_name = 'analysis_layer'
load_table_name = 'ml_model_confusion_matrix_results'
full_table_name = f"{load_dataset_name}.{load_table_name}"

pandas_gbq.to_gbq(all_results_from_excel, full_table_name,
                  project_id='birkbeck-msc-project-422917',
                  if_exists='replace')

print(f"\nData loaded to BigQuery table: {full_table_name}")


Unnamed: 0,metric_id,metric,standard_logisticregression_with_home_advantage_no_ranking,standard_logisticregression_with_home_advantage_round_3_position,standard_logisticregression_with_home_advantage_massey,standard_logisticregression_with_home_advantage_colley,standard_logisticregression_with_home_advantage_keener,standard_logisticregression_with_home_advantage_trueskill,standard_logisticregression_with_home_advantage_borda_count,standard_logisticregression_with_home_advantage_local_kemeny_optimisation,...,xg_boost_borda_count,xg_boost_local_kemeny_optimisation,xg_boost_with_home_advantage_no_ranking,xg_boost_with_home_advantage_round_3_position,xg_boost_with_home_advantage_massey,xg_boost_with_home_advantage_colley,xg_boost_with_home_advantage_keener,xg_boost_with_home_advantage_trueskill,xg_boost_with_home_advantage_borda_count,xg_boost_with_home_advantage_local_kemeny_optimisation
0,1,Cross-validation Accuracy Mean,0.682,0.702,0.682,0.672,0.656,0.682,0.682,0.682,...,0.856,0.862,0.816,0.816,0.81,0.784,0.78,0.793,0.82,0.807
1,2,Cross-validation Accuracy Standard Deviation,0.03,0.026,0.04,0.045,0.048,0.03,0.043,0.043,...,0.06,0.075,0.039,0.048,0.027,0.028,0.03,0.029,0.075,0.064
2,3,Cross-validation Recall Mean,0.207,0.257,0.219,0.195,0.171,0.22,0.232,0.232,...,0.496,0.508,0.538,0.574,0.499,0.428,0.438,0.453,0.499,0.5
3,4,Cross-validation Recall Standard Deviation,0.049,0.029,0.046,0.021,0.061,0.051,0.046,0.046,...,0.261,0.316,0.178,0.164,0.188,0.206,0.238,0.183,0.264,0.292
4,5,Overall Accuracy,0.682,0.702,0.682,0.672,0.656,0.682,0.682,0.682,...,0.856,0.862,0.816,0.816,0.81,0.784,0.78,0.793,0.82,0.807
5,6,Confusion Matrix - True Negative (Class 0),191.0,193.0,190.0,189.0,186.0,190.0,189.0,189.0,...,220.0,221.0,205.0,202.0,206.0,204.0,202.0,205.0,209.0,205.0
6,7,Confusion Matrix - False Positive (Class 1),32.0,30.0,33.0,34.0,37.0,33.0,34.0,34.0,...,3.0,2.0,18.0,21.0,17.0,19.0,21.0,18.0,14.0,18.0
7,8,Confusion Matrix - False Negative (Class 0),65.0,61.0,64.0,66.0,68.0,64.0,63.0,63.0,...,41.0,40.0,38.0,35.0,41.0,47.0,46.0,45.0,41.0,41.0
8,9,Confusion Matrix - True Positive (Class 1),17.0,21.0,18.0,16.0,14.0,18.0,19.0,19.0,...,41.0,42.0,44.0,47.0,41.0,35.0,36.0,37.0,41.0,41.0
9,10,Precision (Class 0),0.746,0.76,0.748,0.741,0.732,0.748,0.75,0.75,...,0.843,0.847,0.844,0.852,0.834,0.813,0.815,0.82,0.836,0.833


100%|██████████| 1/1 [00:00<00:00, 1402.78it/s]


Data loaded to BigQuery table: analysis_layer.ml_model_confusion_matrix_results





In [None]:
# Load updated Fold results to Google Big Query

# Merge fold results from each model
fold_results_df = pd.concat([fold_results_df_slr, fold_results_df_slrh, fold_results_df_wlrh, fold_results_df_nn, fold_results_df_nnh, fold_results_df_rf, fold_results_df_rfh, fold_results_df_xg, fold_results_df_xgh])
fold_results_df = fold_results_df.rename(columns=lambda x: x.lower().replace(' ','(_)').replace('(', '').replace(')', ''))
display(fold_results_df)

# Write to Excel
fold_results_df.to_excel("ml_model_fold_results.xlsx")

# Load fold results data from Excel to Google BigQuery
fold_results_from_excel = pd.read_excel("all_results_.xlsx")
load_dataset_name = 'analysis_layer'
load_table_name = 'ml_model_fold_results'
full_table_name = f"{load_dataset_name}.{load_table_name}"

pandas_gbq.to_gbq(fold_results_from_excel, full_table_name,
                  project_id='birkbeck-msc-project-422917',
                  if_exists='replace')

print(f"\nData loaded to BigQuery table: {full_table_name}")


Unnamed: 0,model_name_ranking,fold,best_parameters_gridsearchcv,best_recall_gridsearchcv,validation_accuracy,validation_recall
0,Standard LogisticRegression no_ranking,1,"{'classifier__C': 10.0, 'classifier__penalty':...",0.348485,0.704918,0.187500
1,Standard LogisticRegression no_ranking,2,"{'classifier__C': 1000.0, 'classifier__penalty...",0.409091,0.639344,0.250000
2,Standard LogisticRegression no_ranking,3,"{'classifier__C': 100.0, 'classifier__penalty'...",0.348485,0.639344,0.187500
3,Standard LogisticRegression no_ranking,4,"{'classifier__C': 10.0, 'classifier__penalty':...",0.411255,0.688525,0.117647
4,Standard LogisticRegression no_ranking,5,"{'classifier__C': 1000.0, 'classifier__penalty...",0.383117,0.721311,0.176471
...,...,...,...,...,...,...
35,XG Boost with home advantage local_kemeny_opti...,1,"{'classifier__colsample_bytree': 1.0, 'classif...",0.742424,0.918033,0.687500
36,XG Boost with home advantage local_kemeny_opti...,2,"{'classifier__colsample_bytree': 1.0, 'classif...",0.621212,0.770492,0.812500
37,XG Boost with home advantage local_kemeny_opti...,3,"{'classifier__colsample_bytree': 1.0, 'classif...",0.803030,0.737705,0.000000
38,XG Boost with home advantage local_kemeny_opti...,4,"{'classifier__colsample_bytree': 1.0, 'classif...",0.601732,0.836066,0.647059


100%|██████████| 1/1 [00:00<00:00, 10810.06it/s]


Data loaded to BigQuery table: analysis_layer.ml_model_fold_results



