In [None]:
import pandas as pd
import joblib

from expected_vaep_model.processing.data_preprocessor import ExpVAEPPreprocessor

from AFLPy.AFLData_Client import load_data
import numpy as np
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

Load Chains and Expected Scores

In [None]:
chains = load_data(Dataset_Name='AFL_API_Match_Chains', ID = "AFL")
xscore = load_data(Dataset_Name="CG_Expected_Score", ID = "AFL")

In [None]:
chains.shape, xscore.shape

Merge Expected Scores onto Chains

In [None]:
chains.head()

In [None]:
from expected_vaep_model.processing.merge_xscore_to_chains import merge_xscores_to_chains

In [None]:
xchains = merge_xscores_to_chains(chains, xscore)

In [None]:
xchains.head()

Preprocess into Expected VAEP Features / Labels

In [None]:
xvaep_preproc = ExpVAEPPreprocessor()

In [None]:
schema_chains, gamestate_features, gamestate_labels = xvaep_preproc.transform(xchains)

In [None]:
gamestate_features[list(gamestate_features.select_dtypes(include='object'))] = gamestate_features.select_dtypes(include='object').astype(bool)

### Scores

Create Train and Test Set

In [None]:
X_train_scores, X_test_scores, y_train_scores, y_test_scores = train_test_split(
    gamestate_features, 
    gamestate_labels['exp_scores_label'], 
    test_size=0.2, 
    random_state=42)

In [None]:
y_train_scores.mean(), y_test_scores.mean()

Hyperparameter Tuning

In [None]:
from expected_vaep_model.modelling.hyperparameter_tuning import XGBHyperparameterTuner
from expected_vaep_model.modelling.optuna_xgb_param_grid import OptunaXGBParamGrid

In [None]:
xgb_tuner = XGBHyperparameterTuner(X_train_scores, y_train_scores)
xgb_tuner.tune_hyperparameters()

Fitting Model

In [None]:
from expected_vaep_model.modelling.supermodel import SuperXGBRegressor

In [None]:
params = xgb_tuner.get_best_params()
params['objective'] = OptunaXGBParamGrid.error
params['num_rounds'] = 100
params['early_stopping_rounds'] = OptunaXGBParamGrid.early_stopping_rounds
params['verbosity'] = OptunaXGBParamGrid.verbosity
params['monotone_constraints'] = {}

print('Fitting model.')
super_xgb_scores = SuperXGBRegressor(X_train = X_train_scores, 
                            y_train = y_train_scores, 
                            X_test = X_test_scores, 
                            y_test = y_test_scores,
                            params = params)
super_xgb_scores.fit()

Predict

In [None]:
train_score_preds = pd.Series(np.clip(super_xgb_scores.predict(X_train_scores), 0, 6), name = 'exp_scores')
test_score_preds = pd.Series(np.clip(super_xgb_scores.predict(X_test_scores), 0, 6), name = 'exp_scores')

In [None]:
train_score_preds.index = X_train_scores.index
test_score_preds.index = X_test_scores.index

Model Evaluation

In [None]:
import shap

In [None]:
from expected_vaep_model.evaluation.model_evaluation import XGBRegressorEvaluator

In [None]:
train_evals_data = pd.concat([schema_chains.loc[X_train_scores.index], X_train_scores, y_train_scores, train_score_preds], axis=1)
test_evals_data = pd.concat([schema_chains.loc[X_test_scores.index], X_test_scores, y_test_scores, test_score_preds], axis=1)

In [None]:
test_evals_data.shape

In [None]:
test_evals_data[['mark_a1', 'mark_a2']] = test_evals_data[['mark_a1', 'mark_a2']].astype(bool)

In [None]:
scores_test_evals = XGBRegressorEvaluator(model = super_xgb_scores.xgb_model,
                                       data = test_evals_data,
                                       actual_name = "exp_scores_label",
                                       expected_name = "exp_scores"
                                       )

In [None]:
test_evals_data[['exp_scores_label', 'exp_scores']].describe()

In [None]:
scores_test_evals.plot_distribution()

In [None]:
scores_test_evals.plot_feature_importance(max_num_features=10, importance_type="total_gain")

In [None]:
scores_test_evals.plot_shap_summary_plot(sample=1000)

### Concedes

Create Train and Test Set

In [None]:
X_train_concedes, X_test_concedes, y_train_concedes, y_test_concedes = train_test_split(
    gamestate_features, 
    gamestate_labels['exp_concedes_label'], 
    test_size=0.2, 
    random_state=42)

In [None]:
y_train_concedes.mean(), y_test_concedes.mean()

Hyperparameter Tuning

In [None]:
from expected_vaep_model.modelling.hyperparameter_tuning import XGBHyperparameterTuner
from expected_vaep_model.modelling.optuna_xgb_param_grid import OptunaXGBParamGrid

In [None]:
xgb_tuner = XGBHyperparameterTuner(X_train_concedes, y_train_concedes)
xgb_tuner.tune_hyperparameters()

Fitting Model

In [None]:
from expected_vaep_model.modelling.supermodel import SuperXGBRegressor

In [None]:
params = xgb_tuner.get_best_params()
params['objective'] = OptunaXGBParamGrid.error
params['num_rounds'] = 100
params['early_stopping_rounds'] = OptunaXGBParamGrid.early_stopping_rounds
params['verbosity'] = OptunaXGBParamGrid.verbosity
params['monotone_constraints'] = {}

print('Fitting model.')
super_xgb_concedes = SuperXGBRegressor(X_train = X_train_concedes, 
                            y_train = y_train_concedes, 
                            X_test = X_test_concedes, 
                            y_test = y_test_concedes,
                            params = params)
super_xgb_concedes.fit()

Predict

In [None]:
train_concedes_preds = pd.Series(np.clip(super_xgb_concedes.predict(X_train_concedes), 0, 6), name = 'exp_concedes')
test_concedes_preds = pd.Series(np.clip(super_xgb_concedes.predict(X_test_concedes), 0, 6), name = 'exp_concedes')

In [None]:
train_concedes_preds.index = X_train_concedes.index
test_concedes_preds.index = X_test_concedes.index

Model Evaluation

In [None]:
import shap

In [None]:
from expected_vaep_model.evaluation.model_evaluation import XGBRegressorEvaluator

In [None]:
train_evals_concedes_data = pd.concat([schema_chains.loc[X_train_concedes.index], X_train_concedes, y_train_concedes, train_concedes_preds], axis=1)
test_evals_concedes_data = pd.concat([schema_chains.loc[X_test_concedes.index], X_test_concedes, y_test_concedes, test_concedes_preds], axis=1)

In [None]:
train_evals_concedes_data.shape

In [None]:
train_evals_concedes_data[['mark_a1', 'mark_a2']] = train_evals_concedes_data[['mark_a1', 'mark_a2']].astype(bool)

In [None]:
concedes_test_evals = XGBRegressorEvaluator(model = super_xgb_concedes.xgb_model,
                                       data = test_evals_concedes_data,
                                       actual_name = "exp_concedes_label",
                                       expected_name = "exp_concedes"
                                       )

In [None]:
test_evals_concedes_data[['exp_concedes_label', 'exp_concedes']].describe()

In [None]:
concedes_test_evals.plot_distribution()

In [None]:
concedes_test_evals.plot_feature_importance(max_num_features=10, importance_type="total_gain")

In [None]:
concedes_test_evals.plot_shap_summary_plot(sample=10000)

Expected VAEP

In [None]:
schema_chains = pd.concat([schema_chains, gamestate_features, gamestate_labels], axis=1)

In [None]:
schema_chains['exp_scores'] = np.clip(super_xgb_scores.predict(gamestate_features), 0, 6)
schema_chains['exp_concedes'] = np.clip(super_xgb_concedes.predict(gamestate_features), 0, 6)

In [None]:
from expected_vaep_model.processing.calculate_exp_vaep import calculate_exp_vaep_values

In [None]:
xvaep_chains = calculate_exp_vaep_values(schema_chains)

In [None]:
xvaep_chains['Match_ID'] = xvaep_chains['match_id']
xvaep_chains['year'] = xvaep_chains['match_id'].apply(lambda x: int(x.split("_")[1]))
xvaep_chains['round'] = xvaep_chains['match_id'].apply(lambda x: x.split("_")[2])

In [None]:
xvaep_chains.head()

Export Models

In [None]:
model_output_dir = '/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-vaep-model/model_outputs/models'

joblib.dump(super_xgb_scores, f'{model_output_dir}/exp_vaep_scores.joblib')
joblib.dump(super_xgb_concedes, f'{model_output_dir}/exp_vaep_concedes.joblib')

Export Preprocessor

In [None]:
preproc_output_dir = '/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-vaep-model/model_outputs/preprocessors'
joblib.dump(xvaep_preproc, f'{preproc_output_dir}/exp_vaep_preprocessor.joblib')

Upload Scored Data

In [None]:
from AFLPy.AFLData_Client import upload_data, load_data

In [None]:
upload_data(Dataset = xvaep_chains[xvaep_chains['year']==2024], Dataset_Name="CG_Expected_VAEP", overwrite=True, update_if_identical=True)

In [None]:
for year in range(2021, 2025):
    # print(year)
    upload_data(Dataset = xvaep_chains[xvaep_chains['year']==year], Dataset_Name="CG_Expected_VAEP", overwrite=True, update_if_identical=True)

In [None]:
upload_data(Dataset = xvaep_chains, Dataset_Name="CG_Expected_VAEP", overwrite=True, update_if_identical=True)

In [None]:
from AFLPy.AFLData_Client import metadata

In [None]:
md = metadata(Dataset_Name="CG_Expected_VAEP", ID = "AFL")
md['Time_Created'].min()

In [None]:
xvaep_data = load_data(Dataset_Name="CG_Expected_VAEP", ID = "AFL")
xvaep_data.head()