In [10]:
######## Create, Train, and Predict Models

# Loads train, test, and val datasets
# Instantiates Models
# Trains Models
# Predicts using Models

Import All Required Modules

In [1]:
import sys

# Manually add the project root to sys.path
sys.path.append('/Users/joaquinuriarte/Documents/GitHub/sports-betting/')

# === STEP 0: Imports
from modules.model_manager.trainer.trainer import Trainer
from modules.model_manager.predictor.predictor import Predictor
from modules.model_manager.factories.model_factory import ModelFactory
from modules.model_manager.helpers.configuration_loader import ConfigurationLoader as MMConfigLoader
from modules.model_manager.model_manager import ModelManager
from sklearn.model_selection import KFold
from modules.data_structures.model_dataset import ModelDataset
from model_binaries.utils.binary_utils import save_entity, load_entity, cross_val_train, compute_f1, graph_entity, evaluate_modelV01_predictions

2025-01-30 09:11:03.725629: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# === STEP 1: Dependency Instantiations And Global Variable Declarations

## === MODEL MANAGER
yaml_path0 = '/Users/joaquinuriarte/Documents/GitHub/sports-betting/configs/model_v01/model_v01_000.yaml'
yaml_path1 = '/Users/joaquinuriarte/Documents/GitHub/sports-betting/configs/model_v01/model_v01_001.yaml'
yaml_path2 = '/Users/joaquinuriarte/Documents/GitHub/sports-betting/configs/model_v01/model_v01_002.yaml'
yaml_path3 = '/Users/joaquinuriarte/Documents/GitHub/sports-betting/configs/model_v01/model_v01_003.yaml'


yaml_paths = [yaml_path0, yaml_path1, yaml_path3]


checkpoint = '/Users/joaquinuriarte/Documents/GitHub/sports-betting/models'
trainer = Trainer(checkpoint)
predictor = Predictor()
model_factory = ModelFactory()
mm_configuration_loader = MMConfigLoader()

Load Train Datasets into Memory

In [3]:
# File path to load train, test, and val datasets
train_test_val_folder_path = "/Users/joaquinuriarte/Documents/GitHub/sports-betting/processed_datasets/model_v01"

In [4]:
train_dataset = load_entity(train_test_val_folder_path, "train.pkl")
validation_dataset = load_entity(train_test_val_folder_path, "val.pkl")

Load & Train on predefined trained and val datasets

In [5]:
# === STEP 4: MODEL MANAGER
yamls = [yaml_path3]
model_manager = ModelManager(trainer, predictor, model_factory, mm_configuration_loader)

In [6]:
models = model_manager.create_models(yamls)

# If no val dataset, code requires to push None instead
val_dataset = validation_dataset if validation_dataset is not None else None

model_manager.train(models, [(train_dataset, val_dataset)], save_after_training=True)

INFO:root:Training model '43834563003bc611e0b4eb44ff245a3c' for 20 epochs with batch size 32.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


INFO:root:Model '43834563003bc611e0b4eb44ff245a3c': Finished training.


Model saved successfully at: models/43834563003bc611e0b4eb44ff245a3c/model_weights_43834563003bc611e0b4eb44ff245a3c.pth


Assess Performance

In [8]:
predictions = models[0].predict(validation_dataset.examples, return_target_labels=True)

In [9]:
# Get all metrics for trained model on val dataset predictions
metrics = evaluate_modelV01_predictions(predictions, "final_score_A", "final_score_B", "target_final_score_A", "target_final_score_B")
metrics

KeyError: 'actual_A'

In [None]:
# Graph distribution of predictions against labels 
graph_entity(predictions=predictions,bins=50)

In [10]:
# Print actual predictions against labels
predictions

Unnamed: 0,final_score_A,final_score_B,target_final_score_A,target_final_score_B
0,114.103958,108.657242,100.0,90.0
1,98.552681,92.435143,113.0,85.0
2,101.859909,102.382309,92.0,84.0
3,110.537834,102.760582,100.0,89.0
4,112.125984,112.536713,105.0,113.0
...,...,...,...,...
2541,100.233932,99.834160,101.0,112.0
2542,103.362671,98.408669,95.0,98.0
2543,96.118019,96.558937,99.0,100.0
2544,104.148605,97.709427,124.0,105.0


Launch Tensorboard

In [None]:
%tensorboard --logdir=./logs

CROSS VALIDATION ALTERNATIVE

Create and Train Models Using KFold Cross Validation

In [24]:
# === STEP 4: MODEL MANAGER
yamls = [yaml_path3]
model_manager = ModelManager(trainer, predictor, model_factory, mm_configuration_loader)

Get Averaged Metrics for KFold

In [None]:
avg_metrics = cross_val_train(model_manager=model_manager, yamls=yamls, train_dataset=train_dataset, n_splits=5)
f1 = compute_f1(avg_metrics["val_precision"], avg_metrics["val_recall"])
print(avg_metrics)
print(f1)