## Imports

In [None]:
import io

import boto3
import pandas as pd

## Downloading processed features

In [4]:
s3 = boto3.client("s3")

In [5]:
buckets = s3.list_buckets()
print([bucket["Name"] for bucket in buckets["Buckets"]])

['epl-predictions-data-storage-478340992394', 'epl-predictions-mlflow-artifacts-478340992394', 'epl-predictor-tf-state']


In [23]:
# read file from s3 to pandas dataframe
file_name = "processed/epl_features.parquet"

response = s3.get_object(
  Bucket="epl-predictions-data-storage-478340992394", 
  Key=file_name
)

In [24]:
# read response as parquet file
df = pd.read_parquet(io.BytesIO(response["Body"].read()))

In [25]:
df.head()

Unnamed: 0,match_id,date,hometeam,awayteam,home_wins_last_n,home_draws_last_n,home_losses_last_n,home_goals_scored_last_n,home_goals_conceded_last_n,home_form_points,...,h2h_draws,whd_home_avg,wha_home_avg,whh_home_avg,whd_away_avg,wha_away_avg,whh_away_avg,target_result,target_home_goals,target_away_goals
0,0,2000-08-19,Sunderland,Arsenal,0,0,0,0,0,0,...,0,,,,,,,H,1,0
1,1,2000-08-19,Liverpool,Bradford,0,0,0,0,0,0,...,0,,,,,,,H,1,0
2,2,2000-08-19,Leicester,Aston Villa,0,0,0,0,0,0,...,0,,,,,,,D,0,0
3,3,2000-08-19,Leeds,Everton,0,0,0,0,0,0,...,0,,,,,,,H,2,0
4,4,2000-08-19,Tottenham,Ipswich,0,0,0,0,0,0,...,0,,,,,,,H,3,1


## Model training

In [None]:
import numpy as np
import pandas as pd

import mlflow
from mlflow.entities import ViewType
from mlflow import MlflowClient

from catboost import CatBoostClassifier, Pool
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    f1_score, accuracy_score, precision_score, recall_score, 
    classification_report, roc_auc_score
)

from xgboost import XGBClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

In [62]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("epl_predictions")

client = MlflowClient()

### Train-Validation split

In [None]:
# fix rows with insufficient historical data (early matches)
df_ml = df.fillna(0.0, inplace=False)

feature_cols = [col for col in df_ml.columns 
               if not col.startswith('target_') and 
               col not in ['match_id', 'date', 'div', 'season']]

X = df_ml[feature_cols]
y = df_ml["target_result"]

In [74]:
X.tail(5)

Unnamed: 0,hometeam,awayteam,home_wins_last_n,home_draws_last_n,home_losses_last_n,home_goals_scored_last_n,home_goals_conceded_last_n,home_form_points,away_wins_last_n,away_draws_last_n,...,away_form_points,h2h_home_wins,h2h_away_wins,h2h_draws,whd_home_avg,wha_home_avg,whh_home_avg,whd_away_avg,wha_away_avg,whh_away_avg
8735,Ipswich,West Ham,0,1,4,2,12,1,1,2,...,5,0,1,1,3.746861,4.332935,2.368596,3.627224,4.040413,2.499936
8736,Fulham,Man City,2,0,3,7,9,6,4,1,...,13,1,10,3,3.619192,4.165946,2.523772,4.688571,6.252343,3.566526
8737,Bournemouth,Leicester,1,2,2,4,6,5,2,1,...,7,4,0,2,4.006026,4.44013,2.668948,3.710325,4.158901,2.543176
8738,Liverpool,Crystal Palace,2,1,2,11,9,7,2,3,...,9,6,4,1,4.165736,5.263877,3.030688,3.739806,4.391757,2.762035
8739,Wolves,Brentford,2,0,3,6,7,6,4,0,...,12,1,2,0,3.784099,4.423717,2.719272,3.900398,4.026331,2.570469


In [64]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# Check which columns are categorical (teams)
categorical_cols = df_ml.select_dtypes(include=['object']).columns.tolist()
categorical_cols = [col for col in categorical_cols if col in feature_cols]

In [75]:
categorical_cols

['hometeam', 'awayteam']

### CatBoost model

In [None]:
# Update your pools with categorical features
train_pool = Pool(X_train, label=y_train, cat_features=categorical_cols)
val_pool = Pool(X_val, label=y_val, cat_features=categorical_cols)

In [66]:
space = {
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'depth': hp.quniform('depth', 3, 10, 1),
    'l2_leaf_reg': hp.uniform('l2_leaf_reg', 1, 10),
    'iterations': 200,
    'loss_function': 'MultiClass',
    'verbose': 0,
    'cat_features': categorical_cols,  # Add this line
}

In [None]:
def objective(params):
    params['depth'] = int(params['depth'])  # convert depth to int

    with mlflow.start_run(
			nested=True, 
			tags={"model": "catboost", "type": "hyperopt"},
            description="Hyperopt optimization for CatBoost model"
        ):
        model = CatBoostClassifier(**params)
        model.fit(train_pool, eval_set=val_pool, early_stopping_rounds=30)

        y_pred = model.predict(X_val)
        y_pred_proba = model.predict_proba(X_val)
        
        # Calculate multiple metrics
        f1_macro = f1_score(y_val, y_pred, average='macro')
        f1_weighted = f1_score(y_val, y_pred, average='weighted')
        accuracy = accuracy_score(y_val, y_pred)
        precision_macro = precision_score(y_val, y_pred, average='macro')
        recall_macro = recall_score(y_val, y_pred, average='macro')
        
        # ROC AUC for multiclass
        roc_auc = roc_auc_score(y_val, y_pred_proba, multi_class='ovr', average='macro')

        # Log all metrics
        mlflow.log_params(params)
        mlflow.log_metric("val_f1_macro", f1_macro)
        mlflow.log_metric("val_f1_weighted", f1_weighted)
        mlflow.log_metric("val_accuracy", accuracy)
        mlflow.log_metric("val_precision_macro", precision_macro)
        mlflow.log_metric("val_recall_macro", recall_macro)
        mlflow.log_metric("val_roc_auc", roc_auc)
        
        # Log classification report as artifact
        report = classification_report(y_val, y_pred, output_dict=True)
        mlflow.log_dict(report, "classification_report.json")
        
        mlflow.catboost.log_model(model, "model")

        return {'loss': -f1_macro, 'status': STATUS_OK}

In [None]:
with mlflow.start_run(run_name="catboost_hyperopt"):
    best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=30,
        trials=Trials(),
        rstate=np.random.default_rng(42)
    )
    
    print(f"✅ Best parameters: {best}")
    mlflow.log_params(best)

In [79]:
best_params = {
	**best,
	'iterations': 200,
	'loss_function': 'MultiClass',
	'verbose': 0,
	'cat_features': categorical_cols, 
}

In [139]:
best_params['depth'] = int(best_params['depth'])  # convert depth to int

with mlflow.start_run(
		run_name="catboost_final_model",
		tags={"model": "catboost", "type": "hyperopt"},
		description="Hyperopt optimization for CatBoost model"
	):
	model = CatBoostClassifier(**best_params)
	model.fit(train_pool, eval_set=val_pool, early_stopping_rounds=30)

	y_pred = model.predict(X_val)
	y_pred_proba = model.predict_proba(X_val)
	
	# Calculate multiple metrics
	f1_macro = f1_score(y_val, y_pred, average='macro')
	f1_weighted = f1_score(y_val, y_pred, average='weighted')
	accuracy = accuracy_score(y_val, y_pred)
	precision_macro = precision_score(y_val, y_pred, average='macro')
	recall_macro = recall_score(y_val, y_pred, average='macro')
	
	# ROC AUC for multiclass
	roc_auc = roc_auc_score(y_val, y_pred_proba, multi_class='ovr', average='macro')

	# Log all metrics
	mlflow.log_params(best_params)
	mlflow.log_metric("f1_macro", f1_macro)
	mlflow.log_metric("f1_weighted", f1_weighted)
	mlflow.log_metric("accuracy", accuracy)
	mlflow.log_metric("precision_macro", precision_macro)
	mlflow.log_metric("recall_macro", recall_macro)
	mlflow.log_metric("roc_auc", roc_auc)

	# Log classification report as artifact
	report = classification_report(y_val, y_pred, output_dict=True)
	mlflow.log_dict(report, "classification_report.json")
	
	mlflow.catboost.log_model(model, "model")



🏃 View run catboost_final_model at: http://localhost:5000/#/experiments/3/runs/b0829d3f334c4db597b2c28ba01cbb96
🧪 View experiment at: http://localhost:5000/#/experiments/3


In [86]:
model = mlflow.catboost.load_model("s3://epl-predictions-mlflow-artifacts-478340992394/3/models/m-27ea07b6f4d2411ebac8c6e96211f79f/artifacts")

Downloading artifacts:   0%|          | 0/5 [00:01<?, ?it/s]

In [90]:
pd.DataFrame(model.predict(X_val), y_val)

Unnamed: 0_level_0,0
target_result,Unnamed: 1_level_1
A,H
H,A
A,A
H,H
D,H
...,...
H,H
A,A
D,A
H,H


### XGBoost

In [117]:
# Encode categorical features for XGBoost
label_encoders = {}
X_train_encoded = X_train.copy()
X_val_encoded = X_val.copy()

for col in categorical_cols:
    le = LabelEncoder()
    X_train_encoded[col] = le.fit_transform(X_train[col].astype(str))
    X_val_encoded[col] = le.transform(X_val[col].astype(str))
    label_encoders[col] = le

In [118]:
# Encode target variable
target_encoder = LabelEncoder()
y_train_encoded = target_encoder.fit_transform(y_train)
y_val_encoded = target_encoder.transform(y_val)

In [119]:
xgb_space = {
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'n_estimators': hp.quniform('n_estimators', 100, 500, 50),
    'subsample': hp.uniform('subsample', 0.6, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0),
    'reg_alpha': hp.uniform('reg_alpha', 0, 10),
    'reg_lambda': hp.uniform('reg_lambda', 1, 10),
    'random_state': 42,
    'objective': 'multi:softprob',
    'num_class': 3,
    'eval_metric': 'mlogloss'
}

In [120]:
def xgb_objective(params):
    # Convert to int
    params['max_depth'] = int(params['max_depth'])
    params['n_estimators'] = int(params['n_estimators'])
    
    with mlflow.start_run(
        nested=True, 
        tags={"model": "xgboost", "type": "hyperopt"},
        description="Hyperopt optimization for XGBoost model"
    ):
        # Create XGBoost model
        model = XGBClassifier(**params)
        
        # Fit model with encoded targets
        model.fit(
            X_train_encoded, y_train_encoded,  # Use encoded target
            eval_set=[(X_val_encoded, y_val_encoded)],  # Use encoded target
            verbose=False
        )
        
        # Predictions
        y_pred = model.predict(X_val_encoded)
        y_pred_proba = model.predict_proba(X_val_encoded)
        
        # Calculate metrics (convert back to original labels for metrics)
        y_pred_original = target_encoder.inverse_transform(y_pred)
        
        f1_macro = f1_score(y_val, y_pred_original, average='macro')
        f1_weighted = f1_score(y_val, y_pred_original, average='weighted')
        accuracy = accuracy_score(y_val, y_pred_original)
        precision_macro = precision_score(y_val, y_pred_original, average='macro')
        recall_macro = recall_score(y_val, y_pred_original, average='macro')
        roc_auc = roc_auc_score(y_val_encoded, y_pred_proba, multi_class='ovr', average='macro')
        
        # Log metrics
        mlflow.log_params(params)
        mlflow.log_metric("val_f1_macro", f1_macro)
        mlflow.log_metric("val_f1_weighted", f1_weighted)
        mlflow.log_metric("val_accuracy", accuracy)
        mlflow.log_metric("val_precision_macro", precision_macro)
        mlflow.log_metric("val_recall_macro", recall_macro)
        mlflow.log_metric("val_roc_auc", roc_auc)
        
        # Log classification report
        report = classification_report(y_val, y_pred_original, output_dict=True)
        mlflow.log_dict(report, "classification_report.json")
        
        # Log model and encoders
        mlflow.xgboost.log_model(model, "model")
        mlflow.log_dict({
            "target_encoder_classes": target_encoder.classes_.tolist(),
            **{k: v.classes_.tolist() for k, v in label_encoders.items()}
        }, "encoders.json")
        
        return {'loss': -f1_macro, 'status': STATUS_OK}

In [121]:
with mlflow.start_run(run_name="xgboost_hyperopt"):
    xgb_best = fmin(
        fn=xgb_objective,
        space=xgb_space,
        algo=tpe.suggest,
        max_evals=10,
        trials=Trials(),
        rstate=np.random.default_rng(42)
    )
    
    print(f"✅ Best XGBoost parameters: {xgb_best}")
    mlflow.log_params(xgb_best)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]


  self.get_booster().save_model(fname)




🏃 View run enthused-finch-289 at: http://localhost:5000/#/experiments/3/runs/64a645e82b474daf89ec39c849838cbc

🧪 View experiment at: http://localhost:5000/#/experiments/3

 10%|█         | 1/10 [01:29<13:23, 89.30s/trial, best loss: -0.4099215575418566]


  self.get_booster().save_model(fname)




🏃 View run masked-zebra-697 at: http://localhost:5000/#/experiments/3/runs/cf46153d155f47979c090829755d9d29

🧪 View experiment at: http://localhost:5000/#/experiments/3                     

 20%|██        | 2/10 [02:16<08:38, 64.78s/trial, best loss: -0.4099215575418566]


  self.get_booster().save_model(fname)




🏃 View run traveling-goat-834 at: http://localhost:5000/#/experiments/3/runs/cbb202ce91124f6a8583ea1b8a584e74

🧪 View experiment at: http://localhost:5000/#/experiments/3                     

 30%|███       | 3/10 [03:07<06:48, 58.37s/trial, best loss: -0.4184982492240856]


  self.get_booster().save_model(fname)




🏃 View run victorious-rat-262 at: http://localhost:5000/#/experiments/3/runs/8caa91befd4d43d2b7e945d84ae6d08c

🧪 View experiment at: http://localhost:5000/#/experiments/3                     

 40%|████      | 4/10 [03:57<05:29, 54.85s/trial, best loss: -0.4227484238374167]


  self.get_booster().save_model(fname)




🏃 View run incongruous-elk-228 at: http://localhost:5000/#/experiments/3/runs/9d99cd54474d43e680a76e5cb8b0acf1

🧪 View experiment at: http://localhost:5000/#/experiments/3                     

 50%|█████     | 5/10 [04:53<04:37, 55.42s/trial, best loss: -0.4227484238374167]


  self.get_booster().save_model(fname)




🏃 View run serious-robin-334 at: http://localhost:5000/#/experiments/3/runs/7ffeee13593045cd863ac9bc7f959b62

🧪 View experiment at: http://localhost:5000/#/experiments/3                     

 60%|██████    | 6/10 [06:29<04:37, 69.31s/trial, best loss: -0.42945455040672015]


  self.get_booster().save_model(fname)




🏃 View run trusting-stag-421 at: http://localhost:5000/#/experiments/3/runs/42d8730d71ec46c184bf1f18b058b874

🧪 View experiment at: http://localhost:5000/#/experiments/3                      

 70%|███████   | 7/10 [08:15<04:03, 81.15s/trial, best loss: -0.42945455040672015]


  self.get_booster().save_model(fname)




🏃 View run clean-grouse-988 at: http://localhost:5000/#/experiments/3/runs/842764a65a0f4f7190ae7f0eab61a3a7

🧪 View experiment at: http://localhost:5000/#/experiments/3                      

 80%|████████  | 8/10 [09:01<02:19, 69.90s/trial, best loss: -0.42945455040672015]


  self.get_booster().save_model(fname)




🏃 View run defiant-fish-755 at: http://localhost:5000/#/experiments/3/runs/eae10a155c894636a5e08240c78f4425

🧪 View experiment at: http://localhost:5000/#/experiments/3                      

 90%|█████████ | 9/10 [11:22<01:32, 92.37s/trial, best loss: -0.42945455040672015]


  self.get_booster().save_model(fname)




🏃 View run bedecked-conch-945 at: http://localhost:5000/#/experiments/3/runs/d227f18ac9174f90a526add6fb56ea9b

🧪 View experiment at: http://localhost:5000/#/experiments/3                      

100%|██████████| 10/10 [12:35<00:00, 75.56s/trial, best loss: -0.42945455040672015]
✅ Best XGBoost parameters: {'colsample_bytree': np.float64(0.7845735941946608), 'learning_rate': np.float64(0.17329448321462293), 'max_depth': np.float64(9.0), 'n_estimators': np.float64(400.0), 'reg_alpha': np.float64(4.4194339835245335), 'reg_lambda': np.float64(2.4492169775573576), 'subsample': np.float64(0.9018909441059156)}
🏃 View run xgboost_hyperopt at: http://localhost:5000/#/experiments/3/runs/40e867245b5d40d69f352662f969ef07
🧪 View experiment at: http://localhost:5000/#/experiments/3


In [122]:
# Cell: Final XGBoost Model
xgb_best_params = {
    **xgb_best,
    'random_state': 42,
    'objective': 'multi:softprob',
    'num_class': 3,
    'eval_metric': 'mlogloss'
}

xgb_best_params['max_depth'] = int(xgb_best_params['max_depth'])
xgb_best_params['n_estimators'] = int(xgb_best_params['n_estimators'])

with mlflow.start_run(
    run_name="xgboost_final_model",
    tags={"model": "xgboost", "type": "final"},
    description="Final XGBoost model with best parameters"
):
    # Train final model
    xgb_model = XGBClassifier(**xgb_best_params)
    xgb_model.fit(
        X_train_encoded, y_train_encoded,  # Use encoded targets
        eval_set=[(X_val_encoded, y_val_encoded)],
        verbose=False
    )
    
    # Final predictions
    y_pred = xgb_model.predict(X_val_encoded)
    y_pred_proba = xgb_model.predict_proba(X_val_encoded)
    
    # Convert predictions back to original labels
    y_pred_original = target_encoder.inverse_transform(y_pred)
    
    # Calculate metrics
    metrics = {
        'accuracy': accuracy_score(y_val, y_pred_original),
        'f1_macro': f1_score(y_val, y_pred_original, average='macro'),
        'f1_weighted': f1_score(y_val, y_pred_original, average='weighted'),
        'precision_macro': precision_score(y_val, y_pred_original, average='macro'),
        'recall_macro': recall_score(y_val, y_pred_original, average='macro'),
        'roc_auc_macro': roc_auc_score(y_val_encoded, y_pred_proba, multi_class='ovr', average='macro')
    }
    
    # Log metrics
    for metric_name, metric_value in metrics.items():
        mlflow.log_metric(metric_name, metric_value)
    
    # Print results
    print("🎯 Final XGBoost Model Performance:")
    for metric_name, metric_value in metrics.items():
        print(f"   {metric_name}: {metric_value:.4f}")
    
    # Classification report
    print("\n📊 XGBoost Classification Report:")
    print(classification_report(y_val, y_pred_original))
    
    # Log model and encoders
    mlflow.xgboost.log_model(xgb_model, "model")
    mlflow.log_dict({
        "target_encoder_classes": target_encoder.classes_.tolist(),
        **{k: v.classes_.tolist() for k, v in label_encoders.items()}
    }, "encoders.json")



🎯 Final XGBoost Model Performance:
   accuracy: 0.4994
   f1_macro: 0.4295
   f1_weighted: 0.4748
   precision_macro: 0.4367
   recall_macro: 0.4394
   roc_auc_macro: 0.6285

📊 XGBoost Classification Report:
              precision    recall  f1-score   support

           A       0.47      0.45      0.46       520
           D       0.27      0.15      0.20       426
           H       0.57      0.72      0.63       802

    accuracy                           0.50      1748
   macro avg       0.44      0.44      0.43      1748
weighted avg       0.47      0.50      0.47      1748



  self.get_booster().save_model(fname)


🏃 View run xgboost_final_model at: http://localhost:5000/#/experiments/3/runs/7a754f16e6f5402e8274b08d41641f99
🧪 View experiment at: http://localhost:5000/#/experiments/3


## Model Registry

In [147]:
retrieved_runs = client.search_runs(
  experiment_ids="3",
  filter_string="tags.type = 'final'",
  run_view_type=ViewType.ACTIVE_ONLY,
  order_by=["metrics.val_accuracy ASC"]
)

In [None]:
for run in retrieved_runs:
  print(f"{run.info.run_name}: {run.data.metrics['accuracy']}")

catboost_final_model: 0.5183066361556065
xgboost_final_model: 0.4994279176201373


In [149]:
REGISTERED_MODEL_NAME = "epl-predictions-model"

In [150]:
from datetime import date

client.create_registered_model(
  name=REGISTERED_MODEL_NAME,
  tags={
    "creator": "kamal",
    "problem": "epl-predictions",
  },
  description=f"created at {date.today()}"
)

<RegisteredModel: aliases={}, creation_timestamp=1754337335142, deployment_job_id='', deployment_job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', description='created at 2025-08-04', last_updated_timestamp=1754337335142, latest_versions=[], name='epl-predictions-model', tags={'creator': 'kamal', 'problem': 'epl-predictions'}>

In [None]:
for run in retrieved_runs:
  client.create_model_version(
    name = REGISTERED_MODEL_NAME,
    source=f"runs:/{run.info.run_id}/model",
    tags={"name": f"{run.info.run_name}"},
    description=f"Moved to registry on {date.today()}"
  )

2025/08/04 22:57:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: epl-predictions-model, version 1
2025/08/04 22:57:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: epl-predictions-model, version 2


In [152]:
for version in client.search_model_versions(filter_string=f"name = '{REGISTERED_MODEL_NAME}'"):
  print(f"version:{version.version}, URI: {client.get_model_version_download_uri(name=REGISTERED_MODEL_NAME, version=version.version)}")

version:2, URI: runs:/7a754f16e6f5402e8274b08d41641f99/model
version:1, URI: runs:/b0829d3f334c4db597b2c28ba01cbb96/model


In [153]:
client.set_registered_model_alias(REGISTERED_MODEL_NAME, "ready-prod", "1")

In [155]:
client.set_registered_model_alias(REGISTERED_MODEL_NAME, "need-fine-tunning", "2")
client.set_registered_model_alias(REGISTERED_MODEL_NAME, "staging", "2")