In [1]:
import json
import warnings
from typing import Any, Literal

import numpy as np
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=200)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)


# Demo (Prevents ruff from removing the unused module import)
name: Any
category: Literal["A", "B", "C"]
json.loads('{"name": "Bike Rental Prediction", "category": "A"}')

{'name': 'Bike Rental Prediction', 'category': 'A'}

In [3]:
go_up_from_current_directory(go_up=1)

/Users/mac/Desktop/Projects/Bike-Rental-Prediction


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [None]:
import narwhals as nw
from narwhals.typing import IntoDataFrameT


def split_temporal_data_to_train_val_test(
    data: IntoDataFrameT,
    test_size: float = 0.2,
    val_size: float = 0.2,
    print_shapes: bool = True,
) -> tuple[IntoDataFrameT, IntoDataFrameT, IntoDataFrameT]:
    """Split data into train, validation and test sets while maintaining temporal order.

    Parameters
    ----------
    data : IntoDataFrameT
        The input dataframe to be split.
    test_size : float, default=0.2
        Proportion of the dataset to include in the test split.
    val_size : float, default=0.2
        Proportion of the dataset to include in the validation split.
    print_shapes : bool, default=True
        Whether to print the shapes of the resulting dataframes.

    Returns
    -------
    tuple[IntoDataFrameT, IntoDataFrameT, IntoDataFrameT]
        A tuple containing the train, validation, and test dataframes.
    """
    # Validation
    if not 0.0 <= test_size <= 1.0 or not 0.0 <= val_size <= 1.0:
        raise ValueError("test_size and val_size must be between 0.0 and 1.0")
    if (test_size + val_size) >= 1.0:
        raise ValueError("The sum of test_size and val_size must be less than 1.0")

    # Convert to Narwhals DataFrame
    nw_data: nw.DataFrame = nw.from_native(data)
    num_train: int = int((1 - test_size - val_size) * nw_data.shape[0])
    num_val: int = int(val_size * nw_data.shape[0])

    # Add index
    nw_data = nw_data.with_row_index()
    train_data: nw.DataFrame = nw_data.filter(nw.col("index") <= num_train).drop(
        "index"
    )

    val_data: nw.DataFrame = nw_data.filter(
        (nw.col("index") > num_train) & (nw.col("index") <= (num_train + num_val))
    ).drop("index")
    test_data: nw.DataFrame = nw_data.filter(
        nw.col("index") > (num_train + num_val)
    ).drop("index")

    if print_shapes:
        print(
            f"Train shape: {train_data.shape} | Val shape: {val_data.shape} | Test shape: {test_data.shape}"
        )

    return (train_data.to_native(), val_data.to_native(), test_data.to_native())

In [6]:
df: pl.DataFrame = pl.DataFrame(
    {
        "idx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
        "role": [
            "engineer",
            "doctor",
            "nurse",
            "engineer",
            "nurse",
            "teacher",
            "doctor",
            "doctor",
            "teacher",
            "engineer",
            "teacher",
            "nurse",
            "teacher",
            "engineer",
            "nurse",
        ],
    }
)

df

idx,role
i64,str
0,"""engineer"""
1,"""doctor"""
2,"""nurse"""
3,"""engineer"""
4,"""nurse"""
5,"""teacher"""
6,"""doctor"""
7,"""doctor"""
8,"""teacher"""
9,"""engineer"""


In [7]:
(train_df, val_df, test_df) = split_temporal_data_to_train_val_test(
    data=df,
    test_size=0.8,
    val_size=0.1,
)
train_df

Train shape: (2, 2) | Val shape: (1, 2) | Test shape: (12, 2)


idx,role
i64,str
0,"""engineer"""
1,"""doctor"""


In [8]:
rng = np.random.default_rng(123)
x = rng.standard_normal(size=(1_000, 10))

X_train, X_test = train_test_split(x, test_size=0.2, random_state=123)
y_train = rng.standard_normal(size=(X_train.shape[0],))
y_test = rng.standard_normal(size=(X_test.shape[0],))

params: dict[str, Any] = {
    "n_estimators": 100,
    "max_depth": 10,
}

rf_reg = RandomForestRegressor(**params)

rf_reg.fit(X_train, y_train)
# rf_reg.score(X_test, y_test)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [9]:
rf_reg.feature_importances_

array([0.1303, 0.1002, 0.0709, 0.1185, 0.0967, 0.1166, 0.0898, 0.1   ,
       0.0883, 0.0888])

In [10]:
from src.config import app_config
from src.exceptions import (
    CustomError,
    MLFlowConnectionError,
)
from src.ml.feature_engineering import FeatureEngineer
from src.ml.trainer import ModelTrainer
from src.ml.utils import split_temporal_data_to_train_val_test



In [None]:
import httpx
import mlflow
import mlflow.sklearn
from mlflow import MlflowClient

port: int = 5001
url: str = f"http://localhost:{port}"


def check_mlflow(url: str, timeout: float = 2.0) -> bool:
    """
    Check MLflow endpoint accessibility and handle common httpx errors.

    Returns True if reachable (2xx), False otherwise.
    """
    try:
        resp = httpx.get(url, timeout=timeout)
        resp.raise_for_status()
        console.print("[success]MLflow is accessible")
        return True

    except httpx.HTTPStatusError as e:
        console.print(
            f"[error]MLflow returned non-2xx status: {e.response.status_code} — {e}"
        )
        return False

    except httpx.RequestError as e:
        # covers ConnectError, ReadTimeout, etc.
        console.print(f"[error]Network/connection error when contacting MLflow: {e}")
        return False

    except (MLFlowConnectionError, CustomError) as e:
        console.print(f"[error]Project-specific MLflow error: {e}")
        return False

    except Exception as e:
        console.print(f"[error]Unexpected error: {e}")
        return False


check_mlflow(url)

True

In [12]:
fp: str = "../../../../Documents/data_dump/bike_data/database.parquet"
data: pl.DataFrame = pl.read_parquet(fp)
console.print(f"Shape: {data.shape}", style="info")
display(data.head())

(train_df, val_df, test_df) = split_temporal_data_to_train_val_test(data=data)

datetime,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
str,i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64,i64,i64,i64
"""2011-01-01 00:00:00""",1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
"""2011-01-01 01:00:00""",1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
"""2011-01-01 02:00:00""",1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
"""2011-01-01 03:00:00""",1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
"""2011-01-01 04:00:00""",1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


Shapes -> Train shape: (8342, 16) | Val shape: (2780, 16) | Test shape: (2781, 16)


In [None]:
from src.exp_tracking.model_loader import (
    get_best_run,
    load_best_model,
    load_model_from_run,
    load_registered_model_from_registry,
)

In [None]:
feat_eng = FeatureEngineer()
train_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=train_df, config=app_config.feature_config
)
val_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=val_df, config=app_config.feature_config
)
test_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=test_df, config=app_config.feature_config
)
display(train_features_df.head())

val_features_df.head()

season,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,is_weekend,sin_hour,cos_hour,sin_weekday,cos_weekday,cnt_lag_0hr,cnt_lag_1hr,cnt_lag_24hr,hr_lag_1hr,hr_lag_24hr,temp_lag_1hr,temp_lag_3hr,cnt_rolling_mean_3hr,cnt_rolling_median_3hr,cnt_rolling_mean_6hr,cnt_rolling_median_6hr,hr_rolling_mean_3hr,hr_rolling_median_3hr,hr_rolling_mean_6hr,hr_rolling_median_6hr,temp_plus_hum,hum_plus_hr,cnt_diff_1hr,cnt_diff_2hr,hr_diff_1hr,hr_diff_24hr,temp_diff_1hr,temp_diff_2hr,temp_diff_24hr,is_high_temp,is_high_hum,is_peak_hour,is_working_hour,is_business_hour,target
i64,i64,i64,i64,i64,i64,i64,f64,f64,i8,f64,f64,f64,f64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,f64,f64,f64,i8,i8,i8,i8,i8,i64
1,1,0,0,6,0,1,0.24,0.81,1,0.0,1.0,-0.781831,0.62349,16,16,16,0,0,0.24,0.24,29.333333,32.0,17.166667,14.5,1.0,1.0,2.5,2.5,1.05,0.81,24,16,1,0,-0.02,-0.02,0.22,0,0,0,0,0,40
1,1,1,0,6,0,1,0.22,0.8,1,0.258819,0.965926,-0.781831,0.62349,40,16,16,0,0,0.24,0.24,29.333333,32.0,17.166667,14.5,1.0,1.0,2.5,2.5,1.02,1.8,24,16,1,0,-0.02,-0.02,0.22,0,0,0,0,0,32
1,1,2,0,6,0,1,0.22,0.8,1,0.5,0.866025,-0.781831,0.62349,32,40,16,1,0,0.22,0.24,29.333333,32.0,17.166667,14.5,1.0,1.0,2.5,2.5,1.02,2.8,-8,16,1,0,0.0,-0.02,0.22,0,0,0,0,0,13
1,1,3,0,6,0,1,0.24,0.75,1,0.707107,0.707107,-0.781831,0.62349,13,32,16,2,0,0.22,0.24,28.333333,32.0,17.166667,14.5,2.0,2.0,2.5,2.5,0.99,3.75,-19,-27,1,0,0.02,0.02,0.22,0,0,0,0,0,1
1,1,4,0,6,0,1,0.24,0.75,1,0.866025,0.5,-0.781831,0.62349,1,13,16,3,0,0.24,0.22,15.333333,13.0,17.166667,14.5,3.0,3.0,2.5,2.5,0.99,4.75,-12,-31,1,0,0.0,0.02,0.22,0,0,0,0,0,1


season,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,is_weekend,sin_hour,cos_hour,sin_weekday,cos_weekday,cnt_lag_0hr,cnt_lag_1hr,cnt_lag_24hr,hr_lag_1hr,hr_lag_24hr,temp_lag_1hr,temp_lag_3hr,cnt_rolling_mean_3hr,cnt_rolling_median_3hr,cnt_rolling_mean_6hr,cnt_rolling_median_6hr,hr_rolling_mean_3hr,hr_rolling_median_3hr,hr_rolling_mean_6hr,hr_rolling_median_6hr,temp_plus_hum,hum_plus_hr,cnt_diff_1hr,cnt_diff_2hr,hr_diff_1hr,hr_diff_24hr,temp_diff_1hr,temp_diff_2hr,temp_diff_24hr,is_high_temp,is_high_hum,is_peak_hour,is_working_hour,is_business_hour,target
i64,i64,i64,i64,i64,i64,i64,f64,f64,i8,f64,f64,f64,f64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,f64,f64,f64,i8,i8,i8,i8,i8,i64
4,12,6,0,1,1,1,0.16,0.86,0,1.0,6.1232e-17,0.781831,0.62349,68,68,68,6,6,0.16,0.16,218.666667,191.0,179.0,155.0,7.0,7.0,8.5,8.5,1.02,6.86,123,329,1,0,0.02,0.04,0.2,0,1,0,0,0,191
4,12,7,0,1,1,1,0.18,0.74,0,0.965926,-0.258819,0.781831,0.62349,191,68,68,6,6,0.16,0.16,218.666667,191.0,179.0,155.0,7.0,7.0,8.5,8.5,0.92,7.74,123,329,1,0,0.02,0.04,0.2,0,0,1,0,0,397
4,12,8,0,1,1,1,0.2,0.75,0,0.866025,-0.5,0.781831,0.62349,397,191,68,7,6,0.18,0.16,218.666667,191.0,179.0,155.0,7.0,7.0,8.5,8.5,0.95,8.75,206,329,1,0,0.02,0.04,0.2,0,0,1,0,1,183
4,12,9,0,1,1,1,0.22,0.69,0,0.707107,-0.707107,0.781831,0.62349,183,397,68,8,6,0.2,0.16,257.0,191.0,179.0,155.0,8.0,8.0,8.5,8.5,0.91,9.69,-214,-8,1,0,0.02,0.04,0.2,0,0,1,1,1,108
4,12,10,0,1,1,1,0.24,0.67,0,0.5,-0.866025,0.781831,0.62349,108,183,68,9,6,0.22,0.18,229.333333,183.0,179.0,155.0,9.0,9.0,8.5,8.5,0.91,10.67,-75,-289,1,0,0.02,0.04,0.2,0,0,0,1,1,127


In [15]:
trainer = ModelTrainer(
    train_data=train_features_df,
    val_data=val_features_df,
    test_data=test_features_df,
    target_col="target",
)

2025-10-15 18:46:25 - trainer - [INFO] - Data prepared -> x_train shape: (8342, 43), y_train shape: (8342,) | x_val shape: (2780, 43), y_val shape: (2780,) | x_test shape: (2781, 43), y_test shape: (2781,)
2025-10-15 18:46:25 - mlflow_tracker - [INFO] - Set MLflow tracking URI to: http://localhost:5001
2025-10-15 18:46:25 - mlflow_tracker - [INFO] - Set experiment to: bike rental (ID: 1)
2025-10-15 18:46:25 - mlflow_tracker - [INFO] - Initialized MLFlowTracker with experiment: bike rental


In [43]:
get_best_run(
    experiment_name=app_config.experiment_config.experiment_name,
    client=trainer.mlflow_tracker.client,
)

2025-10-15 20:17:12 - model_loader - [INFO] - Best run: de464b1bf81e43228c7e4d5a392bf4ea with RMSE=55.7


{'run_id': 'de464b1bf81e43228c7e4d5a392bf4ea',
 'metric_value': 55.7,
 'data': <Run: data=<RunData: metrics={'Adjusted_R2': 0.9, 'MAE': 34.49, 'MAPE': 26.67, 'RMSE': 55.7}, params={'max_depth': '10',
  'min_samples_leaf': '1',
  'min_samples_split': '2',
  'n_estimators': '100',
  'random_state': '42'}, tags={'feature_set_version': '1',
  'hardware': 'cpu',
  'mlflow.parentRunId': 'ce42a5473bb64884a319be8fb8303c5b',
  'mlflow.runName': 'run_2025-10-15T20:13:46',
  'mlflow.source.name': '/Users/mac/Desktop/Projects/Bike-Rental-Prediction/.venv/lib/python3.12/site-packages/ipykernel_launcher.py',
  'mlflow.source.type': 'LOCAL',
  'mlflow.user': 'mac',
  'model_family': 'ModelType.RANDOM_FOREST',
  'optimizer_engine': 'None',
  'purpose': 'Model Training',
  'scientist': 'chinedu',
  'team': 'data_science'}>, info=<RunInfo: artifact_uri='s3://mlflow-artifacts/1/de464b1bf81e43228c7e4d5a392bf4ea/artifacts', end_time=1760555649578, experiment_id='1', lifecycle_stage='active', run_id='de464b

In [44]:
best_model = load_best_model(
    experiment_name=app_config.experiment_config.experiment_name,
    client=trainer.mlflow_tracker.client,
)

2025-10-15 20:17:16 - model_loader - [INFO] - Best run: de464b1bf81e43228c7e4d5a392bf4ea with RMSE=55.7
Best Run: {'run_id': 'de464b1bf81e43228c7e4d5a392bf4ea', 'metric_value': 55.7, 'data': <Run: data=<RunData: metrics={'Adjusted_R2': 0.9, 'MAE': 34.49, 'MAPE': 26.67, 'RMSE': 55.7}, params={'max_depth': '10',
 'min_samples_leaf': '1',
 'min_samples_split': '2',
 'n_estimators': '100',
 'random_state': '42'}, tags={'feature_set_version': '1',
 'hardware': 'cpu',
 'mlflow.parentRunId': 'ce42a5473bb64884a319be8fb8303c5b',
 'mlflow.runName': 'run_2025-10-15T20:13:46',
 'mlflow.source.name': '/Users/mac/Desktop/Projects/Bike-Rental-Prediction/.venv/lib/python3.12/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'mac',
 'model_family': 'ModelType.RANDOM_FOREST',
 'optimizer_engine': 'None',
 'purpose': 'Model Training',
 'scientist': 'chinedu',
 'team': 'data_science'}>, info=<RunInfo: artifact_uri='s3://mlflow-artifacts/1/de464b1bf81e43228c7e4d5a392bf4e

Downloading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

2025-10-15 20:17:16 - mlflow_tracker - [INFO] - Detected model file: 'ModelType.RANDOM_FOREST_2025-10-15T20:14:09_model.pkl' with extension 'pkl'
2025-10-15 20:17:16 - mlflow_tracker - [INFO] - ✅ Successfully loaded ModelType.RANDOM_FOREST model from run de464b1bf81e43228c7e4d5a392bf4ea
2025-10-15 20:17:16 - model_loader - [INFO] - ✅ Loaded best ModelType.RANDOM_FOREST model


In [40]:
import mlflow

try:
    mlflow.end_run()
except Exception as e:
    print(e)

In [41]:
# trainer._hyperparameter_tuning_xgboost()
# result = trainer._hyperparameter_tuning_lightgbm()

# trainer.hyperparameter_tuning_all_models()
result = trainer.train_all_models()

2025-10-15 20:13:45 - trainer - [INFO] - 🚀 Training with default hyperparameters
2025-10-15 20:13:46 - mlflow_tracker - [INFO] - Started MLflow run: ce42a5473bb64884a319be8fb8303c5b (name: run_2025-10-15T20:13:45)
2025-10-15 20:13:46 - trainer - [INFO] - Training Random Forest ...
2025-10-15 20:13:46 - mlflow_tracker - [INFO] - Started MLflow run: de464b1bf81e43228c7e4d5a392bf4ea (name: run_2025-10-15T20:13:46)
2025-10-15 20:13:46 - trainer - [INFO] - Starting Random Forest training with TimeSeriesSplit cross-validation.
2025-10-15 20:14:09 - mlflow_tracker - [INFO] - ✅ Successfully logged ModelType.RANDOM_FOREST model and metadata
🏃 View run run_2025-10-15T20:13:46 at: http://localhost:5001/#/experiments/1/runs/de464b1bf81e43228c7e4d5a392bf4ea
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-15 20:14:09 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED
2025-10-15 20:14:09 - trainer - [INFO] - 🚀 Random Forest training completed successfully.
2025-10-

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 20:14:13 - mlflow_s3_utils - [INFO] - Synced visualizations/reports/model_metrics_comparison_2025-10-15T20:14:12.html to S3
2025-10-15 20:14:13 - mlflow_s3_utils - [INFO] - Successfully synced all artifacts for run ce42a5473bb64884a319be8fb8303c5b to S3
2025-10-15 20:14:13 - trainer - [INFO] - ✅ Successfully synced artifacts to S3
2025-10-15 20:14:13 - trainer - [INFO] - Verifying S3 artifact storage...
2025-10-15 20:14:13 - s3_verification - [INFO] - Found 1 artifacts in S3 for run ce42a5473bb64884a319be8fb8303c5b
2025-10-15 20:14:13 - s3_verification - [INFO] - Artifacts: visualizations/reports/model_metrics_comparison_2025-10-15T20:14:12.html...
2025-10-15 20:14:13 - s3_verification - [ERROR] - ❌ S3 artifact verification FAILED
2025-10-15 20:14:13 - s3_verification - [ERROR] -   - Artifact URI: s3://mlflow-artifacts/1/ce42a5473bb64884a319be8fb8303c5b/artifacts
2025-10-15 20:14:13 - s3_verification - [ERROR] -   - Missing artifacts: models/


In [None]:
# console.print(result)

In [None]:
import os

from src.schemas.types import ModelType

client = MlflowClient()
tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")
client.list_artifacts(run_id=result[0].run_id)
lgb_dict: dict[str, Any] = load_model_from_run(
    run_id=result[0].run_id,
    model_name=ModelType.LIGHTGBM,
    tracking_uri=tracking_uri,
)

xgb_dict: dict[str, Any] = load_model_from_run(
    run_id=result[0].run_id,
    model_name=ModelType.XGBOOST,
    tracking_uri=tracking_uri,
)

In [None]:
# Register model
suffix: str = "_best"
trainer.mlflow_tracker.register_model(
    run_id=result[0].run_id,
    model=lgb_dict["model"],
    model_name=lgb_dict["model_name"],
    input_example=val_features_df.drop("target"),
    suffix=suffix,
)

In [None]:
# Set model version alias
model_name: str = f"{lgb_dict['model_name']}{suffix}"
model_version_alias: str = "staging"
version: str = "2"
value: dict[str, Any] = {
    "project": app_config.experiment_config.experiment_name,
    "model_name": model_name,
    "version": version,
    "env": "staging",
}
# Duplicate of step in UI
client.set_registered_model_alias(model_name, model_version_alias, version)

client.set_model_version_tag(model_name, version, key="extra", value=json.dumps(value))

# Get information about the model
model_info = client.get_model_version_by_alias(model_name, model_version_alias)
model_tags = model_info.tags
print(f"Model tags: {model_tags}")

# Get the model version using a model URI
model_uri: str = f"models:/{model_name}@{model_version_alias}"

model = None
loaders: list[tuple[str, Any]] = [
    ("sklearn", getattr(mlflow, "sklearn", None)),
    ("xgboost", getattr(mlflow, "xgboost", None)),
    ("lightgbm", getattr(mlflow, "lightgbm", None)),
]

for name, module in loaders:
    if module is None:
        print(f"MLflow does not expose loader for {name}; skipping.")
        continue

    loader = getattr(module, "load_model", None)
    if loader is None:
        print(f"Loader for {name} does not have load_model; skipping.")
        continue

    try:
        model = loader(model_uri)
        print(f"Successfully loaded model using {name}")
        break
    except Exception as e:
        print(f"Failed to load with {name}: {e}")

if model is None:
    print("Failed to load model after trying sklearn, xgboost and lightgbm.")

In [None]:
model.predict(val_features_df.head(2).drop("target"))

In [None]:
res = {
    "run_id": "26482ea4b7e24eaf8101938e34d9a550",
    "status": "✅ Model registered successfully",
    "datetime": "2025-10-15T20:06:10",
    "model_name": "ModelType.RANDOM_FOREST_staging",
    "model_version": "2",
    "model_version_alias": "staging",
}
model = load_registered_model_from_registry(
    model_name=res["model_name"], model_version_alias=res["model_version_alias"]
)

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

2025-10-15 21:10:41 - model_loader - [INFO] - ✅ Successfully loaded model ModelType.RANDOM_FOREST_staging using 'sklearn'


In [None]:
{
    "run_id": "26482ea4b7e24eaf8101938e34d9a550",
    "status": "✅ Model registered successfully",
    "datetime": "2025-10-15T20:06:10",
    "model_name": "ModelType.RANDOM_FOREST_staging",
    "model_version": "2",
    "model_version_alias": "staging",
}

In [None]:
# 25
col: str = "windspeed"
train_df[col].describe().to_dicts()

In [None]:
t_min, t_max = -8, 39
t_norm = 0.96
original_temp = t_norm * (t_max - t_min) + t_min
original_temp


def convert_to_original_temp(
    t_norm: float, t_min: float = -8, t_max: float = 39
) -> float:
    """Convert normalized temperature back to original scale (Celsius)."""
    return t_norm * (t_max - t_min) + t_min


def calculate_temp_factor(temp: float) -> int:
    """Calculate temperature factor based on given temperature in celsius."""
    temp = np.round(temp, 2)
    if temp < 0 or temp > 35:
        return 4
    if 0 <= temp <= 9 or 30 <= temp <= 35:
        return 3
    if 21 <= temp <= 29:
        return 2
    if 10 <= temp <= 20:
        return 1
    return 4  # Default case (should not occur)


temp: float = convert_to_original_temp(t_norm=t_norm)
temp_factor: int = calculate_temp_factor(temp=temp)
print(f"Temp: {temp}, Temp Factor: {temp_factor}")

In [None]:
def calculate_hum_factor(hum: float) -> int:
    """Calculate humidity factor based on given humidity (0 to 1)."""
    hum = np.round(hum, 2)
    if hum > 0.8:
        return 4
    if hum < 0.2:
        return 3
    if 0.2 <= hum <= 0.39 or 0.61 <= hum <= 0.8:
        return 2
    if 0.4 <= hum <= 0.6:
        return 1
    return 4


def convert_to_original_windspeed(w_norm: float, w_factor: float = 0.8507) -> float:
    """Convert normalized windspeed back to original scale (km/h)."""
    return w_norm * w_factor


def calculate_windspeed_factor(windspeed: float) -> int:
    """Calculate windspeed factor based on given windspeed (km/h)."""
    windspeed = np.round(windspeed, 2)
    if windspeed > 50:
        return 4
    if 26 <= windspeed <= 50:
        return 3
    if 16 <= windspeed <= 25:
        return 2
    if 0 <= windspeed <= 15:
        return 1
    return 1

In [None]:
from typing import TypedDict


class WeatherDict(TypedDict):
    temp: float
    hum: float
    windspeed: float
    weather_sit: float


def normalize_factor(value: int) -> float:
    min_value, max_value = (1, 4)

    if not min_value <= value <= max_value:
        raise (f"{value} should be between 1 and 4")

    norm_value: float = (value - min_value) / (max_value - min_value)
    return round(norm_value, 2)


def calculate_weather_factor(
    temp: int, hum: int, windspeed: int, weather_sit: int
) -> float:
    """Lower is better."""
    base_factor: float = 0.5
    weights: WeatherDict = WeatherDict(
        weather_sit=0.4, temp=0.15, windspeed=0.15, hum=0.1
    )

    # Temperature
    base_factor += weights["temp"] * temp
    # Humidity
    base_factor += weights["hum"] * hum
    # Windspeed
    base_factor += weights["windspeed"] * windspeed
    # Weather Situation
    base_factor *= weights["weather_sit"] * weather_sit
    # Clip between 0.85 and 1.7
    return round(max(0.85, min(base_factor, 1.7)), 2)


normalize_factor(1)
calculate_weather_factor(temp=3, hum=1, windspeed=1, weather_sit=3)

In [None]:
def calculate_dynamic_elasticity(base_elasticity, utilization_rate, weather_factor=1.0):
    # Controls sensitivity to utilization rate
    # Adjust elasticity based on utilization and weather
    alpha = 0.6

    elasticity = (
        base_elasticity * (1 + alpha * (0.5 - utilization_rate)) * weather_factor
    )
    return round(elasticity, 2)


base_elasticity = -1.2
utilization_rate = 0.2
weather_factor = 1.3
print(
    calculate_dynamic_elasticity(
        base_elasticity=base_elasticity,
        utilization_rate=utilization_rate,
        weather_factor=weather_factor,
    )
)


def calculate_dynamic_elasticity(
    base_elasticity: float, utilization_rate: float, weather_factor=1.0
):
    """Calculate dynamic elasticity based on base elasticity, utilization rate, and weather factor.

    Parameters
    ----------
    base_elasticity : float
        The base elasticity value.
    utilization_rate : float
        The utilization rate, typically between 0 and 1.
    weather_factor : float, optional
        The weather factor, default is 1.0.

    Returns
    -------
    float
        The calculated dynamic elasticity, rounded to 2 decimal places.

    Notes
    -----
    When the weather_factor is high (poor weather), elasticity is high.
    When the utilization rate (demand) is high, elasticity is relatively low.
    """
    # Controls sensitivity to utilization rate
    # Adjust elasticity based on utilization and weather
    alpha = 0.7
    elasticity = base_elasticity * (alpha * weather_factor + (1 - utilization_rate))
    return round(elasticity, 2)


print(
    calculate_dynamic_elasticity(
        base_elasticity=base_elasticity,
        utilization_rate=utilization_rate,
        weather_factor=weather_factor,
    )
)

In [None]:
hr: int = 24
np.sin(hr / 24 * 2 * np.pi)


def sin_hr(hr: int) -> float:
    return np.sin(hr / 24 * 2 * np.pi)


sin_hr(18)

In [None]:
def scale_value(
    value: float,
    min_value: float,
    max_value: float,
    out_min: float = 0.8,
    out_max: float = 1.3,
) -> float:
    """Scale a value from an input range [min_value, max_value] to an output range [out_min, out_max]."""
    # Validate ranges
    if min_value >= max_value:
        raise ValueError("min_value must be less than max_value")
    if out_min >= out_max:
        raise ValueError("out_min must be less than out_max")
    if not min_value <= value <= max_value:
        raise ValueError(f"value {value} must be between {min_value} and {max_value}")

    # Normalize to 0..1 then scale to out_min..out_max
    normalized: float = (value - min_value) / (max_value - min_value)
    scaled_value: float = out_min + (normalized * (out_max - out_min))
    return round(scaled_value, 4)


def calculate_time_factor(time: int) -> float:
    """Calculate time factor based on hour of the day (0-23)."""
    peak_hr: set[int] = {7, 8, 9, 16, 17, 18}
    business_hr: set[int] = {10, 11, 12, 13, 14, 15, 19, 20}

    if not 0 <= time <= 23:
        raise ValueError("Hour must be between 0 and 23")
    if time in peak_hr:
        return 1.5
    if time in business_hr:
        return 1.2
    return 0.85


def calculate_competitor_factor(base_price: float, competitor_price: float) -> float:
    min_value, max_value = (0.85, 1.5)
    alpha: float = 0.4

    if competitor_price < 0:
        raise (f"{competitor_price} cannot be a negative value.")

    pct_change: float = (base_price - competitor_price) / base_price
    result: float = round(1 - (alpha * pct_change), 2)

    return np.clip(result, a_min=min_value, a_max=max_value).item()


def calculate_price_multiplier(
    base_price: float,
    competitor_price: float,
    utilization_rate: float,
    weather_factor: float,
    time: int,
    base_elasticity: float = -1.1,
) -> float:
    """Calculate overall price multiplier based on various factors."""
    # Tunable
    k: float = 0.55
    surge: float = 1 + (k * utilization_rate)

    print(f"Surge: {surge}")
    comp_factor: float = calculate_competitor_factor(base_price, competitor_price)
    print(f"Competitor Factor: {comp_factor}")
    dyn_elasticity: float = calculate_dynamic_elasticity(
        base_elasticity, utilization_rate, weather_factor
    )
    print(f"Dynamic Elasticity: {dyn_elasticity}")
    time_factor: float = calculate_time_factor(time)
    print(f"Time Factor: {time_factor}")

    price_multiplier: float = (
        k * comp_factor * surge * time_factor * (1 / np.abs(dyn_elasticity))
    )

    return round(price_multiplier, 2)


def calculate_price(base_price: float, price_multiplier: float) -> float:
    """Calculate final price based on base price and price multiplier."""
    min_value, max_value = (400, 800)
    currency: str = "NGN"

    if base_price < 0:
        raise ValueError("Base price cannot be negative")
    if price_multiplier <= 0:
        raise ValueError("Price multiplier must be greater than zero")

    final_price: float = base_price * price_multiplier
    final_price = np.clip(round(final_price, 2), min_value, max_value).item()
    print(
        f"\nPrice Breakdown:\n================ \nBase Price: {currency} {base_price} "
        f"\nOther Factors (like surge, etc): {currency} {round(final_price - base_price, 2)}, "
        f"\n\nFinal Price: {currency} {final_price:,}"
    )


# Usage examples:
# map 0.5 from range [0, 1] to default [0.8, 1.3] -> 1.05
# scale_value(0.9, 0, 1)

# calculate_time_factor(17)
# calculate_dynamic_elasticity(
#     base_elasticity=-1.1, utilization_rate=0.55, weather_factor=0.9
# )
# calculate_competitor_factor(base_price=5, competitor_price=4.55)

base_price = 550
competitor_price = 460

price_multiplier: float = calculate_price_multiplier(
    base_price=base_price,
    competitor_price=competitor_price,
    utilization_rate=0.75,
    weather_factor=0.8,
    time=12,
    base_elasticity=-1.1,
)

calculate_price(base_price=base_price, price_multiplier=price_multiplier)

In [None]:
scale_value(1.7, min_value=0.85, max_value=1.7, out_min=0, out_max=0.99)

In [None]:
train_df.sample(5)

In [None]:
{
    "data_shape": {
        "total_rows": 13903,
        "total_columns": 16,
        "number_of_numeric_columns": 15,
        "number_of_categorical_columns": 1,
    },
    "other_info": {
        "data_nulls": {
            "hr": 0,
            "yr": 0,
            "cnt": 0,
            "hum": 0,
            "mnth": 0,
            "temp": 0,
            "atemp": 0,
            "casual": 0,
            "season": 0,
            "holiday": 0,
            "weekday": 0,
            "datetime": 0,
            "windspeed": 0,
            "registered": 0,
            "weathersit": 0,
            "workingday": 0,
        },
        "total_nulls": 0,
        "memory_usage_MB": 1.84,
        "num_duplicated_rows": 0,
        "validation_timestamp": "2025-10-14T12:58:00+00:00",
        "num_unique_numeric_rows": {
            "hr": 24,
            "yr": 2,
            "cnt": 789,
            "hum": 88,
            "mnth": 12,
            "temp": 50,
            "atemp": 65,
            "casual": 303,
            "season": 4,
            "holiday": 2,
            "weekday": 7,
            "windspeed": 30,
            "registered": 701,
            "weathersit": 4,
            "workingday": 2,
        },
        "num_unique_categorical_rows": {"datetime": 13903},
    },
    "data_schema": {
        "numeric": {
            "hr": "Int64",
            "yr": "Int64",
            "cnt": "Int64",
            "hum": "Float64",
            "mnth": "Int64",
            "temp": "Float64",
            "atemp": "Float64",
            "casual": "Int64",
            "season": "Int64",
            "holiday": "Int64",
            "weekday": "Int64",
            "windspeed": "Float64",
            "registered": "Int64",
            "weathersit": "Int64",
            "workingday": "Int64",
        },
        "categorical": {},
    },
    "summary_statistics": {
        "numeric": [
            {
                "max": 4.0,
                "min": 1.0,
                "std": 1.04,
                "mean": 2.26,
                "mode": [2.0],
                "count": 13903,
                "range": 3.0,
                "column": "season",
                "median": 2.0,
                "variance": 1.07,
                "missing_pct": 0.0,
                "unique_values": 4,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.0,
                "std": 0.48,
                "mean": 0.38,
                "mode": [0.0],
                "count": 13903,
                "range": 1.0,
                "column": "yr",
                "median": 0.0,
                "variance": 0.24,
                "missing_pct": 0.0,
                "unique_values": 2,
                "missing_values": 0,
            },
            {
                "max": 12.0,
                "min": 1.0,
                "std": 3.22,
                "mean": 5.65,
                "mode": [7.0, 5.0],
                "count": 13903,
                "range": 11.0,
                "column": "mnth",
                "median": 5.0,
                "variance": 10.36,
                "missing_pct": 0.0,
                "unique_values": 12,
                "missing_values": 0,
            },
            {
                "max": 23.0,
                "min": 0.0,
                "std": 6.91,
                "mean": 11.55,
                "mode": [16.0, 17.0],
                "count": 13903,
                "range": 23.0,
                "column": "hr",
                "median": 12.0,
                "variance": 47.78,
                "missing_pct": 0.0,
                "unique_values": 24,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.0,
                "std": 0.16,
                "mean": 0.03,
                "mode": [0.0],
                "count": 13903,
                "range": 1.0,
                "column": "holiday",
                "median": 0.0,
                "variance": 0.03,
                "missing_pct": 0.0,
                "unique_values": 2,
                "missing_values": 0,
            },
            {
                "max": 6.0,
                "min": 0.0,
                "std": 2.01,
                "mean": 3.0,
                "mode": [6.0],
                "count": 13903,
                "range": 6.0,
                "column": "weekday",
                "median": 3.0,
                "variance": 4.02,
                "missing_pct": 0.0,
                "unique_values": 7,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.0,
                "std": 0.46,
                "mean": 0.68,
                "mode": [1.0],
                "count": 13903,
                "range": 1.0,
                "column": "workingday",
                "median": 1.0,
                "variance": 0.22,
                "missing_pct": 0.0,
                "unique_values": 2,
                "missing_values": 0,
            },
            {
                "max": 4.0,
                "min": 1.0,
                "std": 0.64,
                "mean": 1.42,
                "mode": [1.0],
                "count": 13903,
                "range": 3.0,
                "column": "weathersit",
                "median": 1.0,
                "variance": 0.41,
                "missing_pct": 0.0,
                "unique_values": 4,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.02,
                "std": 0.2,
                "mean": 0.5,
                "mode": [0.62],
                "count": 13903,
                "range": 0.98,
                "column": "temp",
                "median": 0.5,
                "variance": 0.04,
                "missing_pct": 0.0,
                "unique_values": 50,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.0,
                "std": 0.18,
                "mean": 0.48,
                "mode": [0.62],
                "count": 13903,
                "range": 1.0,
                "column": "atemp",
                "median": 0.48,
                "variance": 0.03,
                "missing_pct": 0.0,
                "unique_values": 65,
                "missing_values": 0,
            },
            {
                "max": 1.0,
                "min": 0.0,
                "std": 0.2,
                "mean": 0.62,
                "mode": [0.88],
                "count": 13903,
                "range": 1.0,
                "column": "hum",
                "median": 0.62,
                "variance": 0.04,
                "missing_pct": 0.0,
                "unique_values": 88,
                "missing_values": 0,
            },
            {
                "max": 0.85,
                "min": 0.0,
                "std": 0.12,
                "mean": 0.19,
                "mode": [0.0],
                "count": 13903,
                "range": 0.85,
                "column": "windspeed",
                "median": 0.19,
                "variance": 0.02,
                "missing_pct": 0.0,
                "unique_values": 30,
                "missing_values": 0,
            },
            {
                "max": 367.0,
                "min": 0.0,
                "std": 47.34,
                "mean": 34.04,
                "mode": [0.0],
                "count": 13903,
                "range": 367.0,
                "column": "casual",
                "median": 15.0,
                "variance": 2241.19,
                "missing_pct": 0.0,
                "unique_values": 303,
                "missing_values": 0,
            },
            {
                "max": 796.0,
                "min": 0.0,
                "std": 137.51,
                "mean": 140.6,
                "mode": [4.0],
                "count": 13903,
                "range": 796.0,
                "column": "registered",
                "median": 107.0,
                "variance": 18909.65,
                "missing_pct": 0.0,
                "unique_values": 701,
                "missing_values": 0,
            },
            {
                "max": 957.0,
                "min": 1.0,
                "std": 166.96,
                "mean": 174.64,
                "mode": [5.0],
                "count": 13903,
                "range": 956.0,
                "column": "cnt",
                "median": 130.0,
                "variance": 27874.11,
                "missing_pct": 0.0,
                "unique_values": 789,
                "missing_values": 0,
            },
        ],
        "categorical": [
            {
                "column": "datetime",
                "missing_pct": 0.0,
                "total_count": 13903,
                "value_counts": [
                    {
                        "__data__": ["2011-01-01 00:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 01:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 02:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 03:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 04:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 05:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 06:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 07:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 08:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                    {
                        "__data__": ["2011-01-01 09:00:00", 1],
                        "__version__": 1,
                        "__classname__": "builtins.tuple",
                    },
                ],
                "unique_values": 13903,
                "missing_values": 0,
            }
        ],
    },
}