# Lab For Experimentation

In [1]:
import json
import warnings
from typing import Any, Literal

import numpy as np
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=200)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)


# Demo (Prevents ruff from removing the unused module import)
name: Any
category: Literal["A", "B", "C"]
json.loads('{"name": "Bike Rental Prediction", "category": "A"}')

{'name': 'Bike Rental Prediction', 'category': 'A'}

In [3]:
go_up_from_current_directory(go_up=1)

/Users/mac/Desktop/Projects/Bike-Rental-Prediction


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [None]:
import narwhals as nw
from narwhals.typing import IntoDataFrameT


def split_temporal_data_to_train_val_test(
    data: IntoDataFrameT,
    test_size: float = 0.2,
    val_size: float = 0.2,
    print_shapes: bool = True,
) -> tuple[IntoDataFrameT, IntoDataFrameT, IntoDataFrameT]:
    """Split data into train, validation and test sets while maintaining temporal order.

    Parameters
    ----------
    data : IntoDataFrameT
        The input dataframe to be split.
    test_size : float, default=0.2
        Proportion of the dataset to include in the test split.
    val_size : float, default=0.2
        Proportion of the dataset to include in the validation split.
    print_shapes : bool, default=True
        Whether to print the shapes of the resulting dataframes.

    Returns
    -------
    tuple[IntoDataFrameT, IntoDataFrameT, IntoDataFrameT]
        A tuple containing the train, validation, and test dataframes.
    """
    # Validation
    if not 0.0 <= test_size <= 1.0 or not 0.0 <= val_size <= 1.0:
        raise ValueError("test_size and val_size must be between 0.0 and 1.0")
    if (test_size + val_size) >= 1.0:
        raise ValueError("The sum of test_size and val_size must be less than 1.0")

    # Convert to Narwhals DataFrame
    nw_data: nw.DataFrame = nw.from_native(data)
    num_train: int = int((1 - test_size - val_size) * nw_data.shape[0])
    num_val: int = int(val_size * nw_data.shape[0])

    # Add index
    nw_data = nw_data.with_row_index()
    train_data: nw.DataFrame = nw_data.filter(nw.col("index") <= num_train).drop(
        "index"
    )

    val_data: nw.DataFrame = nw_data.filter(
        (nw.col("index") > num_train) & (nw.col("index") <= (num_train + num_val))
    ).drop("index")
    test_data: nw.DataFrame = nw_data.filter(
        nw.col("index") > (num_train + num_val)
    ).drop("index")

    if print_shapes:
        print(
            f"Train shape: {train_data.shape} | Val shape: {val_data.shape} | Test shape: {test_data.shape}"
        )

    return (train_data.to_native(), val_data.to_native(), test_data.to_native())

In [6]:
df: pl.DataFrame = pl.DataFrame(
    {
        "idx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
        "role": [
            "engineer",
            "doctor",
            "nurse",
            "engineer",
            "nurse",
            "teacher",
            "doctor",
            "doctor",
            "teacher",
            "engineer",
            "teacher",
            "nurse",
            "teacher",
            "engineer",
            "nurse",
        ],
    }
)

df

idx,role
i64,str
0,"""engineer"""
1,"""doctor"""
2,"""nurse"""
3,"""engineer"""
4,"""nurse"""
5,"""teacher"""
6,"""doctor"""
7,"""doctor"""
8,"""teacher"""
9,"""engineer"""


In [7]:
(train_df, val_df, test_df) = split_temporal_data_to_train_val_test(
    data=df,
    test_size=0.8,
    val_size=0.1,
)
train_df

Train shape: (2, 2) | Val shape: (1, 2) | Test shape: (12, 2)


idx,role
i64,str
0,"""engineer"""
1,"""doctor"""


In [8]:
rng = np.random.default_rng(123)
x = rng.standard_normal(size=(1_000, 10))

X_train, X_test = train_test_split(x, test_size=0.2, random_state=123)
y_train = rng.standard_normal(size=(X_train.shape[0],))
y_test = rng.standard_normal(size=(X_test.shape[0],))

params: dict[str, Any] = {
    "n_estimators": 100,
    "max_depth": 10,
}

rf_reg = RandomForestRegressor(**params)

rf_reg.fit(X_train, y_train)
# rf_reg.score(X_test, y_test)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [9]:
rf_reg.feature_importances_

array([0.1155, 0.0949, 0.0804, 0.1105, 0.0938, 0.1175, 0.087 , 0.102 ,
       0.1005, 0.0978])

In [10]:
from src.config import app_config
from src.exceptions import (
    CustomError,
    MLFlowConnectionError,
)
from src.ml.feature_engineering import FeatureEngineer
from src.ml.trainer import ModelTrainer
from src.ml.utils import split_temporal_data_to_train_val_test



In [None]:
import httpx

port: int = 5001
url: str = f"http://localhost:{port}"


def check_mlflow(url: str, timeout: float = 2.0) -> bool:
    """
    Check MLflow endpoint accessibility and handle common httpx errors.

    Returns True if reachable (2xx), False otherwise.
    """
    try:
        resp = httpx.get(url, timeout=timeout)
        resp.raise_for_status()
        console.print("[success]MLflow is accessible")
        return True

    except httpx.HTTPStatusError as e:
        console.print(
            f"[error]MLflow returned non-2xx status: {e.response.status_code} — {e}"
        )
        return False

    except httpx.RequestError as e:
        # covers ConnectError, ReadTimeout, etc.
        console.print(f"[error]Network/connection error when contacting MLflow: {e}")
        return False

    except (MLFlowConnectionError, CustomError) as e:
        console.print(f"[error]Project-specific MLflow error: {e}")
        return False

    except Exception as e:
        console.print(f"[error]Unexpected error: {e}")
        return False


check_mlflow(url)

True

In [12]:
fp: str = "../../../../Documents/data_dump/bike_data/database.parquet"
data: pl.DataFrame = pl.read_parquet(fp)
console.print(f"Shape: {data.shape}", style="info")

(train_df, val_df, test_df) = split_temporal_data_to_train_val_test(data=data)

Shapes -> Train shape: (8342, 16) | Val shape: (2780, 16) | Test shape: (2781, 16)


In [None]:
feat_eng = FeatureEngineer()
train_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=train_df, config=app_config.feature_config
)
val_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=val_df, config=app_config.feature_config
)
test_features_df: pl.DataFrame = feat_eng.create_all_features(
    data=test_df, config=app_config.feature_config
)
display(train_features_df.head())

val_features_df.head()

season,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,is_weekend,sin_hour,cos_hour,sin_weekday,cos_weekday,cnt_lag_0hr,cnt_lag_1hr,cnt_lag_24hr,hr_lag_1hr,hr_lag_24hr,temp_lag_1hr,temp_lag_3hr,hum_lag_1hr,hum_lag_3hr,temp_rolling_mean_3hr,temp_rolling_median_3hr,temp_rolling_mean_6hr,temp_rolling_median_6hr,hum_rolling_mean_3hr,hum_rolling_median_3hr,hum_rolling_mean_6hr,hum_rolling_median_6hr,temp_plus_hum,hum_plus_hr,cnt_diff_1hr,cnt_diff_2hr,hr_diff_1hr,hr_diff_24hr,temp_diff_1hr,temp_diff_2hr,temp_diff_24hr,hum_diff_1hr,hum_diff_2hr,is_high_temp,is_high_hum,is_peak_hour,is_working_hour,is_business_hour,target
i64,i64,i64,i64,i64,i64,i64,f64,f64,i8,f64,f64,f64,f64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,f64,f64,f64,f64,f64,i8,i8,i8,i8,i8,i64
1,1,0,0,6,0,1,0.24,0.81,1,0.0,1.0,-0.781831,0.62349,16,16,16,0,0,0.24,0.24,0.81,0.81,0.226667,0.22,0.233333,0.24,0.803333,0.8,0.776667,0.775,1.05,0.81,24,16,1,0,-0.02,-0.02,0.22,-0.01,-0.01,0,0,0,0,0,40
1,1,1,0,6,0,1,0.22,0.8,1,0.258819,0.965926,-0.781831,0.62349,40,16,16,0,0,0.24,0.24,0.81,0.81,0.226667,0.22,0.233333,0.24,0.803333,0.8,0.776667,0.775,1.02,1.8,24,16,1,0,-0.02,-0.02,0.22,-0.01,-0.01,0,0,0,0,0,32
1,1,2,0,6,0,1,0.22,0.8,1,0.5,0.866025,-0.781831,0.62349,32,40,16,1,0,0.22,0.24,0.8,0.81,0.226667,0.22,0.233333,0.24,0.803333,0.8,0.776667,0.775,1.02,2.8,-8,16,1,0,0.0,-0.02,0.22,0.0,-0.01,0,0,0,0,0,13
1,1,3,0,6,0,1,0.24,0.75,1,0.707107,0.707107,-0.781831,0.62349,13,32,16,2,0,0.22,0.24,0.8,0.81,0.226667,0.22,0.233333,0.24,0.783333,0.8,0.776667,0.775,0.99,3.75,-19,-27,1,0,0.02,0.02,0.22,-0.05,-0.05,0,0,0,0,0,1
1,1,4,0,6,0,1,0.24,0.75,1,0.866025,0.5,-0.781831,0.62349,1,13,16,3,0,0.24,0.22,0.75,0.8,0.233333,0.24,0.233333,0.24,0.766667,0.75,0.776667,0.775,0.99,4.75,-12,-31,1,0,0.0,0.02,0.22,0.0,-0.05,0,0,0,0,0,1


season,mnth,hr,holiday,weekday,workingday,weathersit,temp,hum,is_weekend,sin_hour,cos_hour,sin_weekday,cos_weekday,cnt_lag_0hr,cnt_lag_1hr,cnt_lag_24hr,hr_lag_1hr,hr_lag_24hr,temp_lag_1hr,temp_lag_3hr,hum_lag_1hr,hum_lag_3hr,temp_rolling_mean_3hr,temp_rolling_median_3hr,temp_rolling_mean_6hr,temp_rolling_median_6hr,hum_rolling_mean_3hr,hum_rolling_median_3hr,hum_rolling_mean_6hr,hum_rolling_median_6hr,temp_plus_hum,hum_plus_hr,cnt_diff_1hr,cnt_diff_2hr,hr_diff_1hr,hr_diff_24hr,temp_diff_1hr,temp_diff_2hr,temp_diff_24hr,hum_diff_1hr,hum_diff_2hr,is_high_temp,is_high_hum,is_peak_hour,is_working_hour,is_business_hour,target
i64,i64,i64,i64,i64,i64,i64,f64,f64,i8,f64,f64,f64,f64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,f64,f64,f64,f64,f64,i8,i8,i8,i8,i8,i64
4,12,6,0,1,1,1,0.16,0.86,0,1.0,6.1232e-17,0.781831,0.62349,68,68,68,6,6,0.16,0.16,0.86,0.86,0.18,0.18,0.21,0.21,0.783333,0.75,0.726667,0.715,1.02,6.86,123,329,1,0,0.02,0.04,0.2,-0.12,-0.11,0,1,0,0,0,191
4,12,7,0,1,1,1,0.18,0.74,0,0.965926,-0.258819,0.781831,0.62349,191,68,68,6,6,0.16,0.16,0.86,0.86,0.18,0.18,0.21,0.21,0.783333,0.75,0.726667,0.715,0.92,7.74,123,329,1,0,0.02,0.04,0.2,-0.12,-0.11,0,0,1,0,0,397
4,12,8,0,1,1,1,0.2,0.75,0,0.866025,-0.5,0.781831,0.62349,397,191,68,7,6,0.18,0.16,0.74,0.86,0.18,0.18,0.21,0.21,0.783333,0.75,0.726667,0.715,0.95,8.75,206,329,1,0,0.02,0.04,0.2,0.01,-0.11,0,0,1,0,1,183
4,12,9,0,1,1,1,0.22,0.69,0,0.707107,-0.707107,0.781831,0.62349,183,397,68,8,6,0.2,0.16,0.75,0.86,0.2,0.2,0.21,0.21,0.726667,0.74,0.726667,0.715,0.91,9.69,-214,-8,1,0,0.02,0.04,0.2,-0.06,-0.05,0,0,1,1,1,108
4,12,10,0,1,1,1,0.24,0.67,0,0.5,-0.866025,0.781831,0.62349,108,183,68,9,6,0.22,0.18,0.69,0.74,0.22,0.22,0.21,0.21,0.703333,0.69,0.726667,0.715,0.91,10.67,-75,-289,1,0,0.02,0.04,0.2,-0.02,-0.08,0,0,0,1,1,127


In [15]:
trainer = ModelTrainer(
    train_data=train_features_df,
    val_data=val_features_df,
    test_data=test_features_df,
    target_col="target",
)

2025-10-13 19:56:36 - trainer - [INFO] - Data prepared -> x_train shape: (8342, 47), y_train shape: (8342,) | x_val shape: (2780, 47), y_val shape: (2780,) | x_test shape: (2781, 47), y_test shape: (2781,)
2025-10-13 19:56:36 - mlflow_tracker - [INFO] - Set MLflow tracking URI to: http://localhost:5001
2025-10-13 19:56:36 - mlflow_tracker - [INFO] - Set experiment to: bike rental (ID: 1)
2025-10-13 19:56:36 - mlflow_tracker - [INFO] - Initialized MLFlowTracker with experiment: bike rental


In [17]:
# trainer._train_random_forest(params={})
result = trainer._hyperparameter_tuning_lightgbm()

2025-10-13 19:58:32 - trainer - [INFO] - 🚨 Starting hyperparameter tuning for LightGBM with 5 trials...
2025-10-13 19:58:32 - mlflow_tracker - [INFO] - Started MLflow run: 1379afba8d16467f9e04b1d626682f29 (name: run_2025-10-13T19:58:32)


[I 2025-10-13 19:58:32,673] A new study created in memory with name: no-name-0eebe62c-b587-4571-a6d3-56ac951f23fc


2025-10-13 19:58:32 - mlflow_tracker - [INFO] - Started MLflow run: 50c40263f2cf4b21965880885da62023 (name: run_2025-10-13T19:58:32)
Trial 0: Mean RMSE = 60.49
🏃 View run run_2025-10-13T19:58:32 at: http://localhost:5001/#/experiments/1/runs/50c40263f2cf4b21965880885da62023
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:58:33 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


[I 2025-10-13 19:58:33,971] Trial 0 finished with value: 60.49 and parameters: {'reg_lambda': 0.03148911647956861, 'reg_alpha': 6.3512210106407005, 'learning_rate': 0.17524101118128144, 'num_leaves': 188, 'max_depth': 5, 'min_child_samples': 12, 'num_boost_round': 210, 'early_stopping_rounds': 88}. Best is trial 0 with value: 60.49.


2025-10-13 19:58:33 - trainer - [INFO] - Initial trial 0 achieved value: 60.49
2025-10-13 19:58:33 - mlflow_tracker - [INFO] - Started MLflow run: cbb7cc5b0f95487185b2aecc8a4e4bbe (name: run_2025-10-13T19:58:33)
Trial 1: Mean RMSE = 62.99
🏃 View run run_2025-10-13T19:58:33 at: http://localhost:5001/#/experiments/1/runs/cbb7cc5b0f95487185b2aecc8a4e4bbe
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:58:52 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


[I 2025-10-13 19:58:52,840] Trial 1 finished with value: 62.99 and parameters: {'reg_lambda': 0.25378155082656645, 'reg_alpha': 0.6796578090758157, 'learning_rate': 0.01083858126934475, 'num_leaves': 292, 'max_depth': 13, 'min_child_samples': 14, 'num_boost_round': 445, 'early_stopping_rounds': 26}. Best is trial 0 with value: 60.49.


2025-10-13 19:58:52 - mlflow_tracker - [INFO] - Started MLflow run: f1c22145061b4bf68b9a6e7a78dba17d (name: run_2025-10-13T19:58:52)
Trial 2: Mean RMSE = 59.69
🏃 View run run_2025-10-13T19:58:52 at: http://localhost:5001/#/experiments/1/runs/f1c22145061b4bf68b9a6e7a78dba17d
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:58:55 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


[I 2025-10-13 19:58:55,501] Trial 2 finished with value: 59.69 and parameters: {'reg_lambda': 0.016480446427978974, 'reg_alpha': 0.12561043700013563, 'learning_rate': 0.054182823195332415, 'num_leaves': 101, 'max_depth': 10, 'min_child_samples': 11, 'num_boost_round': 655, 'early_stopping_rounds': 43}. Best is trial 2 with value: 59.69.


2025-10-13 19:58:55 - trainer - [INFO] - Trial 2 achieved value: 59.69 with 1.3225% improvement
2025-10-13 19:58:55 - mlflow_tracker - [INFO] - Started MLflow run: 3a72cde674a1430abc034938719d6b51 (name: run_2025-10-13T19:58:55)
Trial 3: Mean RMSE = 61.44
🏃 View run run_2025-10-13T19:58:55 at: http://localhost:5001/#/experiments/1/runs/3a72cde674a1430abc034938719d6b51
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:59:02 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


[I 2025-10-13 19:59:02,702] Trial 3 finished with value: 61.44 and parameters: {'reg_lambda': 0.06672367170464209, 'reg_alpha': 1.3826232179369866, 'learning_rate': 0.021839352923182988, 'num_leaves': 164, 'max_depth': 10, 'min_child_samples': 7, 'num_boost_round': 1254, 'early_stopping_rounds': 25}. Best is trial 2 with value: 59.69.


2025-10-13 19:59:02 - mlflow_tracker - [INFO] - Started MLflow run: e6328928ef524b3dacac0a207a2d56b2 (name: run_2025-10-13T19:59:02)
Trial 4: Mean RMSE = 62.7
🏃 View run run_2025-10-13T19:59:02 at: http://localhost:5001/#/experiments/1/runs/e6328928ef524b3dacac0a207a2d56b2
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:59:03 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


[I 2025-10-13 19:59:03,130] Trial 4 finished with value: 62.7 and parameters: {'reg_lambda': 0.0018205657658407262, 'reg_alpha': 6.24513957474307, 'learning_rate': 0.43709904681305034, 'num_leaves': 247, 'max_depth': 6, 'min_child_samples': 9, 'num_boost_round': 1400, 'early_stopping_rounds': 50}. Best is trial 2 with value: 59.69.


Best trial: 2 | Value: 59.69
Training until validation scores don't improve for 43 rounds
Early stopping, best iteration is:
[162]	validation's l2: 3562.34
2025-10-13 19:59:06 - mlflow_tracker - [INFO] - ✅ Successfully logged ModelType.LIGHTGBM model and metadata
🏃 View run run_2025-10-13T19:58:32 at: http://localhost:5001/#/experiments/1/runs/1379afba8d16467f9e04b1d626682f29
🧪 View experiment at: http://localhost:5001/#/experiments/1
2025-10-13 19:59:06 - mlflow_tracker - [INFO] - Ended MLflow run with status: FINISHED


In [18]:
console.print(result)

In [None]:
# trainer.hyperparameter_tuning_all_models()

In [None]:
result = trainer._hyperparameter_tuning_xgboost()
# result = trainer._hyperparameter_tuning_lightgbm()

result

In [None]:
import mlflow

try:
    mlflow.end_run()
except Exception as e:
    print(e)

trainer.mlflow_tracker.load_model_artifact(
    run_id=result["run_id"],
    model_name=ModelType.XGBOOST.value,
    artifact_subpath="models",
)

In [None]:
from src.exp_tracking.model_loader import (
    load_best_model,
)

In [None]:
# console.print(result, style="highlight")

# Load the best model from the experiment
best_model_artifacts = load_best_model(
    experiment_name=app_config.experiment_config.experiment_name
)

if best_model_artifacts:
    console.print(
        f"[success]Loaded best model: {best_model_artifacts.get('metadata', {}).get('model_type', 'Unknown')}",
        style="success",
    )
else:
    console.print("[warning]No best model found!", style="warning")

In [None]:
# Display the loaded best model artifacts
console.print(best_model_artifacts)
best_model_artifacts.keys()

In [None]:
trainer.input_example.head()

In [None]:
trainer.mlflow_tracker.register_model(
    run_id=best_model_artifacts.get("run_id"),
    model=best_model_artifacts.get("model"),
    model_name=best_model_artifacts.get("model_name"),
    input_example=trainer.input_example,
)

In [None]:
best_model_artifacts["metadata"].keys()

In [None]:
# Extract model information
model = best_model_artifacts["model"]
model_name = best_model_artifacts["model_name"]
run_id = best_model_artifacts["run_id"]
metadata = best_model_artifacts["metadata"]
input_example = best_model_artifacts.get("input_example")

# Convert input example to appropriate format if available
input_example_df = None
if input_example:
    import pandas as pd

    input_example_df = pd.DataFrame(input_example[:1]).drop(columns=["target"])

# Prepare tags
tags: dict[str, Any] = {
    "experiment_name": app_config.experiment_config.experiment_name,
    "model_type": metadata.get("model_type", "Unknown"),
    "timestamp": metadata.get("timestamp", "Unknown"),
}

# Log the model using MLflow's native format for model registry
# This creates a proper MLmodel file that can be registered
try:
    with mlflow.start_run(run_id=run_id):
        # Set tags for the run
        mlflow.set_tags(tags)

        # Determine model type and log appropriately
        if "XGBOOST" in model_name:
            model_info = mlflow.xgboost.log_model(
                xgb_model=model,
                artifact_path="registered_model",
                registered_model_name=f"{model_name}_best",
                input_example=input_example_df,
            )
        elif "LIGHTGBM" in model_name:
            model_info = mlflow.lightgbm.log_model(
                lgb_model=model,
                artifact_path="registered_model",
                registered_model_name=f"{model_name}_best",
                input_example=input_example_df,
            )
        elif "RANDOM_FOREST" in model_name:
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="registered_model",
                registered_model_name=f"{model_name}_best",
                input_example=input_example_df,
                metadata=tags,
            )
        else:
            raise ValueError(f"Unsupported model type: {model_name}")


except Exception as e:
    console.print(f"[error]Error registering model: {e}", style="error")

In [None]:
{
    "datetime": "2025-10-13T16:32:34",
    "num_val_rows": 1390,
    "num_test_rows": 1390,
    "val_data_path": "/opt/airflow/artifacts/data/val_data.parquet",
    "num_train_rows": 11123,
    "test_data_path": "/opt/airflow/artifacts/data/test_data.parquet",
    "train_data_path": "/opt/airflow/artifacts/data/train_data.parquet",
}