# Training models

In this file we will take a methodical approach to run tests with different types of models, applying search spaces with HyOpt and recording the experiments in MLOps.

## 1.0 Basic imports and setup

- Start by running `mlflow` locally, run in the terminal:

`mlflow ui --backend-store-uri file:./mlruns --port 5001`

- Make sure you have the environment `ml_kaggle` activated.

In [1]:
# start by importing libraries
import pandas as pd
import numpy as np

import joblib
import tempfile
import json
import os
from dotenv import load_dotenv
load_dotenv()

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor # might import others as needed
from sklearn.metrics import root_mean_squared_error

from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# local utils
import sys
sys.path.append('.')

from src.utils import predict_and_create_submission

In [2]:
# preparing for MLflow
import mlflow
from mlflow.tracking import MlflowClient
import mlflow.sklearn

# -------------------------------
# SETTINGS
# -------------------------------
EXPERIMENT_NAME = "10_predicting_road_accident_risk"
SEED = 1
RMSE_THRESHOLD = 0.057          # acceptable RMSE based on dummy model
MAX_EVALS = 25                 # number of hyperopt trials
N_JOBS = -1                    # RF parallelism

# ---- Tracking URI from your .env ----
port = os.getenv("MLFLOW_PORT", "5001")

# If notebook runs on HOST use localhost; if inside another container use service name 'mlflow'
candidates = [f"http://localhost:{port}", f"http://mlflow:{port}"]

last_err = None
connected_uri = None
for uri in candidates:
    try:
        mlflow.set_tracking_uri(uri)
        client = MlflowClient()

        # Health check compatible across MLflow versions
        if hasattr(client, "search_experiments"):
            _ = client.search_experiments(max_results=1)
        elif hasattr(client, "list_experiments"):
            _ = client.list_experiments()  # older MLflow
        else:
            # Fallback: try fetching the Default experiment (id "0")
            _ = client.get_experiment_by_name("Default") or client.get_experiment("0")

        connected_uri = uri
        break
    except Exception as e:
        last_err = e

if not connected_uri:
    raise RuntimeError(f"Could not reach MLflow. Last error: {last_err}")

print("Connected to MLflow at:", connected_uri)

# ---- Create/select your experiment ----
mlflow.set_experiment(EXPERIMENT_NAME)
print("Tracking:", mlflow.get_tracking_uri())


# Smoke test
""" with mlflow.start_run(run_name="smoke-test"):
    mlflow.log_param("ok", True)
    mlflow.log_metric("ping", 1.0)
print("Logged a test run.") """


Connected to MLflow at: http://localhost:5001
Tracking: http://localhost:5001


' with mlflow.start_run(run_name="smoke-test"):\n    mlflow.log_param("ok", True)\n    mlflow.log_metric("ping", 1.0)\nprint("Logged a test run.") '

In [3]:
# load the data
train_data = pd.read_csv('data/train_split.csv')
test_data = pd.read_csv('data/test_split.csv')

y_train = train_data['accident_risk'].values
y_test = test_data['accident_risk'].values

In [4]:
# select the features we will use
num_features = ['curvature']
cat_features = ['speed_limit', 'lighting', 'num_reported_accidents', 'weather']

## 2.0 Functions to train the models

In [5]:
def train_model(X, y, cat_features, num_features, model_name, model_config):
    # Separate column types
    # cat_features = ['speed_limit', 'lighting', 'num_reported_accidents', 'weather']
    # num_features = ["curvature"]

    # 1. Split the data
    X_train, X_test, y_train, y_test = train_test_split(X[cat_features+num_features], y, test_size=0.2, random_state=42)

    # 2. Define transformers
    # numeric_transformer = StandardScaler()   # optional for numeric stability

    categorical_transformer = OneHotEncoder(
        drop="first",           # drop one dummy per category → avoids multicollinearity
        handle_unknown="ignore" # handle unseen categories gracefully
    )

    # 3. Combine into a ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", "passthrough", num_features),
            ("cat", categorical_transformer, cat_cat_featurescols)
        ]
    )

    # 4. Build the pipeline
    model = Pipeline(steps=[
        ("preprocessor", preprocessor),
        (model_name, model_config)
    ])

    # 5. Fit the model
    model.fit(X_train, y_train)

    return model

In [6]:
def make_preprocessor(cat_cols, num_cols):
    ohe = OneHotEncoder(drop="first", handle_unknown="ignore", sparse_output=True)
    preproc = ColumnTransformer(
        transformers=[
            ("num", "passthrough", num_cols),
            ("cat", ohe, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=1.0,   # keep sparse when possible
    )
    return preproc


In [7]:
from hyperopt import hp
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

MODEL_REGISTRY = {
    "linreg": {
        "factory": lambda p: LinearRegression(),  # no hyperparams
        "space": {}, 
        "cast": lambda p: p
    },
    "ridge": {
        "factory": lambda p: Ridge(alpha=p["alpha"], random_state=SEED),
        "space": {
            "alpha": hp.loguniform("alpha", np.log(1e-4), np.log(1e3)),
        },
        "cast": lambda p: p,
    },
    "lasso": {
        "factory": lambda p: Lasso(alpha=p["alpha"], random_state=SEED, max_iter=20000),
        "space": {
            "alpha": hp.loguniform("alpha", np.log(1e-4), np.log(1e1)),
        },
        "cast": lambda p: p,
    },
    "elasticnet": {
        "factory": lambda p: ElasticNet(alpha=p["alpha"], l1_ratio=p["l1_ratio"], random_state=SEED, max_iter=20000),
        "space": {
            "alpha": hp.loguniform("alpha", np.log(1e-4), np.log(1e1)),
            "l1_ratio": hp.uniform("l1_ratio", 0.0, 1.0),
        },
        "cast": lambda p: p,
    },
    "rf": {
        "factory": lambda p: RandomForestRegressor(
            n_estimators=p["n_estimators"],
            max_depth=p["max_depth"],
            min_samples_split=p["min_samples_split"],
            min_samples_leaf=p["min_samples_leaf"],
            max_features=p["max_features"],
            random_state=SEED,
            n_jobs=-1,
        ),
        "space": {
            "n_estimators": hp.quniform("n_estimators", 100, 600, 1),
            "max_depth": hp.pchoice("max_depth", [(0.25, None), (0.75, hp.quniform("md_int", 4, 40, 1))]),
            "min_samples_split": hp.quniform("min_samples_split", 2, 20, 1),
            "min_samples_leaf": hp.quniform("min_samples_leaf", 1, 20, 1),
            "max_features": hp.choice("max_features", ["sqrt", "log2", hp.uniform("max_feat_frac", 0.3, 1.0)]),
        },
        "cast": lambda p: {
            **p,
            "n_estimators": int(p["n_estimators"]),
            "max_depth": None if p["max_depth"] is None else int(p["max_depth"]),
            "min_samples_split": int(p["min_samples_split"]),
            "min_samples_leaf": int(p["min_samples_leaf"]),
            # max_features may be str or float – leave as is
        },
    },
    "gbr": {
        "factory": lambda p: GradientBoostingRegressor(
            n_estimators=p["n_estimators"],
            learning_rate=p["learning_rate"],
            max_depth=p["max_depth"],
            subsample=p["subsample"],
            min_samples_split=p["min_samples_split"],
            min_samples_leaf=p["min_samples_leaf"],
            random_state=SEED,
        ),
        "space": {
            "n_estimators": hp.quniform("n_estimators", 50, 500, 1),
            "learning_rate": hp.loguniform("learning_rate", np.log(1e-3), np.log(0.5)),
            "max_depth": hp.quniform("max_depth", 2, 8, 1),
            "subsample": hp.uniform("subsample", 0.5, 1.0),
            "min_samples_split": hp.quniform("min_samples_split", 2, 20, 1),
            "min_samples_leaf": hp.quniform("min_samples_leaf", 1, 10, 1),
        },
        "cast": lambda p: {
            **p,
            "n_estimators": int(p["n_estimators"]),
            "max_depth": int(p["max_depth"]),
            "min_samples_split": int(p["min_samples_split"]),
            "min_samples_leaf": int(p["min_samples_leaf"]),
        },
    },
}


In [8]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, mean_squared_error
import mlflow.sklearn
from hyperopt import fmin, tpe, Trials, STATUS_OK
import numpy as np
import pandas as pd
import json, joblib, tempfile

SEED = 42
CV_FOLDS = 5

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

rmse_scorer = make_scorer(rmse, greater_is_better=False)

def make_pipeline(model_key, params, cat_cols, num_cols):
    preproc = make_preprocessor(cat_cols, num_cols)
    estimator = MODEL_REGISTRY[model_key]["factory"](params)
    pipe = Pipeline([
        ("preprocessor", preproc),
        ("model", estimator),
    ])
    return pipe

def objective(model_key, X, y, cat_cols, num_cols, exp_id=None):
    space = MODEL_REGISTRY[model_key]["space"]
    caster = MODEL_REGISTRY[model_key]["cast"]

    def _obj(raw_params):
        params = caster(dict(raw_params)) if raw_params else {}
        with mlflow.start_run(run_name=f"{model_key}-hyperopt"):
            mlflow.set_tags({
                "model_key": model_key,
                "stage": "hyperopt",
                "cv_folds": CV_FOLDS,
            })
            mlflow.log_params(params)

            pipe = make_pipeline(model_key, params, cat_cols, num_cols)
            cv = KFold(n_splits=CV_FOLDS, shuffle=True, random_state=SEED)

            # cross_val_score returns negative RMSE because greater_is_better=False
            scores = cross_val_score(pipe, X, y, scoring=rmse_scorer, cv=cv, n_jobs=-1)
            rmse_cv = -scores  # make positive

            mlflow.log_metric("rmse_mean", float(rmse_cv.mean()))
            mlflow.log_metric("rmse_std", float(rmse_cv.std()))
            for i, s in enumerate(rmse_cv, 1):
                mlflow.log_metric(f"rmse_fold_{i}", float(s))

            # Fit once on full training for artifact logging
            pipe.fit(X, y)
            mlflow.sklearn.log_model(pipe, artifact_path="model")

            # Also dump params as artifact for convenience
            with tempfile.TemporaryDirectory() as tmp:
                with open(os.path.join(tmp, "params.json"), "w") as f:
                    json.dump(params, f, indent=2)
                mlflow.log_artifacts(tmp, artifact_path="run_meta")

            return {"loss": float(rmse_cv.mean()), "status": STATUS_OK}

    return space, _obj


In [9]:
from hyperopt import Trials

def run_search(model_key, X, y, cat_cols, num_cols, max_evals=30, exp_id=None):
    entry = MODEL_REGISTRY[model_key]
    space = entry["space"]
    cast  = entry["cast"]

    print(f"=== Searching {model_key} with {max_evals} evals ===")

    # --- No hyperparameters: do a single CV run + fit, log to MLflow ---
    if not space:   # handles {} or None
        params = {}
        pipe = make_pipeline(model_key, params, cat_cols, num_cols)

        from sklearn.model_selection import KFold, cross_val_score
        cv = KFold(n_splits=CV_FOLDS, shuffle=True, random_state=SEED)
        scores = cross_val_score(pipe, X, y, scoring=rmse_scorer, cv=cv, n_jobs=-1)
        rmse_cv = -scores

        with mlflow.start_run(experiment_id=exp_id, run_name=f"{model_key}-no_search"):
            mlflow.set_tags({"model_key": model_key, "stage": "hyperopt", "cv_folds": CV_FOLDS})
            mlflow.log_metric("rmse_mean", float(rmse_cv.mean()))
            mlflow.log_metric("rmse_std",  float(rmse_cv.std()))
            for i, s in enumerate(rmse_cv, 1):
                mlflow.log_metric(f"rmse_fold_{i}", float(s))
            pipe.fit(X, y)
            mlflow.sklearn.log_model(pipe, artifact_path="model")

        return pipe, params, None

    # --- Regular Hyperopt path ---
    space, obj = objective(model_key, X, y, cat_cols, num_cols, exp_id)

    trials = Trials()
    best_raw = fmin(fn=obj, space=space, algo=tpe.suggest,
                    max_evals=max_evals, trials=trials,
                    rstate=np.random.default_rng(SEED))

    best_params = cast(dict(best_raw))
    best_pipe = make_pipeline(model_key, best_params, cat_cols, num_cols)
    best_pipe.fit(X, y)

    print("Best params:", best_params)
    return best_pipe, best_params, trials



In [11]:
# Assumes you already did:
# train_data, test_data loaded
y_train = train_data['accident_risk'].values
y_test  = test_data['accident_risk'].values
num_features = ['curvature']
cat_features = ['speed_limit', 'lighting', 'num_reported_accidents', 'weather']

X_train = train_data[cat_features + num_features]
X_test  = test_data[cat_features + num_features]

results = []

#for key, evals in [("linreg", 0), ("ridge", 40), ("lasso", 40), ("elasticnet", 60), ("rf", 60), ("gbr", 80)]:
for key, evals in [("gbr", 25)]:
    model, best_params, trials = run_search(key, X_train, y_train, cat_features, num_features, max_evals=evals)
    y_pred = model.predict(X_test)
    test_rmse = rmse(y_test, y_pred)

    # Log a final run per model family for the test result
    with mlflow.start_run(run_name=f"{key}-final"):
        mlflow.set_tags({"model_key": key, "stage": "final_test"})
        mlflow.log_params(best_params)
        mlflow.log_metric("test_rmse", float(test_rmse))
        mlflow.sklearn.log_model(model, artifact_path="model")

    results.append({"model": key, "test_rmse": float(test_rmse), "best_params": best_params})

pd.DataFrame(results).sort_values("test_rmse")


=== Searching gbr with 25 evals ===
  0%|          | 0/25 [00:00<?, ?trial/s, best loss=?]


2025/10/13 20:31:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/0c2ddb36f02640b788950f52d4c9b5a4.

2025/10/13 20:31:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



  4%|▍         | 1/25 [10:01<4:00:47, 601.99s/trial, best loss: 0.10045919045891556]


2025/10/13 20:35:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/46e1e2a0aae8419b93ddd1e1986ac5b7.

2025/10/13 20:35:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



  8%|▊         | 2/25 [14:37<2:37:10, 410.01s/trial, best loss: 0.10045919045891556]


2025/10/13 20:36:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/80b87f4d978a45fba5a053a17ef29181.

2025/10/13 20:36:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 12%|█▏        | 3/25 [15:44<1:32:49, 253.14s/trial, best loss: 0.0566114062695119] 


2025/10/13 20:37:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/56f40421389b41c78e49c9011fc1e2f9.

2025/10/13 20:37:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 16%|█▌        | 4/25 [16:36<1:00:51, 173.89s/trial, best loss: 0.0566114062695119]


2025/10/13 20:38:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/30940637dacb4345a478122c56002edd.

2025/10/13 20:38:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 20%|██        | 5/25 [17:39<44:39, 134.00s/trial, best loss: 0.0566114062695119]  


2025/10/13 20:42:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/7db9b1790da84142a1a551b1634fb9bd.

2025/10/13 20:42:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 24%|██▍       | 6/25 [21:04<50:00, 157.92s/trial, best loss: 0.0566114062695119]


2025/10/13 20:48:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/0205f839f41f4e42838b7ab35b093785.

2025/10/13 20:48:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 28%|██▊       | 7/25 [27:25<1:09:19, 231.09s/trial, best loss: 0.056500124139159044]


2025/10/13 20:51:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/ff908cad263a470f9b0558c7819eab89.

2025/10/13 20:51:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 32%|███▏      | 8/25 [30:11<59:32, 210.14s/trial, best loss: 0.056500124139159044]  


2025/10/13 21:06:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/de68ebdc62e243fca551a701000f5df1.

2025/10/13 21:06:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 36%|███▌      | 9/25 [45:29<1:55:06, 431.63s/trial, best loss: 0.056500124139159044]


2025/10/13 21:10:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/0c4656cb51be40c3b17be0f89fa83be6.

2025/10/13 21:10:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 40%|████      | 10/25 [49:48<1:34:34, 378.31s/trial, best loss: 0.05641483309335117]


2025/10/13 21:12:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/f6519170815e45519772bc12b555e9a1.

2025/10/13 21:12:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 44%|████▍     | 11/25 [52:00<1:10:38, 302.76s/trial, best loss: 0.05641483309335117]


2025/10/13 21:16:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/b022dd9645ed47cca4631d0bef24b097.

2025/10/13 21:16:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 48%|████▊     | 12/25 [55:23<59:00, 272.37s/trial, best loss: 0.05641483309335117]  


2025/10/13 21:24:20 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/c75001e425934a45b25609241640cd36.

2025/10/13 21:24:20 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 52%|█████▏    | 13/25 [1:03:21<1:06:59, 334.95s/trial, best loss: 0.05638477732136464]


2025/10/13 21:30:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/ba75af668a334e1dadf25ed256be5911.

2025/10/13 21:30:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 56%|█████▌    | 14/25 [1:09:24<1:02:55, 343.19s/trial, best loss: 0.05638477732136464]


2025/10/13 21:33:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/9d0b9d422c0e43f6ad69f8c0cf555939.

2025/10/13 21:33:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 60%|██████    | 15/25 [1:12:42<49:55, 299.52s/trial, best loss: 0.05638477732136464]  


2025/10/13 21:51:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/f03c31ece48d457aa2e616283722e0ca.

2025/10/13 21:51:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 64%|██████▍   | 16/25 [1:30:07<1:18:36, 524.02s/trial, best loss: 0.05638477732136464]


2025/10/13 21:56:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/76e6133d5cb34390bd37a0329e65b30c.

2025/10/13 21:56:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 68%|██████▊   | 17/25 [1:35:09<1:00:56, 457.08s/trial, best loss: 0.05638477732136464]


2025/10/13 21:56:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/9971b805c155446fa076f2a480cdb288.

2025/10/13 21:56:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 72%|███████▏  | 18/25 [1:35:58<39:01, 334.48s/trial, best loss: 0.05638477732136464]  


2025/10/13 22:01:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/7ae03a88b76f4d088d9170edeec8b4c1.

2025/10/13 22:01:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 76%|███████▌  | 19/25 [1:40:09<30:56, 309.43s/trial, best loss: 0.05638477732136464]


2025/10/13 22:04:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/6f70da8eb392434d8007b9c3e900b0fa.

2025/10/13 22:04:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 80%|████████  | 20/25 [1:43:57<23:44, 284.91s/trial, best loss: 0.05638477732136464]


2025/10/13 22:08:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/5b28c54c49704f4eb2a3a7419616c9b4.

2025/10/13 22:08:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 84%|████████▍ | 21/25 [1:47:26<17:29, 262.29s/trial, best loss: 0.05638477732136464]


2025/10/13 22:11:35 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/aacb2e95875245049283a43c1f715425.

2025/10/13 22:11:35 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 88%|████████▊ | 22/25 [1:50:36<12:02, 240.68s/trial, best loss: 0.05638477732136464]


2025/10/13 22:15:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/64fac3585d0f4faabbfba4bde2b36772.

2025/10/13 22:15:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 92%|█████████▏| 23/25 [1:54:03<07:41, 230.52s/trial, best loss: 0.05638477732136464]


2025/10/13 23:40:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/270c73a306684bdcbe74b3df49ce85b5.

2025/10/13 23:40:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



 96%|█████████▌| 24/25 [3:19:17<28:15, 1695.73s/trial, best loss: 0.05638477732136464]


2025/10/14 00:50:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-hyperopt at: http://localhost:5001/#/experiments/284535062331422681/runs/2b95de56c7054593a6e474b2fd906adf.

2025/10/14 00:50:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.



100%|██████████| 25/25 [4:29:29<00:00, 646.79s/trial, best loss: 0.05638477732136464] 
Best params: {'learning_rate': 0.04446902279053312, 'max_depth': 7, 'min_samples_leaf': 3, 'min_samples_split': 13, 'n_estimators': 379, 'subsample': 0.6415804690150524}


2025/10/14 01:45:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run gbr-final at: http://localhost:5001/#/experiments/284535062331422681/runs/2ca470e9791e4cf1aa907d9c0f04f76d.
2025/10/14 01:45:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/284535062331422681.


Unnamed: 0,model,test_rmse,best_params
0,gbr,0.055806,"{'learning_rate': 0.04446902279053312, 'max_de..."
