# Hyperparameter Tuning with Hyperopt and Logging with MLflow

We’ll use Hyperopt to find the best C and penalty for LogisticRegression, and MLflow to track:

- The parameters tried
- Their corresponding accuracies
- The best result

In [4]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
import numpy as np

# Load dataset with feature names
data = load_iris(as_frame=True)
X = data.data
y = data.target

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameter search space
search_space = {
    'C': hp.loguniform('C', np.log(0.001), np.log(100)),  # C from 0.001 to 100, log scale
    'penalty': hp.choice('penalty', ['l1', 'l2']),        # either 'l1' or 'l2'
}

# Start a named MLflow experiment
mlflow.set_experiment("logistic_regression_hyperopt")

# This will store all trial results
trials = Trials()

# Define the objective function for Hyperopt
def objective(params):
    with mlflow.start_run(nested=True):  # nested=True allows logging under a parent run
        try:
            # Set solver that works with both l1 and l2 penalties
            model = LogisticRegression(**params, solver='liblinear', max_iter=200)

            # Fit the model
            model.fit(X_train, y_train)

            # Predict on validation set
            y_pred = model.predict(X_val)

            # Evaluate accuracy
            acc = accuracy_score(y_val, y_pred)

            # Log parameters and metric to MLflow
            mlflow.log_params(params)
            mlflow.log_metric("val_accuracy", acc)

            # Also log the model itself
            mlflow.sklearn.log_model(model, artifact_path="model", input_example=X_train.iloc[[0]])

            # Return negative accuracy because Hyperopt minimizes the objective
            return {'loss': -acc, 'status': STATUS_OK}

        except Exception as e:
            # If something goes wrong, log the error and return a high loss
            mlflow.log_param("error", str(e))
            return {'loss': 999, 'status': STATUS_OK}

# Run the optimization
with mlflow.start_run(run_name="hyperopt_tuning") as parent_run:
    best_params = fmin(
        fn=objective,            # the function to minimize
        space=search_space,      # hyperparameter space
        algo=tpe.suggest,        # optimization algorithm
        max_evals=20,            # number of trials
        trials=trials            # record results
    )

    # Log the best parameters found
    mlflow.log_param("best_params", best_params)
    print("Best parameters found:", best_params)


  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]




  5%|▌         | 1/20 [00:04<01:21,  4.27s/trial, best loss: -0.36666666666666664]




 10%|█         | 2/20 [00:08<01:18,  4.35s/trial, best loss: -0.36666666666666664]




 15%|█▌        | 3/20 [00:12<01:13,  4.31s/trial, best loss: -1.0]                




 20%|██        | 4/20 [00:17<01:08,  4.28s/trial, best loss: -1.0]




 25%|██▌       | 5/20 [00:21<01:04,  4.27s/trial, best loss: -1.0]




 30%|███       | 6/20 [00:25<01:00,  4.33s/trial, best loss: -1.0]




 35%|███▌      | 7/20 [00:30<00:56,  4.32s/trial, best loss: -1.0]




 40%|████      | 8/20 [00:34<00:51,  4.28s/trial, best loss: -1.0]




 45%|████▌     | 9/20 [00:38<00:46,  4.26s/trial, best loss: -1.0]




 50%|█████     | 10/20 [00:42<00:42,  4.23s/trial, best loss: -1.0]




 55%|█████▌    | 11/20 [00:46<00:37,  4.21s/trial, best loss: -1.0]




 60%|██████    | 12/20 [00:51<00:33,  4.18s/trial, best loss: -1.0]




 65%|██████▌   | 13/20 [00:55<00:29,  4.18s/trial, best loss: -1.0]




 70%|███████   | 14/20 [00:59<00:25,  4.17s/trial, best loss: -1.0]




 75%|███████▌  | 15/20 [01:03<00:21,  4.21s/trial, best loss: -1.0]




 80%|████████  | 16/20 [01:07<00:16,  4.22s/trial, best loss: -1.0]




 85%|████████▌ | 17/20 [01:12<00:12,  4.24s/trial, best loss: -1.0]




 90%|█████████ | 18/20 [01:16<00:08,  4.24s/trial, best loss: -1.0]




 95%|█████████▌| 19/20 [01:20<00:04,  4.25s/trial, best loss: -1.0]




100%|██████████| 20/20 [01:25<00:00,  4.27s/trial, best loss: -1.0]
Best parameters found: {'C': 52.61752886474606, 'penalty': 1}


In [5]:
print("Tracking URI:", mlflow.get_tracking_uri())


Tracking URI: sqlite:///mlflow.db


**Key Points for Beginners**

- mlflow.set_experiment(...): names your experiment so you can find it later.

- mlflow.start_run(nested=True): tracks each trial separately inside one parent run.

- hp.loguniform: chooses C from a log scale — good for wide value ranges.

- hp.choice: selects one option from a list — used here for 'penalty'.

- Negative accuracy: Hyperopt minimizes the objective, so we use -accuracy.