In [1]:
# Core
import pandas as pd
import numpy as np

# ML & Metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# MLflow
import mlflow
import mlflow.sklearn


In [2]:
# Load the preprocessed dataset
df = pd.read_csv("../data/diabetes.csv")

# Define features and target
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
from itertools import product

# Define hyperparameter values to try
C_values = [0.1, 1.0, 10.0]
solvers = ['liblinear', 'lbfgs']

# Loop over all combinations
for C, solver in product(C_values, solvers):
    with mlflow.start_run():
        model = LogisticRegression(C=C, solver=solver, max_iter=1000)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Log parameters and metrics
        mlflow.log_param("C", C)
        mlflow.log_param("solver", solver)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("f1_score", f1)

        # Log the model
        mlflow.sklearn.log_model(model, "model")

        print(f"✔️ Run logged with C={C}, solver={solver}")




✔️ Run logged with C=0.1, solver=liblinear




✔️ Run logged with C=0.1, solver=lbfgs




✔️ Run logged with C=1.0, solver=liblinear




✔️ Run logged with C=1.0, solver=lbfgs




✔️ Run logged with C=10.0, solver=liblinear




✔️ Run logged with C=10.0, solver=lbfgs


After evaluating multiple hyperparameter combinations using MLflow, the best performing Logistic Regression model was obtained with:
- **C = 10.0**
- **solver = liblinear**

This configuration achieved:
- **Accuracy**: 75.97%
- **F1 Score**: 66.67%

Thus, this model was selected as the final version for deployment.