In [14]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier

import kagglehub
from kagglehub import KaggleDatasetAdapter


file_path = "wine_quality_classification.csv"


df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "sahideseker/wine-quality-classification",
  file_path,
  
)




  df = kagglehub.load_dataset(


## Data Prep

In [15]:
quality_order = ["low", "medium", "high"]  
encoder = OrdinalEncoder(
    categories=[quality_order],
    handle_unknown='use_encoded_value',  
    unknown_value=-1  
)
y_encoded = encoder.fit_transform(df[['quality_label']]).ravel()


X = df.drop(columns="quality_label")
y = y_encoded
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y  
)


## Models

In [16]:

params_lr = {
    "solver": "lbfgs",
    "max_iter": 10000,
    "random_state": 8888,

    "class_weight": "balanced",  
    "penalty": "l2",
    "C": 0.1  
}

lr = LogisticRegression(**params_lr)
lr.fit(X_train, y_train)


y_pred_lr = lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_lr)



y_proba = lr.predict_proba(X_test)
report_dict_lr = classification_report(y_test, y_pred_lr, output_dict=True)


In [17]:

params_rf = {
    "n_estimators": 30,
    "max_depth": 3
}
rf_clf = RandomForestClassifier(**params_rf)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
report_dict_rf = classification_report(y_test, y_pred_rf, output_dict=True)

# MLflow structure

In [18]:
models = {"LogisticRegression": lr, "RandomForest": rf_clf}
params = {"LogisticRegression": params_lr, "RandomForest": params_rf}
report_dict = {"LogisticRegression": report_dict_lr, "RandomForest": report_dict_rf}
wine_feature_names = list(df.columns)

## Single Experiment with multiple runs

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("MLflow Wine")


for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(params[model_name])
        
        mlflow.log_metrics({
            "accuracy": report_dict[model_name]["accuracy"],
            "recall_class_0": report_dict[model_name]["0.0"]["recall"],
            "recall_class_1": report_dict[model_name]["1.0"]["recall"],
            "recall_class_2": report_dict[model_name]["2.0"]["recall"],
            "f1-score": report_dict[model_name]["macro avg"]["f1-score"]
        })
        
        mlflow.set_tag("Single Experiment/ Multiple Runs Training info", f"{model_name} model for wine")
        
        signature = infer_signature(X_train, model.predict(X_train))
        
        model_info = mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path=f"{model_name.lower()}_model",
            signature=signature,
            input_example=X_train,
            registered_model_name=f"tracking-wine-{model_name.lower()}"
            )
        
        
        predictions = model.predict(X_test)
        result = pd.DataFrame(X_test, columns=wine_feature_names).drop(columns="quality_label")
        result["actual_class"] = y_test
        result['predicted_class'] = predictions
        result.to_csv(f"{model_name}_predictions.csv")
        mlflow.log_artifact(f"{model_name}_predictions.csv")

2025/04/29 14:33:25 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Wine' does not exist. Creating a new experiment.
Successfully registered model 'tracking-wine'.
2025/04/29 14:33:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-wine, version 1
Created version '1' of model 'tracking-wine'.


🏃 View run inquisitive-fowl-377 at: http://127.0.0.1:5000/#/experiments/340645493566817619/runs/044b07dee4cd4ad59cc194b31d592b07
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/340645493566817619


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 50.43it/s] 


Unnamed: 0,fixed_acidity,residual_sugar,alcohol,density,actual_class,predicted_class
136,4.8,8.9,12.1,0.9961,1.0,0.0
755,15.3,14.2,13.0,1.0025,0.0,0.0
239,12.7,10.4,10.3,0.9957,1.0,1.0
522,6.3,6.6,12.5,1.0043,1.0,0.0


## Nested Runs

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("MLflow_Wine_Nested")

with mlflow.start_run(run_name="Wine_Model_Comparison"):
    for model_name, model in models.items():
        with mlflow.start_run(run_name=model_name, nested=True):
            mlflow.log_params(params[model_name])
            mlflow.log_metrics({
                "accuracy": report_dict[model_name]["accuracy"],
                "recall_class_0": report_dict[model_name]["0.0"]["recall"],
                "recall_class_1": report_dict[model_name]["1.0"]["recall"],
                "recall_class_2": report_dict[model_name]["2.0"]["recall"],
                "f1-score": report_dict[model_name]["macro avg"]["f1-score"]
            })
            mlflow.set_tag("Training Info", f"{model_name} model for Wine")
            signature = infer_signature(X_train, model.predict(X_train))
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path=f"{model_name.lower()}_model",
                signature=signature,
                input_example=X_train,
                registered_model_name=f"tracking-wine-{model_name.lower()}"
            )
            predictions = model.predict(X_test)
            result = pd.DataFrame(X_test, columns=wine_feature_names).drop(columns="quality_label", errors="ignore")
            result["actual_class"] = y_test
            result["predicted_class"] = predictions
            result.to_csv(f"{model_name}_predictions.csv")
            mlflow.log_artifact(f"{model_name}_predictions.csv")

## Separate Experiments for Each Model

In [None]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

for model_name, model in models.items():
    mlflow.set_experiment(f"MLflow_Wine_{model_name}")
    with mlflow.start_run(run_name=f"{model_name}_Run"):
        mlflow.log_params(params[model_name])
        mlflow.log_metrics({
            "accuracy": report_dict[model_name]["accuracy"],
            "recall_class_0": report_dict[model_name]["0.0"]["recall"],
            "recall_class_1": report_dict[model_name]["1.0"]["recall"],
            "recall_class_2": report_dict[model_name]["2.0"]["recall"],
            "f1-score": report_dict[model_name]["macro avg"]["f1-score"]
        })
        mlflow.set_tag("Training Info", f"{model_name} model for Wine")
        signature = infer_signature(X_train, model.predict(X_train))
        model_info = mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path=f"{model_name.lower()}_model",
            signature=signature,
            input_example=X_train,
            registered_model_name=f"tracking-wine-{model_name.lower()}"
        )
        predictions = model.predict(X_test)
        result = pd.DataFrame(X_test, columns=wine_feature_names).drop(columns="quality_label", errors="ignore")
        result["actual_class"] = y_test
        result["predicted_class"] = predictions
        result.to_csv(f"{model_name}_predictions.csv")
        mlflow.log_artifact(f"{model_name}_predictions.csv")