In [1]:
import pandas as pd

import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


In [2]:
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
file_path = "wine_quality_classification.csv"

# Load the latest version
df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "sahideseker/wine-quality-classification",
  file_path,
  # Provide any additional arguments like 
  # sql_query or pandas_kwargs. See the 
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)



  from .autonotebook import tqdm as notebook_tqdm
  df = kagglehub.load_dataset(


In [3]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


print("Unique quality labels:", df['quality_label'].unique())


quality_order = ["low", "medium", "high"]  
encoder = OrdinalEncoder(
    categories=[quality_order],
    handle_unknown='use_encoded_value',  
    unknown_value=-1  
)
y_encoded = encoder.fit_transform(df[['quality_label']]).ravel()


X = df.drop(columns="quality_label")
y = y_encoded
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y  
)


params = {
    "solver": "lbfgs",
    "max_iter": 10000,
    "random_state": 8888,

    "class_weight": "balanced",  
    "penalty": "l2",
    "C": 0.1  
}

lr = LogisticRegression(**params)
lr.fit(X_train, y_train)


y_pred = lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=quality_order))


y_proba = lr.predict_proba(X_test)
report_dict = classification_report(y_test, y_pred, output_dict=True)


Unique quality labels: ['high' 'medium' 'low']

Accuracy: 0.3800

Classification Report:
              precision    recall  f1-score   support

         low       0.28      0.27      0.27        60
      medium       0.43      0.44      0.43        71
        high       0.41      0.42      0.41        69

    accuracy                           0.38       200
   macro avg       0.37      0.37      0.37       200
weighted avg       0.38      0.38      0.38       200



In [None]:
# Set our tracking server uri for logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Wine")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    
    
    mlflow.log_metrics(
        {
            "accuracy" : report_dict['accuracy'],
            "recall_class_0" : report_dict['0.0']['recall'],
            "recall_class_1" : report_dict['1.0']['recall'],
            "recall_class_2" : report_dict['2.0']['recall'],
            "f1-score" : report_dict['macro avg']['f1-score']
            
        }
    )

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for Wine")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-wine",
    )
    
    
    
    
    
# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

wine_feature_names = list(df.columns)

result = pd.DataFrame(X_test, columns=wine_feature_names).drop(columns="quality_label")
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]