# MLflow Experiment - Wine Quality Classification
This notebook demonstrates MLflow tracking with a different dataset and model

In [13]:
from sklearn.datasets import load_wine
import mlflow
import datetime
import os
import pickle
from joblib import dump
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

## Load and Cache Dataset

In [14]:
# Check if the pickle files exist (using different names to avoid conflicts)
data_dir = './data'
os.makedirs(data_dir, exist_ok=True)

wine_data_path = os.path.join(data_dir, 'wine_data.pickle')
wine_target_path = os.path.join(data_dir, 'wine_target.pickle')

if os.path.exists(wine_data_path) and os.path.exists(wine_target_path): 
    print("Loading cached Wine dataset...")
    X = pickle.load(open(wine_data_path, 'rb'))
    y = pickle.load(open(wine_target_path, 'rb'))
else:
    print("Fetching Wine dataset for the first time...")
    wine = load_wine()
    X = wine.data
    y = wine.target
    
    pickle.dump(X, open(wine_data_path, 'wb'))
    pickle.dump(y, open(wine_target_path, 'wb'))
    print("Dataset cached successfully!")

print(f"Dataset shape: {X.shape}")
print(f"Number of classes: {len(set(y))}")

Fetching Wine dataset for the first time...
Dataset cached successfully!
Dataset shape: (178, 13)
Number of classes: 3


## MLflow Experiment Setup and Training

In [15]:
# Set MLflow tracking URI (same location as train_model.py)
mlruns_dir = './mlruns'
os.makedirs(mlruns_dir, exist_ok=True)
mlflow.set_tracking_uri(mlruns_dir)

# Experiment details
dataset_name = "Wine Quality Dataset"
current_time = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
experiment_name = f"{dataset_name}_{current_time}"    
experiment_id = mlflow.create_experiment(experiment_name)

print(f"Created experiment: {experiment_name}")
print(f"Experiment ID: {experiment_id}")
print(f"MLflow tracking URI: {mlflow.get_tracking_uri()}")
print(f"Saving to: {os.path.abspath(mlruns_dir)}")

Created experiment: Wine Quality Dataset_260129_215936
Experiment ID: 808277251728951572
MLflow tracking URI: ./mlruns
Saving to: c:\Users\tajwa\Desktop\MLOPS\MLOps_lab_works\Github_Labs\Lab2\src\mlruns


  return FileStore(store_uri, store_uri)


In [16]:
with mlflow.start_run(experiment_id=experiment_id, run_name=dataset_name):
    
    # Log dataset parameters
    params = {
        "dataset_name": dataset_name,
        "number_of_datapoints": X.shape[0],
        "number_of_features": X.shape[1],
        "number_of_classes": len(set(y)),
        "model_type": "GradientBoostingClassifier"
    }
    
    mlflow.log_params(params)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size=0.3,
        random_state=42,
        stratify=y
    )
    
    print(f"Training set size: {len(X_train)}")
    print(f"Test set size: {len(X_test)}")
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Gradient Boosting Classifier
    gb_clf = GradientBoostingClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        random_state=42
    )
    
    print("\nTraining Gradient Boosting Classifier...")
    gb_clf.fit(X_train_scaled, y_train)
    print("Training complete!")
    
    # Model hyperparameters
    mlflow.log_params({
        "n_estimators": 100,
        "learning_rate": 0.1,
        "max_depth": 3
    })
    
    # Predictions
    y_pred = gb_clf.predict(X_test_scaled)
    
    # Calculate and log metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1_macro = f1_score(y_test, y_pred, average='macro')
    f1_weighted = f1_score(y_test, y_pred, average='weighted')
    
    mlflow.log_metrics({
        'Accuracy': accuracy,
        'F1_Score_Macro': f1_macro,
        'F1_Score_Weighted': f1_weighted
    })
    
    print(f"\nModel Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score (Macro): {f1_macro:.4f}")
    print(f"F1 Score (Weighted): {f1_weighted:.4f}")
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Save model (matching train_model.py structure: Lab2/models)
    models_dir = os.path.join('..', 'models')
    os.makedirs(models_dir, exist_ok=True)
    
    model_filename = os.path.join(models_dir, f'model_{current_time}_gb_model.joblib')
    dump(gb_clf, model_filename)
    print(f"\nModel saved to: {os.path.abspath(model_filename)}")
    
    # Save scaler as well
    scaler_filename = os.path.join(models_dir, f'scaler_{current_time}.joblib')
    dump(scaler, scaler_filename)
    print(f"Scaler saved to: {os.path.abspath(scaler_filename)}")
    
    # Log model to MLflow
    mlflow.sklearn.log_model(gb_clf, "gradient_boosting_model")
    
    print("\n✅ Experiment logged successfully to MLflow!")

Training set size: 124
Test set size: 54

Training Gradient Boosting Classifier...




Training complete!

Model Performance:
Accuracy: 0.9630
F1 Score (Macro): 0.9652
F1 Score (Weighted): 0.9627

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.89      0.94        18
           1       0.91      1.00      0.95        21
           2       1.00      1.00      1.00        15

    accuracy                           0.96        54
   macro avg       0.97      0.96      0.97        54
weighted avg       0.97      0.96      0.96        54


Model saved to: c:\Users\tajwa\Desktop\MLOPS\MLOps_lab_works\Github_Labs\Lab2\models\model_260129_215936_gb_model.joblib
Scaler saved to: c:\Users\tajwa\Desktop\MLOPS\MLOps_lab_works\Github_Labs\Lab2\models\scaler_260129_215936.joblib

✅ Experiment logged successfully to MLflow!


## View MLflow UI
Since this notebook now saves to `./mlruns` (same as train_model.py), run:
```bash
# From the src/ directory:
python -m mlflow ui --backend-store-uri ./mlruns

# Or from Lab2/ directory:
python -m mlflow ui --backend-store-uri ./src/mlruns
```
Then navigate to http://localhost:5000