# Wine Quality Prediction Experiment

This notebook demonstrates the training, evaluation, and export of the ElasticNet model for Wine Quality prediction. It integrates with MLflow for tracking and exports the model to ONNX format for Triton Inference Server.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import mlflow
import mlflow.sklearn
import mlflow.onnx
import sys
import os
import shutil
import json
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

## Configuration

In [None]:
# Set tracking URI to point to the Dockerized MLflow server
tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5001")
mlflow.set_tracking_uri(tracking_uri)
print(f"Logging to MLflow at {tracking_uri}")

# Ensure data directory exists (relative to notebook location)
# Note: This notebook assumes it is running from the project root or src/model. 
# Let's adjust to project root if needed.
if os.path.basename(os.getcwd()) == "model":
    os.chdir("../../")
    print(f"Changed working directory to project root: {os.getcwd()}")
else:
    print(f"Working directory: {os.getcwd()}")

## Helper Functions

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

## Load Data

In [None]:
csv_url = os.path.join("data", "wine_quality.csv")
try:
    data = pd.read_csv(csv_url)
    print(f"Data loaded successfully. Shape: {data.shape}")
except Exception as e:
    print(f"Unable to read file. Error: {e}")

In [None]:
# Split data
train, test = train_test_split(data)

train_x = train.drop(["target"], axis=1)
test_x = test.drop(["target"], axis=1)
train_y = train[["target"]]
test_y = test[["target"]]

## Train and Log

In [None]:
alpha = 0.5
l1_ratio = 0.5

with mlflow.start_run() as run:
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    predicted_qualities = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print(f"ElasticNet model (alpha={alpha}, l1_ratio={l1_ratio}):")
    print(f"  RMSE: {rmse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")

    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(lr, "model")
    
    # Convert to ONNX
    initial_type = [('float_input', FloatTensorType([None, 13]))]
    onx = convert_sklearn(lr, initial_types=initial_type)
    
    # Log ONNX model to MLflow
    mlflow.onnx.log_model(onx, "model_onnx")
    
    # Save run info for CI/CD
    run_id = run.info.run_id
    artifact_uri = mlflow.get_artifact_uri("model")
    print(f"Run ID: {run_id}")
    print(f"Artifact URI: {artifact_uri}")
    
    with open("run_info.json", "w") as f:
        json.dump({"run_id": run_id, "artifact_uri": artifact_uri}, f)
    
    # Save model locally for easy access by app (simulating model registry fetch)
    if os.path.exists("models/wine_model"):
        shutil.rmtree("models/wine_model")
    os.makedirs("models/wine_model", exist_ok=True)
    mlflow.sklearn.save_model(lr, "models/wine_model/sklearn")
    
    # Save ONNX model locally
    with open("models/wine_model/model.onnx", "wb") as f:
        f.write(onx.SerializeToString())
        
    print("Model saved to models/wine_model (sklearn and onnx)")

    # Update Triton Model Repository
    triton_model_path = "model_repository/wine_model/1/model.onnx"
    os.makedirs(os.path.dirname(triton_model_path), exist_ok=True)
    shutil.copy("models/wine_model/model.onnx", triton_model_path)
    print(f"Model deployed to Triton repository at {triton_model_path}")