### Day 12 - MLFlow

In [0]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split


#### Load the data as a Pandas DataFrame from the Gold Medallion Layer

In [0]:
# Read GOLD table
df = spark.table("ecommerce_catalog.gold.category_performance").toPandas()
df.columns

Index(['category_code', 'unique_views', 'unique_carts', 'unique_purchases',
       'total_revenue', 'cart_to_purchase_ratio'],
      dtype='object')

In [0]:
df = df.dropna()

Use three features to predict the target viz., 'total_revenue'

In [0]:
# Features and target
X = df[["unique_views", "unique_carts", "unique_purchases"]]
y = df["total_revenue"]

In [0]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

#### Train an elementary linear regression model

In [0]:
model = LinearRegression()
model.fit(X_train, y_train)

#### Log Parameters, Metrics, and Model (MLflow)

In [0]:
with mlflow.start_run(run_name="gold_linear_regression_v1"):

    # Log parameters
    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("features", "views,carts,purchases")
    mlflow.log_param("test_size", 0.2)

    # Evaluate
    r2 = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", r2)

    # Log model
    mlflow.sklearn.log_model(model, artifact_path="model")

print(f"R² Score: {r2:.4f}")




R² Score: 0.6927


Run 2 - Specify a couple of explicit parameters

Same data, same splits, same features

In [0]:
from sklearn.linear_model import LinearRegression

model = LinearRegression(
    fit_intercept=True,
    positive=True
)


In [0]:
with mlflow.start_run(run_name="gold_linear_regression_explicit_params"):

    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("fit_intercept", True)
    mlflow.log_param("positive", True)

    model.fit(X_train, y_train)

    r2 = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", r2)

    mlflow.sklearn.log_model(model, artifact_path="model")


