#### Fit And Register Model


In [0]:
%pip install prophet==1.1.6
%pip install mlflow --upgrade --pre
dbutils.library.restartPython()

In [0]:
from prophet import Prophet
from pyspark.sql.functions import col
from sklearn.metrics import mean_absolute_error
from mlflow.models.signature import infer_signature
import matplotlib.pyplot as plt
import mlflow

In [0]:
catalog = "dhurley_catalog"
schema = "electricity_load_forecasting"
train_data = "train_data" # silver tables
test_data = "test_data"
registered_model_name = "prophet_electricity_load_forecast"

In [0]:
def add_regressors_prophet_model(model, df, exclude=["ds", "y"]):
  [model.add_regressor(col) for col in train_df_prophet.columns if col not in ["ds", "y"]]

def create_prophet_model_dataframe(df):
    """ Converts a Spark DataFrame to a Pandas DataFrame for Prophet model training """
    return df.select(
        col("datetime").alias("ds"),
        col("load").alias("y"), 
        *[col(c) for c in df.columns if c not in ("datetime", "load")]
    ).toPandas()

In [0]:
with mlflow.start_run(run_name = "prophet_model_electricity_load"):

    # read train test data from silver UC tables
    train_df = spark.table(f"{catalog}.{schema}.{train_data}")
    test_df = spark.table(f"{catalog}.{schema}.{test_data}")

    # convert train test data into pandas df expected by Prophet model
    train_df_prophet = create_prophet_model_dataframe(train_df)
    test_df_prophet = create_prophet_model_dataframe(test_df)

    # fit Prophet model
    prophetModel = Prophet()
    add_regressors_prophet_model(prophetModel, train_df_prophet)
    prophetModel.fit(train_df_prophet)

    # predict with prophet model
    predictions = prophetModel.predict(test_df_prophet)

    # log performance metrics
    mse = mean_absolute_error(test_df_prophet["y"], predictions["yhat"])
    mlflow.log_metric("MeanAbsoluteError", mse)

    # log model for re-use
    model_log = mlflow.prophet.log_model(
        pr_model=prophetModel,
        registered_model_name=f"{catalog}.{schema}.{registered_model_name}",
        input_example=train_df_prophet.head(),
        signature=mlflow.models.signature.infer_signature(train_df, predictions)
    )

    # log performance plot as artifact
    plt_df = test_df_prophet[['ds', 'y']].merge(predictions[['ds', 'yhat']], on='ds')

    fig = plt.figure(figsize=(8, 6))
    plt.scatter(plt_df['y'], plt_df['yhat'], alpha=0.6, edgecolors='k', label='Predictions')

    min_val, max_val = plt_df['y'].min(), plt_df['y'].max()
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect Prediction')

    plt.title("Actual vs Predicted")
    plt.xlabel("Actual (y)")
    plt.ylabel("Predicted (yhat)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    mlflow.log_figure(fig, "actual_vs_predicted.png")

