### Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as matplotlib

import datetime as dt

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.functions import unix_timestamp, to_timestamp
from pyspark.sql.functions import col, sum, to_timestamp, window

# Initialize Spark Session
spark = SparkSession.builder.appName("EnergyProductionForecast").getOrCreate()

In [None]:
from neuralprophet import NeuralProphet, uncertainty_evaluate, set_log_level, set_random_seed

# Disable logging messages unless there is an error
set_log_level("ERROR")

In [None]:
from neuralprophet import set_random_seed

set_random_seed(0)

### Forecast Data Source

Target Variable: GESQ
- Source: WESM CRSS Energy Settlment

Feature Variable: Accuweather Weather Parameters
- Source: Accuweather Historical & Forecast

In [None]:
query = """
SELECT *
FROM plant_ops.mao_forecast.gesq_accuweather
ORDER BY CALENDAR_DATE
"""

df = spark.sql(query)

display(df)

### Feature Engineering

In [None]:
df_fcst = df.toPandas()
df_fcst.drop(columns=["CALENDAR_DATE", "PARTICIPANT_NAME"], inplace=True)

In [None]:
df_fcst.rename(columns={"TIME_INTERVAL": "ds", "GESQ": "y"}, inplace=True)

numeric_cols = df_fcst.select_dtypes(include='number').columns
df_fcst[numeric_cols] = df_fcst[numeric_cols].round(3)

df_fcst = df_fcst[['ds', 'y', 'SOLAR_IRRADIANCE']]#, 'CLOUD_COVER']]

display(df_fcst)

In [None]:

df_fcst.drop_duplicates(subset=['ds'], inplace=True)

### Forecast Modelling

#### Quantile Regression for Uncertainty Prediction

In [None]:
m = NeuralProphet(epochs=10)

train_df, test_df = m.split_df(df_fcst, freq="H", valid_p=1.0 / 16)
train_df.shape, test_df.shape

#### Train-Test-Validation

In [None]:
# NeuralProphet only accepts quantiles value in between 0 and 1
# Parameter for quantile regression
confidence_lv = 0.9
quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]

In [None]:
model = NeuralProphet(growth='off',
                  seasonality_mode="additive",
                  #n_changepoints=60,
                  yearly_seasonality=True,
                  weekly_seasonality=True,
                  daily_seasonality=True,
                  loss_func='MAE',
                  n_forecasts=90*24,
                  n_lags=180*24,
                  ar_layers=[64,64,64,64],
                  learning_rate=0.001,
                  collect_metrics=['MSE', 'MAE', 'RMSE'],
                  )

#m.set_plotting_backend("plotly-static")  # show plots correctly in jupyter notebooks

# Add Lagged Regressors
model.add_lagged_regressor("SOLAR_IRRADIANCE", n_lags=30*24)
model.add_lagged_regressor("CLOUD_COVER_TOTAL", n_lags=7*24)

metrics = model.fit(df_fcst, freq="H", progress="plot")

In [None]:
future = model.make_future_dataframe(df_fcst, 
                                     regressors_df=df_fcst, 
                                     periods=15*24, 
                                     n_historic_predictions=True)

future['SOLAR_IRRADIANCE'] = future['SOLAR_IRRADIANCE'].astype('float64')

forecast = model.predict(df=future)

#Handling Negative Forecast Values
#model.handle_negative_value(forecast, handling=0)

model.highlight_nth_step_ahead_of_each_forecast(15*24)
fig_forecast = model.plot(forecast)

# Display the plot
display(fig_forecast)

### Model Metrics

In [None]:
metrics.tail()

In [None]:
df_residuals = pd.DataFrame({"ds": df_fcst["ds"], "residuals": df_fcst["y"] - forecast["yhat1"]})
fig = df_residuals.plot(x="ds", y="residuals", figsize=(10, 6))

### Final Model

In [None]:
final_model = model.config