#Libraries

In [0]:
%pip install prophet

In [0]:
%restart_python

In [0]:
import pandas as pd
import numpy as np
from itertools import product

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from prophet import Prophet

#Customized Functions

In [0]:
# Plots the original time series data and Prophet model predictions, including confidence intervals, for both the full period and a zoomed-in test period.
def plot_prediction_prophet(df, proph_forecast, test_first_date = '2014-01-01'):
    

    """
    Parameters:
    df (pd.DataFrame): The original time series data grouped by date, with columns 'ds' and 'y'.
    proph_forecast (pd.DataFrame): The Prophet forecast dataframe.
    test_first_date (str): The first date of the test period (format 'YYYY-MM-DD'). Default is '2014-01-01'.
    """
    
    # Extract forecasted data for the test period
    y_pred = proph_forecast.loc[proph_forecast.ds >= test_first_date, ['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

    # First plot: full historical and forecasted data
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(20, 10))
    sns.lineplot(x=df.ds, y=df.y, label='Original Data', ax=ax[0])
    sns.lineplot(x=y_pred.ds, y=y_pred.yhat, label='Predictions', ax=ax[0])

    # Second plot: zoom on test period and forecast with confidence interval
    sns.lineplot(x=df.ds, y=df.y, label='Original Data', ax=ax[1])
    sns.lineplot(x=y_pred.ds, y=y_pred.yhat, label='Predictions', ax=ax[1])
    ax[1].set_xlim(pd.to_datetime(test_first_date) - pd.Timedelta(days=80), pd.to_datetime('2014-03-31'))

    # Add confidence interval as a transparent band to the second subplot
    ax[1].fill_between(
        y_pred.ds, 
        y_pred.yhat_lower, 
        y_pred.yhat_upper, 
        color='orange', 
        alpha=0.2, 
        label='Confidence Interval'
    )

    for ax_i in ax:
        ax_i.set_xlabel('Date')
        ax_i.set_ylabel('Unit Sales')
        ax_i.legend()

    plt.tight_layout()
    plt.show()

In [0]:
# Evaluates Prophet model performance on train and test sets using specified metrics (R2, RMSE, MAE).
# Returns a list of calculated metrics for both train and test sets.
def model_evaluate_prophet(df, proph_forecast, test_first_date = '2014-01-01', metrics = ['r2', 'rmse', 'mae'], print_metrics = True, return_list = False):
    """
    Parameters:
    df (pd.DataFrame): The original time series data grouped by date, with columns 'ds' and 'y'.
    proph_forecast (pd.DataFrame): The Prophet forecast dataframe.
    test_first_date (str): The first date of the test period (format 'YYYY-MM-DD'). Default is '2014-01-01'.
    metrics (list): List of metric names to display as results. Supported values are 'r2', 'rmse', and 'mae'.
    print_metrics (bool): If True, prints the calculated metrics to the console; if False, suppresses printing.
    return_list (bool): If True, the function returns a list containing the calculated metrics; if False, it does not return anything.
    """
    
    # Split the actual and predicted values into train and test sets based on the test_first_date
    y_train = df.loc[df.ds < test_first_date, 'y']
    y_train_pred = proph_forecast.loc[proph_forecast.ds < test_first_date, 'yhat']
    
    y_test = df.loc[df.ds >= test_first_date, 'y']
    y_test_pred = proph_forecast.loc[proph_forecast.ds >= test_first_date, 'yhat']
    
    # calculate and print various metrics (R2, RMSE, MAE) for both train and test sets
    print("", end="\t")
    print("train", end="\t\t\t")
    print("test", end="\n")

    result_metrics = []

    if "r2" in metrics:
        r2_train = r2_score(y_train, y_train_pred)
        r2_test = r2_score(y_test, y_test_pred)

        if print_metrics:
            print("r2", end="\t")
            print(r2_train, end="\t")
            print(r2_test)

        result_metrics.extend([r2_train, r2_test])

    if "rmse" in metrics:
        rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
        rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

        if print_metrics:
            print("rmse", end="\t")
            print(rmse_train, end="\t")
            print(rmse_test)

        result_metrics.extend([rmse_train, rmse_test])

    if "mae" in metrics:
        mae_train = mean_absolute_error(y_train, y_train_pred)
        mae_test = mean_absolute_error(y_test, y_test_pred)

        if print_metrics:
            print("mae", end="\t")
            print(mae_train, end="\t")
            print(mae_test)

        result_metrics.extend([mae_train, mae_test])

    if return_list:
        return result_metrics

# Dataset Importing and Preparation

In [0]:
df = spark.table("workspace.timeseries.train2").toPandas()
df_holidays = spark.table("workspace.timeseries.holidays").toPandas()

#convert date to datetime
df.date = pd.to_datetime(df.date)
df_holidays.date = pd.to_datetime(df_holidays.date)

In [0]:
df.head(3)

In [0]:
# Prepare the dataset for the Prophet model 
# grouping unit sales by date
# renaming columns to Prophet's required format ('ds' for date, 'y' for target variable)

df = df.groupby("date")["unit_sales"].sum().reset_index().rename(columns={"date": "ds", "unit_sales": "y"})
df.head(3)

In [0]:
#Train / Test split
df_train = df[df.ds < "2014-01-01"]
df_test = df[df.ds >= "2014-01-01"]

preparing holidays dataset

In [0]:
#filtering holidays of guayas region and within the train-test period
guayas_holidays = df_holidays[
    ((df_holidays.locale_name == "Guayaquil") | (df_holidays.locale_name == "Ecuador")) 
    & ((df_holidays.date > '2013-01-01') & (df_holidays.date <= '2014-03-31'))
]

In [0]:
#preparing the dataset according to prophet requirements
holidays_prophet = guayas_holidays[['type', 'date']].rename(columns={'type': 'holiday', 'date': 'ds'}).reset_index(drop=True)
holidays_prophet.head(3)

In [0]:
holidays_prophet.info()

#Prophet

##Basic

In [0]:
model_basic = Prophet(
    #holidays=holidays_prophet, #it makes the forecast worst (r2_test:0.3951, mae_test: 6856.7790)
    daily_seasonality=False,
    weekly_seasonality=True,   
    yearly_seasonality=True,    
    seasonality_mode='additive'  
)

# Fit (train) the model on our data
model_basic.fit(df_train)

In [0]:
# Create a dataframe with future dates (let's predict 90 days ahead)
future_days = 90

# make_future_dataframe creates dates: includes historical + future dates
future_basic = model_basic.make_future_dataframe(periods=future_days, freq='D')

In [0]:
# Generate predictions
forecast_basic = model_basic.predict(future_basic)

# Display the last predictions (the actual forecast)
#forecast_basic.tail()

In [0]:
#model evaluation metrics
a = model_evaluate_prophet(df, forecast_basic)

In [0]:
#result plot
plot_prediction_prophet(df, forecast_basic)

In [0]:
fig = model_basic.plot_components(forecast_basic, figsize=(15, 10))
plt.suptitle('Basic Model: Forecast Components Breakdown')#, fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

##Advanced

In [0]:
#function to execute advanced prophet forecasting
def advanced_prophet(
    df_train,
    week_forder = 5, #Set it to false to "deactivate" weekly seasonality component
    week_pscale = 10, 
    month_forder = 8, #Set it to false to "deactivate" monthly seasonality component
    month_pscale = 10, 
    year_forder = 20, #Set it to false to "deactivate" yearly seasonality component
    year_pscale = 10,
    future_days = 90,
    seasonalities = ['w', 'm', 'y'] #choose which seasonalities to include (two ways: here or setting forder = False)
    ):

    model_advanced = Prophet(
        daily_seasonality=False,
        weekly_seasonality=False,    # Turn off automatic weekly (we'll add custom)
        yearly_seasonality=False,    # Turn off automatic yearly (we'll add custom)
        seasonality_mode='additive'
    )

    # Add CUSTOM weekly seasonality with higher Fourier order
    
    if 'w' in seasonalities and week_forder != False: 
        model_advanced.add_seasonality(
            name='weekly_custom',
            period=7,                   
            fourier_order = week_forder,            
            prior_scale = week_pscale
        )
        print('weekly seasonality active')

    # Add CUSTOM monthly seasonality
    if 'm' in seasonalities and month_forder != False:
        model_advanced.add_seasonality(
            name='monthly_custom',
            period=30.5,                
            fourier_order = month_forder,            
            prior_scale = month_pscale
        )
        print('monthly seasonality active')

    # Add CUSTOM yearly seasonality with higher Fourier order
    if 'y' in seasonalities and year_forder != False:
        model_advanced.add_seasonality(
            name='yearly_custom',
            period=365.25,              
            fourier_order = year_forder,           
            prior_scale = year_pscale              
        )
        print('yearly seasonality active')

    # Train the model
    model_advanced.fit(df_train)

    # Create future dates (same as before)
    future_advanced = model_advanced.make_future_dataframe(periods=future_days, freq='D')

    # Generate predictions
    forecast_advanced = model_advanced.predict(future_advanced)

    return model_advanced, forecast_advanced

###Base (week only)

In [0]:
model, forecast = advanced_prophet(df_train, week_forder=3, seasonalities=['w'])
print()
model_evaluate_prophet(df, forecast)
print()
fig = model.plot_components(forecast, figsize=(15, 8))
plt.suptitle('Basic Model: Forecast Components Breakdown')#, fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

###Monthly component

In [0]:
#best model with week_forder = 3
model, forecast = advanced_prophet(df_train, week_forder=3, month_forder=3, seasonalities=['w', 'm'])
print()
model_evaluate_prophet(df, forecast)
print()
fig = model.plot_components(forecast, figsize=(15, 8))
plt.suptitle('Basic Model: Forecast Components Breakdown')#, fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

In [0]:
for mfo in [1,2,3,4,5,6,7,8,9,10,11,12]:    
    model, forecast = advanced_prophet(df_train, week_forder=3, month_forder=mfo, seasonalities=['m'])
    print()
    print("month fourier order:", mfo)
    model_evaluate_prophet(df, forecast)
    print()
    #fig = model.plot_components(forecast, figsize=(15, 8))
    #plt.suptitle('Basic Model: Forecast Components Breakdown')#, fontsize=16, fontweight='bold', y=1.00)
    #plt.tight_layout()
    #plt.show()

###Yearly Component

In [0]:
#best with week_forder = 3
model, forecast = advanced_prophet(df_train, week_forder=3, year_forder=10, seasonalities=['w', 'y'])
print()
print("yearly fourier order:", yfo)
model_evaluate_prophet(df, forecast)
print()

fig = model.plot_components(forecast, figsize=(15, 8))
plt.suptitle('Basic Model: Forecast Components Breakdown')
plt.tight_layout()
plt.show()

###OVERALL OPTIMIZATION

In [0]:
week_forder_list = [False] + list(range(1,8))
week_forder_list

In [0]:
month_forder_list = [False] + list(range(1,10))
month_forder_list

In [0]:
year_forder_list = [False] + list(range(8,24,2))
year_forder_list

In [0]:
print("estimated time: ", end="")
20 / 2704 * len(week_forder_list) * len(month_forder_list) * len(year_forder_list)

In [0]:
week_forder_list = [False] #+ list(range(1,8)) 
month_forder_list = [False] #+ list(range(1,10))
year_forder_list = [False] #+ list(range(8,24,2)) #with there values included it takes up to 5 min

model_metrics_dict = {}

for wfo,mfo,yfo in list(product(week_forder_list, month_forder_list, year_forder_list)):
    model, result_df = advanced_prophet(df_train, week_forder = wfo, month_forder = mfo, year_forder = yfo)
    model_metrics = model_evaluate_prophet(df, result_df, metrics=["r2", "mae"], print_metrics=False, return_list=True)
    model_metrics_dict[f"wfo:{wfo}_mfo:{mfo}_yfo:{yfo}"] = model_metrics

model_metrics_df = pd.DataFrame(model_metrics_dict).T.rename(columns={0: 'r2_train', 1: 'r2_test', 2: 'mae_train', 3: 'mae_test'})
 	
#BEST RESULTS           r2_train	r2_test	    mae_train	    mae_test
#wfo:6_mfo:2_yfo:10	    0.821276	0.496001	3085.005903	    5498.647049

In [0]:
model_metrics_df.sort_values(by='r2_test', ascending=False).head(5)

In [0]:
# MODEL AFTER OPTIMIZING FOURIER ORDER
# model fitting and forecasting
model, forecast = advanced_prophet(df_train, week_forder=6, month_forder=2 , year_forder=10)
print()

#model evaluations
model_evaluate_prophet(df, forecast)
print()

#plot forecast
plot_prediction_prophet(df, forecast)
print()

#plot forecast components
fig = model.plot_components(forecast, figsize=(15, 10))
plt.suptitle('Basic Model: Forecast Components Breakdown')
plt.tight_layout()
plt.show()