# loading libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import os 
import glob
import pathlib
import pandas as pd
import numpy as np
import darts
from darts import TimeSeries
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import os
import glob
from tqdm import tqdm
from darts.dataprocessing.transformers.scaler import Scaler
from darts.models.forecasting.prophet_model import Prophet


# Helping Functions


In [None]:
import numpy as np

def calculate_metrics(actual, predicted):
    # Convert inputs to numpy arrays for easier calculations
    actual = np.array(actual)
    predicted = np.array(predicted)
    
    # Calculate individual metrics
    mae = np.mean(np.abs(predicted - actual))
    rmse = np.sqrt(np.mean((predicted - actual) ** 2))
    mape = np.mean(np.abs((predicted - actual) / actual)) * 100
    mse = np.mean((predicted - actual) ** 2)
    
    metrics = {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'MSE': mse
    }
    
    return metrics


# Selected Time Store_Number % family
## Records
* Store Number : 1 & Family : Automative 
* Store Number : 2 & Family :Automative
* Store Number : 3 & Family :Automative
* Store Number : 7 & Family :Automative
* Store Number : 1 & Family : Seafood

In [None]:
fileName = '2_AUTOMOTIVE'
df = pd.read_csv(f'../ProcessedData/GroupData/{fileName}.csv')
df.head()


In [None]:
df = df[['date','sales']]
df = df.drop_duplicates()
df.head()

## Spliting Data into Training & Testing Data

In [None]:
from darts import TimeSeries
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is your DataFrame containing daily data
series = TimeSeries.from_dataframe(df, "date", "sales", freq='1D', fill_missing_dates=True, fillna_value=0)


split_point = 0.80

train_series, test_series = series.split_after(split_point)

# Set the figure size and style
plt.figure(figsize=(18, 6))
# Plot the training and testing data
train_series.plot(label='Training Data', color='blue', linewidth=1.5, marker='o')
test_series.plot(label='Testing Data', color='orange', linewidth=1.5, marker='o')

# Add title and labels
plt.title('Training and Testing Data')
plt.xlabel('Date')
plt.ylabel('Global DGV NS Visits')

# Add grid lines
plt.grid(True)

# Add legend
plt.legend()

# Display the plot
plt.show()


## FacebookProphet

In [None]:
prophet = Prophet(
    suppress_stdout_stderror=True,
    add_encoders={
        'cyclic': {'future': ['month']},
        'datetime_attribute': {'future': ['hour', 'dayofweek']},
        'position': {'future': ['relative']},
        'custom': {'future': [lambda idx: (idx.year - 1950) / 50]},
        'transformer': Scaler()
    },
    add_seasonalities=dict(
        name='custom_daily',
        seasonal_periods=24,
        fourier_order=6,
        prior_scale=10.0,
        mode='multiplicative'),
        country_holidays='US')
prophet.fit(train_series)

horizan = 30*4

# summary = arima_model.model.summary()
test_series_ = test_series[0:horizan]
plt.figure(figsize=(18, 6))
forcast_arima = prophet.predict(horizan)
prophet.predict(horizan).plot(marker='o',label='predicted')
test_series_.plot(marker='o',label='Actual/Ground truth')
# Add title and labels
plt.title('Ground truth vs predicted')
plt.xlabel('Date')
plt.ylabel('Global DGV NS Visits')
plt.xticks(forcast_arima.time_index, forcast_arima.time_index.strftime('%Y-%m-%d'), rotation=90)

# Add grid lines
plt.grid(True)

# Add legend
plt.legend()

# Display the plot
plt.show()


# Model Evaluation

In [None]:
def model_evaluation(model_name,model_object,test_series,FileName):
    
    result_path = f'../ProcessedData/Results/{model_name}/{FileName}'
    result_plot_path = f'../ProcessedData/Results/{model_name}/{FileName}/{model_name}_Plots'
    os.makedirs(result_path,exist_ok=True)
    os.makedirs(result_plot_path,exist_ok=True)

        # Set your parameters
    window_sizes = [30, 45, 90]
    prediction_horizons = [15, 30,35]
    slide_steps = [5, 10, 15]

    test_series = test_series
    model = model_object

    for window_size in window_sizes:
        for prediction_horizon in prediction_horizons:
            for slide_step in slide_steps:
                print(f'Iteration : Window size : {window_size} Horizan: {prediction_horizon}, Stride : {slide_step}')
                evaluation_df = predict_and_evaluate(window_size, prediction_horizon, slide_step, test_series, model,result_plot_path)
                evaluation_df.to_csv(f'{result_path}/window_size_{window_size}_horizon_{prediction_horizon}_stride_{slide_step}.csv', index=False)
                
                print(f'Window_size_{window_size}_prediction_horizon_{prediction_horizon}_slide_step_{slide_step} - Evaluation completed.')
        #         break
        #     break
        # break

# Facebok Prophet Model Evaluation

In [None]:

def make_plots(input_window,ground_truth,forecast,bypass_information,result_plot_path):

    plt.figure(figsize=(30, 6))
    input_window.plot(label='Input Data', marker='o')
    forecast.plot(label='Predicted', marker='o')
    ground_truth.plot(label='Ground Truth', marker='o')
    
    combined_time_index = input_window.time_index.append(forecast.time_index).append(ground_truth.time_index)
    starting_date_of_input_data = input_window.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_input_data = input_window.time_index[-1].strftime("%Y-%m-%d")
    starting_date_predicted = forecast.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_predicted = forecast.time_index[-1].strftime("%Y-%m-%d")
    
    plt.xticks(combined_time_index, combined_time_index.strftime('%Y-%m-%d'), rotation=90)
    plt.title(f'Results of Input Data from {starting_date_of_input_data} to {ending_date_of_input_data} & Evaluation on from {starting_date_predicted} to {ending_date_of_predicted}', fontsize=16)
    plt.ylabel('Quantity Sold', fontsize=14)
    plt.xlabel('Dates', fontsize=14)
    plt.legend()
    
    plot_filename = f"{result_plot_path}/{bypass_information['window_size']}_{bypass_information['horizon']}_{bypass_information['slide_step']}.png"
    plt.savefig(plot_filename)      
    plt.show()
    plt.close()
# Function to predict and evaluate
def get_ground_truth(window_size, prediction_horizon, slide_step, test_series):
    ground_truth_list = []
    input_window_list = []
    num_predictions = len(test_series) - window_size - prediction_horizon + 1
    for i in range(0, num_predictions, slide_step):
        input_window = test_series[i:i + window_size]
        ground_truth = test_series[i + window_size:i + window_size + prediction_horizon]
        ground_truth_list.append(ground_truth)
        input_window_list.append(input_window)
        
    return ground_truth_list , input_window_list



    
            

model_name = 'prophet'
FileName = fileName
result_path = f'../ProcessedData/Results/{model_name}/{FileName}'
result_plot_path = f'../ProcessedData/Results/{model_name}/{FileName}/{model_name}_Plots'
os.makedirs(result_path,exist_ok=True)
os.makedirs(result_plot_path,exist_ok=True)

window_sizes = [30, 45, 90]
prediction_horizons = [15, 30, 35]
slide_steps = [5, 10, 15]
test_series = test_series

for window_size in window_sizes:
    for prediction_horizon in prediction_horizons:
        for slide_step in slide_steps:
            
            ground_truths,input_windows = get_ground_truth(window_size,prediction_horizon,slide_step,test_series)
            
            predictions =prophet.historical_forecasts(series=test_series ,
                                            num_samples=1, 
                                            train_length=window_size, 
                                            start=None, 
                                            forecast_horizon=prediction_horizon, 
                                            stride=slide_step, 
                                            retrain=True, 
                                            overlap_end=False,
                                            last_points_only=False, 
                                            verbose=False, 
                                            show_warnings=True, 
                                            predict_likelihood_parameters=False, 
                                            enable_optimization=True)
            
            meta_information_evaluation = {
                    'Iterations': [],
                    'MAE': [],
                    'RMSE': [],
                    'MAPE': [],
                    'MSE': [],
                    'input_window_size': [],
                    'horizon': [],
                    'stride': []
                }
                
            stride=0
            for i in range(len(predictions)):
                input_window = input_windows[i]
                ground_truth = ground_truths[i]
                forecast = predictions[i]
                sample = forecast.pd_dataframe().reset_index()
                sample['sales'] = df['sales'].apply(lambda val : 0  if val <0 else val)
                sample = sample[['date','sales']]
                forecast = TimeSeries.from_dataframe(sample,time_col='date',value_cols  = 'sales',freq='1D')

                
                bypass_information = {
                    'slide_step':stride,
                    'window_size':window_size,
                    'horizon':prediction_horizon,            
                }
                make_plots(input_window,ground_truth,forecast,bypass_information,result_plot_path)
                
                actual = ground_truth.values().flatten().tolist()
                predicted = forecast.values().flatten().tolist()
                metrics = calculate_metrics(actual, predicted)
                
                meta_information_evaluation['Iterations'].append(stride)
                meta_information_evaluation['MAE'].append(metrics['MAE'])
                meta_information_evaluation['RMSE'].append(metrics['RMSE'])
                meta_information_evaluation['MAPE'].append(metrics['MAPE'])
                meta_information_evaluation['MSE'].append(metrics['MSE'])
                meta_information_evaluation['input_window_size'].append(window_size)
                meta_information_evaluation['horizon'].append(prediction_horizon)
                meta_information_evaluation['stride'].append(slide_step)
        
                stride += slide_step
                
            evaluation_df = pd.DataFrame.from_dict(meta_information_evaluation)
            evaluation_df.to_csv(f'{result_path}/window_size_{window_size}_horizon_{prediction_horizon}_stride_{slide_step}.csv', index=False)
    #         break
        
    #     break
    # break


In [56]:
import pandas as pd
import glob

def aggregate_evaluation_results(file_pattern):
    eval_dict = {
        'window_size': [],
        'horizan': [],
        'stride': [],
        'AVG_MAE': [],
        'AVG_MSE': [],
        'AVG_RMSE': [],
        'AVG_MAPE': [],
    }
    
    paths = glob.glob(file_pattern)
    
    for path in paths:
        window_size = path.split('/')[-1].split('_')[2]
        horizan = path.split('/')[-1].split('_')[4]
        stride = path.split('/')[-1].split('_')[6].split('.')[0]

        df = pd.read_csv(path)
        eval_dict['window_size'].append(window_size)
        eval_dict['horizan'].append(horizan)
        eval_dict['stride'].append(stride)

        eval_dict['AVG_MAE'].append(df['MAE'].mean())
        eval_dict['AVG_MSE'].append(df['MSE'].mean())
        eval_dict['AVG_RMSE'].append(df['RMSE'].mean())
        eval_dict['AVG_MAPE'].append(df['MAPE'].mean())
    
    eval_df = pd.DataFrame.from_dict(eval_dict)
    eval_df = eval_df.dropna()
    eval_df.sort_values(['window_size', 'horizan', 'stride'], inplace=True, ascending=True)
    
    return eval_df

# Example usage
file_pattern = f"../ProcessedData/Results/prophet/{fileName}/*.csv"
result_df = aggregate_evaluation_results(file_pattern)
result_df


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE,AVG_MAPE
5,30,15,10,2.875,13.858333,3.677508,inf
15,30,15,15,3.013333,15.226667,3.875133,inf
23,30,15,5,2.857778,13.782222,3.675262,inf
14,30,30,10,3.25,18.638889,4.313142,inf
26,30,30,15,3.2,18.1,4.252281,inf
10,30,30,5,3.141667,17.336111,4.153545,inf
16,30,35,10,3.280952,18.347619,4.280677,inf
2,30,35,15,3.285714,18.5,4.300083,inf
3,30,35,5,3.233766,17.81039,4.216783,inf
8,45,15,10,2.744444,13.322222,3.619536,77.139851


'2_AUTOMOTIVE'