# loading libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import pathlib
import pandas as pd
import numpy as np
import darts
from darts import TimeSeries
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import os
import glob
from tqdm import tqdm
from darts.dataprocessing.transformers.scaler import Scaler

# Helping Functions


In [None]:
import numpy as np


def calculate_metrics(actual, predicted):
    # Convert inputs to numpy arrays for easier calculations
    actual = np.array(actual)
    predicted = np.array(predicted)

    # Calculate individual metrics
    mae = np.mean(np.abs(predicted - actual))
    rmse = np.sqrt(np.mean((predicted - actual) ** 2))
    mape = np.mean(np.abs((predicted - actual) / actual)) * 100
    mse = np.mean((predicted - actual) ** 2)

    metrics = {"MAE": mae, "RMSE": rmse, "MAPE": mape, "MSE": mse}

    return metrics

# Data Reading 😶

In [None]:
fileName = "amazon"
df = pd.read_csv(f"../ProcessedData/{fileName}.csv")
df.head()

In [None]:
df = df[["Date", "Close"]]
df = df.drop_duplicates()
df.head()

# stationary Tesst

In [None]:
def adfuller_test(values):
    result = adfuller(values)
    labels = [
        "ADF Test Statistic",
        "p-value",
        "#Lags Used",
        "Number of Observations Used",
    ]
    for value, label in zip(result, labels):
        print(label + " : " + str(value))
    if result[1] <= 0.05:
        print(
            "P value is less than 0.05 that means we can reject the null hypothesis(Ho). Therefore we can conclude that data has no unit root and is stationary"
        )
    else:
        print(
            "Weak evidence against null hypothesis that means time series has a unit root which indicates that it is non-stationary "
        )

In [None]:
adfuller_test(df["Close"])

In [None]:
adfuller_test(df["Close"].diff(1).iloc[1:])

## Spliting Data into Training & Testing Data

In [None]:
from darts import TimeSeries
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is your DataFrame containing daily data
series = TimeSeries.from_dataframe(
    df, "Date", "Close", freq="1D", fill_missing_dates=True, fillna_value=0
)


split_point = 0.80

train_series, test_series = series.split_after(split_point)

# Set the figure size and style
plt.figure(figsize=(18, 6))
# Plot the training and testing data
train_series.plot(label="Training Data", color="blue", linewidth=1.5, marker="o")
test_series.plot(label="Testing Data", color="orange", linewidth=1.5, marker="o")

# Add title and labels
plt.title("Training and Testing Data")
plt.xlabel("Date")
plt.ylabel("Amazon Close Price")

# Add grid lines
plt.grid(True)

# Add legend
plt.legend()

# Display the plot
plt.show()

## Exponential Smoothing Model

In [None]:
from darts.models.forecasting.exponential_smoothing import ExponentialSmoothing
from darts.utils.utils import ModelMode
from darts.utils.utils import SeasonalityMode

expo_smoot = ExponentialSmoothing(
    trend=ModelMode.ADDITIVE,
    damped=True,
    seasonal=SeasonalityMode.ADDITIVE,
    seasonal_periods=4,
    random_state=1999,
    remove_bias=True,
    smoothing_seasonal=0.3,
)

expo_smoot.fit(train_series)

# Print the summary of the trained model
print(expo_smoot.model.summary())

### GridSearch for Exponential Smoothing

In [None]:
# from darts.models.forecasting.exponential_smoothing import ExponentialSmoothing
# from darts.utils.utils import ModelMode, SeasonalityMode
# from darts.metrics import mae
# import numpy as np

# # Define a range of hyperparameters for the grid search
# trend_modes = [ModelMode.ADDITIVE, ]
# damped_values = [True, False]
# seasonality_modes = [SeasonalityMode.ADDITIVE]
# seasonal_periods_values = [4,7,10, 14,17,21, 30]  # You can add more values
# smoothing_seasonal_values = [0.1, 0.3, 0.5]  # You can add more values
# remove_biass =  [True,False]
# smoothing_trends = [ 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
# smoothing_levels = [ 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
# best_mae = np.inf
# best_params = None

# # Perform grid search
# for trend_mode in trend_modes:
#     for damped in damped_values:
#         for seasonality_mode in seasonality_modes:
#             for seasonal_periods in seasonal_periods_values:
#                 for smoothing_seasonal in smoothing_seasonal_values:
#                     for remove_bias in remove_biass:
#                         for smoothing_trend in smoothing_trends:
#                             for smoothing_level in smoothing_levels:

#                                 expo_smoot = ExponentialSmoothing(
#                                     trend=trend_mode,
#                                     damped=damped,
#                                     seasonal=seasonality_mode,
#                                     seasonal_periods=seasonal_periods,
#                                     random_state=1999,
#                                     remove_bias=remove_bias,
#                                     smoothing_seasonal=smoothing_seasonal,
#                                     smoothing_trend = smoothing_trend,
#                                     smoothing_level = smoothing_level
#                                 )

#                                 expo_smoot.fit(train_series)
#                                 forecast = expo_smoot.predict(len(test_series))
#                                 mae_score = mae(test_series, forecast)

#                                 if mae_score < best_mae:
#                                     best_mae = mae_score
#                                     best_params = {
#                                         'trend': trend_mode,
#                                         'damped': damped,
#                                         'seasonality': seasonality_mode,
#                                         'seasonal_periods': seasonal_periods,
#                                         'smoothing_seasonal': smoothing_seasonal
#                                     }

# print("Best parameters:", best_params)
# print("Best MAE:", best_mae)

In [None]:
def make_plots(
    input_window, ground_truth, forecast, bypass_information, result_plot_path
):
    plt.figure(figsize=(30, 6))
    input_window.plot(label="Input Data", marker="o")
    forecast.plot(label="Predicted", marker="o")
    ground_truth.plot(label="Ground Truth", marker="o")

    combined_time_index = input_window.time_index.append(forecast.time_index).append(
        ground_truth.time_index
    )
    starting_date_of_input_data = input_window.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_input_data = input_window.time_index[-1].strftime("%Y-%m-%d")
    starting_date_predicted = forecast.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_predicted = forecast.time_index[-1].strftime("%Y-%m-%d")

    plt.xticks(
        combined_time_index, combined_time_index.strftime("%Y-%m-%d"), rotation=90
    )
    plt.title(
        f"Results of Input Data from {starting_date_of_input_data} to {ending_date_of_input_data} & Evaluation on from {starting_date_predicted} to {ending_date_of_predicted}",
        fontsize=16,
    )
    plt.ylabel("Amazon Close Price", fontsize=14)
    plt.xlabel("Dates", fontsize=14)
    plt.legend()

    plot_filename = f"{result_plot_path}/{bypass_information['window_size']}_{bypass_information['horizon']}_{bypass_information['slide_step']}.png"
    plt.savefig(plot_filename)
    # plt.show()
    plt.close()


# Function to predict and evaluate
def get_ground_truth(window_size, prediction_horizon, slide_step, test_series):
    ground_truth_list = []
    input_window_list = []
    num_predictions = len(test_series) - window_size - prediction_horizon + 1
    for i in range(0, num_predictions, slide_step):
        input_window = test_series[i : i + window_size]
        ground_truth = test_series[
            i + window_size : i + window_size + prediction_horizon
        ]
        ground_truth_list.append(ground_truth)
        input_window_list.append(input_window)

    return ground_truth_list, input_window_list

In [13]:
model_name = "ExponentionalSmoothing"
FileName = fileName
result_path = f"../ProcessedData/Results/{model_name}/{FileName}"
result_plot_path = (
    f"../ProcessedData/Results/{model_name}/{FileName}/{model_name}_Plots"
)
os.makedirs(result_path, exist_ok=True)
os.makedirs(result_plot_path, exist_ok=True)

window_sizes = [30, 45, 90]
prediction_horizons = [15, 30, 35]
slide_steps = [5, 10, 15]
test_series = test_series

for window_size in window_sizes:
    for prediction_horizon in prediction_horizons:
        for slide_step in slide_steps:
            ground_truths, input_windows = get_ground_truth(
                window_size, prediction_horizon, slide_step, test_series
            )

            predictions = expo_smoot.historical_forecasts(
                series=test_series,
                num_samples=1,
                train_length=window_size,
                start=None,
                forecast_horizon=prediction_horizon,
                stride=slide_step,
                retrain=True,
                overlap_end=False,
                last_points_only=False,
                verbose=False,
                show_warnings=True,
                predict_likelihood_parameters=False,
                enable_optimization=True,
            )

            meta_information_evaluation = {
                "Iterations": [],
                "MAE": [],
                "RMSE": [],
                "MAPE": [],
                "MSE": [],
                "input_window_size": [],
                "horizon": [],
                "stride": [],
            }

            stride = 0
            for i in range(len(predictions)):
                input_window = input_windows[i]
                ground_truth = ground_truths[i]
                forecast = predictions[i]
                sample = forecast.pd_dataframe().reset_index()
                sample["Close"] = df["Close"].apply(lambda val: 0 if val < 0 else val)
                sample = sample[["Date", "Close"]]
                forecast = TimeSeries.from_dataframe(
                    sample, time_col="Date", value_cols="Close", freq="1D"
                )

                bypass_information = {
                    "slide_step": stride,
                    "window_size": window_size,
                    "horizon": prediction_horizon,
                }
                make_plots(
                    input_window,
                    ground_truth,
                    forecast,
                    bypass_information,
                    result_plot_path,
                )

                actual = ground_truth.values().flatten().tolist()
                predicted = forecast.values().flatten().tolist()
                metrics = calculate_metrics(actual, predicted)

                meta_information_evaluation["Iterations"].append(stride)
                meta_information_evaluation["MAE"].append(metrics["MAE"])
                meta_information_evaluation["RMSE"].append(metrics["RMSE"])
                meta_information_evaluation["MAPE"].append(metrics["MAPE"])
                meta_information_evaluation["MSE"].append(metrics["MSE"])
                meta_information_evaluation["input_window_size"].append(window_size)
                meta_information_evaluation["horizon"].append(prediction_horizon)
                meta_information_evaluation["stride"].append(slide_step)

                stride += slide_step

            evaluation_df = pd.DataFrame.from_dict(meta_information_evaluation)
            evaluation_df.to_csv(
                f"{result_path}/window_size_{window_size}_horizon_{prediction_horizon}_stride_{slide_step}.csv",
                index=False,
            )
    #         break

    #     break
    # break

  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape = np.mean(np.abs((predicted - actual) / actual)) * 100
  mape =

# Evaluate Metrics

In [4]:
import pandas as pd
import glob


def aggregate_evaluation_results(file_pattern):
    eval_dict = {
        "window_size": [],
        "horizan": [],
        "stride": [],
        "AVG_MAE": [],
        "AVG_MSE": [],
        "AVG_RMSE": [],
        "AVG_MAPE": [],
    }

    paths = glob.glob(file_pattern)
    for path in paths:
        window_size = path.split("/")[-1].split("_")[2]
        horizan = path.split("/")[-1].split("_")[4]
        stride = path.split("/")[-1].split("_")[6].split(".")[0]

        df = pd.read_csv(path)
        eval_dict["window_size"].append(window_size)
        eval_dict["horizan"].append(horizan)
        eval_dict["stride"].append(stride)

        eval_dict["AVG_MAE"].append(df["MAE"].mean())
        eval_dict["AVG_MSE"].append(df["MSE"].mean())
        eval_dict["AVG_RMSE"].append(df["RMSE"].mean())
        eval_dict["AVG_MAPE"].append(df["MAPE"].mean())

    eval_df = pd.DataFrame.from_dict(eval_dict)
    eval_df = eval_df.dropna()
    eval_df.sort_values(
        ["window_size", "horizan", "stride"], inplace=True, ascending=True
    )

    return eval_df


# Example usage
file_pattern = f"../ProcessedData/Results/ExponentionalSmoothing/amazon/*.csv"
result_df = aggregate_evaluation_results(file_pattern)
result_df = result_df[['window_size','horizan','stride','AVG_MAE','AVG_MSE','AVG_RMSE']]


In [5]:
for name , group in result_df.groupby(['window_size','horizan']):
    print(name)
    display(group)    

('30', '15')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
5,30,15,10,59.842883,4793.12174,67.37911
15,30,15,15,59.545134,4764.509922,67.416416
23,30,15,5,60.305498,4846.574748,67.790858


('30', '30')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
14,30,30,10,58.321304,4560.017166,65.578206
26,30,30,15,58.111936,4526.850307,65.571819
10,30,30,5,58.020501,4514.722393,65.344875


('30', '35')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
16,30,35,10,57.732323,4526.907384,65.331545
2,30,35,15,58.588445,4630.050133,66.25426
3,30,35,5,57.862857,4538.449117,65.462035


('45', '15')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
8,45,15,10,59.97326,4789.785481,67.481177
20,45,15,15,58.203567,4566.957222,66.163035
18,45,15,5,59.665416,4749.079036,67.173064


('45', '30')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
19,45,30,10,57.041314,4377.434047,64.463793
11,45,30,15,56.851905,4352.16685,64.403775
9,45,30,5,57.465226,4435.439987,64.806858


('45', '35')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
4,45,35,10,57.302937,4457.44856,64.941443
7,45,35,15,57.354512,4459.591496,65.114469
13,45,35,5,57.315833,4460.226117,64.941235


('90', '15')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
12,90,15,10,57.704061,4503.74682,65.31739
25,90,15,15,58.042118,4528.0394,65.853853
24,90,15,5,58.151304,4559.171493,65.730169


('90', '30')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
1,90,30,10,56.36141,4318.503316,63.726816
22,90,30,15,56.801535,4339.297941,64.322076
17,90,30,5,56.06389,4271.532964,63.496228


('90', '35')


Unnamed: 0,window_size,horizan,stride,AVG_MAE,AVG_MSE,AVG_RMSE
0,90,35,10,55.665672,4272.394987,63.410103
6,90,35,15,57.525173,4474.212443,65.260373
21,90,35,5,55.954572,4302.822195,63.684746
