# Loading libraries

In [None]:
import pandas as pd 
import numpy as np
import os
import glob
import darts
from darts import TimeSeries
from statsmodels.tsa.stattools import adfuller
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.metrics import r2_score


# Helping Functions 


In [None]:

def plot_scatter(df):
    os.makedirs('../plots',exist_ok=True)
    saturday_Df = df[df['day_of_week']==5]
    sunday_Df = df[df['day_of_week']==6]
    friday_Df = df[df['day_of_week']==4]
    thursday_Df = df[df['day_of_week']==3]
    wednesday_Df = df[df['day_of_week']==2]
    Tuesday_Df = df[df['day_of_week']==1]
    monday_Df = df[df['day_of_week']==0]
    fig = make_subplots()
                        
    
    fig.add_trace(go.Scatter(x=df['date'], y=df['order_id'], mode='markers+lines',name='Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=saturday_Df['date'], y=saturday_Df['order_id'], mode='markers',name='-Saturday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=sunday_Df['date'], y=sunday_Df['order_id'], mode='markers',name='-Sunday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=friday_Df['date'], y=friday_Df['order_id'], mode='markers',name='-friday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=thursday_Df['date'], y=thursday_Df['order_id'], mode='markers',name='-Thursday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=wednesday_Df['date'], y=wednesday_Df['order_id'], mode='markers',name='-Wednesday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=Tuesday_Df['date'], y=Tuesday_Df['order_id'], mode='markers',name='-Tuesday-Quantity'),row=1,col=1)
    fig.add_trace(go.Scatter(x=monday_Df['date'], y=monday_Df['order_id'], mode='markers',name='-Monday-Quantity'),row=1,col=1)
    
    fig.update_layout(
        title='Scatter Plot of Order ID',
        height=600,
        width=1800,
        font_size=14
    )
    fig.update_yaxes(title_text='Quantity', row=1, col=1)
    fig.update_yaxes(title_text='Revenue', row=2, col=1)
    fig.show()
    # fig.write_html(f'../plots/{sku_id}_{region_id}.html')
    
def calculate_smape(actual, predicted):
    """
    Calculate Symmetric Mean Absolute Percentage Error (SMAPE) given actual and predicted values.

    Parameters:
    actual (list or array): The actual (true) values.
    predicted (list or array): The predicted values.

    Returns:
    smape (float): The SMAPE value.
    """
    n = len(actual)
    sum_smape = sum(2 * abs(a - p) / (abs(a) + abs(p)) for a, p in zip(actual, predicted))
    smape = (1 / n) * sum_smape * 100
    return smape

from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_log_error 

def calculate_metrics(actual, predicted):
    # Convert inputs to numpy arrays for easier calculations
    smape = calculate_smape(actual, predicted)
    r2 = r2_score(actual, predicted)

    medae = median_absolute_error(actual, predicted)

    # Mean Squared Logarithmic Error (MSLE)
    msle = mean_squared_log_error(actual, predicted)
    
    actual = np.array(actual)
    predicted = np.array(predicted)
    
    # Calculate individual metrics
    mae = np.mean(np.abs(predicted - actual))
    rmse = np.sqrt(np.mean((predicted - actual) ** 2))
    mape = np.mean(np.abs((predicted - actual) / actual)) * 100
    mse = np.mean((predicted - actual) ** 2)
    
    metrics = {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'MSE': mse,
        'SMAPE':smape,
        'R2':r2,
        'MEDEA':medae,
        'MSLE':msle,
        
    }
    
    return metrics



# Loading data

In [None]:
os.makedirs('../processedData/Iteration_03', exist_ok=True)

fileName = '../RawData/Sales Forecast_NEW2.xlsx'
df = pd.read_excel(fileName,sheet_name='NEW_DATA')
df['date'] = pd.to_datetime(df['date'],format='%Y-%m-%d')
df.sort_values(['date'],inplace=True)
df.reset_index(inplace=True,drop=True)
df.to_csv('../processedData/Iteration_03/sorted_Sales_Forecast_dataset.csv',index=False)
df =  df[['date','order_id']]
df.to_csv('../processedData/Iteration_03/date_order_sales_forecast_dataset.csv',index=False)


In [None]:
df.head()

In [None]:
df = df.groupby('date')['order_id'].sum().reset_index()
df.to_csv('../processedData/Iteration_03/date_order_id_sales_forecast_cleaned.csv',index=False)

In [None]:
df['year'] = df['date'].dt.year
df['day_of_month'] = df['date'].dt.day
df['month'] = df['date'].dt.month
df['day_of_week'] = df['date'].dt.day_of_week
df['day_name'] = df['date'].dt.strftime('%A')


In [None]:
plot_scatter(df)

# stationary Tesst

In [None]:
def adfuller_test(values):
    result=adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
    for value,label in zip(result,labels):
        print(label+' : '+str(value) )
    if result[1] <= 0.05:
        print("P value is less than 0.05 that means we can reject the null hypothesis(Ho). Therefore we can conclude that data has no unit root and is stationary")
    else:
        print("Weak evidence against null hypothesis that means time series has a unit root which indicates that it is non-stationary ")
adfuller_test(df['order_id'])

## Spliting Data into Training & Testing Data

In [None]:
from darts import TimeSeries
import numpy as np
import matplotlib.pyplot as plt

# Assuming df is your DataFrame containing daily data
series = TimeSeries.from_dataframe(df, "date", "order_id", freq='1D', fill_missing_dates=True, fillna_value=0)


split_point = 0.80

train_series, test_series = series.split_after(split_point)

# Set the figure size and style
plt.figure(figsize=(18, 6))
# Plot the training and testing data
train_series.plot(label='Training Data', color='blue', linewidth=1.5, marker='o')
test_series.plot(label='Testing Data', color='orange', linewidth=1.5, marker='o')

# Add title and labels
plt.title('Training and Testing Data')
plt.xlabel('Date')
plt.ylabel('Sales')

# Add grid lines
plt.grid(True)

# Add legend
plt.legend()

# Display the plot
plt.show()


## Inspect Seasonality

In [None]:
from darts.utils.statistics import plot_acf, check_seasonality

for m in range(2, 25):
    is_seasonal, period = check_seasonality(train_series, m=m, alpha=0.05)
    if is_seasonal:
        print("There is seasonality of order {}.".format(period))

## Auto Correlation plot
The autocorrelation function (ACF) is used to identify the order of ARIMA models. The ACF plot shows the correlation between the time series and its lagged version. The lag at which the ACF plot crosses the upper confidence interval for the first time is considered as the order of the **MA** component of the ARIMA model. Similarly, if the ACF plot decays slowly, it indicates that there is a high degree of autocorrelation in the time series, which means that an AR component should be included in the ARIMA model.

In [None]:
from darts.utils.statistics import plot_acf,plot_pacf


plot_acf(train_series, m=7, max_lag=100,  fig_size=(10, 5), axis=None, default_formatting=True)
plt.xlabel('lags')
plt.ylabel('correlation')
plt.title('Auto Correlation Plot')
plt.show()

## Partial Auto Correlation plot
The partial autocorrelation function (PACF) is also used to identify the order of ARIMA models. The PACF plot shows the correlation between the time series and its lagged version, but with the influence of the intermediate lags removed. The lag at which the PACF plot crosses the upper confidence interval for the first time is considered as the order of the **AR** component of the ARIMA model.

In [None]:
from darts.utils.statistics import plot_acf,plot_pacf
plot_pacf(train_series, m=7, max_lag=100,  fig_size=(10, 5), axis=None, default_formatting=True)

plt.xlabel('lags')
plt.ylabel('correlation')
plt.title('Partial Auto Correlation Plot')
plt.show()

# ARIMA Model

In [None]:
from darts.dataprocessing.transformers.scaler import Scaler
from darts.models.forecasting.arima import ARIMA

arima_model = ARIMA(
    p=4,
    d=0, 
    q=2, 
    random_state=1999,
    trend='n',
    add_encoders={
                'cyclic': {'future': ['month']},
                'datetime_attribute': {'future': ['hour', 'dayofweek']},
                'position': {'future': ['relative']},
                'custom': {'future': [lambda idx: (idx.year - 1950) / 50]}
            }
                                            ,
)

arima_model.fit(train_series)
arima_model.model.summary()

arima_model.predict(3,series = test_series)

horizan = 30*4

# summary = arima_model.model.summary()
test_series_ = test_series[0:horizan]
plt.figure(figsize=(18, 6))
forcast_arima = arima_model.predict(horizan)
arima_model.predict(horizan).plot(marker='o',label='predicted')
test_series_.plot(marker='o',label='Actual/Ground truth')
# Add title and labels
plt.title('Ground truth vs predicted')
plt.xlabel('Date')
plt.ylabel('Oder Number')
plt.xticks(forcast_arima.time_index, forcast_arima.time_index.strftime('%Y-%m-%d'), rotation=90)

# Add grid lines
plt.grid(True)

# Add legend
plt.legend()

# Display the plot
plt.show()
arima_model.predict(3,series = test_series)


# Model Evaluation

In [None]:

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

# Function to predict and evaluate
def predict_and_evaluate(window_size, prediction_horizon, slide_step, test_series, model,result_plot_path ):
    num_predictions = len(test_series) - window_size - prediction_horizon + 1
    
    meta_information_evaluation = {
        'Iterations': [],
        'MAE': [],
        'RMSE': [],
        'MAPE': [],
        'MSE': [],
        'SMAPE':[],
        'R2':[],
        'MEDEA':[],
        'MSLE':[],
        'input_window_size': [],
        'horizon': [],
        'stride': []
    }
    try:
        for i in tqdm(range(0, num_predictions, slide_step)):
            input_window = test_series[i:i + window_size]
            ground_truth = test_series[i + window_size:i + window_size + prediction_horizon]
            forecast = model.predict(n=prediction_horizon, series=input_window)
            actual = ground_truth.values().flatten().tolist()
            
            predicted = forecast.values().flatten().tolist()
            metrics = calculate_metrics(actual, predicted)
            
            meta_information_evaluation['Iterations'].append(i)
            meta_information_evaluation['MAE'].append(metrics['MAE'])
            meta_information_evaluation['RMSE'].append(metrics['RMSE'])
            meta_information_evaluation['MAPE'].append(metrics['MAPE'])
            meta_information_evaluation['MSE'].append(metrics['MSE'])
            meta_information_evaluation['input_window_size'].append(window_size)
            meta_information_evaluation['horizon'].append(prediction_horizon)
            meta_information_evaluation['stride'].append(slide_step)
            
            meta_information_evaluation['SMAPE'].append(metrics['SMAPE'])
            meta_information_evaluation['R2'].append(metrics['R2'])
            meta_information_evaluation['MEDEA'].append(metrics['MEDEA'])
            meta_information_evaluation['MSLE'].append(metrics['MSLE'])
        
            bypass_information = {
                'slide_step':slide_step,
                'window_size':window_size,
                'horizon':prediction_horizon,            
            }
            create_plots(input_window,forecast,ground_truth,result_plot_path,bypass_information)

        evalaution_df = pd.DataFrame.from_dict(meta_information_evaluation)
        
        return evalaution_df
    
    except Exception as e:
        print('Error Occurred in fuction predict_and_evaluate():', e)
        evalaution_df = pd.DataFrame.from_dict(meta_information_evaluation)
        
        return evalaution_df

# Function to create plots
def create_plots(input_window, forecast, ground_truth,result_plot_path,bypass_information):
    plt.figure(figsize=(30, 6))
    input_window.plot(label='Input Data', marker='o')
    forecast.plot(label='Predicted', marker='o')
    ground_truth.plot(label='Ground Truth', marker='o')
    
    combined_time_index = input_window.time_index.append(forecast.time_index).append(ground_truth.time_index)
    starting_date_of_input_data = input_window.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_input_data = input_window.time_index[-1].strftime("%Y-%m-%d")
    starting_date_predicted = forecast.time_index[0].strftime("%Y-%m-%d")
    ending_date_of_predicted = forecast.time_index[-1].strftime("%Y-%m-%d")
    
    plt.xticks(combined_time_index, combined_time_index.strftime('%Y-%m-%d'), rotation=90)
    plt.title(f'Results of Input Data from {starting_date_of_input_data} to {ending_date_of_input_data} & Evaluation on from {starting_date_predicted} to {ending_date_of_predicted}', fontsize=16)
    plt.ylabel('Quantity Sold', fontsize=14)
    plt.xlabel('Dates', fontsize=14)
    plt.legend()
    
    plot_filename = f"{result_plot_path}/{bypass_information['window_size']}_{bypass_information['horizon']}_{bypass_information['slide_step']}.png"
    plt.savefig(plot_filename)
    plt.close()
    # plt.show()

In [None]:
def model_evaluation(model_name,model_object,test_series,FileName):
    
    result_path = f'../ProcessedData/Results/{model_name}/{FileName}'
    result_plot_path = f'../ProcessedData/Results/{model_name}/{FileName}/{model_name}_Plots'
    os.makedirs(result_path,exist_ok=True)
    os.makedirs(result_plot_path,exist_ok=True)

        # Set your parameters
    window_sizes = [30, 45, 90]
    prediction_horizons = [15, 30,35]
    slide_steps = [5, 10, 15]

    test_series = test_series
    model = model_object

    for window_size in window_sizes:
        for prediction_horizon in prediction_horizons:
            for slide_step in slide_steps:
                print(f'Iteration : Window size : {window_size} Horizan: {prediction_horizon}, Stride : {slide_step}')
                evaluation_df = predict_and_evaluate(window_size, prediction_horizon, slide_step, test_series, model,result_plot_path)
                evaluation_df.to_csv(f'{result_path}/window_size_{window_size}_horizon_{prediction_horizon}_stride_{slide_step}.csv', index=False)
                
                print(f'Window_size_{window_size}_prediction_horizon_{prediction_horizon}_slide_step_{slide_step} - Evaluation completed.')

In [None]:

model_name = 'ARIMA'
FileName = 'arima_model'
model_object = arima_model
test_series = test_series 
model_evaluation(model_name,model_object,test_series,FileName)


In [None]:
test_series