In [None]:
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pointbiserialr
from scipy.stats import chi2_contingency
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from IPython.core.interactiveshell import InteractiveShell
import os
InteractiveShell.ast_node_interactivity = "all"

In [None]:
df_clean = pd.read_csv("df_clean_forecasting.csv")

In [None]:
#Arima forecasting
weekly_sales = df_clean.groupby('Date')['Weekly_Sales'].sum().reset_index()
weekly_sales.set_index('Date', inplace=True)

train = weekly_sales.iloc[:-52]
test = weekly_sales.iloc[-52:]

In [None]:
#time series plot
plt.figure(figsize=(10,4))
plt.plot(weekly_sales.index, weekly_sales['Weekly_Sales'])
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//10], rotation=45)
plt.xlabel("Date")
plt.ylabel("Sales")
plt.title("Time Series")

plt.show()

In [None]:
#1st acf/pacf plots
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
acf = plot_acf(weekly_sales, lags=40)
plt.show()
pacf = plot_pacf(weekly_sales, lags=40)
plt.show()

In [None]:
#tuning parameters
from statsmodels.tsa.stattools import adfuller

result = adfuller(weekly_sales['Weekly_Sales'])
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

In [None]:
# ARIMA(0,0,0)
from statsmodels.tsa.arima.model import ARIMA

model_000 = ARIMA(train, order=(0,0,0))
model_000_fit = model_000.fit()
forecast_000 = model_000_fit.forecast(steps=len(test))

print(model_000_fit.summary())

In [None]:
#plot forecast ARIMA(0,0,0)
plt.figure(figsize=(6,2))
plt.plot(train.index, train, label='Training')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast_000, label='Forecast')
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//10], rotation=45)
plt.legend(loc='upper left')
plt.show()

In [None]:
#ARIMA(1,0,2)
model_102 = ARIMA(train, order=(1,0,2))
model_102_fit = model_102.fit()
forecast_102 = model_102_fit.forecast(steps=len(test))

print(model_102_fit.summary())

In [None]:
#plot forecast ARIMA(1,0,2)
plt.figure(figsize=(6,2))
plt.plot(train.index, train, label='Training')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast_102, label='Forecast')
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//10], rotation=45)
plt.legend(loc='upper left')
plt.show()

In [None]:
#auto arima
import pmdarima as pm
auto_arima = pm.auto_arima(train, stepwise=False, seasonal=False)
print(auto_arima)

seasonal_auto_arima = pm.auto_arima(train, stepwise=False, seasonal=True)
print(seasonal_auto_arima)

In [None]:
#plotting residuals ARIMA(0,0,0)
residuals_000 = model_000_fit.resid

plt.figure(figsize=(6,2))
plt.plot(residuals_000)
plt.title("Residuals ARIMA(0,0,0)")
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//8], rotation=45)
plt.xlabel("Date")
plt.ylabel("Residuals")
plt.show()

In [None]:
#plotting residuals ARIMA(1,0,2)
residuals_102 = model_102_fit.resid

plt.figure(figsize=(6,2))
plt.plot(residuals_102)
plt.title("Residuals ARIMA(1,0,2)")
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//8], rotation=45)
plt.xlabel("Date")
plt.ylabel("Residuals")
plt.savefig('residuals102')
plt.show()

In [None]:
#seasonal arima model
from statsmodels.tsa.statespace.sarimax import SARIMAX
sarima = SARIMAX(train, order=(0,0,4), seasonal_order=(1,1,1,52))
sarima_fit = sarima.fit()
sarima_forecast = sarima_fit.forecast(steps = len(test))

In [None]:
#plot sarima

plt.figure(figsize=(6,2))
plt.plot(train.index, train, label="train")
plt.plot(test.index, test, label="test")
plt.plot(test.index, sarima_forecast, label="forecast")
plt.xticks(ticks=weekly_sales.index[::len(weekly_sales)//8], rotation=45)
plt.legend(loc="lower left")
plt.title("Weekly Sales Forecast")
plt.show()

In [None]:
#sarima metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
true_values = test['Weekly_Sales']
predicted_values = sarima_forecast

mae = mean_absolute_error(true_values, predicted_values)
rmse = np.sqrt(mean_squared_error(true_values, predicted_values))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")