In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
from sklearn.metrics import mean_squared_error

# Step 1: Import and preprocess the sales data


In [None]:
# Read in the sales data, ensuring that the 'Date' column is parsed as datetime and set as the index.

data = pd.read_csv('202112 Channel Sales by SKU.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)


In [None]:
# Resample data to monthly intervals, summing the sales for each month. Fill any missing values with 0.

monthly_data = data.resample('M').sum().fillna(0)
print(monthly_data.head())



# Step 2: Test for stationarity using the Augmented Dickey-Fuller test

In [None]:
# This test helps determine if the data is stationary. If p-value > 0.05, the data is non-stationary.

result = adfuller(monthly_data['SKB0011373'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# If the data is not stationary, apply differencing to make it stationary
if result[1] > 0.05:
    monthly_data['SKB0011373_diff'] = monthly_data['SKB0011373'].diff().dropna()
else:
    monthly_data['SKB0011373_diff'] = monthly_data['SKB0011373']



# Step 3: Plot ACF and PACF to determine the values of p and q

In [None]:
# The ACF and PACF plots help identify the appropriate p and q values for the ARIMA model.

fig, ax = plt.subplots(2, 1, figsize=(12, 8))
plot_acf(monthly_data['SKB0011373_diff'].dropna(), lags=20, ax=ax[0])
plot_pacf(monthly_data['SKB0011373_diff'].dropna(), lags=20, ax=ax[1])
plt.show()



# Step 4: Use Grid Search to fine-tune the ARIMA model parameters (p, d, q)

In [None]:
# Test different combinations of p, d, q to find the optimal model with the lowest AIC value.

warnings.filterwarnings("ignore")

p = d = q = range(0, 3)
best_aic = float("inf")
best_order = None

for i in p:
    for j in d:
        for k in q:
            try:
                model = SARIMAX(monthly_data['SKB0011373'], order=(i, j, k))
                results = model.fit(disp=False)
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (i, j, k)
            except:
                continue

print(f"Best parameters: p={best_order[0]}, d={best_order[1]}, q={best_order[2]} with AIC={best_aic}")



# Step 5: Fit the SARIMA model with the best parameters

In [None]:
# Using the optimal (p, d, q) values, fit the SARIMAX model to the data.

model = SARIMAX(monthly_data['SKB0011373'], order=best_order)
model_fit = model.fit(disp=False)


# Step 6: Forecast the next 12 months of sales

In [None]:
# Generate predictions for the next 12 months and visualize them.

forecast = model_fit.get_forecast(steps=12)
forecast_ci = forecast.conf_int()

plt.figure(figsize=(10, 6))
plt.plot(monthly_data.index, monthly_data['SKB0011373'], label='Historical Sales Data')
plt.plot(forecast.predicted_mean.index, forecast.predicted_mean, label='Forecast', color='red')
plt.fill_between(forecast_ci.index, forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1], color='pink', alpha=0.3)
plt.legend()
plt.show()
