# Retail Sales Forecasting
**Author:** Anusuya M

This notebook covers data loading, EDA, preprocessing, and a time-series forecasting example using ARIMA / SARIMAX (statsmodels).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error

# Load data
df = pd.read_csv('../data/retail_sales.csv', parse_dates=['Order Date'])
df.head()


## Aggregate monthly sales and prepare for forecasting

In [None]:
# Aggregate to monthly sales
monthly = df.set_index('Order Date').resample('M')['Sales'].sum().reset_index()
monthly.columns = ['ds','y']
monthly['ds'] = pd.to_datetime(monthly['ds'])
monthly.head()


## Train/Test split and SARIMAX example


In [None]:
# Simple SARIMAX example (tunable)
train = monthly.iloc[:-6]
test = monthly.iloc[-6:]
model = SARIMAX(train['y'], order=(1,1,1), seasonal_order=(1,1,1,12), enforce_stationarity=False, enforce_invertibility=False)
res = model.fit(disp=False)
pred = res.get_forecast(steps=len(test))
pred_mean = pred.predicted_mean
print('MAE:', mean_absolute_error(test['y'], pred_mean))

import matplotlib.pyplot as plt
plt.plot(monthly['ds'], monthly['y'], label='Actual')
plt.plot(test['ds'], pred_mean, label='Forecast')
plt.legend()
plt.title('Actual vs Forecast (SARIMAX)')
plt.show()


## Next steps
- Hyperparameter tuning (grid search on p,d,q, seasonal)
- Incorporate exogenous variables (promotions, holidays)
- Build the Power BI dashboard using the `data/retail_sales.csv` file
