# Sales Forecasting - Modeling

In [None]:
# 1. Train-Test Split
split_date = '2023-10-01'
train = weekly_df[weekly_df['Date'] < split_date]
test = weekly_df[weekly_df['Date'] >= split_date]

train_ts = train.set_index('Date')['Total Amount']
test_ts = test.set_index('Date')['Total Amount']


In [None]:
# 2. ARIMA Model
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

arima_model = ARIMA(train_ts, order=(1,1,1))
arima_result = arima_model.fit()
arima_forecast = arima_result.forecast(steps=len(test_ts))

arima_df = test_ts.to_frame()
arima_df['Forecast'] = arima_forecast.values

plt.figure(figsize=(12, 6))
plt.plot(train_ts.index, train_ts, label='Train')
plt.plot(test_ts.index, test_ts, label='Actual', color='blue')
plt.plot(arima_df.index, arima_df['Forecast'], label='ARIMA Forecast', color='red')
plt.title('ARIMA Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Weekly Total Sales')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# 3. SARIMA Model
from statsmodels.tsa.statespace.sarimax import SARIMAX

sarima_model = SARIMAX(train_ts, order=(1,1,1), seasonal_order=(1,1,1,52),
                       enforce_stationarity=False, enforce_invertibility=False)
sarima_result = sarima_model.fit()
sarima_forecast = sarima_result.forecast(steps=len(test_ts))

sarima_df = test_ts.to_frame()
sarima_df['Forecast'] = sarima_forecast.values

plt.figure(figsize=(12, 6))
plt.plot(train_ts.index, train_ts, label='Train')
plt.plot(test_ts.index, test_ts, label='Actual', color='blue')
plt.plot(sarima_df.index, sarima_df['Forecast'], label='SARIMA Forecast', color='orange')
plt.title('SARIMA Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Weekly Total Sales')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# 4. Prophet Forecasting
from prophet import Prophet

prophet_df = weekly_df.rename(columns={'Date': 'ds', 'Total Amount': 'y'})
prophet_df = prophet_df[['ds', 'y', 'lag_1', 'rolling_3']]

model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False,
                changepoint_prior_scale=0.05, seasonality_prior_scale=10.0)
model.add_regressor('lag_1')
model.add_regressor('rolling_3')
model.fit(prophet_df)

future = model.make_future_dataframe(periods=4, freq='W')
future['lag_1'] = prophet_df['lag_1'].reindex(future.index, method='ffill')
future['rolling_3'] = prophet_df['rolling_3'].reindex(future.index, method='ffill')

forecast = model.predict(future)

fig1 = model.plot(forecast)
plt.title("Prophet Weekly Sales Forecast")
plt.show()


In [None]:
# 5. Random Forest and XGBoost
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

features = ['lag_1', 'lag_2', 'rolling_3', 'rolling_std_3', 'is_holiday']
ml_df = weekly_df[['Date', 'Total Amount'] + features].dropna()

train_ml = ml_df[ml_df['Date'] < split_date]
test_ml = ml_df[ml_df['Date'] >= split_date]

X_train = train_ml[features]
y_train = train_ml['Total Amount']
X_test = test_ml[features]
y_test = test_ml['Total Amount']

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)


In [None]:
# 6. Evaluation
def evaluate_forecast(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true.replace(0, np.nan))) * 100
    print(f"{model_name} Evaluation:\nMAE: {mae:.2f} | RMSE: {rmse:.2f} | MAPE: {mape:.2f}%\n")

evaluate_forecast(y_test, rf_preds, "Random Forest")
evaluate_forecast(y_test, xgb_preds, "XGBoost")
