In [None]:
df = pd.read_csv("D:\Data Science\ML\Projects\Walmart TIme Series\Walmart (1) - Harshit Trivedi.csv")

In [None]:
#directory where th eoutput files will be saved:
os.getcwd()

In [None]:
# instaling the required libraries for tsa
pip.install pmdarima

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
import pmdarima as pm

# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Initialize lists to store results
summary_data = []
forecasts_dict = []
combined_future_forecast = []

# Get list of unique stores
stores = df['Store'].unique()

for store in stores:
    print(f"Processing Store {store}...")

    # Filter data for the current store
    store_data = df[df['Store'] == store].copy()

    # Convert date column to datetime and set as index
    store_data['Date'] = pd.to_datetime(store_data['Date'])
    store_data.set_index('Date', inplace=True)

    # Train-Test split (80-20)
    train_size = int(len(store_data) * 0.8)
    train_data = store_data.iloc[:train_size]
    test_data = store_data.iloc[train_size:]

    # Fit AutoARIMA to find the best model (seasonality = 52 weeks)
    model = pm.auto_arima(
        train_data['Weekly_Sales'],
        seasonal=True,
        m=52,  # Adjust if different seasonality is needed
        stepwise=True,
        trace=False  # Set to True to see model selection process
    )

    # Extract best orders from AutoARIMA
    order = model.order
    seasonal_order = model.seasonal_order

    # Fit SARIMAX model using the best orders
    sarima_model = SARIMAX(
        train_data['Weekly_Sales'],
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False
    ).fit(disp=False)

    # Dynamic Forecasting (using predicted values)
    dynamic_start = len(train_data)  # Start from the first test point
    forecast = sarima_model.predict(start=dynamic_start, end=len(store_data) - 1, dynamic=True)

    # Calculate RMSE
    mse = mean_squared_error(test_data['Weekly_Sales'], forecast)
    rmse = np.sqrt(mse)

    # Store summary metrics
    summary_data.append({
        'Store': store,
        'Train Size': len(train_data),
        'Test Size': len(test_data),
        'RMSE': rmse,
        'Best Order': order,
        'Seasonal Order': seasonal_order
    })

    # Store forecasted values
    forecasts_dict.append(pd.DataFrame({
        'Actual Sales': test_data['Weekly_Sales'].values,
        'Predicted Sales': forecast
    }, index=test_data.index))

    # Forecasting for the next 12 weeks dynamically
    future_forecast_dynamic = []
    current_input = store_data['Weekly_Sales'].iloc[-1]  # Use the last observed value to start forecasting

    for _ in range(12):
        # Predict the next week (one-step ahead forecast)
        next_forecast = sarima_model.predict(start=len(store_data), end=len(store_data), dynamic=True)

        # Store the forecasted value
        future_forecast_dynamic.append(next_forecast[0])

        # Update the current input for the next prediction (append the forecasted value)
        store_data = store_data.append(pd.DataFrame({
            'Weekly_Sales': [next_forecast[0]]
        }, index=[store_data.index[-1] + pd.Timedelta(weeks=1)]))

    # Store the dynamic future forecast results
    future_dates_dynamic = pd.date_range(start=store_data.index[-12], periods=12, freq='W')
    future_forecast_dynamic_df = pd.DataFrame({
        'Store': [store] * 12,  # Add store name as a column
        'Predicted Sales': future_forecast_dynamic
    }, index=future_dates_dynamic)

    # Append this store's forecast to the combined future forecast table
    combined_future_forecast.append(future_forecast_dynamic_df)

    # Plot actual vs predicted sales and future forecast
    plt.figure(figsize=(10, 5))
    plt.plot(train_data.index, train_data['Weekly_Sales'], label="Train Data", color='blue')
    plt.plot(test_data.index, test_data['Weekly_Sales'], label="Actual Sales", color='green')
    plt.plot(test_data.index, forecast, label="Dynamic Forecast", color='red', linestyle='dashed')
    plt.plot(future_forecast_dynamic_df.index, future_forecast_dynamic_df['Predicted Sales'], label="Future Forecast", color='orange', linestyle='dotted')
    plt.title(f"Store {store} Sales Forecast")
    plt.legend()
    plt.show()

# Convert summary data to a DataFrame
summary_df = pd.DataFrame(summary_data)

# Convert combined future forecast to a single DataFrame
combined_future_forecast_df = pd.concat(combined_future_forecast)

# Save summary and combined future forecast to CSV
summary_df.to_csv('sales_forecasting_summary_dynamic.csv', index=False)
combined_future_forecast_df.to_csv('combined_future_forecast_dynamic.csv', index=False)

# Preview summary
print(summary_df.head())
