In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
train = pd.read_csv(r"C:\Users\Ayush Bhagirath\Downloads\store-sales-time-series-forecasting\train.csv")
stores = pd.read_csv(r"C:\Users\Ayush Bhagirath\Downloads\store-sales-time-series-forecasting\stores.csv")
transactions = pd.read_csv(r"C:\Users\Ayush Bhagirath\Downloads\store-sales-time-series-forecasting\transactions.csv")
oil = pd.read_csv(r"C:\Users\Ayush Bhagirath\Downloads\store-sales-time-series-forecasting\oil.csv")

In [3]:
for df in [train, transactions, oil]:
    df['date'] = pd.to_datetime(df['date'])

In [4]:
train = train[['date', 'store_nbr', 'family', 'sales']]
transactions = transactions[['date', 'store_nbr', 'transactions']]
stores = stores[['store_nbr', 'cluster']]
oil = oil.rename(columns={'dcoilwtico': 'oil_price'})

In [5]:
train = train.merge(transactions, on=['date', 'store_nbr'], how='left')
train = train.merge(stores, on='store_nbr', how='left')
train = train.merge(oil[['date', 'oil_price']], on='date', how='left')

In [6]:
train['transactions'] = train['transactions'].fillna(0)
train['oil_price'] = train['oil_price'].ffill()

In [7]:
train = train.sort_values(by=['store_nbr', 'family', 'date'])

In [None]:
train['lag_7'] = train.groupby(['store_nbr', 'family'])['sales'].shift(7)
train['lag_14'] = train.groupby(['store_nbr', 'family'])['sales'].shift(14)
train['lag_28'] = train.groupby(['store_nbr', 'family'])['sales'].shift(28)

In [None]:
train['rolling_7'] = (
    train.groupby(['store_nbr','family'])['sales']
    .rolling(7).mean().reset_index(level=[0,1], drop=True)
)

train['rolling_28'] = (
    train.groupby(['store_nbr','family'])['sales']
    .rolling(28).mean().reset_index(level=[0,1], drop=True)
)

In [None]:
features = [
    'store_nbr', 'family', 'transactions', 'oil_price',
    'lag_7', 'lag_14', 'lag_28', 'rolling_7', 'rolling_28'
]

data = train[features + ['sales', 'date']].dropna()

In [None]:
data['store_nbr'] = data['store_nbr'].astype('category').cat.codes
data['family'] = data['family'].astype('category').cat.codes

In [None]:
split_date = '2017-01-01'

train_df = data[data['date'] < split_date]
valid_df = data[data['date'] >= split_date]

In [None]:
X_train = train_df.drop(columns=['sales', 'date'])
y_train = train_df['sales']

X_valid = valid_df.drop(columns=['sales', 'date'])
y_valid = valid_df['sales']

In [None]:
from sklearn.ensemble import RandomForestRegressor

sample = train_df.sample(300000, random_state=42)

X_sample = sample.drop(columns=['sales', 'date'])
y_sample = sample['sales']

model = RandomForestRegressor(
    n_estimators=80,
    max_depth=8,
    n_jobs=-1,
    random_state=42
)

model.fit(X_sample, y_sample)

In [None]:
last_date = data['date'].max()

In [None]:
forecast_df = (
    valid_df
    .sort_values('date')
    .groupby(['store_nbr', 'family'])
    .tail(1)
    .reset_index(drop=True)
)

forecast_df = forecast_df.loc[forecast_df.index.repeat(30)].reset_index(drop=True)

forecast_df['date'] = pd.date_range(
    start=last_date + pd.Timedelta(days=1),
    periods=len(forecast_df)
)

In [None]:
X_forecast = forecast_df.drop(columns=['sales', 'date'], errors='ignore')
forecast_df['predicted_sales'] = model.predict(X_forecast)

In [None]:
final_forecast = (
    forecast_df
    .groupby('date')['predicted_sales']
    .sum()
    .reset_index()
    .sort_values('date')
    .head(30)
)

final_forecast.to_csv('future_sales_forecast.csv', index=False)
final_forecast.head()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(final_forecast['date'], final_forecast['predicted_sales'])
plt.xlabel('Date')
plt.ylabel('Total Predicted Sales')
plt.title('Next 30-Day Sales Demand Forecast')
plt.tight_layout()
plt.savefig('sales_demand_forecast.png', dpi=300, bbox_inches='tight')
plt.show()

# Automated Sales & Demand Forecasting

## Business Problem
The business needs visibility into future product demand across stores to support inventory planning and purchasing decisions.

## Objective
Build an end-to-end demand forecasting pipeline using historical sales data, store attributes, transactions, and external indicators.

## Approach
Data cleaning → merging datasets → lag & rolling features → regression-based forecasting → forward demand prediction.

## Insights & Results
Generated a 30-day demand forecast and identified high-demand stores and product families.

## Business Impact
Supports inventory allocation, procurement planning, and revenue forecasting.

## Project Outcome
Built a production-style sales demand forecasting workflow and delivered business-ready forecasts with supporting insights for planning and decision-making.
