<a href="https://colab.research.google.com/github/eghib22/Store-Sales-Forecasting/blob/main/model_experiment_prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

! mkdir ~/.kaggle
from google.colab import files
files.upload()
!mv "kaggle.json" ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!ls -l ~/.kaggle/

!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
! unzip walmart-recruiting-store-sales-forecasting
!unzip '*.csv.zip'
!unzip '*.csv.zip'


Mounted at /content/drive


Saving kaggle.json to kaggle.json
total 4
-rw------- 1 root root 71 Jul  6 12:44 kaggle.json
Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 766MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           
Archive:  features.csv.zip
  inflating: features.csv            

Archive:  sampleSubmission.csv.zip
  inflating: sampleSubmission.csv    

Archive:  train.csv.zip
  inflating: train.csv               

Archive:  test.csv.zip
  inflating: test.csv                

4 archives were successfully processed.
Archive:  features.csv.zip
replace features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: features.csv            

Archive:  sampleSubmission.csv.zip
replace sampleSubmission.csv? [y]es, [n]o, [A]l

In [2]:

!pip install wandb
import wandb
wandb.login()
wandb.init(project="Store-Sales-Forecasting", name="prophet-training-run")




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33magasi22[0m ([33magasi22-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
!pip install prophet

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import gc

from prophet import Prophet
from sklearn.metrics import mean_squared_error




In [4]:
train = pd.read_csv('train.csv')
features = pd.read_csv('features.csv')
stores = pd.read_csv('stores.csv')

train['Date'] = pd.to_datetime(train['Date'])
features['Date'] = pd.to_datetime(features['Date'])



In [5]:
df = train.merge(features, on=['Store', 'Date', 'IsHoliday'], how='left')
df = df.merge(stores, on='Store', how='left')


In [6]:
df = df.sort_values('Date')
df['Temperature'] = df['Temperature'].fillna(method='ffill').fillna(method='bfill')
df['Fuel_Price'] = df['Fuel_Price'].fillna(method='ffill').fillna(method='bfill')
df['CPI'] = df['CPI'].fillna(method='ffill').fillna(method='bfill')
df['Unemployment'] = df['Unemployment'].fillna(method='ffill').fillna(method='bfill')


In [7]:
def weighted_mae(y_true, y_pred, weights):
    return np.sum(weights * np.abs(y_true - y_pred)) / np.sum(weights)


In [9]:

import logging
logging.getLogger("cmdstanpy").setLevel(logging.WARNING)
logging.getLogger("prophet").setLevel(logging.WARNING)

In [10]:
import warnings
warnings.filterwarnings("ignore")

results = []
all_preds = []

store_dept_groups = df.groupby(['Store', 'Dept'])
total_groups = len(store_dept_groups)

print(f"--- Starting Prophet Modeling for {total_groups} Store-Department Combinations ---")

for idx, ((store_id, dept_id), group) in enumerate(store_dept_groups, start=1):
    print(f"\n--- Processing Store: {store_id}, Department: {dept_id} ({idx}/{total_groups}) ---")

    g = group.sort_values('Date').copy()
    g['ds'] = g['Date']
    g['y'] = g['Weekly_Sales']
    weights = g['IsHoliday'].apply(lambda x: 5 if x else 1)

    y_train = g[g['ds'] < '2012-01-01']
    y_val = g[(g['ds'] >= '2012-01-01') & (g['ds'] < '2012-07-01')]
    weights_val = weights.loc[y_val.index]

    if len(y_train) < 100 or len(y_val) < 20:
        print(f"  Skipping (Store {store_id}, Dept {dept_id}): Not enough data ({len(y_train)} train, {len(y_val)} val).")
        continue

    try:
        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False
        )

        model.fit(y_train[['ds', 'y']])

        future = y_val[['ds']].copy()
        forecast = model.predict(future)

        y_pred = forecast['yhat'].values
        y_true = y_val['y'].values

        wmae = weighted_mae(y_true, y_pred, weights_val)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))

        print(f"   WMAE: {wmae:.2f}")

        results.append({
            'Store': store_id,
            'Dept': dept_id,
            'RMSE': rmse,
            'WMAE': wmae
        })

        all_preds.append(pd.DataFrame({
            'Date': y_val['ds'].values,
            'Store': store_id,
            'Dept': dept_id,
            'y_true': y_true,
            'y_pred': y_pred,
            'weight': weights_val.values
        }))

        wandb.log({
            'Store': store_id,
            'Dept': dept_id,
            'RMSE': rmse,
            'WMAE': wmae
        })

    except Exception as e:
        print(f"  Failed for Store {store_id}, Dept {dept_id}: {e}")
        continue

    gc.collect()

print("\n--- Prophet Modeling Complete ---")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

--- Processing Store: 22, Department: 77 (1666/3331) ---
  Skipping (Store 22, Dept 77): Not enough data (3 train, 2 val).

--- Processing Store: 22, Department: 78 (1667/3331) ---
  Skipping (Store 22, Dept 78): Not enough data (6 train, 0 val).

--- Processing Store: 22, Department: 79 (1668/3331) ---
   WMAE: 1841.04

--- Processing Store: 22, Department: 80 (1669/3331) ---
   WMAE: 519.88

--- Processing Store: 22, Department: 81 (1670/3331) ---
   WMAE: 1161.31

--- Processing Store: 22, Department: 82 (1671/3331) ---
   WMAE: 2740.20

--- Processing Store: 22, Department: 83 (1672/3331) ---
   WMAE: 312.03

--- Processing Store: 22, Department: 85 (1673/3331) ---
   WMAE: 451.99

--- Processing Store: 22, Department: 87 (1674/3331) ---
   WMAE: 1063.07

--- Processing Store: 22, Department: 90 (1675/3331) ---
   WMAE: 1115.19

--- Processing Store: 22, Department: 91 (1676/3331) ---
   WMAE: 2087.93

--- Processing

In [11]:
all_df = pd.concat(all_preds)
overall_wmae = np.sum(all_df['weight'] * np.abs(all_df['y_true'] - all_df['y_pred'])) / np.sum(all_df['weight'])

print("Overall WMAE:", overall_wmae)

results_df = pd.DataFrame(results)
print(results_df.head())

wandb.log({'Overall_WMAE': overall_wmae})


Overall WMAE: 1916.375930800765
   Store  Dept         RMSE         WMAE
0      1     1  7593.523894  4774.693544
1      1     2  2949.514949  2379.769672
2      1     3  1389.780780  1227.278611
3      1     4  3300.410919  2598.559989
4      1     5  4799.037605  3460.748384


In [12]:
results_df.to_csv('/content/drive/MyDrive/prophet_results.csv', index=False)
wandb.finish()


0,1
Dept,▁▂▃▄▂▄▂▁▂▄▅▄▂▃▆▃█▂▃▇▁▃▄▂▁▃▂▇▅▁▇▃▃▄▂▅▃▇▁█
Overall_WMAE,▁
RMSE,▁▂▁▆▂▃▁▅▁▇▇▄▂▃▇▄▂▁▁█▂▂▁▁▂▂▁▁▅▂▃▆▄▁▄▄▅▂▂▃
Store,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇█████
WMAE,▃▂▂▃▂▅▁▄▂▂▂▄▂▁▁▁▃█▂▁▄▁▁▂▂▂▄▁▁▂▁▂▂▁▁▃▄▂▂▂

0,1
Dept,97.0
Overall_WMAE,1916.37593
RMSE,709.82412
Store,45.0
WMAE,486.35695
