# Retail Demand Forecasting - Walmart

This notebook covers:
- Loading the Walmart data
- Performing explanatory data analysis (stores, features, sales)


In [1]:
import sys
from pathlib import Path

# Add the project root to the system path so that local module imports
# do not throw exceptions
PROJECT_ROOT = Path('..').resolve()

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

In [2]:
import pandas as pd
import numpy as np

# Local imports
from src.data import load_walmart_data


In [6]:
DATA_DIR = Path('..') / 'data'

stores, features, sales = load_walmart_data(DATA_DIR)

# Pre-merge integrity check
assert sales[['Store', 'Date']].duplicated().sum() == 0
assert features[['Store', 'Date', 'IsHoliday']].duplicated().sum() == 0
assert stores[['Store']].duplicated().sum() == 0

# Merge the sales ad features dataframes
df = sales.merge(
    features, 
    on=['Store', 'Date', 'IsHoliday'],
    how='left',
    validate='many_to_one'
)

df.head()

Unnamed: 0,Store,Date,Weekly_Sales,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment
0,1,2010-02-05,1643690.9,0,42.31,2.572,,,,,,211.096358,8.106
1,1,2010-02-12,1641957.44,1,38.51,2.548,,,,,,211.24217,8.106
2,1,2010-02-19,1611968.17,0,39.93,2.514,,,,,,211.289143,8.106
3,1,2010-02-26,1409727.59,0,46.63,2.561,,,,,,211.319643,8.106
4,1,2010-03-05,1554806.68,0,46.5,2.625,,,,,,211.350143,8.106
