In [1]:
import pandas as pd

train=pd.read_csv("data/transactions.csv")
train.head()

Unnamed: 0,date,store_nbr,transactions
0,2013-01-01,25,770
1,2013-01-02,1,2111
2,2013-01-02,2,2358
3,2013-01-02,3,3487
4,2013-01-02,4,1922


In [5]:
train.date = pd.to_datetime(train.date)
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000888 entries, 0 to 3000887
Data columns (total 6 columns):
 #   Column       Dtype         
---  ------       -----         
 0   id           int64         
 1   date         datetime64[ns]
 2   store_nbr    int64         
 3   family       object        
 4   sales        float64       
 5   onpromotion  int64         
dtypes: datetime64[ns](1), float64(1), int64(3), object(1)
memory usage: 137.4+ MB


In [6]:
train=train.sort_values(by="date")
train.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1194,1194,2013-01-01,42,CELEBRATION,0.0,0
1193,1193,2013-01-01,42,BREAD/BAKERY,0.0,0
1192,1192,2013-01-01,42,BOOKS,0.0,0
1191,1191,2013-01-01,42,BEVERAGES,0.0,0


In [7]:
import numpy as np

# Time features
train["dayofweek"] = train["date"].dt.dayofweek
train["month"] = train["date"].dt.month
train["day"] = train["date"].dt.day

# Cyclical encoding (helps seasonality)
train["dow_sin"] = np.sin(2*np.pi*train["dayofweek"]/7)
train["dow_cos"] = np.cos(2*np.pi*train["dayofweek"]/7)
train["month_sin"] = np.sin(2*np.pi*train["month"]/12)
train["month_cos"] = np.cos(2*np.pi*train["month"]/12)

# Encode family as integer IDs
family2idx = {f:i for i,f in enumerate(train["family"].unique())}
train["family_id"] = train["family"].map(family2idx).astype(int)

# Store numbers might not start from 0, convert to index
store2idx = {s:i for i,s in enumerate(train["store_nbr"].unique())}
train["store_id"] = train["store_nbr"].map(store2idx).astype(int)

train.head()


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,dayofweek,month,day,dow_sin,dow_cos,month_sin,month_cos,family_id,store_id
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,1,1,1,0.781831,0.62349,0.5,0.866025,0,0
1194,1194,2013-01-01,42,CELEBRATION,0.0,0,1,1,1,0.781831,0.62349,0.5,0.866025,1,1
1193,1193,2013-01-01,42,BREAD/BAKERY,0.0,0,1,1,1,0.781831,0.62349,0.5,0.866025,2,1
1192,1192,2013-01-01,42,BOOKS,0.0,0,1,1,1,0.781831,0.62349,0.5,0.866025,3,1
1191,1191,2013-01-01,42,BEVERAGES,0.0,0,1,1,1,0.781831,0.62349,0.5,0.866025,4,1


In [8]:
from sklearn.preprocessing import StandardScaler

num_features = ["sales", "onpromotion", "dow_sin", "dow_cos", "month_sin", "month_cos"]

scaler = StandardScaler()
train[num_features] = scaler.fit_transform(train[num_features])

train.head()


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,dayofweek,month,day,dow_sin,dow_cos,month_sin,month_cos,family_id,store_id
0,0,2013-01-01,1,AUTOMOTIVE,-0.324661,-0.213012,1,1,1,1.105069,0.880139,0.646018,1.283837,0,0
1194,1194,2013-01-01,42,CELEBRATION,-0.324661,-0.213012,1,1,1,1.105069,0.880139,0.646018,1.283837,1,1
1193,1193,2013-01-01,42,BREAD/BAKERY,-0.324661,-0.213012,1,1,1,1.105069,0.880139,0.646018,1.283837,2,1
1192,1192,2013-01-01,42,BOOKS,-0.324661,-0.213012,1,1,1,1.105069,0.880139,0.646018,1.283837,3,1
1191,1191,2013-01-01,42,BEVERAGES,-0.324661,-0.213012,1,1,1,1.105069,0.880139,0.646018,1.283837,4,1


In [9]:
LOOKBACK = 30 
HORIZON = 1    

X_list, y_list, store_list, family_list = [], [], [], []

for (store_id, family_id), g in train.groupby(["store_id", "family_id"]):
    g = g.sort_values("date")
    values = g[num_features].values  # shape: (T, F)
    
    if len(values) < LOOKBACK + HORIZON:
        continue
    
    for t in range(LOOKBACK, len(values) - HORIZON + 1):
        X_seq = values[t-LOOKBACK:t]                     
        y_target = values[t + HORIZON - 1, 0]
        
        X_list.append(X_seq)
        y_list.append(y_target)
        store_list.append(store_id)
        family_list.append(family_id)

X = np.array(X_list, dtype=np.float32)
y = np.array(y_list, dtype=np.float32)
store_arr = np.array(store_list, dtype=np.int64)
family_arr = np.array(family_list, dtype=np.int64)

X.shape, y.shape


((2947428, 30, 6), (2947428,))

In [10]:
from sklearn.model_selection import train_test_split

idx = np.arange(len(X))

train_idx, test_idx = train_test_split(idx, test_size=0.15, random_state=42, shuffle=True)
train_idx, val_idx  = train_test_split(train_idx, test_size=0.15, random_state=42, shuffle=True)

X_train, y_train = X[train_idx], y[train_idx]
X_val, y_val     = X[val_idx], y[val_idx]
X_test, y_test   = X[test_idx], y[test_idx]

store_train, fam_train = store_arr[train_idx], family_arr[train_idx]
store_val, fam_val     = store_arr[val_idx], family_arr[val_idx]
store_test, fam_test   = store_arr[test_idx], family_arr[test_idx]

len(X_train), len(X_val), len(X_test)

(2129516, 375797, 442115)

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader

class SalesDataset(Dataset):
    def __init__(self, X, y, store_ids, family_ids):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.store_ids = torch.tensor(store_ids, dtype=torch.long)
        self.family_ids = torch.tensor(family_ids, dtype=torch.long)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], self.store_ids[idx], self.family_ids[idx]



In [18]:
BATCH_SIZE = 2048

train_loader = DataLoader(SalesDataset(X_train, y_train, store_train, fam_train), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(SalesDataset(X_val, y_val, store_val, fam_val), batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(SalesDataset(X_test, y_test, store_test, fam_test), batch_size=BATCH_SIZE, shuffle=False)


In [13]:
import torch.nn as nn

class SalesGRU(nn.Module):
    def __init__(self, n_stores, n_families, n_num_features, emb_store=8, emb_family=16, hidden=64, layers=1):
        super().__init__()
        self.store_emb = nn.Embedding(n_stores, emb_store)
        self.family_emb = nn.Embedding(n_families, emb_family)
        
        # GRU input size = numerical features + embeddings
        self.gru = nn.GRU(
            input_size=n_num_features + emb_store + emb_family,
            hidden_size=hidden,
            num_layers=layers,
            batch_first=True
        )
        
        self.head = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
    def forward(self, x_seq, store_id, family_id):
        # x_seq: (B, T, F)
        B, T, F = x_seq.shape
        
        s_emb = self.store_emb(store_id)      # (B, emb_store)
        f_emb = self.family_emb(family_id)    # (B, emb_family)
        
        # Repeat embeddings across time steps
        s_rep = s_emb.unsqueeze(1).repeat(1, T, 1)  # (B, T, emb_store)
        f_rep = f_emb.unsqueeze(1).repeat(1, T, 1)  # (B, T, emb_family)
        
        x = torch.cat([x_seq, s_rep, f_rep], dim=-1)  # (B, T, F+embs)
        
        out, _ = self.gru(x)           # out: (B, T, hidden)
        last = out[:, -1, :]           # (B, hidden)
        pred = self.head(last).squeeze(-1)  # (B,)
        return pred


In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"

n_stores = train["store_id"].nunique()
n_families = train["family_id"].nunique()
n_num_features = len(num_features)

model = SalesGRU(n_stores, n_families, n_num_features).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
from tqdm.auto import tqdm

def run_epoch(loader, train_mode=True, epoch=None):
    model.train() if train_mode else model.eval()
    total_loss = 0.0
    n = 0

    mode = "Train" if train_mode else "Val"
    pbar = tqdm(loader, desc=f"Epoch {epoch:02d} {mode}", leave=False) if epoch is not None else loader

    with torch.set_grad_enabled(train_mode):
        for batch_features, batch_targets, batch_store_ids, batch_family_ids in pbar:
            batch_features = batch_features.to(device)
            batch_targets = batch_targets.to(device)
            batch_store_ids = batch_store_ids.to(device)
            batch_family_ids = batch_family_ids.to(device)

            preds = model(batch_features, batch_store_ids, batch_family_ids)
            loss = criterion(preds, batch_targets)

            if train_mode:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            batch_size = len(batch_features)
            total_loss += loss.item() * batch_size
            n += batch_size

            if epoch is not None:
                pbar.set_postfix(mse=f"{loss.item():.5f}")

    return total_loss / n




In [None]:
EPOCHS = 3

for epoch in tqdm(range(1, EPOCHS + 1), desc="Training"):
    train_loss = run_epoch(train_loader, train_mode=True, epoch=epoch)
    val_loss = run_epoch(val_loader, train_mode=False, epoch=epoch)
    print(f"Epoch {epoch:02d} | Train MSE: {train_loss:.5f} | Val MSE: {val_loss:.5f}")

In [None]:
model.eval()
preds_scaled = []
y_scaled = []

with torch.no_grad():
    for batch_features, batch_targets, batch_store_ids, batch_family_ids in test_loader:
        batch_features = batch_features.to(device)
        batch_store_ids = batch_store_ids.to(device)
        batch_family_ids = batch_family_ids.to(device)
        pr = model(batch_features, batch_store_ids, batch_family_ids).cpu().numpy()
        preds_scaled.append(pr)
        y_scaled.append(batch_targets.numpy())

preds_scaled = np.concatenate(preds_scaled)
y_scaled = np.concatenate(y_scaled)

# Inverse transform sales only:
# StandardScaler inverse: x_original = x_scaled * scale + mean
sales_mean = scaler.mean_[0]
sales_std = np.sqrt(scaler.var_[0])

preds = preds_scaled * sales_std + sales_mean
y_true = y_scaled * sales_std + sales_mean

mae = np.mean(np.abs(y_true - preds))
rmse = np.sqrt(np.mean((y_true - preds)**2))
mape = np.mean(np.abs((y_true - preds) / np.clip(np.abs(y_true), 1e-8, None))) * 100

print(f"Test MAE : {mae:.3f}")
print(f"Test RMSE: {rmse:.3f}")
print(f"Test MAPE: {mape:.2f}%")
