In [1]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch import optim
from tqdm import tqdm

from capsnet.model import CapsNet
from capsnet.loss import margin_loss

In [None]:
# ─────────── CONFIG ─────────── #
RAW_SPLIT   = '../dataset/d_data_split/'                # your train/val CSV splits
NPY_ROOT    = '../dataset/e_preprocessed_img/'         # root of both patch/resize subdirs
PATCH_META  = os.path.join(NPY_ROOT, 'patch_metadata.csv')
RESIZE_META = os.path.join(NPY_ROOT, 'resize_metadata.csv')
OUT_ROOT    = '../model_output/capsnet'

METHODS     = ['patch','resize']
BATCH_SIZE  = 32
EPOCHS      = 10
LR          = 1e-3
DEVICE      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_CLASSES = 10

os.makedirs(OUT_ROOT, exist_ok=True)

In [11]:
# ─────────── DATASET ─────────── #
class NpyImageDataset(Dataset):
    def __init__(self, paths, labels):
        self.paths  = paths
        self.labels = labels

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, i):
        arr = np.load(self.paths[i])
        # ensure shape (C,H,W)
        if arr.ndim == 2:
            arr = arr[None,...]
        else:
            # if it came in as HxWxC, uncomment:
            # arr = np.transpose(arr,(2,0,1))
            pass
        img   = torch.from_numpy(arr).float()
        label = torch.tensor(self.labels[i],dtype=torch.long)
        return img, label

In [12]:
# ─────────── UTILS ─────────── #
def build_dataset(split_csv, meta_df, day, npy_root):
    """
    - split_csv: path to train.csv or val.csv
    - meta_df: full patch or resize metadata (columns: day,image_filename,npy_path,...)
    - day: current day folder name
    """
    df_split = pd.read_csv(split_csv)            # columns: filename,label,...
    if 'pm2.5' in df_split.columns:
        df_split = df_split.rename(columns={'pm2.5':'label'})

    df_meta  = meta_df[meta_df['day']==day]
    # merge on 'filename' <-> 'image_filename'
    df = pd.merge(df_split,
                  df_meta,
                  left_on='image_filename',
                  right_on='image_filename',
                  how='inner')
    # now df has columns: filename,label,day,image_filename,patch_idx?,npy_path
    img_paths = df['npy_path'].apply(lambda p: os.path.join(npy_root,p)).tolist()
    labels    = df['label'].tolist()
    return NpyImageDataset(img_paths, labels)

In [13]:
# ─────────── MAIN ─────────── #
def main():
    # 1) read metadata up‐front
    patch_meta  = pd.read_csv(PATCH_META)
    resize_meta = pd.read_csv(RESIZE_META)

    for day in sorted(os.listdir(RAW_SPLIT)):
        day_dir = os.path.join(RAW_SPLIT, day)
        train_csv = os.path.join(day_dir, 'train.csv')
        val_csv   = os.path.join(day_dir, 'val.csv')
        if not (os.path.isfile(train_csv) and os.path.isfile(val_csv)):
            continue

        print(f"\n Day {day}")

        for method in METHODS:
            print(f" Method = {method}")

            # choose correct metadata
            meta_df = patch_meta if method=='patch' else resize_meta

            # build train/val Datasets
            train_ds = build_dataset(train_csv, meta_df, day, NPY_ROOT)
            val_ds   = build_dataset(val_csv,   meta_df, day, NPY_ROOT)

            # loaders
            train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                                      pin_memory=True)
            val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                                      pin_memory=True)

            # model + optimiser
            model = CapsNet().to(DEVICE)
            optimz= optim.Adam(model.parameters(), lr=LR)

            best_val_loss = float('inf')
            save_dir = os.path.join(OUT_ROOT, day)
            os.makedirs(save_dir, exist_ok=True)
            save_path = os.path.join(save_dir, f"{method}_capsnet.pth")

            for epoch in range(EPOCHS):
                # ---- TRAIN ----
                model.train()
                train_loss = 0.0
                for imgs, labs in tqdm(train_loader, desc=f"Train [{method}] E{epoch+1}/{EPOCHS}"):
                    imgs, labs = imgs.to(DEVICE,non_blocking=True), labs.to(DEVICE,non_blocking=True)
                    optimz.zero_grad()
                    out_caps = model(imgs)                   # shape (batch, n_class, dim)
                    lengths  = out_caps.norm(dim=-1)         # (batch,n_class)
                    # one‐hot target
                    tgt = torch.eye(NUM_CLASSES,device=DEVICE).index_select(0,labs)
                    loss= margin_loss(tgt, lengths)
                    loss.backward()
                    optimz.step()
                    train_loss += loss.item()

                train_loss /= len(train_loader)
                print(f" Epoch {epoch+1}: train_loss={train_loss:.4f}")

                # ---- VALID ----
                model.eval()
                val_loss = 0.0
                with torch.no_grad():
                    for imgs, labs in val_loader:
                        imgs, labs = imgs.to(DEVICE), labs.to(DEVICE)
                        out_caps = model(imgs)
                        lengths  = out_caps.norm(dim=-1)
                        tgt = torch.eye(NUM_CLASSES,device=DEVICE).index_select(0,labs)
                        val_loss += margin_loss(tgt, lengths).item()

                val_loss /= len(val_loader)
                print(f"    ✅           val_loss={val_loss:.4f}")

                # checkpoint best
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    torch.save(model.state_dict(), save_path)
                    print(f" saved best → {save_path}")


In [14]:
if __name__=='__main__':
    main()

FileNotFoundError: [Errno 2] No such file or directory: '../dataset/e_preprocessed_img\\patch_metadata.csv'