# 01 — Dataset & Dataloaders (Places2 — Scene Recognition)

This notebook prepares the **Places2 simplified (40 classes)** dataset for training and validation:
- builds **train/val split (80/20)**,
- applies standard **data augmentations**,
- creates **PyTorch DataLoaders**,
- saves a small `metadata.json` mapping: class name → idx.

> **Expected dataset layout**  
> 
> ```text
> /path/to/Places2_simp/
>   airport terminal/
>     img_0001.jpg
>     ...
>   amphitheatre/
>     ...
>   ...
> ```
> Each subfolder is a class with ~1000 images of size 128×128 (RGB).

In [None]:
# %pip install torch torchvision torchaudio
# %pip install numpy pandas matplotlib scikit-learn tqdm

import os, json, random, numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, utils as tv_utils

DATA_ROOT = Path("/path/to/Places2_simp")  # <-- EDIT THIS
OUT_DIR = Path("../data"); OUT_DIR.mkdir(parents=True, exist_ok=True)
SEED = 42
BATCH_TRAIN = 256
BATCH_VAL = 1024
NUM_WORKERS = 4

print("DATA_ROOT exists:", DATA_ROOT.exists())
torch.manual_seed(SEED); np.random.seed(SEED); random.seed(SEED)

## Transforms & Dataset

In [None]:
train_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=20, fill=0),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

dataset_full = datasets.ImageFolder(root=str(DATA_ROOT), transform=train_tf)
class_to_idx = dataset_full.class_to_idx
with open(OUT_DIR / "metadata.json", "w") as f:
    json.dump({"class_to_idx": class_to_idx}, f, indent=2)
len(dataset_full), len(class_to_idx)

## Train/Validation split (80/20, stratified by class)

In [None]:
indices = np.arange(len(dataset_full))
# Build targets via bare folder scan (no transform) to avoid running transforms
bare = datasets.ImageFolder(root=str(DATA_ROOT))
targets = np.array([y for _, y in bare.imgs])

train_idx, val_idx = [], []
for cls, idxs in pd.Series(np.arange(len(targets))).groupby(targets).groups.items():
    idxs = np.array(list(idxs))
    np.random.shuffle(idxs)
    n_val = int(0.2 * len(idxs))
    val_idx.extend(idxs[:n_val]); train_idx.extend(idxs[n_val:])

train_ds = Subset(datasets.ImageFolder(root=str(DATA_ROOT), transform=train_tf), train_idx)
val_ds   = Subset(datasets.ImageFolder(root=str(DATA_ROOT), transform=val_tf), val_idx)

train_loader = DataLoader(train_ds, batch_size=BATCH_TRAIN, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_VAL,   shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

len(train_ds), len(val_ds), len(class_to_idx)

## Peek a batch

In [None]:
import torchvision
batch = next(iter(train_loader))
imgs, labels = batch
grid = torchvision.utils.make_grid(imgs[:32], nrow=8, padding=2, normalize=True)
plt.figure(figsize=(10,8)); plt.imshow(grid.permute(1,2,0)); plt.axis("off"); plt.title("Augmented training samples"); plt.show()