In [1]:
# Cell 1 ‚Äî Imports + Config
import sys, os, torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np, h5py, json, time, logging
from pathlib import Path
from tqdm.notebook import tqdm as tqdm_nb
from datetime import datetime
import warnings; warnings.filterwarnings("ignore")
from src.config import TrainingConfig

config    = TrainingConfig()
device    = config.DEVICE
LOAD_MODE = config.DATA_MODE   # ‚Üê from .env

Path("logs").mkdir(exist_ok=True)
logging.basicConfig(filename="logs/training.log", level=logging.INFO,
                    format="%(asctime)s - %(message)s")

print("=" * 70)
print("NOTEBOOK 05 ‚Äî PROGRESSIVE TRAINING")
print("=" * 70)
config.print_summary()

# ‚îÄ‚îÄ CPU threading ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
import torch
if not torch.has_mkl or torch.get_num_threads() == 1:
    torch.set_num_threads(os.cpu_count())

try:
    torch.set_num_interop_threads(os.cpu_count() // 2)
except RuntimeError:
    pass  # already set ‚Äî safe to ignore on re-runs

print(f"‚öôÔ∏è  PyTorch threads       : {torch.get_num_threads()}")
print(f"‚öôÔ∏è  DataLoader workers    : 2")


NOTEBOOK 05 ‚Äî PROGRESSIVE TRAINING

üöÄ CIVICPULSE TRAINING CONFIGURATION (Benchmark-Optimized)
Device           : cpu
VRAM Available   : 0.0 GB
Batch Size       : 64
Data Mode        : normal
Patch Size       : 256√ó256 cells
ConvLSTM         : 64 hidden, 2 layers
Learning Rate    : 0.001
Sequence Length  : 4 timesteps

‚öôÔ∏è  PyTorch threads       : 8
‚öôÔ∏è  DataLoader workers    : 2


In [2]:
# Cell 2 ‚Äî Dataset Classes (HDF5 and Normal)
class PopulationDatasetHDF5(Dataset):
    """Lazy HDF5 dataset ‚Äî low RAM."""
    def __init__(self, h5_path, patch_size=64, stride=32, downsample=1):
        self.h5_path    = h5_path
        self.patch_size = patch_size
        self.stride     = stride
        self.downsample = downsample
        with h5py.File(h5_path, "r") as h5:
            shape = h5["population_data"].shape
        self.height  = shape[1] // downsample
        self.width   = shape[2] // downsample
        self.patches = [(y, x)
                        for y in range(0, self.height - patch_size, stride)
                        for x in range(0, self.width  - patch_size, stride)]

    def __len__(self): return len(self.patches)

    def __getitem__(self, idx):
        y, x = self.patches[idx]
        ds, ps = self.downsample, self.patch_size
        with h5py.File(self.h5_path, "r") as h5:
            data = h5["population_data"][:,
                       y*ds:(y+ps)*ds:ds,
                       x*ds:(x+ps)*ds:ds]
        X  = torch.from_numpy(data[:4].copy()).float().unsqueeze(1)
        y_ = torch.from_numpy(data[4].copy()).float().unsqueeze(0)
        return X, y_


class PopulationDatasetNormal(Dataset):
    """Full in-memory dataset."""
    def __init__(self, data_array, patch_size=64, stride=32):
        self.data       = data_array
        self.patch_size = patch_size
        T, H, W = data_array.shape
        self.patches = [(y, x)
                        for y in range(0, H - patch_size, stride)
                        for x in range(0, W - patch_size, stride)]

    def __len__(self): return len(self.patches)

    def __getitem__(self, idx):
        y, x = self.patches[idx]
        ps   = self.patch_size
        data = self.data[:, y:y+ps, x:x+ps]
        X  = torch.from_numpy(data[:4].copy()).float().unsqueeze(1)
        y_ = torch.from_numpy(data[4].copy()).float().unsqueeze(0)
        return X, y_


def make_dataset(h5_path, normal_data, patch_size, stride, downsample):
    """Factory: returns correct dataset based on LOAD_MODE."""
    if LOAD_MODE == "hdf5":
        return PopulationDatasetHDF5(h5_path, patch_size, stride, downsample)
    else:
        return PopulationDatasetNormal(normal_data, patch_size, stride)

In [3]:
# Cell 3 ‚Äî Load Data (mode-aware)
h5_path     = "data/processed/india_sample.h5"
normal_data = None

if LOAD_MODE == "hdf5":
    print("üìÇ HDF5 mode ‚Äî data will be loaded lazily per patch.")
    with h5py.File(h5_path, "r") as h5:
        print(f"  Dataset shape: {h5['population_data'].shape}")
else:
    print("üìÇ Normal mode ‚Äî loading full arrays into RAM...")
    tel  = np.load("data/processed/telangana_population_sequence.npy")
    maha = np.load("data/processed/maharashtra_population_sequence.npy")
    T, H1, W1 = tel.shape
    _,  H2, W2 = maha.shape
    maxH, maxW = max(H1,H2), max(W1,W2)
    tel  = np.pad(tel,  ((0,0),(0,maxH-H1),(0,maxW-W1)))
    maha = np.pad(maha, ((0,0),(0,maxH-H2),(0,maxW-W2)))
    normal_data = np.concatenate([tel, maha], axis=1).astype(np.float32)
    print(f"  Full array loaded: {normal_data.shape}")

üìÇ Normal mode ‚Äî loading full arrays into RAM...
  Full array loaded: (5, 1634, 997)


In [4]:
# Cell 4 ‚Äî Model classes (copy from NB04)

class ConvLSTMCell(nn.Module):
    def __init__(self, in_channels, hidden_channels, kernel_size=3):
        super().__init__()
        self.hidden_channels = hidden_channels
        padding = kernel_size // 2
        self.conv_gates = nn.Conv2d(
            in_channels + hidden_channels, 2 * hidden_channels, kernel_size, padding=padding)
        self.conv_candidate = nn.Conv2d(
            in_channels + hidden_channels, hidden_channels, kernel_size, padding=padding)

    def forward(self, inputs, hidden_state):
        h, c = hidden_state
        combined = torch.cat([inputs, h], dim=1)
        gates = self.conv_gates(combined)
        reset_gate, update_gate = torch.split(gates, self.hidden_channels, dim=1)
        reset_gate  = torch.sigmoid(reset_gate)
        update_gate = torch.sigmoid(update_gate)
        combined_candidate = torch.cat([inputs, reset_gate * h], dim=1)
        candidate = torch.tanh(self.conv_candidate(combined_candidate))
        new_c = (1 - update_gate) * c + update_gate * candidate
        new_h = torch.tanh(new_c) * update_gate + (1 - update_gate) * h
        return new_h, new_c


class ConvLSTMEncoderDecoder(nn.Module):
    def __init__(self, in_channels=1,
                 hidden_channels=None, num_layers=None, kernel_size=3):
        super().__init__()
        # Pull from config if not explicitly passed
        _cfg = TrainingConfig()
        hidden_channels = hidden_channels or _cfg.HIDDEN_CHANNELS
        num_layers      = num_layers      or _cfg.NUM_LAYERS
        self.hidden_channels = hidden_channels
        self.num_layers      = num_layers
        self.encoder_cells = nn.ModuleList([
            ConvLSTMCell(in_channels if i == 0 else hidden_channels,
                         hidden_channels, kernel_size)
            for i in range(num_layers)])
        self.decoder_cells = nn.ModuleList([
            ConvLSTMCell(hidden_channels, hidden_channels, kernel_size)
            for _ in range(num_layers)])
        self.output_conv = nn.Conv2d(hidden_channels, 1, kernel_size=1)

    def forward(self, x):
        B, T, C, H, W = x.shape
        h = [torch.zeros(B, self.hidden_channels, H, W, device=x.device, dtype=x.dtype)
             for _ in range(self.num_layers)]
        c = [torch.zeros(B, self.hidden_channels, H, W, device=x.device, dtype=x.dtype)
             for _ in range(self.num_layers)]
        for t in range(T):
            xt = x[:, t]
            for layer in range(self.num_layers):
                h[layer], c[layer] = self.encoder_cells[layer](
                    xt if layer == 0 else h[layer-1], (h[layer], c[layer]))
        for layer in range(self.num_layers):
            inp = h[layer-1] if layer > 0 else h[0]
            h[layer], c[layer] = self.decoder_cells[layer](inp, (h[layer], c[layer]))
        return self.output_conv(h[-1])   # (B, 1, H, W)

class PopulationLoss(nn.Module):
    def __init__(self, alpha=0.7, beta=0.3):
        super().__init__()
        self.alpha, self.beta = alpha, beta
        self.mse = nn.MSELoss()
        self.mae = nn.L1Loss()

    def forward(self, pred, target):
        mse  = self.mse(pred, target)
        mae  = self.mae(pred, target)
        pc   = torch.clamp(pred,   min=0)
        tc   = torch.clamp(target, min=0)
        mask = tc > 1.0
        rel  = (torch.abs(pc[mask] - tc[mask]) / (tc[mask] + 1e-8)).mean() \
               if mask.sum() > 0 else torch.tensor(0.0, device=pred.device)
        return self.alpha * mse + self.beta * mae + 0.1 * rel

def calculate_r2(pred, target):
    ss_res = ((pred - target) ** 2).sum()
    ss_tot = ((target - target.mean()) ** 2).sum()
    return (1 - ss_res / ss_tot).item()

criterion = PopulationLoss()
print("‚úÖ Model classes and loss function defined")

‚úÖ Model classes and loss function defined


In [5]:
# Cell 5 ‚Äî Training Stages Config
training_stages = {
    "stage1_coarse": dict(
        downsample=4, patch_size=32,
        batch_size=config.BATCH_SIZE,   # ‚Üê from .env benchmark
        epochs=3, lr=1e-3, name="Stage 1 ‚Äî Coarse"),
    "stage2_medium": dict(
        downsample=2, patch_size=64,
        batch_size=config.BATCH_SIZE,   # ‚Üê from .env benchmark
        epochs=5, lr=5e-4, name="Stage 2 ‚Äî Medium"),
    "stage3_fine": dict(
        downsample=1, patch_size=128,
        batch_size=config.BATCH_SIZE,   # ‚Üê from .env benchmark
        epochs=10, lr=1e-4, name="Stage 3 ‚Äî Fine"),
}
for s, cfg in training_stages.items():
    print(f"{cfg['name']}  |  ds={cfg['downsample']}  "
          f"patch={cfg['patch_size']}  bs={cfg['batch_size']}  ep={cfg['epochs']}")

Stage 1 ‚Äî Coarse  |  ds=4  patch=32  bs=64  ep=3
Stage 2 ‚Äî Medium  |  ds=2  patch=64  bs=64  ep=5
Stage 3 ‚Äî Fine  |  ds=1  patch=128  bs=64  ep=10


> # ‚ö†Ô∏è <span style="color:red">WARNING: EPOCHS REDUCED FOR PIPELINE TEST</span>
> <span style="color:red">Epochs are set to **3 / 5 / 10** for local CPU dry run only.<br>
> **Reset to 20 / 50 / 100 before running on GPU.**</span>

In [6]:
# Cell 6 ‚Äî Trainer Class
class Trainer:
    def __init__(self, model, device, checkpoint_dir="models/checkpoints"):
        self.model    = model.to(device)
        self.device   = device
        self.ckpt_dir = Path(checkpoint_dir)
        self.ckpt_dir.mkdir(parents=True, exist_ok=True)

    def save_checkpoint(self, epoch, stage, loss, r2, is_best=False):
        ckpt = dict(epoch=epoch, stage=stage,
                    model_state=self.model.state_dict(), loss=loss, r2=r2)
        torch.save(ckpt, self.ckpt_dir / f"ckpt_{stage}_ep{epoch}.pt")
        if is_best:
            torch.save(ckpt, self.ckpt_dir / "best_model.pt")

In [7]:
# Cell 7 ‚Äî Progressive Training Loop (clean tqdm.notebook bars)
from tqdm.notebook import tqdm as tqdm_nb

model     = ConvLSTMEncoderDecoder().to(device)
trainer   = Trainer(model, device)
criterion = PopulationLoss()
all_history  = {}
TOTAL_STAGES = len(training_stages)

# threading already configured in Cell 1 ‚Äî no duplicate block here
print(f"‚öôÔ∏è  PyTorch threads    : {torch.get_num_threads()}")
print(f"‚öôÔ∏è  DataLoader workers : 2")

for stage_idx, (stage_name, cfg) in enumerate(training_stages.items(), 1):
    print(f"\n{'='*70}")
    print(f"[{stage_idx}/{TOTAL_STAGES}] {cfg['name']}")
    print(f"  patch={cfg['patch_size']}  downsample={cfg['downsample']}  "
          f"batch={cfg['batch_size']}  epochs={cfg['epochs']}  lr={cfg['lr']}")
    print(f"{'='*70}")

    dataset = make_dataset(
        h5_path, normal_data,
        patch_size=cfg["patch_size"],
        stride=cfg["patch_size"] // 2,
        downsample=cfg["downsample"])

    train_sz = int(0.8 * len(dataset))
    val_sz   = len(dataset) - train_sz
    train_set, val_set = torch.utils.data.random_split(dataset, [train_sz, val_sz])

    train_loader = DataLoader(train_set, batch_size=cfg["batch_size"],
                          shuffle=True,  num_workers=0)
    val_loader   = DataLoader(val_set,   batch_size=cfg["batch_size"],
                          shuffle=False, num_workers=0)

    print(f"  {len(dataset)} patches ‚Üí train={train_sz} / val={val_sz}  "
          f"({len(train_loader)} train batches / {len(val_loader)} val batches)")

    optimizer = optim.Adam(model.parameters(), lr=cfg["lr"])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="max", factor=0.5, patience=5)

    history = dict(train_loss=[], train_r2=[], val_loss=[], val_r2=[])
    best_r2 = -np.inf
    stage_start = time.time()

    epoch_bar = tqdm_nb(range(1, cfg["epochs"] + 1),
                        desc=f"  {cfg['name']}", unit="ep", leave=True)

    for epoch in epoch_bar:
        # ‚îÄ‚îÄ Train ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
        model.train()
        total_loss = total_r2 = 0
        batch_bar = tqdm_nb(train_loader, desc="  Train", leave=False,
                            unit="batch", mininterval=0.5)
        for X, y in batch_bar:
            X, y = X.to(device), y.to(device)
            out  = model(X)
            loss = criterion(out, y)
            optimizer.zero_grad(); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            total_loss += loss.item()
            total_r2   += calculate_r2(out.detach(), y.detach())
        n = len(train_loader)
        tr_loss, tr_r2 = total_loss / n, total_r2 / n

        # ‚îÄ‚îÄ Validate ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
        model.eval()
        vl_loss_sum = vl_r2_sum = 0
        val_bar = tqdm_nb(val_loader, desc="  Val  ", leave=False,
                          unit="batch", mininterval=0.5)
        with torch.no_grad():
            for X, y in val_bar:
                X, y = X.to(device), y.to(device)
                out  = model(X)
                vl_loss_sum += criterion(out, y).item()
                vl_r2_sum   += calculate_r2(out, y)
        n = len(val_loader)
        vl_loss, vl_r2 = vl_loss_sum / n, vl_r2_sum / n

        history["train_loss"].append(tr_loss)
        history["train_r2"].append(tr_r2)
        history["val_loss"].append(vl_loss)
        history["val_r2"].append(vl_r2)

        is_best = vl_r2 > best_r2
        if is_best: best_r2 = vl_r2
        trainer.save_checkpoint(epoch, stage_name, vl_loss, vl_r2, is_best=is_best)

        epoch_bar.set_postfix(
            tr_L=f"{tr_loss:.0f}", tr_R2=f"{tr_r2:.3f}",
            vl_L=f"{vl_loss:.0f}", vl_R2=f"{vl_r2:.3f}",
            best=f"{best_r2:.3f}")

        scheduler.step(vl_r2)
        logging.info(f"{stage_name} ep{epoch} tr={tr_loss:.4f}/{tr_r2:.3f} "
                     f"val={vl_loss:.4f}/{vl_r2:.3f}")

    stage_mins = (time.time() - stage_start) / 60
    all_history[stage_name] = history
    print(f"  ‚úÖ {cfg['name']} done in {stage_mins:.1f} min | Best Val R¬≤={best_r2:.3f}")

with open("logs/training_summary.json", "w") as f:
    json.dump({k: {kk: vv[-1] for kk, vv in v.items()}
               for k, v in all_history.items()}, f, indent=2)
print("\n‚úÖ Training summary ‚Üí logs/training_summary.json")
print("‚úÖ Best model      ‚Üí models/checkpoints/best_model.pt")


‚öôÔ∏è  PyTorch threads    : 8
‚öôÔ∏è  DataLoader workers : 2

[1/3] Stage 1 ‚Äî Coarse
  patch=32  downsample=4  batch=64  epochs=3  lr=0.001
  6161 patches ‚Üí train=4928 / val=1233  (77 train batches / 20 val batches)


  Stage 1 ‚Äî Coarse:   0%|          | 0/3 [00:00<?, ?ep/s]

  Train:   0%|          | 0/77 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/20 [00:00<?, ?batch/s]

  Train:   0%|          | 0/77 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/20 [00:00<?, ?batch/s]

  Train:   0%|          | 0/77 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/20 [00:00<?, ?batch/s]

  ‚úÖ Stage 1 ‚Äî Coarse done in 22.8 min | Best Val R¬≤=-0.076

[2/3] Stage 2 ‚Äî Medium
  patch=64  downsample=2  batch=64  epochs=5  lr=0.0005
  1500 patches ‚Üí train=1200 / val=300  (19 train batches / 5 val batches)


  Stage 2 ‚Äî Medium:   0%|          | 0/5 [00:00<?, ?ep/s]

  Train:   0%|          | 0/19 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/5 [00:00<?, ?batch/s]

  Train:   0%|          | 0/19 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/5 [00:00<?, ?batch/s]

  Train:   0%|          | 0/19 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/5 [00:00<?, ?batch/s]

  Train:   0%|          | 0/19 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/5 [00:00<?, ?batch/s]

  Train:   0%|          | 0/19 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/5 [00:00<?, ?batch/s]

  ‚úÖ Stage 2 ‚Äî Medium done in 37.5 min | Best Val R¬≤=-0.040

[3/3] Stage 3 ‚Äî Fine
  patch=128  downsample=1  batch=64  epochs=10  lr=0.0001
  336 patches ‚Üí train=268 / val=68  (5 train batches / 2 val batches)


  Stage 3 ‚Äî Fine:   0%|          | 0/10 [00:00<?, ?ep/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  Train:   0%|          | 0/5 [00:00<?, ?batch/s]

  Val  :   0%|          | 0/2 [00:00<?, ?batch/s]

  ‚úÖ Stage 3 ‚Äî Fine done in 96.5 min | Best Val R¬≤=-0.052

‚úÖ Training summary ‚Üí logs/training_summary.json
‚úÖ Best model      ‚Üí models/checkpoints/best_model.pt
