In [3]:
# Cell 1 ‚Äî Imports + Config
import sys, os
import torch
import torch.nn as nn
import numpy as np
import rasterio
import rasterio.transform
import h5py
from pathlib import Path
from tqdm.notebook import tqdm as tqdm_nb
import warnings; warnings.filterwarnings("ignore")
from src.config import TrainingConfig

config    = TrainingConfig()
device    = config.DEVICE
LOAD_MODE = config.DATA_MODE

print("=" * 70)
print("NOTEBOOK 06 ‚Äî INFERENCE & PREDICTIONS")
print("=" * 70)
config.print_summary()


NOTEBOOK 06 ‚Äî INFERENCE & PREDICTIONS

üöÄ CIVICPULSE TRAINING CONFIGURATION (Benchmark-Optimized)
Device           : cpu
VRAM Available   : 0.0 GB
Batch Size       : 64
Data Mode        : normal
Patch Size       : 256√ó256 cells
ConvLSTM         : 64 hidden, 2 layers
Learning Rate    : 0.001
Sequence Length  : 4 timesteps



In [4]:
# Cell 2 ‚Äî Load Model (config-aware)
# [Paste ConvLSTMCell + ConvLSTMEncoderDecoder here ‚Äî same as NB04 Cells 2, 3]
class ConvLSTMCell(nn.Module):
    def __init__(self, in_channels, hidden_channels, kernel_size=3):
        super().__init__()
        self.hidden_channels = hidden_channels
        padding = kernel_size // 2
        self.conv_gates = nn.Conv2d(
            in_channels + hidden_channels, 2 * hidden_channels, kernel_size, padding=padding)
        self.conv_candidate = nn.Conv2d(
            in_channels + hidden_channels, hidden_channels, kernel_size, padding=padding)

    def forward(self, inputs, hidden_state):
        h, c = hidden_state
        combined = torch.cat([inputs, h], dim=1)
        gates = self.conv_gates(combined)
        reset_gate, update_gate = torch.split(gates, self.hidden_channels, dim=1)
        reset_gate  = torch.sigmoid(reset_gate)
        update_gate = torch.sigmoid(update_gate)
        combined_candidate = torch.cat([inputs, reset_gate * h], dim=1)
        candidate = torch.tanh(self.conv_candidate(combined_candidate))
        new_c = (1 - update_gate) * c + update_gate * candidate
        new_h = torch.tanh(new_c) * update_gate + (1 - update_gate) * h
        return new_h, new_c

class ConvLSTMEncoderDecoder(nn.Module):
    def __init__(self, in_channels=1,
                 hidden_channels=None, num_layers=None, kernel_size=3):
        super().__init__()
        # Pull from config if not explicitly passed
        _cfg = TrainingConfig()
        hidden_channels = hidden_channels or _cfg.HIDDEN_CHANNELS
        num_layers      = num_layers      or _cfg.NUM_LAYERS
        self.hidden_channels = hidden_channels
        self.num_layers      = num_layers
        self.encoder_cells = nn.ModuleList([
            ConvLSTMCell(in_channels if i == 0 else hidden_channels,
                         hidden_channels, kernel_size)
            for i in range(num_layers)])
        self.decoder_cells = nn.ModuleList([
            ConvLSTMCell(hidden_channels, hidden_channels, kernel_size)
            for _ in range(num_layers)])
        self.output_conv = nn.Conv2d(hidden_channels, 1, kernel_size=1)

    def forward(self, x):
        B, T, C, H, W = x.shape
        h = [torch.zeros(B, self.hidden_channels, H, W, device=x.device, dtype=x.dtype)
             for _ in range(self.num_layers)]
        c = [torch.zeros(B, self.hidden_channels, H, W, device=x.device, dtype=x.dtype)
             for _ in range(self.num_layers)]
        for t in range(T):
            xt = x[:, t]
            for layer in range(self.num_layers):
                h[layer], c[layer] = self.encoder_cells[layer](
                    xt if layer == 0 else h[layer-1], (h[layer], c[layer]))
        for layer in range(self.num_layers):
            inp = h[layer-1] if layer > 0 else h[0]
            h[layer], c[layer] = self.decoder_cells[layer](inp, (h[layer], c[layer]))
        return self.output_conv(h[-1])   # (B, 1, H, W)

model_path = "models/checkpoints/best_model.pt"
model = ConvLSTMEncoderDecoder().to(device)   # pulls HIDDEN_CHANNELS/NUM_LAYERS from config
if Path(model_path).exists():
    ckpt = torch.load(model_path, map_location=device)
    model.load_state_dict(ckpt["model_state"])
    print(f"‚úÖ Loaded checkpoint  (R¬≤={ckpt.get('r2', 'N/A')})")
else:
    print("‚ö†Ô∏è  No checkpoint found ‚Äî using untrained weights (demo only)")
model.eval()



‚úÖ Loaded checkpoint  (R¬≤=-0.05233561992645264)


ConvLSTMEncoderDecoder(
  (encoder_cells): ModuleList(
    (0): ConvLSTMCell(
      (conv_gates): Conv2d(65, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv_candidate): Conv2d(65, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (1): ConvLSTMCell(
      (conv_gates): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv_candidate): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (decoder_cells): ModuleList(
    (0-1): 2 x ConvLSTMCell(
      (conv_gates): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv_candidate): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (output_conv): Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1))
)

In [5]:
# Cell 3 ‚Äî Load Data (mode-aware)
h5_path = "data/processed/india_sample.h5"

if LOAD_MODE == "hdf5":
    print("üìÇ HDF5 mode...")
    with h5py.File(h5_path, "r") as h5:
        data     = h5["population_data"][:]
        metadata = {k: h5.attrs[k] for k in h5.attrs}
else:
    print("üìÇ Normal mode...")
    tel  = np.load("data/processed/telangana_population_sequence.npy")
    maha = np.load("data/processed/maharashtra_population_sequence.npy")
    T, H1, W1 = tel.shape
    _,  H2, W2 = maha.shape
    maxH, maxW = max(H1,H2), max(W1,W2)
    tel  = np.pad(tel,  ((0,0),(0,maxH-H1),(0,maxW-W1)))
    maha = np.pad(maha, ((0,0),(0,maxH-H2),(0,maxW-W2)))
    data     = np.concatenate([tel, maha], axis=1).astype(np.float32)
    metadata = {"years": "2000,2005,2010,2015,2020"}

print(f"  Data shape: {data.shape}  |  {metadata}")

üìÇ Normal mode...
  Data shape: (5, 1634, 997)  |  {'years': '2000,2005,2010,2015,2020'}


In [6]:
# Cell 4 ‚Äî Patch-based Prediction Function
def predict_full(model, data_sequence, patch_size=512, overlap=64, device="cpu"):
    T, H, W    = data_sequence.shape
    output     = np.zeros((H, W), dtype=np.float32)
    count_map  = np.zeros((H, W), dtype=np.float32)
    stride     = patch_size - overlap
    patches    = [(y, x)
                  for y in range(0, H - patch_size + 1, stride)
                  for x in range(0, W - patch_size + 1, stride)]

    with torch.no_grad():
        for y, x in tqdm_nb(patches, desc="Predicting patches"):
            patch = data_sequence[:, y:y+patch_size, x:x+patch_size]
            X     = torch.from_numpy(patch[:4].copy()).float() \
                        .unsqueeze(0).unsqueeze(2).to(device)
            pred  = model(X).squeeze().cpu().numpy()
            output[y:y+patch_size, x:x+patch_size]    += pred
            count_map[y:y+patch_size, x:x+patch_size] += 1

    return output / (count_map + 1e-8)

In [7]:
# Cell 5 ‚Äî Generate 2025 & 2030 Predictions
predictions = {}

print("üîÆ Predicting 2025 (input: 2005‚Äì2020)...")
pred_2025 = predict_full(model, data[1:], device=device)
predictions["2025"] = pred_2025
print(f"  Shape: {pred_2025.shape}  Range: {pred_2025.min():.0f}‚Äì{pred_2025.max():.0f}")

print("\nüîÆ Predicting 2030 (input: 2010‚Äì2025)...")
data_2010_2025 = np.concatenate([data[2:], pred_2025[np.newaxis]], axis=0)
pred_2030 = predict_full(model, data_2010_2025, device=device)
predictions["2030"] = pred_2030
print(f"  Shape: {pred_2030.shape}  Range: {pred_2030.min():.0f}‚Äì{pred_2030.max():.0f}")

üîÆ Predicting 2025 (input: 2005‚Äì2020)...


Predicting patches:   0%|          | 0/6 [00:00<?, ?it/s]

  Shape: (1634, 997)  Range: -2‚Äì18

üîÆ Predicting 2030 (input: 2010‚Äì2025)...


Predicting patches:   0%|          | 0/6 [00:00<?, ?it/s]

  Shape: (1634, 997)  Range: -2‚Äì18


In [8]:
# Cell 6 ‚Äî Save as GeoTIFF
proj_dir = Path("data/projections")
proj_dir.mkdir(parents=True, exist_ok=True)

ref_tifs = list(Path("data/processed").glob("india_pop_clipped_*.tif"))
if ref_tifs:
    with rasterio.open(ref_tifs[0]) as src:
        base_profile = src.profile
else:
    base_profile = dict(
        driver="GTiff", dtype="float32", nodata=None,
        width=pred_2025.shape[1], height=pred_2025.shape[0],
        count=1, crs="EPSG:4326",
        transform=rasterio.transform.Affine(1, 0, 72, 0, -1, 35))

for year, pred in predictions.items():
    out_path = proj_dir / f"population_prediction_{year}.tif"
    base_profile.update(dtype="float32", width=pred.shape[1], height=pred.shape[0], count=1)
    with rasterio.open(out_path, "w", **base_profile) as dst:
        dst.write(pred, 1)
    print(f"  ‚úÖ Saved {out_path}")

  ‚úÖ Saved data\projections\population_prediction_2025.tif
  ‚úÖ Saved data\projections\population_prediction_2030.tif


In [9]:
# Cell 7 ‚Äî Validation vs Actual 2020
actual_2020 = data[4]
pred_2020   = predict_full(model, data[:4], device=device)

mae  = np.abs(pred_2020 - actual_2020).mean()
rmse = np.sqrt(((pred_2020 - actual_2020) ** 2).mean())
ss_res = ((pred_2020 - actual_2020) ** 2).sum()
ss_tot = ((actual_2020 - actual_2020.mean()) ** 2).sum()
r2 = 1 - ss_res / ss_tot

print("=" * 70)
print("PREDICTION VALIDATION (2020 actual vs predicted)")
print("=" * 70)
print(f"  R¬≤  : {r2:.3f}")
print(f"  MAE : {mae:.1f} people/km¬≤")
print(f"  RMSE: {rmse:.1f} people/km¬≤")
print("\n‚úÖ Next: Notebook 07 ‚Äî Gap Analysis")

Predicting patches:   0%|          | 0/6 [00:00<?, ?it/s]

PREDICTION VALIDATION (2020 actual vs predicted)
  R¬≤  : -0.029
  MAE : 182.9 people/km¬≤
  RMSE: 1062.4 people/km¬≤

‚úÖ Next: Notebook 07 ‚Äî Gap Analysis
