# Task A: Independent Feature Extraction (CMOS vs SPC)

Goal for mid-term:
1. Take paired frames from:
   - CMOS_sat_1024x512 (saturated RGB camera)
   - SPC_512x256_train_png (SPC sensor, HDR-ish mono)
2. Run separate feature extractors:
   - CMOS branch (pretrained backbone) → 3-channel feature map
   - SPC branch (dilated conv) → 3-channel feature map
3. Save:
   - Raw CMOS frame
   - Raw SPC frame
   - CMOS feature map
   - SPC feature map

This demonstrates:
- We can learn/produce meaningful feature representations per sensor.
- We are treating CMOS and SPC as two separate input modalities.
- We are **not** resizing SPC and **not** fusing yet.


In [1]:
import torch
print("Torch version:", torch.__version__)
print("Torch built with CUDA:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


Torch version: 2.5.1+cu121
Torch built with CUDA: 12.1
CUDA available: True
GPU: NVIDIA GeForce RTX 4050 Laptop GPU


In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))


CUDA available: True
Device: NVIDIA GeForce RTX 4050 Laptop GPU


In [2]:
import sys, torch
print("Python executable:", sys.executable)
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Python executable: c:\Users\anika\AppData\Local\Programs\Python\Python312\python.exe
Torch version: 2.5.1+cu121
CUDA available: True


In [2]:
import importlib
import utils.dataset
importlib.reload(utils.dataset)
from utils.dataset import HDRPairDataset

ds = HDRPairDataset()
print("Total samples:", len(ds))
s = ds[0]
print("Example:", s["name"])
print("CMOS shape:", s["cmos"].shape)
print("SPC shape:", s["spc"].shape)


[INFO] Using device: cuda
Total samples: 2100
Example: 9C4A0001-5e832da4cc
CMOS shape: torch.Size([3, 512, 1024])
SPC shape: torch.Size([1, 256, 512])


In [3]:
import torch
from torch.utils.data import DataLoader, Subset
from utils.dataset import HDRPairDataset
from utils.common import DEVICE, OUT_ROOT
from models.feature_extractors import DilatedConvEncoder
from models.feature_extractors import HighResCMOSEncoder
from utils.save_intermediate import tensor_to_pngimg, save_feature_map
from pathlib import Path

print("Device:", DEVICE)

# Load full dataset (CMOS + SPC only)
full_train = HDRPairDataset()

# We'll just take a few for demo/export
subset_idx = list(range(min(5, len(full_train))))
subset = Subset(full_train, subset_idx)
loader = DataLoader(subset, batch_size=1, shuffle=False)

print("Subset size:", len(subset))

# Feature extractors
spc_encoder = DilatedConvEncoder(in_channels=1).to(DEVICE).eval()
cmos_encoder = HighResCMOSEncoder().to(DEVICE).eval()

# Output directory for review artifacts
TASKA_OUT = OUT_ROOT / "taskA_outputs"
TASKA_OUT.mkdir(exist_ok=True)
print("Saving Task A outputs to:", TASKA_OUT)



[INFO] Using device: cuda
Device: cuda
Subset size: 5
Saving Task A outputs to: outputs\taskA_outputs


In [4]:
s0 = full_train[0]
print("name:", s0["name"])
print("CMOS shape:", s0["cmos"].shape)
print("SPC shape:",  s0["spc"].shape)


name: 9C4A0001-5e832da4cc
CMOS shape: torch.Size([3, 512, 1024])
SPC shape: torch.Size([1, 256, 512])


In [4]:
import torch

with torch.no_grad():
    for batch in loader:
        name = batch["name"][0]

        # Per-sensor tensors
        cmos = batch["cmos"].to(DEVICE)  # (1,3,512,1024)
        spc  = batch["spc"].to(DEVICE)   # (1,1,256,512)

        # Separate feature extraction branches
        cmos_feat = cmos_encoder(cmos)   # (1,3,512,1024) after upsample in backbone
        spc_feat  = spc_encoder(spc)     # (1,3,256,512)

        # Save feature maps
        cmos_feat_path = save_feature_map(cmos_feat, name + "_CMOSfeat", mode="cmos")
        spc_feat_path  = save_feature_map(spc_feat,  name + "_SPCfeat",  mode="spc")

        # Save raw CMOS frame (RGB)
        cmos_raw_img = tensor_to_pngimg(batch["cmos"][0].cpu())
        cmos_raw_path = TASKA_OUT / f"{name}_CMOSraw.png"
        cmos_raw_img.save(cmos_raw_path)

        # Save raw SPC frame
        spc_tiled = batch["spc"][0].repeat(3,1,1).cpu()          # make it RGB-looking
        spc_raw_img = tensor_to_pngimg(spc_tiled)
        spc_raw_path = TASKA_OUT / f"{name}_SPCraw.png"
        spc_raw_img.save(spc_raw_path)

        print(f"[{name}]")
        print("  CMOS raw:",  cmos_raw_path)
        print("  SPC raw:",   spc_raw_path)
        print("  CMOS feat:", cmos_feat_path)
        print("  SPC feat:",  spc_feat_path)


[9C4A0001-5e832da4cc]
  CMOS raw: outputs\taskA_outputs\9C4A0001-5e832da4cc_CMOSraw.png
  SPC raw: outputs\taskA_outputs\9C4A0001-5e832da4cc_SPCraw.png
  CMOS feat: outputs\feat_cmos\9C4A0001-5e832da4cc_CMOSfeat_feat.png
  SPC feat: outputs\feat_spc\9C4A0001-5e832da4cc_SPCfeat_feat.png
[9C4A0001-6fbef8172f]
  CMOS raw: outputs\taskA_outputs\9C4A0001-6fbef8172f_CMOSraw.png
  SPC raw: outputs\taskA_outputs\9C4A0001-6fbef8172f_SPCraw.png
  CMOS feat: outputs\feat_cmos\9C4A0001-6fbef8172f_CMOSfeat_feat.png
  SPC feat: outputs\feat_spc\9C4A0001-6fbef8172f_SPCfeat_feat.png
[9C4A0001-7c62497929]
  CMOS raw: outputs\taskA_outputs\9C4A0001-7c62497929_CMOSraw.png
  SPC raw: outputs\taskA_outputs\9C4A0001-7c62497929_SPCraw.png
  CMOS feat: outputs\feat_cmos\9C4A0001-7c62497929_CMOSfeat_feat.png
  SPC feat: outputs\feat_spc\9C4A0001-7c62497929_SPCfeat_feat.png
[9C4A0001-beb39950ec]
  CMOS raw: outputs\taskA_outputs\9C4A0001-beb39950ec_CMOSraw.png
  SPC raw: outputs\taskA_outputs\9C4A0001-beb39950e

In [5]:
import torch
from models.fusion import SimpleFusion
from utils.save_intermediate import _tensor_to_pngimg  # we'll use internal helper
from utils.common import OUT_ROOT
from pathlib import Path

# where we'll save fused maps
FUSED_OUT = OUT_ROOT / "fused"
FUSED_OUT.mkdir(exist_ok=True)

fusion_head = SimpleFusion().to(DEVICE).eval()

with torch.no_grad():
    for batch in loader:
        name = batch["name"][0]

        # forward both encoders
        cmos = batch["cmos"].to(DEVICE)  # (1,3,512,1024)
        spc  = batch["spc"].to(DEVICE)   # (1,1,256,512)

        cmos_feat = cmos_encoder(cmos)   # (1,3,512,1024)
        spc_feat  = spc_encoder(spc)     # (1,3,256,512)

        # fuse
        fused_feat = fusion_head(cmos_feat, spc_feat)  # (1,3,512,1024)

        # save fused feature map as PNG
        fused_img = _tensor_to_pngimg(fused_feat.cpu()[0])
        fused_path = FUSED_OUT / f"{name}_FUSEDfeat.png"
        fused_img.save(fused_path)

        print(f"[{name}] fused -> {fused_path}")


[9C4A0001-5e832da4cc] fused -> outputs\fused\9C4A0001-5e832da4cc_FUSEDfeat.png
[9C4A0001-6fbef8172f] fused -> outputs\fused\9C4A0001-6fbef8172f_FUSEDfeat.png
[9C4A0001-7c62497929] fused -> outputs\fused\9C4A0001-7c62497929_FUSEDfeat.png
[9C4A0001-beb39950ec] fused -> outputs\fused\9C4A0001-beb39950ec_FUSEDfeat.png
[9C4A0001-c6c6bf7c76] fused -> outputs\fused\9C4A0001-c6c6bf7c76_FUSEDfeat.png
