In [None]:
import sys
import os

# Make sure your project root is on PYTHONPATH
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)


from src.data_loader import ShanghaiTechDataModule
import torch
from torch.utils.data import DataLoader
from src.models import get_model
from src.train_lightning import LitDensityEstimator

## RESNET 

In [5]:
# ====== Part A evaluation (ResNet-50 density-map backbone only) ======
device = torch.device("mps")
data_module = ShanghaiTechDataModule(
    data_folder="../data/ShanghaiTech",
    part="part_A",
    validation_split=0.1,
    sigma=5,
    return_count=False,
    batch_size=1,
    num_workers=4,
    input_size=(384, 384),
    density_map_size=(192, 192),
    device=device
)
data_module.setup()
val_loader_A = data_module.test_dataloader()


In [8]:
model = LitDensityEstimator.load_from_checkpoint(
    "/Users/vittorio/Projects/uni/CV/CV-crowd-flow-estimation-/models/checkpoints/unet_depth3_nf64/unet_depth3_nf64.ckpt",
    map_location=device,
)
model.eval()

# Evaluate
total_mae = 0.0
total_mse = 0.0

with torch.no_grad():
    for img, gt_density in val_loader_A:
        img        = img.to(device)       # B×3×H×W
        gt_density = gt_density.to(device)  # B×1×H×W

        # True count
        gt_count = gt_density.sum(dim=(1,2,3))  # B

        # Predicted density and count
        pred_density = model(img)               # B×1×H×W
        pred_count   = pred_density.sum(dim=(1,2,3))

        # Accumulate
        total_mae += torch.abs(pred_count - gt_count).sum().item()
        total_mse += ((pred_count - gt_count)**2).sum().item()

N    = len(val_loader_A)
mae  = total_mae / N
rmse = (total_mse / N)**0.5

print(f"Part A — resnet34: MAE = {mae:.3f}, RMSE = {rmse:.3f}")


Part A — resnet34: MAE = 303.278, RMSE = 395.458


In [5]:
model = LitDensityEstimator.load_from_checkpoint(
    "/Users/vittorio/Projects/uni/CV/CV-crowd-flow-estimation-/models/checkpoints/unet_depth2_nf64/unet_depth2_nf64.ckpt",
    map_location=device,
)
model.eval()

# Evaluate
total_mae = 0.0
total_mse = 0.0

with torch.no_grad():
    for img, gt_density in val_loader_A:
        img        = img.to(device)       # B×3×H×W
        gt_density = gt_density.to(device)  # B×1×H×W

        # True count
        gt_count = gt_density.sum(dim=(1,2,3))  # B

        # Predicted density and count
        pred_density = model(img)               # B×1×H×W
        pred_count   = pred_density.sum(dim=(1,2,3))

        # Accumulate
        total_mae += torch.abs(pred_count - gt_count).sum().item()
        total_mse += ((pred_count - gt_count)**2).sum().item()

N    = len(val_loader_A)
mae  = total_mae / N
rmse = (total_mse / N)**0.5

print(f"Part A — resnet34: MAE = {mae:.3f}, RMSE = {rmse:.3f}")

Part A — resnet34: MAE = 297.075, RMSE = 383.290


In [9]:
import torch.nn as nn

def compute_receptive_field(model: nn.Module):
    """
    Returns a list of dicts, one per Conv2d/Pool2d layer in model,
    with the theoretical receptive field and effective stride ("jump")
    before and after each layer.
    """
    rf = 1        # receptive field so far
    jump = 1      # effective stride so far
    results = []

    for name, layer in model.named_modules():
        if isinstance(layer, (nn.Conv2d, nn.MaxPool2d, nn.AvgPool2d, nn.ConvTranspose2d)):
            # unwrap tuple values
            def _unpack(x):
                return x[0] if isinstance(x, (tuple, list)) else x

            k = _unpack(layer.kernel_size)
            s = _unpack(layer.stride)
            d = _unpack(getattr(layer, 'dilation', 1))

            prev_rf, prev_jump = rf, jump
            # convtranspose also has kernel & stride,
            # we treat it the same here for simplicity
            rf   = prev_rf  + (k - 1) * d * prev_jump
            jump = prev_jump * s

            results.append({
                'layer':    name,
                'type':     layer.__class__.__name__,
                'kernel':   k,
                'stride':   s,
                'dilation': d,
                'prev_rf':  prev_rf,
                'rf':       rf,
                'prev_jump': prev_jump,
                'jump':     jump,
            })

    return results


compute_receptive_field(model)

[{'layer': 'model.inc.0.double_conv.0',
  'type': 'Conv2d',
  'kernel': 3,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 1,
  'rf': 3,
  'prev_jump': 1,
  'jump': 1},
 {'layer': 'model.inc.0.double_conv.3',
  'type': 'Conv2d',
  'kernel': 3,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 3,
  'rf': 5,
  'prev_jump': 1,
  'jump': 1},
 {'layer': 'model.inc.1',
  'type': 'MaxPool2d',
  'kernel': 2,
  'stride': 2,
  'dilation': 1,
  'prev_rf': 5,
  'rf': 6,
  'prev_jump': 1,
  'jump': 2},
 {'layer': 'model.downs.0.down.0',
  'type': 'MaxPool2d',
  'kernel': 2,
  'stride': 2,
  'dilation': 1,
  'prev_rf': 6,
  'rf': 8,
  'prev_jump': 2,
  'jump': 4},
 {'layer': 'model.downs.0.down.1.double_conv.0',
  'type': 'Conv2d',
  'kernel': 3,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 8,
  'rf': 16,
  'prev_jump': 4,
  'jump': 4},
 {'layer': 'model.downs.0.down.1.double_conv.3',
  'type': 'Conv2d',
  'kernel': 3,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 16,
  'rf': 24,
  'prev_jump': 4,
  'jump': 

In [28]:
model = LitDensityEstimator.load_from_checkpoint(
    "/Users/vittorio/Projects/uni/CV/CV-crowd-flow-estimation-/models/checkpoints/unet_depth2_nf64/unet_depth2_nf64.ckpt",
    map_location=device,
)
from src.models import get_model
model = get_model("resnet50")
compute_receptive_field(model)

[{'layer': 'inc.0',
  'type': 'Conv2d',
  'kernel': 7,
  'stride': 2,
  'dilation': 1,
  'prev_rf': 1,
  'rf': 7,
  'prev_jump': 1,
  'jump': 2},
 {'layer': 'down1.0',
  'type': 'MaxPool2d',
  'kernel': 3,
  'stride': 2,
  'dilation': 1,
  'prev_rf': 7,
  'rf': 11,
  'prev_jump': 2,
  'jump': 4},
 {'layer': 'down1.1.0.conv1',
  'type': 'Conv2d',
  'kernel': 1,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 11,
  'rf': 11,
  'prev_jump': 4,
  'jump': 4},
 {'layer': 'down1.1.0.conv2',
  'type': 'Conv2d',
  'kernel': 3,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 11,
  'rf': 19,
  'prev_jump': 4,
  'jump': 4},
 {'layer': 'down1.1.0.conv3',
  'type': 'Conv2d',
  'kernel': 1,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 19,
  'rf': 19,
  'prev_jump': 4,
  'jump': 4},
 {'layer': 'down1.1.0.downsample.0',
  'type': 'Conv2d',
  'kernel': 1,
  'stride': 1,
  'dilation': 1,
  'prev_rf': 19,
  'rf': 19,
  'prev_jump': 4,
  'jump': 4},
 {'layer': 'down1.1.1.conv1',
  'type': 'Conv2d',
  'kernel': 1

In [None]:
model = get_model("resnet50",
                  cpt="/Users/vittorio/Projects/uni/CV/CV-crowd-flow-estimation-/models/pth/part_A_resnet50.pth",
                  device=device,
                  )
model.eval()

# Evaluate
total_mae = 0.0
total_mse = 0.0

with torch.no_grad():
    for img, gt_density in val_loader_A:
        img        = img.to(device)       # B×3×H×W
        gt_density = gt_density.to(device)  # B×1×H×W

        # True count
        gt_count = gt_density.sum(dim=(1,2,3))  # B

        # Predicted density and count
        pred_density = model(img)               # B×1×H×W
        pred_count   = pred_density.sum(dim=(1,2,3))

        # Accumulate
        total_mae += torch.abs(pred_count - gt_count).sum().item()
        total_mse += ((pred_count - gt_count)**2).sum().item()

N    = len(val_loader_A)
mae  = total_mae / N
rmse = (total_mse / N)**0.5

print(f"Part A — resnet34: MAE = {mae:.3f}, RMSE = {rmse:.3f}")


# VGG encoder

In [None]:
model = get_model("vgg19_bn",
                  cpt="../models/pth/part_A_vgg19.pth", 
                  device="mps")
model.eval()
# Evaluate
total_mae = 0.0
total_mse = 0.0

with torch.no_grad():
    for img, gt_density in val_loader_A:
        img        = img.to(device)       # B×3×H×W
        gt_density = gt_density.to(device)  # B×1×H×W

        # True count
        gt_count = gt_density.sum(dim=(1,2,3))  # B

        # Predicted density and count
        pred_density = model(img)               # B×1×H×W
        pred_count   = pred_density.sum(dim=(1,2,3))

        # Accumulate
        total_mae += torch.abs(pred_count - gt_count).sum().item()
        total_mse += ((pred_count - gt_count)**2).sum().item()

N    = len(val_loader_A)
mae  = total_mae / N
rmse = (total_mse / N)**0.5

print(f"Part A — VGG19bn: MAE = {mae:.3f}, RMSE = {rmse:.3f}")