Imports and configuration

In [7]:
import os
from pathlib import Path
import pandas as pd
import torch
import torch.nn as nn
from torchvision.models import vgg16
import torch.optim as optim
from tqdm.notebook import tqdm
import numpy as np
import random
import cv2
from google.colab import drive
import matplotlib.pyplot as plt # For plotting results

# --- Configuration (UPDATED for Colab GPU and Part A) ---
# NOTE: Update this path to where your 'outputs' folder is located in Google Drive
COLAB_DIR = Path("/content/drive/MyDrive/Colab Notebooks")
OUT_DIR = COLAB_DIR / "outputs"
SAVE_TENSORS_DIR = "tensors"

# --- Critical Path Fix Configuration ---
# Old root path found in your manifest (from your Windows desktop)
OLD_ROOT = r"C:\Users\ACEPC\Desktop\DeepVision Crowd Monitoring"
# New root path in Colab
NEW_ROOT = str(COLAB_DIR)

# --- Hyperparameters (Optimized for GPU) ---
BATCH_SIZE = 10 # Optimized for GPU efficiency
LEARNING_RATE = 1e-6
NUM_EPOCHS = 50
NUM_WORKERS = 4 # Optimized workers for faster GPU data loading
LOG_BATCH_INTERVAL = 50 # For verbose batch loss printing

# --- PyTorch Setup ---
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# --- Load Manifest File ---
MANIFEST_PATH = OUT_DIR / "preprocess_manifest.csv"
if not MANIFEST_PATH.exists():
    raise FileNotFoundError(f"Manifest not found. Ensure Drive is mounted and data is uploaded.")
manifest_df = pd.read_csv(MANIFEST_PATH)
TENSORS_BASE = OUT_DIR / SAVE_TENSORS_DIR
USE_SAVED_TENSORS = True

# --- CRITICAL FIX 1: Replace Windows root with Colab root ---
manifest_df['img_path'] = manifest_df['img_path'].str.replace(OLD_ROOT, NEW_ROOT, regex=False)
manifest_df['density_path'] = manifest_df['density_path'].str.replace(OLD_ROOT, NEW_ROOT, regex=False)
manifest_df['density_full_path'] = manifest_df['density_full_path'].str.replace(OLD_ROOT, NEW_ROOT, regex=False)

# --- CRITICAL FIX 2: Replace all remaining Windows backslashes with forward slashes ---
manifest_df['img_path'] = manifest_df['img_path'].str.replace('\\', '/', regex=False)
manifest_df['density_path'] = manifest_df['density_path'].str.replace('\\', '/', regex=False)
manifest_df['density_full_path'] = manifest_df['density_full_path'].str.replace('\\', '/', regex=False)


# --- Filter for Part A Only ---
# This ensures we only work with the Part A data (300 train, 182 test)
manifest_df = manifest_df[manifest_df['part'] == 'part_A'].reset_index(drop=True)

print("✅ Manifest paths successfully cleaned and updated for Colab.")
print(f"Manifest loaded and filtered. Total Part A samples: {len(manifest_df)}.")

Using device: cuda:0
✅ Manifest paths successfully cleaned and updated for Colab.
Manifest loaded and filtered. Total Part A samples: 482.


Dataset and DataLoader Utilities

This cell contains the CrowdDatasetTorch class and the make_dataloader function, which are essential for feeding data into your model.

In [8]:
from torch.utils.data import Dataset, DataLoader

# --- ImageNet Statistics (must be same as  Preprocessing Notebook) ---
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)

class CrowdDatasetTorch(Dataset):
    def __init__(self, manifest_df, use_full_density=False, transform=None, use_saved_tensors=True, tensors_base=None):
        self.df = manifest_df.reset_index(drop=True)
        self.use_full_density = use_full_density
        self.transform = transform
        self.use_saved_tensors = use_saved_tensors
        self.tensors_base = Path(tensors_base) if tensors_base is not None else None
        self.mean_t = torch.from_numpy(IMAGENET_MEAN).view(3,1,1)
        self.std_t = torch.from_numpy(IMAGENET_STD).view(3,1,1)

    def __len__(self):
        return len(self.df)

    def _load_image_tensor_from_file(self, img_path):
        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            raise FileNotFoundError(f"Image not found: {img_path}")

        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        t = torch.from_numpy(img_rgb).permute(2,0,1).contiguous()

        t = (t - self.mean_t) / self.std_t
        return t

    def _load_density_tensor(self, den_path):
        den = np.load(den_path).astype(np.float32)
        return torch.from_numpy(den).unsqueeze(0)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        part = row['part']
        split = row['split']
        base_name = Path(row['img_path']).stem

        if self.use_saved_tensors and self.tensors_base is not None:
            pt_path = self.tensors_base / part / split / (base_name + ".pt")
            if not pt_path.exists():
                print(f"Warning: .pt file not found at {pt_path}. Loading from original files.")
                img_path = row['img_path']
                den_path = row['density_full_path'] if self.use_full_density else row['density_path']
                img_t = self._load_image_tensor_from_file(img_path)
                den_t = self._load_density_tensor(den_path)
            else:
                d = torch.load(str(pt_path))
                img_t = d['image']
                den_t = d['density']
        else:
            img_path = row['img_path']
            den_path = row['density_full_path'] if self.use_full_density else row['density_path']
            img_t = self._load_image_tensor_from_file(img_path)
            den_t = self._load_density_tensor(den_path)

        if self.transform:
            img_t, den_t = self.transform((img_t, den_t))

        return {"image": img_t, "density": den_t, "img_path": str(row['img_path'])}

def make_dataloader(manifest_df, batch_size=8, shuffle=True, num_workers=4, use_full_density=False, use_saved_tensors=True, tensors_base=None):
    ds = CrowdDatasetTorch(
        manifest_df,
        use_full_density=use_full_density,
        use_saved_tensors=use_saved_tensors,
        tensors_base=tensors_base
    )
    loader = DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=True
    )
    return loader

Data Split and DataLoader Initialization

This cell splits the manifest into training and testing sets and creates the DataLoaders.

In [10]:

# --- Split Data (Using Part A samples only) ---
# Total 300 train samples and 182 test samples will be used.
train_df = manifest_df[manifest_df['split'] == 'train_data']
test_df = manifest_df[manifest_df['split'] == 'test_data']

# --- Create DataLoaders (Optimized for GPU) ---
train_loader = make_dataloader(
    train_df,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS, # NUM_WORKERS = 4
    use_saved_tensors=USE_SAVED_TENSORS,
    tensors_base=TENSORS_BASE
)

test_loader = make_dataloader(
    test_df,
    batch_size=1, # Keep validation batch size at 1
    shuffle=False,
    num_workers=NUM_WORKERS,
    use_saved_tensors=USE_SAVED_TENSORS,
    tensors_base=TENSORS_BASE
)

print(f"Training samples (Part A): {len(train_df)}, Batches: {len(train_loader)}")
print(f"Testing samples (Part A): {len(test_df)}, Batches: {len(test_loader)}")

Training samples (Part A): 300, Batches: 30
Testing samples (Part A): 182, Batches: 182


Model Architecture (VGG-based Regressor)

This cell defines the custom Convolutional Neural Network model using a pre-trained VGG-16 backbone.

In [11]:
class CrowdCounterModel(nn.Module):
    def __init__(self, load_weights=True):
        super(CrowdCounterModel, self).__init__()

        vgg = vgg16(weights='DEFAULT' if load_weights else None)
        features = list(vgg.features.children())

        # 1. Feature Extraction (VGG-16 Backbone)
        # Block 1-4 (Layers 0:19) are Frozen - Feature stability
        self.features_frozen = nn.Sequential(*features[0:19])

        # Block 5 (Layers 19:23) is Unfrozen - CRITICAL for fine-tuning deep features
        self.features_unfrozen = nn.Sequential(*features[19:23])

        # CRITICAL: Freeze only the first four blocks (default weights retained)
        for param in self.features_frozen.parameters():
            param.requires_grad = False

        # 2. Custom Front-end / Regression Layers (Backend)
        self.frontend = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, kernel_size=1)
        )

    def forward(self, x):
        x = self.features_frozen(x)
        x = self.features_unfrozen(x) # Unfrozen block is now part of the forward pass
        x = self.frontend(x)
        return x

# --- Instantiate Model and Setup Optimizer/Loss ---
model = CrowdCounterModel(load_weights=True).to(DEVICE)
criterion = nn.MSELoss()

# --- CRITICAL: Define Parameter Groups for Fine-Tuning ---
# The unfrozen VGG layers need a smaller learning rate (e.g., 1e-7)
VGG_LR = 1e-7
BASE_LR = 1e-6 # Standard LR for the new layers

# Group 1: Parameters for the custom frontend (using standard LR)
params_frontend = list(model.frontend.parameters())

# Group 2: Parameters for the unfrozen VGG block (using VGG_LR)
params_vgg_unfrozen = list(model.features_unfrozen.parameters())

# Optimizer now uses differential learning rates for better convergence
optimizer = optim.Adam([
    {'params': params_frontend, 'lr': BASE_LR},
    {'params': params_vgg_unfrozen, 'lr': VGG_LR}
])

print("✅ Model, Optimizer (with Fine-Tuning LR), and Loss function are set up.")

✅ Model, Optimizer (with Fine-Tuning LR), and Loss function are set up.


Evaluation Function (MAE and RMSE)

This function will run the model on the test data to calculate the standard crowd counting metrics: MAE (Mean Absolute Error) and RMSE (Root Mean Squared Error).

In [12]:
def evaluate_model(model, data_loader, device):
    """Calculates MAE and RMSE on the dataset."""
    model.eval() # Set model to evaluation mode
    mae_sum = 0
    mse_sum = 0
    total_samples = 0

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            images = batch['image'].to(device)
            gt_density = batch['density'].to(device)

            # Forward pass
            pred_density = model(images)

            # --- Calculate Predicted and Ground Truth Counts ---
            # Sum the entire density map (Hds x Wds) for both prediction and GT
            pred_count = torch.sum(pred_density).item()
            gt_count = torch.sum(gt_density).item()

            # MAE (Mean Absolute Error)
            mae_sum += abs(pred_count - gt_count)

            # MSE (Mean Squared Error)
            mse_sum += (pred_count - gt_count)**2

            total_samples += images.size(0)

    # Calculate final metrics
    final_mae = mae_sum / total_samples
    final_rmse = (mse_sum / total_samples)**0.5

    return {'mae': final_mae, 'rmse': final_rmse}

Main Training Loop

This is the core training logic where the model is iterated over the training data and validated after each epoch.

In [1]:
# --- Main Training Loop ---
best_mae = float('inf')
MODEL_SAVE_NAME = "best_crowd_counter_model.pth"
MODEL_SAVE_PATH = OUT_DIR / MODEL_SAVE_NAME
LOG_BATCH_INTERVAL = 50

# Lists to track loss values (Step 4)
batch_losses = []
epoch_losses = []
epoch_val_mae = [] # For visualization
epoch_val_rmse = [] # For visualization


# --- HELPER FUNCTION: Remap Keys for Fine-Tuning Structure ---
def remap_vgg_keys(state_dict):
    """Converts old continuous VGG keys to the new frozen/unfrozen structure."""
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('features.'):
            # Split features based on the new model structure (0:19 is frozen, 19:23 is unfrozen)
            layer_index = int(k.split('.')[1])

            if layer_index < 19:
                # Map to self.features_frozen
                new_key = k.replace('features.', 'features_frozen.')
                new_key = new_key.replace(f'.{layer_index}.', f'.{layer_index}.') # No need to change index number
            else:
                # Map to self.features_unfrozen (Layers 19, 21 only—VGG uses 0-indexed features array)
                # We need to re-index Block 5 weights: 19 -> 0, 21 -> 2
                new_block5_index = layer_index - 19
                new_key = k.replace('features.', 'features_unfrozen.')
                new_key = new_key.replace(f'.{layer_index}.', f'.{new_block5_index}.')
        else:
            new_key = k # Frontend/Backend keys remain the same

        new_state_dict[new_key] = v
    return new_state_dict

# --- CRITICAL: Resume Logic and Baseline MAE ---
if MODEL_SAVE_PATH.exists():
    try:
        # Load the raw state dict
        old_state_dict = torch.load(str(MODEL_SAVE_PATH), map_location=DEVICE)

        # Remap keys from the old structure to the new split structure
        remapped_state_dict = remap_vgg_keys(old_state_dict)

        # Load the remapped weights into the model
        model.load_state_dict(remapped_state_dict)

        # Set the correct baseline MAE for competitive saving
        best_mae = 134.55

        print(f"✅ Resuming training: Weights remapped and loaded. Baseline MAE set to {best_mae:.2f}.")

    except Exception as e:
        print(f"⚠️ Error loading/remapping previous model weights: {e}. Starting from fresh VGG initialization.")
        best_mae = float('inf')


print("Starting Training Loop (Validation Enabled)...")
for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_total_loss = 0

    # --- Training Phase ---
    for i, batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (Train)")):
        images = batch['image'].to(DEVICE)
        gt_density = batch['density'].to(DEVICE)

        # 1. Get prediction
        pred_density = model(images)

        # 2. Calculate Loss (Batch Loss)
        loss = criterion(pred_density, gt_density)

        # 3. Tracking and Verbose Output
        batch_losses.append(loss.item())
        epoch_total_loss += loss.item()

        # Verbose print for Batch Loss
        if (i + 1) % LOG_BATCH_INTERVAL == 0:
            tqdm.write(f"VERBOSE BATCH LOSS: {loss.item():.4f}")

        # 4. Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # --- Epoch Summary ---
    avg_epoch_loss = epoch_total_loss / len(train_loader)
    epoch_losses.append(avg_epoch_loss)

    # --- Validation Run ---
    eval_metrics = evaluate_model(model, test_loader, DEVICE)
    mae = eval_metrics['mae']
    rmse = eval_metrics['rmse']

    # Store metrics for plotting
    epoch_val_mae.append(mae)
    epoch_val_rmse.append(rmse)

    # --- CRITICAL: Save Best Model Logic ---
    save_status = ""
    if mae < best_mae:
        best_mae = mae
        torch.save(model.state_dict(), str(MODEL_SAVE_PATH))
        save_status = f" | *** NEW BEST MAE: {best_mae:.2f} (Model Saved) ***"
    else:
        save_status = f" | No improvement (Best: {best_mae:.2f})"

    # --- HORIZONTAL OUTPUT LOGIC ---
    print(f"\n--- Epoch {epoch+1} Summary ---"
          f" | TRAIN Loss: {avg_epoch_loss:.4f}"
          f" | VAL MAE: {mae:.2f}"
          f" | VAL RMSE: {rmse:.2f}{save_status}")


# --- FINAL POST-TRAINING VISUALIZATION ---
print("\n" + "="*50)
print(f"ALL TRAINING EPOCHS COMPLETE ({NUM_EPOCHS} Epochs).")
plot_training_results(batch_losses, epoch_losses, epoch_val_mae, epoch_val_rmse)
print(f"Best saved MAE across all epochs: {best_mae:.2f}")
print("\nTraining completed.")

NameError: name 'OUT_DIR' is not defined

Output Visualization.

In [None]:
# --- New Cell: Visualization Function ---
def plot_training_results(batch_losses, epoch_losses, epoch_val_mae, epoch_val_rmse):
    """
    Plots the training loss, epoch loss, and validation metrics (MAE, RMSE)
    to analyze the learning pattern.
    """

    # 1. Prepare Epoch Numbers
    epochs = range(1, len(epoch_losses) + 1)

    plt.figure(figsize=(15, 5))

    # --- Plot 1: Training and Epoch Loss ---
    plt.subplot(1, 2, 1)

    # Plot Epoch Loss (More meaningful trend)
    plt.plot(epochs, epoch_losses, 'b', label='Epoch Loss (MSE)')

    # Plotting Batch Loss is too dense, so we often stick to Epoch Loss for final plot.

    plt.title('Training Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)

    # --- Plot 2: Validation Metrics (MAE & RMSE) ---
    plt.subplot(1, 2, 2)

    # Plot Validation MAE
    plt.plot(epochs, epoch_val_mae, 'r', label='Validation MAE')

    # Plot Validation RMSE
    plt.plot(epochs, epoch_val_rmse, 'g', label='Validation RMSE')

    plt.title('Validation Metrics (MAE & RMSE)')
    plt.xlabel('Epoch')
    plt.ylabel('Count Error')
    plt.legend()
    plt.grid(True)

    # Ensure plots are displayed/saved
    plt.tight_layout()
    plt.savefig(str(OUT_DIR / 'training_results.png'))
    plt.show()

print("Visualization function defined.")