## Crowd Counter Model
Import needed libraries

In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import DataParallel
from torch.optim import AdamW
from torch.utils.data import DataLoader
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel
from torch.utils.data.distributed import DistributedSampler
import torchvision
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Resize, Lambda, ToTensor
import warnings
# Suppress the specific UserWarning
warnings.filterwarnings("ignore", message="The default value of the antialias parameter.*", category=UserWarning)
# %env CUDA_VISIBLE_DEVICES=5,6,7
device = torch.device("cuda:10" if torch.cuda.is_available() else torch.device("cpu"))

In [None]:
torch.cuda.

In [2]:
# Define the transformation for the images (you can customize this based on your needs)
test_size = (512,512)
out_size = (64,64)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(test_size),
])

# Define a custom dataset class
class CrowdDataset(torch.utils.data.Dataset):
    def __init__(self, root='jhu_crowd_v2.0/', split='train', transform=None):
        self.input_folder = os.path.join(root, split, 'images')
        self.output_folder = os.path.join(root, split, 'den')
        self.input_dataset = datasets.ImageFolder(self.input_folder, transform=transform)
        self.classes = self.input_dataset.classes
        self.indices = list(range(len(self.input_dataset)))

    def __getitem__(self, index):
        # Load input image
        input_data = self.input_dataset[self.indices[index]][0]  # [0] to get the data (image)

        # Load output data from CSV file
        image_name = os.path.basename(self.input_dataset.imgs[self.indices[index]][0])
        csv_path = os.path.join(self.output_folder, f"{image_name.replace('.jpg', '.csv')}")
        df = pd.read_csv(csv_path, header=None)
        original_data = torch.tensor(df.values).float()
        # check if any values are nan
        if torch.isnan(original_data).any():
            print(f"NaN found in {csv_path}")
            # original_data[torch.isnan(original_data)] = 0
        resized_tensor = F.interpolate(original_data.unsqueeze(0).unsqueeze(0), size=out_size, mode='bilinear', align_corners=False)
        # check if any values are nan
        if torch.isnan(resized_tensor).any():
            print(f"NaN found in {csv_path}")
            print("error in interpolation")
            # resized_tensor[torch.isnan(resized_tensor)] = 0
        resized_tensor = resized_tensor.squeeze(0).squeeze(0)
        output_data = resized_tensor * (original_data.sum() / resized_tensor.sum())
        if  resized_tensor.sum() == 0:
            print(f"Zero sum found in {csv_path}")
            print("error in interpolation")
            # output_data[torch.isnan(output_data)] = 0
        # check if any values are nan
        if torch.isnan(output_data).any():
            print(f"NaN found in {csv_path}")
            print("error in regularization")
            output_data[torch.isnan(output_data)] = 0
        return {'input': input_data, 'output': output_data}


    def __len__(self):
        return len(self.indices)


In [3]:
class UrebBlock(nn.Module):
    def __init__(self, in_channels):
        super(UrebBlock, self).__init__()
        self.CB = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),  # Add padding to keep the spatial dimensions
            nn.ReLU(),
            nn.Conv2d(32, 1, kernel_size=3, padding=1),   # Add padding to keep the spatial dimensions
        )
        self.DR = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),  # Add padding to keep the spatial dimensions
        )
        self.CEB = nn.Sequential(
            nn.Conv2d(33, 32, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(32, 16, kernel_size=3, padding=1),  # Add padding to keep the spatial dimensions
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),   # Add padding to keep the spatial dimensions
            nn.ReLU(),
            nn.Conv2d(16, 1, kernel_size=3, padding=1),   # Add padding to keep the spatial dimensions
        )
    def forward(self, x):
        r = self.CB(x)
        cm = self.CEB(torch.cat((r, self.DR(x)), dim=1))
        return cm * r
        

In [4]:
class VGGadjusted(nn.Module):
    def __init__(self):
        super(VGGadjusted, self).__init__()
        self.vgg = models.vgg16(weights=None).features
        self.C3 = self.vgg[:17]
        self.C4 = self.vgg[17:24]
        self.C5 = self.vgg[24:]
        self.C6 = nn.Sequential(
            nn.Conv2d(512, 32, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),  # Add padding to keep the spatial dimensions
            nn.ReLU(),
            nn.Conv2d(32, 1, kernel_size=3, padding=1),   # Add padding to keep the spatial dimensions
            nn.Upsample(size=(16, 16), mode='bilinear', align_corners=False)
        )
        self.ureb3 = UrebBlock(256)
        self.ureb4 = UrebBlock(512)
        self.ureb5 = UrebBlock(512)
        self.y5_upsample = nn.Upsample(size=(32, 32), mode='bilinear', align_corners=False)
        self.y4_upsample = nn.Upsample(size=(64, 64), mode='bilinear', align_corners=False)

    def forward(self, x):
        # print('x', torch.isnan(x).any())
        c3 = self.C3(x)
        # print('c3', torch.isnan(c3).any())
        r3 = self.ureb3(c3)
        # print('r3', torch.isnan(r3).any())
        c4 = self.C4(c3)
        # print('c4', torch.isnan(c4).any())
        r4 = self.ureb4(c4)
        # print('r4', torch.isnan(r4).any())
        c5 = self.C5(c4)
        # print('c5', torch.isnan(c5).any())
        r5 = self.ureb5(c5)
        # print('r5', torch.isnan(r5).any())
        y6 = self.C6(c5)
        # print('y6', torch.isnan(y6).any())
        y5 = y6 + r5
        # print('y5', torch.isnan(y5).any())
        y4 = self.y5_upsample(y5) + r4
        # print('y4', torch.isnan(y4).any())
        y3 = self.y4_upsample(y4) + r3
        # print('y3', torch.isnan(y3).any())
        # see if any of these vectors have any nan values
        # for i, j in enumerate([y3, y4, y5, y6, r3, r4, r5]):
        #     print(i, torch.isnan(j).any())
        out = {'y3': y3, 'y4': y4, 'y5': y5, 'y6': y6, 'cm3': r3, 'cm4': r4, 'cm5': r5,}
        return out



In [16]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        self.lambda_c = -0.001
        self.lambda_d = 10000
        self.cm_keys = ['cm3', 'cm4', 'cm5']
        self.y_keys = ['y3', 'y4', 'y5']

    def _scale_regularize_tensor(self, tensor, out_size):
        # print(tensor.shape)
        # print(tuple(list(out_size)[1:]))
        out_size = tuple(list(out_size)[1:])
        # print(tensor.unsqueeze(0).shape)
        resized = F.interpolate(tensor.unsqueeze(0), size=out_size, mode='bilinear', align_corners=False)
        resized = resized.squeeze(0).squeeze(0)
        scaled_tensor = resized * (tensor.sum() / resized.sum())
        return scaled_tensor

    def forward(self, y_hat, y):
        # print(y_hat.keys())
        # print('cm failure') if any([torch.isnan(y_hat[i]).any().item() for i in self.cm_keys ]) else print('cm success')
        # print('y_hat failure') if any([torch.isnan(y_hat[i]).any().item() for i in self.y_keys ]) else print('y_hat success')
        # print('y failure') if torch.isnan(y).any().item() else print('y success')
        # print(y_hat['cm3'].shape)
        # print(y_hat['cm3'])
        std_dev = 0.1
        loss_C = sum(torch.log(torch.relu(y_hat[cm_key]) + torch.abs(torch.randn_like(y_hat[cm_key]) * std_dev) ).mean() for cm_key in self.cm_keys)
        # print("predicted", (y_hat['cm3'] * y_hat['y3']).shape)
        # print("actual", (y_hat['cm3'] * self._scale_regularize_tensor(y, y_hat['cm3'].squeeze(1).shape).unsqueeze(1)).shape)
        # print(y_hat['cm3'].shape)
        # print(y_hat['y3'].shape)
        # print(self._scale_regularize_tensor(y, y_hat['cm3'].squeeze(1).shape).unsqueeze(1).shape)
        loss_d = sum(F.mse_loss(y_hat[cm_key] * y_hat[y_key], y_hat[cm_key] * self._scale_regularize_tensor(y, y_hat[cm_key].squeeze(1).shape).unsqueeze(1)) for y_key, cm_key in list(zip(self.y_keys, self.cm_keys)))
        # print('loss_C', loss_C)
        # print('loss_d', loss_d)
        
        return self.lambda_d * loss_d + self.lambda_c * loss_C


In [17]:
def training():
    # Initialize distributed training
    # torch.distributed.init_process_group(
    #     backend='nccl',
    #     rank=0,
    #     world_size=3  # Total number of processes (GPUs)
    # )

    # make all the datasets
    train_dataset = CrowdDataset(split='train', transform=transform)
    val_dataset = CrowdDataset(split='val', transform=None)
    test_dataset = CrowdDataset(split='test', transform=None)

    #make a dataloader
    # train_sampler = DistributedSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

    model = VGGadjusted()
    optimizer = AdamW(model.parameters(), lr=1e-4)
    criterion = LossFunction()
    epochs = 3
    loop = tqdm(total=len(train_dataloader)*epochs, position=0, leave=False)
    val_loss = []
    train_loss = []


    checkpoint = {
        'epoch': 0,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': 0.0,
    }
    # model = DataParallel(model)
    model = model.to(device)
    model.train()
    for epoch in range(epochs):
        for i, batch in enumerate(train_dataloader):
            optimizer.zero_grad()
            input_data = batch['input'].to(device)
            output_data = batch['output'].to(device)
            output_pred = model(input_data)
            # for key in output_pred.keys():
            #     print('key',torch.isnan(output_pred[key]).any().item())
            loss = criterion(output_pred, output_data)
            loss.backward()
            if i % 10 == 0:
                train_loss.append(loss.item())
                # val_loss.append()
            optimizer.step()
            loop.set_description(f"Epoch [{epoch}/{epochs}]")
            loop.set_postfix(loss=loss.item())
            loop.update(1)

            for name, param in model.named_parameters():
                if torch.isnan(param).any().item():
                    print("MODEL FAILURE")

        checkpoint['epoch'] = epoch
        checkpoint['model_state_dict'] = model.state_dict()
        checkpoint['optimizer_state_dict'] = optimizer.state_dict()
        checkpoint['loss'] = loss.item()
        torch.save(checkpoint, f'checkpoint_{epoch}.pth')
    loop.close()
    # Clean up
    # torch.distributed.destroy_process_group()
    plt.plot(train_loss)
    plt.show()

In [7]:
def validation():
    #evaluate
    val_mse = []
    checkpoint = torch.load('checkpoint_0.pth')
    model = VGGadjusted()
    # model = DataParallel(model)
    model = model.to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    val_dataset = CrowdDataset(split='val', transform=transform)
    val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)
    criterion = LossFunction()
    val_loss = []
    for i, batch in enumerate(val_dataloader):
        input_data = batch['input'].to(device)
        output_data = batch['output'].to(device)
        output_pred = model(input_data)
        estimate = output_pred['y3'].sum().item()
        loss = criterion(output_pred, output_data)
        val_mse.append(estimate)
        val_loss.append(loss.item())
    plt.plot(val_loss)
    plt.show()
    ground_truth = pd.read_csv('jhu_crowd_v2.0/val/image_labels.txt', header=None)
    gt = list(ground_truth[1])
    print('MSE: ', sum([(i[1] - i[0])**2 for i in list(zip(gt, val_mse))])/len(gt))
    print('MAE: ', sum([abs(i[1] - i[0]) for i in list(zip(gt, val_mse))])/len(gt))    

In [18]:
training()

Epoch [0/3]:   7%|▋         | 58/852 [01:52<27:34,  2.08s/it, loss=0.0103] 

Zero sum found in jhu_crowd_v2.0/train/den/1495.csv
error in interpolation
NaN found in jhu_crowd_v2.0/train/den/1495.csv
error in regularization


Epoch [0/3]:   8%|▊         | 72/852 [02:19<25:02,  1.93s/it, loss=0.0123] 

Zero sum found in jhu_crowd_v2.0/train/den/4228.csv
error in interpolation
NaN found in jhu_crowd_v2.0/train/den/4228.csv
error in regularization


Epoch [0/3]:  14%|█▍        | 123/852 [03:47<18:54,  1.56s/it, loss=0.0172] 

Zero sum found in jhu_crowd_v2.0/train/den/3089.csv
error in interpolation
NaN found in jhu_crowd_v2.0/train/den/3089.csv
error in regularization


Epoch [0/3]:  16%|█▌        | 133/852 [04:05<22:55,  1.91s/it, loss=0.00912]

Zero sum found in jhu_crowd_v2.0/train/den/1564.csv
error in interpolation
NaN found in jhu_crowd_v2.0/train/den/1564.csv
error in regularization


Epoch [0/3]:  19%|█▉        | 163/852 [04:53<17:50,  1.55s/it, loss=0.00893]

Zero sum found in jhu_crowd_v2.0/train/den/1002.csv
error in interpolation
NaN found in jhu_crowd_v2.0/train/den/1002.csv
error in regularization


Epoch [0/3]:  19%|█▉        | 165/852 [04:56<17:43,  1.55s/it, loss=0.00893]

In [68]:
validation()

torch.Size([64, 64])
torch.Size([64, 64])
torch.Size([64, 64])


ValueError: Input and output must have the same number of spatial dimensions, but got input with spatial dimensions of [64, 64] and output size of (1, 64, 64). Please provide input tensor in (N, C, d1, d2, ...,dK) format and output size in (o1, o2, ...,oK) format.