In [1]:
import sys
import numpy as np
import matplotlib.pyplot as plt

import torch, torchvision
import torchvision.transforms as T
from torch import nn
from torch import optim
from torchvision.models import resnet50
import torch.nn.functional as F
from torch.utils.data.dataloader import default_collate

sys.path.append("./libs/ACDA")
import Conv_DCFD as acda
from crowd import Crowd_sh

### Crowd counting
The paper uses ACDA trained with ImageNet as feature extractor and replace final linear layer with few standard transposed convolutional layers for recovering 

Note: wanted to use ShanghaiTech subset A dataset since it is smaller than UCF-QNRF, but couldn't find it. UCF-QNRF dataset available here:
https://www.crcv.ucf.edu/data/ucf-qnrf/

Using the ShanghaiTech subset A dataset since it is smaller than UCF-QNRF

Download link:
https://www.kaggle.com/datasets/tthien/shanghaitech

Paper that provided download link and the processing of the ShanghaiTech dataset (need to add citation to paper!)
https://github.com/cvlab-stonybrook/DM-Count
@inproceedings{wang2020DMCount,
  title={Distribution Matching for Crowd Counting},
  author={Boyu Wang and Huidong Liu and Dimitris Samaras and Minh Hoai},
  booktitle={Advances in Neural Information Processing Systems},
  year={2020},
}

For the crowd counting need to implement Resnet-18 (Ad-ResNet-s listed in Table A of appendix) but with out the first "max pool" layer and final "average pool" layer.
The Ad-ResNet-s network was very similar to ResNet-18, so the code for a ResNet-18 was used as a starting point and updated with the changes described in Appendix A.4 for crodwd counting.
Reference for how to implement ResNet-18:
https://www.kaggle.com/code/ivankunyankin/resnet18-from-scratch-using-pytorch/notebook

ResNet original paper: https://arxiv.org/pdf/1512.03385.pdf


In [105]:
# base code for ResNet18 came from link below, modified based on Appendix A structure
# https://www.kaggle.com/code/ivankunyankin/resnet18-from-scratch-using-pytorch?scriptVersionId=54460301&cellId=23

class Block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, kernel_size_acda = None, identity_downsample=None, stride=1, acda_padding=3):
        super(Block, self).__init__()
        self.kernel_size_acda = kernel_size_acda
        if kernel_size is None:
            kernel_size = 3
        if kernel_size_acda:
            self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=1)
            self.bn1 = nn.BatchNorm2d(in_channels)
            self.acda_conv = acda.Conv_DCFD(in_channels, in_channels, kernel_size=kernel_size_acda, stride=1, padding=acda_padding)
            self.acda_bn = nn.BatchNorm2d(in_channels)
            self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=1)
            self.bn2 = nn.BatchNorm2d(out_channels)
        else:
            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=1)
            self.bn1 = nn.BatchNorm2d(out_channels)
            self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=1)
            self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample

    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        if self.kernel_size_acda:
            x = self.acda_conv(x)
            x = self.acda_bn(x)
            x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        x += identity
        x = self.relu(x)
        return x

class ResNet_18(nn.Module):
    def __init__(self, image_channels, num_classes):
        super(ResNet_18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #resnet layers
        self.layer1 = self.__make_layer(64, 64, stride=1)
        self.layer2 = self.__make_layer(64, 128, stride=2)
        self.layer3 = self.__make_layer(128, 256, stride=2)
        self.layer4 = self.__make_layer(256, 512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def __make_layer(self, in_channels, out_channels, stride, temp_padding=1):
        identity_downsample = None
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels, temp_padding)
            
        return nn.Sequential(
            Block(in_channels, out_channels, identity_downsample=identity_downsample, stride=stride), 
            Block(out_channels, out_channels)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        # x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        # x = self.avgpool(x)
        # x = x.view(x.shape[0], -1)
        # x = self.fc(x)
        return x 

    def identity_downsample(self, in_channels, out_channels, temp_padding):   
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=temp_padding), 
            nn.BatchNorm2d(out_channels)
        )

class ResNet_18_ACDA(nn.Module):
    def __init__(self, image_channels, num_classes):
        super(ResNet_18_ACDA, self).__init__()    
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        # # first max pooling layer removed
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #resnet layers
        self.layer1 = self.__make_layer(64, 64, stride=1)
        self.layer2 = self.__make_layer(64, 128, stride=2)
        self.layer3 = self.__make_layer(128, 256, stride=2, kernel_size=1, kernel_size_acda=7, temp_padding=4)
        self.layer4 = self.__make_layer(256, 512, stride=2, kernel_size=1, kernel_size_acda=5, acda_padding=2, temp_padding=4)
        
        # self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # self.fc = nn.Linear(512, num_classes)
        
    def __make_layer(self, in_channels, out_channels, stride, kernel_size=None, kernel_size_acda=None, temp_padding=1, acda_padding=3):
        identity_downsample = None
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels, temp_padding)

        return nn.Sequential(
            Block(in_channels, out_channels, identity_downsample=identity_downsample, 
                  stride=stride, kernel_size=kernel_size, kernel_size_acda=kernel_size_acda, acda_padding=acda_padding), 
            Block(out_channels, out_channels)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        # x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        # YVETTE - last maxpool layer also removed
        # x = self.avgpool(x)
        # x = x.view(x.shape[0], -1)
        # x = self.fc(x)
        return x 
    
    def identity_downsample(self, in_channels, out_channels, temp_padding):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=temp_padding), 
            nn.BatchNorm2d(out_channels)
        )

resNet = ResNet_18(3, 1000)
resNet_acda = ResNet_18_ACDA(3, 1000)

In [106]:
# Train network
mse_loss = nn.MSELoss()

def train(model, optimizer, train_data, num_epochs=2):
    mae_vals = []
    mse_vals = []
    loss_vals = []
    yvette = 0
    for _ in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        mae = 0
        mse = 0
        count = 0
        for i, (inputs, points, gt_discrete) in enumerate(train_data):
            # get the inputs
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            N = inputs.size(0)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            # print(outputs.shape, outputs)
            loss = mse_loss(outputs.sum(1).sum(1).sum(1), torch.from_numpy(gd_count).float())
            loss.backward()
            optimizer.step()

            pred_count = torch.sum(outputs.view(N, -1), dim=1).detach().cpu().numpy()
            pred_err = pred_count - gd_count

            count += N
            mae += np.mean(abs(pred_err)) * N
            mse += np.mean(pred_err * pred_err) * N
            if yvette < 5:
                # print(gd_count.shape)
                # print(outputs.shape)
                # print(outputs.sum(1).shape)
                # print(outputs.sum(1).sum(1).shape)
                # print(outputs.sum(1).sum(1).sum(1).shape)
                # print('gd_count = ', gd_count)
                # print('outputs.sum(1).sum(1).sum(1) = ', outputs.sum(1).sum(1).sum(1))
                print('running_loss = ', running_loss, ' mae_loss.item() = ', loss.item())#, ' pred_err = ', pred_err)
            yvette += 1

            # print statistics
            running_loss += loss.item()
            if i % 10 == 9:    # print every 2000 mini-batches
                # print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                loss_vals.append(running_loss / 10)
                running_loss = 0.0
        mae_vals.append(mae * 1.0 / count)
        mse_vals.append(mse * 1.0 / count)
    print('yvette = ', yvette)
    # return loss_vals
    return mae_vals, mse_vals


In [None]:
print(resNet_acda)

In [4]:
def train_collate(batch):
    transposed_batch = list(zip(*batch))
    images = torch.stack(transposed_batch[0], 0)
    points = transposed_batch[1]  # the number of points is not fixed, keep it as a list of tensor
    gt_discretes = torch.stack(transposed_batch[2], 0)
    return images, points, gt_discretes

# load the data
train_path = './data/ShanghaiTech/part_A/train_data/'
test_path = './data/ShanghaiTech/part_A/test_data/'

train_crowd = Crowd_sh(train_path, crop_size=112, method='train') # YVETTE - should that be 512 instead...?
test_crowd = Crowd_sh(test_path, crop_size=112, method='val')

# batch_size = 10
batch_size = 64
trainloader = torch.utils.data.DataLoader(train_crowd, batch_size=batch_size,
                                          shuffle=True, num_workers=0,
                                          collate_fn=train_collate,
                                          pin_memory=True)

testloader = torch.utils.data.DataLoader(test_crowd, batch_size=batch_size,
                                         shuffle=False, num_workers=0,
                                         collate_fn=default_collate,
                                         pin_memory=False)

number of img: 300
number of img: 182


In [107]:
resNet_optimizer = optim.SGD(resNet.parameters(), lr=0.001, momentum=0.9)
resNet_acda_optimizer = optim.SGD(resNet_acda.parameters(), lr=0.001, momentum=0.9)

# ResNet
epochs = 2
mae, mse = train(resNet, resNet_optimizer, trainloader, epochs)
# test_results = test(resNet, testloader)

# ResNet + ACDA
mae_acda, mse_acda = train(resNet_acda, resNet_acda_optimizer, trainloader, epochs)
# test_results_acda = test(resNet_acda, testloader)

running_loss =  0.0  mae_loss.item() =  420434272.0
running_loss =  420434272.0  mae_loss.item() =  339971693084672.0
running_loss =  339972113518944.0  mae_loss.item() =  4.500726471545722e+20
running_loss =  4.500729871266857e+20  mae_loss.item() =  4.521984781200864e+19
running_loss =  4.9529283493869434e+20  mae_loss.item() =  647.6591186523438
yvette =  10
running_loss =  0.0  mae_loss.item() =  3010494464.0
running_loss =  3010494464.0  mae_loss.item() =  2.2092478546968576e+16
running_loss =  2.209248155746304e+16  mae_loss.item() =  2.4348648334983803e+23
running_loss =  2.434865054423196e+23  mae_loss.item() =  5340144640.0
running_loss =  2.4348650544232494e+23  mae_loss.item() =  1675.1136474609375
yvette =  10


In [108]:
print(mae, mse)
print(mae_acda, mse_acda)
# plt.plot(running_loss)
# plt.show()
# print(running_loss)

# MAE: 
# [12.770059401194255, 12.075229848225911, 14.851797663370768, 13.911667137145995, 14.230592371622722]
# [494.9467084757487, 456.49455607096354, 854.2065104166667, 478.7979793294271, 801.6170638020833]

[5944657377.882916, 15.54] [1.0566246394757443e+20, 756.1866658528646]
[104960359009.99916, 14.836666730244955] [5.194378782769598e+22, 591.1100008138021]


In [109]:
# Number of parameters in the models
print(resNet.parameters(), resNet_acda.parameters())
print(sum(p.numel() for p in resNet.parameters() if p.requires_grad))
print(sum(p.numel() for p in resNet_acda.parameters() if p.requires_grad))

<generator object Module.parameters at 0x0000029403A8C740> <generator object Module.parameters at 0x0000029403A8C7B0>
13070568
9196956
