# Counting Crowds with Deep Learning
## Proof Of Concept

The notebook will implement various papers for the puprose of crowd counting
* [Dense Scale Networks](https://arxiv.org/pdf/1906.09707.pdf)
* [CSRNet: Dilated Convolutional Neural Networks](https://arxiv.org/pdf/1802.10062.pdf)

The goal - to find the best approach to teach a model to count crowds, based on input images.

## 1. Imports

In [1]:
import os
import cv2
import glob
import random
import numpy as np
import scipy.io
import h5py
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F
from skimage import exposure, img_as_float
import logging
import warnings
warnings.filterwarnings("ignore")

In [2]:
# reporting module
from ovreport.report import report_to_overwatch

### 1.1 Globals
A number of parameters on top of the notebook

In [3]:
# Dataset Paths
train_path_UCF_QNRF = 'training_dataset/UCF-QNRF_ECCV18/Train_h5/'
test_path_UCF_QNRF = 'training_dataset/UCF-QNRF_ECCV18/Test_h5/'
# TODO: Add More ... add more

# Target Image Size
TARGET_SHAPE = (720, 480)

# Model save paths
BEST_MODEL_SAVE_PATH = 'models/best/'
CKP_MODEL_SAVE_PATH = 'models/checkpoints/'
# Training Details
PRETRAINED_BACKBONE = True
TRAIN_BATCH_SIZE = 1
TEST_BATCH_SIZE = 1
EPOCHS = 50
LEARNING_RATE = 1e-5
WEIGHT_DECAY = 5e-4

### 1.2 Dealing with Data

In [4]:
class RawDataset(Dataset):
    def __init__(self, root, transform, ratio=8, output_shape=False, aug=False):
        self.nsamples = len(root)
        self.aug = aug
        self.output_shape = output_shape
        self.root = root
        self.ratio = ratio
        self.transform = transform
    
    def __augment(image, target, count, seed):
        random.seed(seed)
        
        # apply random crop
        if random.random() < 0.5:
            crop_size = (img.size[0]//2, img.size[1]//2)
        
            if random.random() <=0.44:
                # 4 non-overlapping patches
                dx = int(random.randint(0,1) * crop_size[0])
                dy = int(random.randint(0,1) * crop_size[1])
            else:
                # 5 random patches
                # set seed to ensure for each image the random patches are certain
                # if not set, the crop will be online which means the patches change every time loading, leading to a dynamic training set.
                patch_id = random.randint(0, 4)
                random.seed(index + patch_id * 0.1)
                dx = int(random.random() * crop_size[0])
                random.seed(index + 0.5 + patch_id * 0.1)
                dy = int(random.random() * crop_size[1])
            # crop
            img = img.crop((dx, dy, crop_size[0]+dx, crop_size[1]+dy))
            target = target[dy:crop_size[1]+dy, dx:crop_size[0]+dx]
            count = float(target.sum())
        
        if random.random() > 0.5:
            target = np.fliplr(target)
            image = image.transpose(Image.FLIP_LEFT_RIGHT)
        
        if random.random() > 0.7:
            img = img_as_float(image)
            # gamma_img: np.array(dtype=float64) ranging [0,1]
            if random.random() > 0.5:
                gamma_img = exposure.adjust_gamma(img, 1.5)
            else:
                gamma_img = exposure.adjust_gamma(img, 0.5)
            gamma_img = gamma_img * 255
            gamma_img = np.uint8(gamma_img)
            image = Image.fromarray(gamma_img)
        
        return image, target, count
    
    def __resize_to_target(self, img, target_shape):
        return cv2.resize(img, target_shape, interpolation=cv2.INTER_CUBIC)
    
    def __load_data(self, path, ratio=8, output_shape=None, aug=False, index=None):
        src_h5 = h5py.File(path, 'r')
        img = src_h5['image_array'].value
        output = src_h5['density_map'].value
        count = float(src_h5['count'].value)


        if len(img.shape) < 3:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

        if output_shape is not None:
            img = self.__resize_to_target(img, output_shape)
            output = self.__resize_to_target(output, output_shape)

        if aug:
            # TODO: Implement augumentation
            img, output, count = self.__augment(img, output, count, 42)

        if ratio>1:
            output = cv2.resize(output, 
                                (int(output.shape[1]/ratio),int(output.shape[0]/ratio)), 
                                interpolation=cv2.INTER_CUBIC) * (ratio**2)

        output = np.reshape(output, (1, ) + output.shape)

        return img, output, count        
    
    def __getitem__(self, index):
        img, target, count = self.__load_data(self.root[index], output_shape=self.output_shape, aug=self.aug)
        if self.transform:
            img = self.transform(img)
        return img, target, count
    def __len__(self):
        return self.nsamples

In [5]:
def get_loaders(train_path, test_path, output_shape, ratio=8):
    train_img_paths = glob.glob(os.path.join(train_path, '*.h5'))
    test_img_paths = glob.glob(os.path.join(test_path, '*.h5'))
    
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    train_dataset = RawDataset(train_img_paths, transform, ratio=ratio, output_shape=output_shape, aug=False)
    test_dataset = RawDataset(test_img_paths, transform, ratio=1, output_shape=output_shape, aug=False)
    
    train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=TRAIN_BATCH_SIZE)
    test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=TEST_BATCH_SIZE)
    
    return train_loader, test_loader

In [6]:
train_loader, test_loader = get_loaders(train_path_UCF_QNRF, test_path_UCF_QNRF, output_shape=TARGET_SHAPE)

### 1.3 Utilities

#### 1.3.1 Model Checkpoints
Save model checkpoints, which are - epoch number, model state and optimizer state. A model checkpoint will be saved only whenever a better MAE is achieved or each 5 epochs, only if the previous loss has been improved.

In [7]:
def save_checkpoint(epoch, model_filename, model_state_dict, optimizer_state_dict, save_path):
    '''
        Saves the model, as well as the optimizer state in the predefined place.
    '''
    checkpoint = {
        'epoch': epoch,
        'state_dict': model_state_dict,
        'optimizer': optimizer_state_dict
    }
    
    torch.save(checkpoint, os.path.join(save_path, model_filename))

In [20]:
def load_checkpoint(checkpoint_filepath):
    '''
        Loads the model from checkpoint_filepath.
    '''    
    return torch.load(checkpoint_filepath)

## 2. Dense Scale Network

### 2.1 The Model

In [9]:
class DDCB(nn.Module):
    '''
        TODO: Docstring
    '''
    def __init__(self, in_planes):
        super(DDCB, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_planes, 256, 1), nn.ReLU(True), 
                                   nn.Conv2d(256, 64, 3, padding=1), nn.ReLU(True))
        self.conv2 = nn.Sequential(nn.Conv2d(in_planes+64, 256, 1), nn.ReLU(True), 
                                   nn.Conv2d(256, 64, 3, padding=2, dilation=2), nn.ReLU(True))
        self.conv3 = nn.Sequential(nn.Conv2d(in_planes+128, 256, 1), nn.ReLU(True), 
                                   nn.Conv2d(256, 64, 3, padding=3, dilation=3), nn.ReLU(True))
        self.conv4 = nn.Sequential(nn.Conv2d(in_planes+128, 512, 3, padding=1), nn.ReLU(True))
    def forward(self, x):
        x1_raw = self.conv1(x)
        x1 = torch.cat([x, x1_raw], 1)
        x2_raw = self.conv2(x1)
        x2 = torch.cat([x, x1_raw, x2_raw], 1)
        x3_raw = self.conv3(x2)
        x3 = torch.cat([x, x2_raw, x3_raw], 1)
        output = self.conv4(x3)
        return output

In [10]:
class DenseScaleNet(nn.Module):
    '''
        TODO: Docstring
    '''
    def __init__(self, load_model='', pretrained_backbone=False, trainable_backbone=False):
        super(DenseScaleNet, self).__init__()
        self.load_model = load_model
        self.pretrained_backbone = pretrained_backbone
        self.trainable_backbone = trainable_backbone
        # network
        self.features = self.__get_backbone()
        self.DDCB1 = DDCB(512)
        self.DDCB2 = DDCB(512)
        self.DDCB3 = DDCB(512)
        self.output_layers = nn.Sequential(nn.Conv2d(512, 128, 3, padding=1), 
                                           nn.ReLU(True), 
                                           nn.Conv2d(128, 64, 3, padding=1), 
                                           nn.ReLU(True), 
                                           nn.Conv2d(64, 1, 1))
        self.__initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x1_raw = self.DDCB1(x)
        x1 = x1_raw + x
        x2_raw = self.DDCB2(x1)
        x2 = x2_raw + x1_raw + x
        x3_raw = self.DDCB3(x2)
        x3 = x3_raw + x2_raw + x1_raw + x
        output = self.output_layers(x3)
        return output
    
    def __get_backbone(self):
#         if self.pretrained_backbone:
#             vgg16 = torchvision.models.vgg16(pretrained=True)
#             # only get the leayers we are interested in
#             backbone = vgg16.features[:23] # to match the layers in the paper
#             # make them untrainable if desired
#             if not self.trainable_backbone:
#                 for param in backbone.parameters():
#                     param.requires_grad = False

#             return backbone
#         else:
        self.features_cfg = [64, 64, 'M', 
                             128, 128, 'M', 
                             256, 256, 256, 'M', 
                             512, 512, 512,]
        return self.__make_layers(self.features_cfg)
    
    def __make_layers(self, cfg, in_channels=3, batch_norm=False, dilation=False):
        if dilation:
            d_rate = 2
        else:
            d_rate = 1
        layers = []
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        return nn.Sequential(*layers)   
    
    def __initialize_weights(self):
        self_dict = self.state_dict()
        pretrained_dict = dict()
        self.__random_initialize_weights()
        if not self.load_model:
            # load vgg16
            vgg16 = torchvision.models.vgg16(pretrained=self.pretrained_backbone)
            # check if the backbone should be trainable or not
            if not self.trainable_backbone:
                for param in vgg16.parameters():
                    param.requires_grad = False
            # copy over the items that match        
            for k, v in vgg16.state_dict().items():
                if k in self_dict and self_dict[k].size() == v.size():
                    pretrained_dict[k] = v
            self_dict.update(pretrained_dict)
            self.load_state_dict(self_dict)
        else:
            self.load_state_dict(self.load_model)
            
    def __random_initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.normal_(module.weight, std=0.01)
                #nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.BatchNorm2d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)

### 2.2. Criterion, Loss

In [11]:
criterion = nn.MSELoss()

In [12]:
def cal_lc_loss(output, target, sizes=(1,2,4)):
    criterion_L1 = nn.L1Loss()
    Lc_loss = None
    for s in sizes:
        pool = nn.AdaptiveAvgPool2d(s)
        est = pool(output)
        gt = pool(target)
        if Lc_loss:
            Lc_loss += criterion_L1(est, gt)
        else:
            Lc_loss = criterion_L1(est, gt)
    return Lc_loss

In [13]:
def calc_loss(output, target):
    Le_Loss = criterion(output, target)
    Lc_Loss = cal_lc_loss(output, target)
    loss = Le_Loss + 1000 * Lc_Loss
    return loss

In [14]:
def val(model, test_loader):
    model.eval()
    mae = 0.0
    mse = 0.0
    with torch.no_grad():
        for img, target, count in test_loader:
            img = img.cuda()
            output = model(img)
            est_count = output.sum().item()
            mae += abs(est_count - count)
            mse += (est_count - count)**2
    mae /= len(test_loader)
    mse /= len(test_loader)
    mse = mse**0.5
    return float(mae), float(mse)

### 2.3 Optimizer

In [15]:
optimizer = torch.optim.Adam(DenseScaleNet('').parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

### 2.4 Init Model
Or load a pretrained one if exists.

In [18]:
def init_model(**options):
    '''
        Initializes the Model.
        If the model is 
        If new is true it starts with a new model, else loads the one in the Model Save Path.
    '''
    if 'new' in options.keys():
        if options['new'] and options['trainable_backbone']:
            dsn_net = DenseScaleNet('', pretrained_backbone=PRETRAINED_BACKBONE, trainable_backbone=True)
        elif options['new']:
            dsn_net = DenseScaleNet('', pretrained_backbone=PRETRAINED_BACKBONE)
    elif 'model' in options.keys():
        dsn_net = DenseScaleNet(options['model'])
    else:
        dsn_net = DenseScaleNet('')
    
    return dsn_net

In [22]:
checkpoint

OrderedDict([('features.0.weight', tensor([[[[-0.5543,  0.1422,  0.5285],
                        [-0.5843,  0.3556,  0.7649],
                        [-0.6917, -0.0498,  0.4830]],
              
                       [[ 0.1760,  0.0107, -0.0805],
                        [ 0.0440, -0.0699, -0.2597],
                        [ 0.1320, -0.1732, -0.1318]],
              
                       [[ 0.3134, -0.1652, -0.4265],
                        [ 0.4751, -0.0824, -0.4863],
                        [ 0.6317,  0.0190, -0.2770]]],
              
              
                      [[[ 0.2342,  0.1278,  0.1865],
                        [-0.4268, -0.2430,  0.2456],
                        [-0.2492,  0.1421, -0.0068]],
              
                       [[-0.1398, -0.2186,  0.1502],
                        [-0.8408, -0.3520,  0.5628],
                        [-0.2415,  0.5189,  0.5374]],
              
                       [[-0.3134, -0.3702, -0.1312],
                        [-0.4712, -

In [23]:
# dsn_net = init_model(new=True, trainable_backbone=True)
checkpoint = load_checkpoint('models/best/DenseScaleNet_ucf_qnrf_noaug_1e-5.pth')
dsn_net = init_model(model=checkpoint)
dsn_net.cuda()

KeyError: 'new'

### 2.5 Training Loop

In [18]:
def train_model(model, train_loader, test_loader, optimizer):
    '''
        TODO: Docstring
    '''
    best_mae, _  = val(model, test_loader)
    best_loss = 9999.0
    
    for epoch in range(EPOCHS):
        train_loss = 0.0
        model.train()
        for img, target, count in tqdm(train_loader):
            optimizer.zero_grad()
            img = img.cuda()
            target = target.float()
            target = target.cuda()
            output = model(img)

            loss = calc_loss(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        mae, mse = val(model, test_loader)

        print('Epoch {}/{} Loss:{:.3f}, MAE:{:.2f}, MSE:{:.2f}, Best MAE:{:.2f}'.format(epoch+1, 
                                                                                        EPOCHS, 
                                                                                        train_loss/len(train_loader), 
                                                                                        mae, 
                                                                                        mse, 
                                                                                        best_mae))
        if epoch % 5 == 0 and train_loss < best_loss:
            save_checkpoint(epoch, f'DenseScaleNet_noaug_e{epoch}_{LEARNING_RATE}.pth', 
                            model.state_dict(), 
                            optimizer.state_dict(), 
                            CKP_MODEL_SAVE_PATH)          
        
        if mae < best_mae:
            best_mae = mae
            print(f'New best mae: {best_mae}. Saving model!')
            # report best model
            report_to_overwatch('VM:ML:P', 'Atlas', f'Epoch {epoch} recorded {best_mae}!')
            
            save_checkpoint(epoch, 'DenseScaleNet_noaug_1e-5.pth', 
                            model.state_dict(), 
                            optimizer.state_dict(), 
                            BEST_MODEL_SAVE_PATH)
    
    return model

### 2.6 Training the model

In [19]:
dsn_net = train_model(dsn_net, train_loader, test_loader, optimizer)
report_to_overwatch('VM:ML:P', 'Atlas', 'Training of model done!')

100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 1/50 Loss:62.306, MAE:690.55, MSE:997.44, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 2/50 Loss:53.285, MAE:639.29, MSE:936.50, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 3/50 Loss:52.816, MAE:635.84, MSE:924.45, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 4/50 Loss:47.746, MAE:665.26, MSE:976.12, Best MAE:616.07


100%|██████████| 1200/1200 [11:34<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 5/50 Loss:47.864, MAE:639.66, MSE:962.88, Best MAE:616.07


100%|██████████| 1200/1200 [11:34<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 6/50 Loss:46.285, MAE:661.62, MSE:964.76, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 7/50 Loss:45.838, MAE:641.21, MSE:950.21, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 8/50 Loss:42.542, MAE:633.35, MSE:937.38, Best MAE:616.07


100%|██████████| 1200/1200 [11:38<00:00,  1.72it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 9/50 Loss:42.627, MAE:652.36, MSE:958.90, Best MAE:616.07


100%|██████████| 1200/1200 [11:38<00:00,  1.72it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 10/50 Loss:42.040, MAE:654.39, MSE:965.78, Best MAE:616.07


100%|██████████| 1200/1200 [11:37<00:00,  1.72it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 11/50 Loss:38.730, MAE:628.64, MSE:923.27, Best MAE:616.07


100%|██████████| 1200/1200 [11:37<00:00,  1.72it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 12/50 Loss:38.917, MAE:632.95, MSE:933.39, Best MAE:616.07


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 13/50 Loss:36.967, MAE:642.05, MSE:948.72, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 14/50 Loss:37.222, MAE:663.17, MSE:969.71, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 15/50 Loss:36.643, MAE:659.62, MSE:965.51, Best MAE:616.07


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]


Epoch 16/50 Loss:36.022, MAE:608.87, MSE:906.52, Best MAE:616.07
New best mae: 608.8675682587537. Saving model!
200
Report sent


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 17/50 Loss:34.729, MAE:655.27, MSE:961.94, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 18/50 Loss:33.848, MAE:613.04, MSE:912.97, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 19/50 Loss:30.892, MAE:617.96, MSE:921.56, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 20/50 Loss:30.738, MAE:636.39, MSE:948.05, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 21/50 Loss:30.513, MAE:651.65, MSE:955.52, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 22/50 Loss:28.778, MAE:648.18, MSE:958.61, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 23/50 Loss:27.144, MAE:638.20, MSE:938.72, Best MAE:608.87


100%|██████████| 1200/1200 [11:34<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 24/50 Loss:27.776, MAE:616.23, MSE:921.01, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 25/50 Loss:28.633, MAE:621.06, MSE:929.28, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 26/50 Loss:25.560, MAE:639.92, MSE:946.89, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 27/50 Loss:25.675, MAE:622.56, MSE:924.95, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 28/50 Loss:27.155, MAE:638.82, MSE:944.49, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 29/50 Loss:25.156, MAE:615.88, MSE:918.54, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 30/50 Loss:23.737, MAE:620.13, MSE:928.46, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 31/50 Loss:26.401, MAE:636.89, MSE:945.12, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 32/50 Loss:22.609, MAE:631.59, MSE:939.71, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 33/50 Loss:22.297, MAE:628.80, MSE:930.30, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 34/50 Loss:22.721, MAE:610.13, MSE:917.82, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 35/50 Loss:22.267, MAE:658.12, MSE:957.92, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 36/50 Loss:23.938, MAE:628.71, MSE:928.62, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 37/50 Loss:21.140, MAE:625.40, MSE:924.67, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 38/50 Loss:22.362, MAE:623.72, MSE:932.93, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 39/50 Loss:21.622, MAE:612.38, MSE:912.70, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 40/50 Loss:19.721, MAE:624.84, MSE:931.64, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 41/50 Loss:20.606, MAE:627.22, MSE:937.16, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 42/50 Loss:18.594, MAE:652.11, MSE:955.94, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 43/50 Loss:19.694, MAE:612.82, MSE:930.24, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 44/50 Loss:19.347, MAE:631.88, MSE:932.61, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 45/50 Loss:18.580, MAE:610.59, MSE:916.06, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 46/50 Loss:18.316, MAE:648.42, MSE:949.15, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 47/50 Loss:22.082, MAE:614.57, MSE:919.95, Best MAE:608.87


100%|██████████| 1200/1200 [11:33<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 48/50 Loss:18.250, MAE:629.49, MSE:937.27, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Epoch 49/50 Loss:17.555, MAE:612.54, MSE:905.71, Best MAE:608.87


100%|██████████| 1200/1200 [11:32<00:00,  1.73it/s]


Epoch 50/50 Loss:17.996, MAE:615.22, MSE:918.28, Best MAE:608.87
200
Report sent


### 2.7 Testing

In [20]:
img, target, count = next(iter(test_loader))
img = img.cuda()

In [21]:
output = dsn_net(img)

In [22]:
output.sum().item()

5.902300834655762

In [25]:
count

tensor([332.], dtype=torch.float64)

## 3. Congested Scene Recognition Network

### 3.1 The Model

In [None]:
class CSRNet(nn.Module):
    def __init__(self, load_weights=False):
        super(CSRNet, self).__init__()
        self.frontend = [64, 64, 'M', 128, 128,
                              'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat = [512, 512, 512, 256, 128, 64]
        self.frontend = make_layers(self.frontend_feat)
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        if not load_weights:
            mod = models.vgg16(pretrained=True)
            self.__initialize_weights()
            fsd = collections.OrderedDict()
            # 10 convlution *(weight, bias) = 20 parameters
            for i in range(len(self.frontend.state_dict().items())):
                temp_key = list(self.frontend.state_dict().items())[i][0]
                fsd[temp_key] = list(mod.state_dict().items())[i][1]
            self.frontend.load_state_dict(fsd)

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        x = nn.functional.interpolate(x, scale_factor=8)
        return x

    def __initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)


    def __make_layers(self, cfg, in_channels=3, batch_norm=False, dilation=False):
        if dilation:
            d_rate = 2
        else:
            d_rate = 1
        layers = []
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3,
                                   padding=d_rate, dilation=d_rate)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        return nn.Sequential(*layers)