<a href="https://colab.research.google.com/github/Unrickyelenting/Crowd-counting/blob/main/DTNNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import h5py
import scipy.io as io
import PIL.Image as Image
import numpy as np
import os
import glob
from matplotlib import pyplot as plt
from scipy.ndimage.filters import gaussian_filter 
import scipy.spatial
import json
from matplotlib import cm as CM
import torch
from torch.utils.data import Dataset
from torchvision import datasets, transforms
import time
import random
from PIL import Image,ImageFilter,ImageDraw
from PIL import ImageStat
import cv2
import torch
import torch.nn as nn
from torch.autograd import Variable
import argparse

In [None]:
class DTNet(nn.Module):
    def __init__(self,load_weights=False):
        super(DTNet, self).__init__()

        # image resolution. In order to get the embedding dimension


        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat  = [512, 512, 512,256,128,64,'T']
        self.frontend = make_layers(self.frontend_feat,in_channels=3,dilation=False)

        #calculate the new resolutiona after the maxpooling layer, because there are
        # three max-pooling layer in the front-end it will become 1/8

        self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        if not load_weights:
            mod = models.vgg16(pretrained = True)
            self._initialize_weights()
            value_list=[]
            for k,v in mod.state_dict().items():
                value_list.append(v.data[:])
            index=0
            for k,v in self.frontend.state_dict().items():
                v.data[:] =value_list[index]
                index=index+1
    def forward(self,x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        return x
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

                
def make_layers(cfg, in_channels = 3,dilation = False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            # when it is a maxpooling layer
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        elif v == 'T':
            # # when it is a multihead self-attention layer
            # multi_attention=MultiHeadAttention(n_head=1, d_model=in_channels, d_k=in_channels, d_v=in_channels)
            layers += [MHSA(in_channels,heads=1)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
            layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

class MHSA(nn.Module):


    def __init__(self, n_dims, heads=1):
        super(MHSA, self).__init__()
        self.heads = heads
        self.dim=n_dims

        self.multihead_attn=nn.MultiheadAttention(embed_dim=n_dims , num_heads=1)

    def forward(self, x):

        # Input x is a (n_batch, in_channels, width, height)
        n_batch, C, width, height = x.size()
        sequence_to_image=x
        sequence_to_image=sequence_to_image.view(n_batch, C, -1).permute(0,2,1)
    

        # multihead_attn = nn.MultiheadAttention(n_head=self.heads,d_model=C,d_k=width*height,d_v=width*height)
        # .to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        mymultihead_attn=self.multihead_attn
        
        attn_output, attn_output_weights = mymultihead_attn(sequence_to_image, sequence_to_image, sequence_to_image)

        # skip connection
        out=x+attn_output.view(n_batch, C, width, height)
        return out


In [None]:
from torchvision import models

# import h5py
# import torch
import shutil

def save_net(fname, net):
    with h5py.File(fname, 'w') as h5f:
        for k, v in net.state_dict().items():
            h5f.create_dataset(k, data=v.cpu().numpy())
def load_net(fname, net):
    with h5py.File(fname, 'r') as h5f:
        for k, v in net.state_dict().items():        
            param = torch.from_numpy(np.asarray(h5f[k]))         
            v.copy_(param)
            
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')   

# The input datset
class listDataset(Dataset):
    def __init__(self, root, shape=None, shuffle=True, transform=None,  train=False, seen=0, batch_size=1, num_workers=1):
        #split the image into 4*4
        if train:
            root = root *4
        random.shuffle(root)
        
        self.nSamples = len(root)
        self.lines = root
        self.transform = transform
        self.train = train
        self.shape = shape
        self.seen = seen
        self.batch_size = batch_size
        self.num_workers = num_workers
        
        
    def __len__(self):
        return self.nSamples
    def __getitem__(self, index):
        assert index <= len(self), 'index range error' 
        
        img_path = self.lines[index]
        
        img,target = load_data(img_path,self.train)
        
        #img = 255.0 * F.to_tensor(img)
        
        #img[0,:,:]=img[0,:,:]-92.8207477031
        #img[1,:,:]=img[1,:,:]-95.2757037428
        #img[2,:,:]=img[2,:,:]-104.877445883
        
        if self.transform is not None:
            img = self.transform(img)
        return img,target
  




# data input
def load_data(img_path,train = True):
    gt_path = img_path.replace('.jpg','.csv').replace('img','den')
    img = Image.open(img_path).convert('RGB')
    target = genfromtxt(img_path, delimiter=',')

    #crop the image into 4*4 and training it
    crop_size = (int(img.size[0]/2),int(img.size[1]/2))
    if train:
      if random.randint(0,9) < 5:
        dx = int(random.randint(0,1)*img.size[0]*1./2)
        dy = int(random.randint(0,1)*img.size[1]*1./2)
      else:
        dx = int(random.random()*img.size[0]*1./2)
        dy = int(random.random()*img.size[1]*1./2)

      img = img.crop((dx,dy,crop_size[0]+dx,crop_size[1]+dy))
      target = target[dy:crop_size[1]+dy,dx:crop_size[0]+dx]
        
      # if random.random()>0.8:
      #   target = np.fliplr(target)
      #   img = img.transpose(Image.FLIP_LEFT_RIGHT)
    
      
    target = cv2.resize(target,(int(target.shape[1]/8),int(target.shape[0]/8)),interpolation = cv2.INTER_CUBIC)*64
    
    
    return img,target


pre=None    
best_prec1 = 1e6

original_lr = 1e-7
lr = 1e-7
batch_size = 1
momentum = 0.95
decay = 5*1e-4
start_epoch = 0
epochs = 2000
steps = [-1,1,100,150]
scales= [1,1,1,1]
workers = 10
seed = time.time()
print_freq = 30



########################################################################################
# Please change the root and of part_B_final or part_A_final
########################################################################################
# set the root
root = '/content/drive/MyDrive/part_A_final/'

train_root=root+'train/img/'
train_list=glob.glob(pathname=train_root+'*.jpg') #get the training path list

val_root=root+'test/img/'
val_list=glob.glob(pathname=val_root+'*.jpg')

# Using cuda
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.cuda.manual_seed(seed)
    
model = DTNet()
# pre_train='/content/drive/MyDrive/ Model_best_last_layer/model_best_9.17.tar'
# print("=> loading checkpoint '{}'".format(pre_train))
# checkpoint = torch.load(pre_train)
# start_epoch = checkpoint['epoch']
# best_prec1 = checkpoint['best_prec1']
# my_models = model.state_dict()
# pre_models = list(checkpoint['state_dict'].items())
# preweight_dict = {}

# for i in range(len(pre_models)):
#   prelayer_name, pre_weights = pre_models[i]
#   preweight_dict[prelayer_name] = pre_weights

# for layer_name, value in my_models.items():
#   if 'module.'+layer_name in preweight_dict.keys() or layer_name in preweight_dict.keys():
#     try:
#       my_models[layer_name] = preweight_dict['module.'+layer_name]
#     except:
#       my_models[layer_name] = preweight_dict[layer_name]
# model.load_state_dict(my_models)
# print("=> loaded checkpoint '{}' (epoch {})".format(pre_train, checkpoint['epoch']))   
model = model.cuda()

    
criterion = nn.MSELoss(size_average=False).cuda()
    
optimizer = torch.optim.SGD(model.parameters(), lr,
              momentum=momentum,
              weight_decay=decay)

# if pre:
#     if os.path.isfile(pre):
#         print("=> loading checkpoint '{}'".format(args.pre))
#         checkpoint = torch.load(pre)
#         start_epoch = checkpoint['epoch']
#         best_prec1 = checkpoint['best_prec1']
#         model.load_state_dict(checkpoint['state_dict'])
#         optimizer.load_state_dict(checkpoint['optimizer'])
#         print("=> loaded checkpoint '{}' (epoch {})"
#               .format(pre, checkpoint['epoch']))
#     else:
#         print("=> no checkpoint found at '{}'".format(pre))
            
def train(train_list, model, criterion, optimizer, epoch):
    
    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    
    
    train_loader = torch.utils.data.DataLoader(
          listDataset(train_list,
                       shuffle=True,
                       transform=transforms.Compose([
                       transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                   ]), 
                       train=True, 
                       batch_size=batch_size,
                       num_workers=workers),
        batch_size=batch_size)
    print('epoch %d, processed %d samples, lr %.10f' % (epoch, epoch * len(train_loader.dataset), lr))
    
    model.train()
    end = time.time()
    
    for i,(img, target)in enumerate(train_loader):
        data_time.update(time.time() - end)
        
        img = img.cuda()
        img = Variable(img)
        output = model(img)
        
        
        
        
        target = target.type(torch.FloatTensor).unsqueeze(0).cuda()
        target = Variable(target)
        
        
        loss = criterion(output, target)
        
        losses.update(loss.item(), img.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    

        batch_time.update(time.time() - end)
        end = time.time()
        
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  .format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))
    
def validate(val_list, model, criterion):
    print ('begin test')
    test_loader = torch.utils.data.DataLoader(
    listDataset(val_list,
                shuffle=False,
                transform=transforms.Compose([
                transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225]),
                ]),  train=False),
    batch_size=batch_size)    
    
    model.eval()
    
    mae = 0
    
    for i,(img, target) in enumerate(test_loader):
        with torch.no_grad():
            img = img.cuda()
            img = Variable(img)
            output = model(img)
        
            mae += abs(output.data.sum()-target.sum().type(torch.FloatTensor).cuda())
        
    mae = mae/len(test_loader)    
    print(' * MAE {mae:.3f} '
              .format(mae=mae))

    return mae    
        
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    
    
    lr = original_lr
    
    for i in range(len(steps)):
        
        scale = scales[i] if i < len(scales) else 1
        
        
        if epoch >= steps[i]:
            lr = lr * scale
            if epoch == steps[i]:
                break
        else:
            break
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count  

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]



Training process

In [None]:
for epoch in range(start_epoch, epochs):
        
    adjust_learning_rate(optimizer, epoch) 
    train(train_list, model, criterion, optimizer, epoch)
    prec1 = validate(val_list, model, criterion)
        
    is_best = prec1 < best_prec1
    best_prec1 = min(prec1, best_prec1)
    print(' * best MAE {mae:.3f} '
          .format(mae=best_prec1))
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': pre,
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)

epoch 0, processed 0 samples, lr 0.0000001000
Epoch: [0][0/1196]	Time 1.307 (1.307)	Data 0.705 (0.705)	Loss 0.6811 (0.6811)	


KeyboardInterrupt: ignored