In [1]:
%load_ext autoreload
%autoreload 2

from __future__ import absolute_import, print_function

from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from test_config import cfg
import os
import sys

# Load model, output stage metadata whilst predicting

In [2]:
# Generate root folder
path_lst = os.getcwd().split(os.sep)[:4]
path_lst[0] += os.sep
root_folder = os.path.join(*path_lst)

# C3 and experiment folders
c3_folder = os.path.join(root_folder, 'C-3-Framework')
exp_folder = os.path.join(c3_folder, 'exp')

# Append to sys.path
sys.path.append(c3_folder)

In [3]:
from test_classes import CvTest
from model_files.VGG16_LCM_REG import VGG16_LCM_REG

In [4]:
# Initialise
exp = '202202080823_data=london_model=VGG16_LCM_REG_lr=1e-05'

test = CvTest(
    exp=exp,
    net_object=VGG16_LCM_REG
)

[2022-02-16 09:10:02] === Experiment:202202080823_data=london_model=VGG16_LCM_REG_lr=1e-05 ===
[2022-02-16 09:10:02] model: VGG16_LCM_REG
[2022-02-16 09:10:02] CC: LCM
[2022-02-16 09:10:02] ===PAPER REVIEW VERSION===


## Dissect logs, output arrays to .npy files

In [5]:
# Generate prediction
test.pred(1)

[2022-02-16 09:05:48] x3:torch.Size([1, 512, 128, 128])
[2022-02-16 09:05:48] x4:torch.Size([1, 512, 64, 64])
[2022-02-16 09:05:48] x5:torch.Size([1, 512, 32, 32])
[2022-02-16 09:05:48] multifuse x5:torch.Size([1, 512, 32, 32])
[2022-02-16 09:05:48] multifuse x4:torch.Size([1, 512, 64, 64])
[2022-02-16 09:05:48] multifuse x3:torch.Size([1, 512, 128, 128])
[2022-02-16 09:05:48] count_layer x5_:torch.Size([1, 512, 16, 16])
[2022-02-16 09:05:48] p_layer x5:torch.Size([1, 3, 16, 16])
[2022-02-16 09:05:48] k_layer x5:torch.Size([1, 1, 16, 16])
[2022-02-16 09:05:48] i_layer x5:torch.Size([1, 3, 16, 16])
[2022-02-16 09:05:48] stage1_regress0: torch.Size([1, 16, 16])
[2022-02-16 09:05:48] stage1_regress1_0: torch.Size([1, 16, 16])
[2022-02-16 09:05:48] stage1_regress1_1: torch.Size([1, 16, 16])
[2022-02-16 09:05:48] stage1_regress1_2: torch.Size([1, 16, 16])
[2022-02-16 09:05:48] stage1_regress2: torch.Size([1, 1, 16, 16])
[2022-02-16 09:05:48] stage1_regress3: torch.Size([1, 1, 16, 16])
[2022

[img_raw: (1024, 1024)]
[img: torch.Size([1, 3, 1024, 1024])]
[gt_raw: (1024, 1024)]
[gt: (1, 16, 16)]
[pred_map: (16, 16)]
[pred_map_resize: (128, 128)]

# Notes

In [6]:
data_mode = cfg.DATASET
if data_mode == 'SHHA':
    patch_max = cfg.SHHAPATCHMAX
elif data_mode == 'SHHB':
    patch_max = cfg.SHHBPATCHMAX
elif data_mode == 'QNRF':
    patch_max = cfg.QNRFPATCHMAX
elif data_mode == 'UCF50':
    patch_max = cfg.CC50PATCHMAX
    
elif data_mode == 'london':
    patch_max = 100

In [7]:
stage_num = [3,3,3]
lambda_i = 1
lambda_k = 1
count_range = patch_max
multi_fuse = True
soft_interval = True


### Layer 3

in:1024x1024x3

out: 128x128x512

### Layer 4

in: 128x128x512

out: 64x64x512

### Layer 5

in: 64x64x512

out: 32x32x512

In [9]:
def VGG_make_layers(cfg, in_channels=3, batch_norm=False, dilation=1):
    d_rate = dilation
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

layer3 = VGG_make_layers([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512])
layer4 = VGG_make_layers(['M', 512, 512, 512], in_channels=512)
layer5 = VGG_make_layers(['M', 512, 512, 512], in_channels=512)

## Scale aware module

Enhance the multi-scale feature extraction capability of the network. Achieves multi-scale
information enhancement only on a single layer feature map and performs this
operation at different convolutional layers to bring rich information to subsequent regression modules.

SAM first compresses the channel of feature map via 1×1 convolution. Afterwards, the compressed
feature map is processed through dilated convolution with different expansion
ratios of 1, 2, 3 and 4 to perceive multi-scale features. The extracted
multi-scale feature maps are fused via channel-wise concatenation operation and
3×3 convolution. The size of final feature map is consistent with the input one.

In [16]:
class DC_layer(nn.Module):
    """
    Direct counting layer

    """
    def __init__(self, level, fuse=False):
        super(DC_layer, self).__init__()
        self.level = level
        self.conv1x1_d1 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d2 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d3 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d4 = nn.Conv2d(512, 512, kernel_size=1)

        self.conv_d1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
        self.conv_d2 = nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2)
        self.conv_d3 = nn.Conv2d(512, 512, kernel_size=3, padding=3, dilation=3)
        self.conv_d4 = nn.Conv2d(512, 512, kernel_size=3, padding=4, dilation=4)
        
        self.fuse = fuse
        if self.fuse:
            self.fuse = nn.Conv2d(512*2, 512, kernel_size=3, padding=1)
            self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x1 = self.conv1x1_d1(x)
        x2 = self.conv1x1_d2(x)
        x3 = self.conv1x1_d3(x)
        x4 = self.conv1x1_d4(x)

        x1 = self.conv_d1(x1)
        x2 = self.conv_d2(x2)
        x3 = self.conv_d3(x3)
        x4 = self.conv_d4(x4)

        # x = torch.cat([x1, x2, x3, x4], dim=1)
        # x = self.relu(self.fuse(x))
        x = Maxout(x1, x2, x3, x4)
        return x
    
def Maxout(x1, x2, x3, x4):

    # mask for x1 >= x2
    mask_1 = torch.ge(x1, x2)
    mask_1 = mask_1.float()
    x = mask_1 * x1 + (1-mask_1) * x2
    
    print(x)

    mask_2 = torch.ge(x, x3)
    mask_2 = mask_2.float()
    x = mask_2 * x + (1-mask_2) * x3
    
    print(x)

    mask_3 = torch.ge(x, x4)
    mask_3 = mask_3.float()
    x = mask_3 * x + (1-mask_3) * x4
    return x

fuse_layer5 = DC_layer(level=0)
fuse_layer4 = DC_layer(level=1)
fuse_layer3 = DC_layer(level=2)

### Exploring Maxout function

In [20]:
x1 = torch.Tensor([
    [1,0,0],
    [0,1,0],
    [0,0,0]
])

x2 = torch.Tensor([
    [0,0,0],
    [0,1,1],
    [0,0,0]
])

x3 = torch.Tensor([
    [0,0,0],
    [0,1,2],
    [0,0,0]
])

x4 = torch.Tensor([
    [0,0,0],
    [0,2,2],
    [0,0,0]
])

Maxout(x1, x2, x3, x4)

tensor([[1., 0., 0.],
        [0., 1., 1.],
        [0., 0., 0.]])
tensor([[1., 0., 0.],
        [0., 1., 2.],
        [0., 0., 0.]])


tensor([[1., 0., 0.],
        [0., 2., 2.],
        [0., 0., 0.]])

In [21]:
mask_1 = torch.ge(x1, x2)
mask_1 = mask_1.float()

In [25]:
mask_1, 1-mask_1

(tensor([[1., 1., 1.],
         [1., 1., 0.],
         [1., 1., 1.]]),
 tensor([[0., 0., 0.],
         [0., 0., 1.],
         [0., 0., 0.]]))

## Notes on 1x1 convolution

[Machine learning mastery link](https://machinelearningmastery.com/introduction-to-1x1-convolutions-to-reduce-the-complexity-of-convolutional-neural-networks/)

Channel-wise pooling/feature map pooling/projection layer

Similar to how pooling downscales feature maps by halving the width and height of the feature maps, 1x1 conv layer reduces the depth or number of feature maps.

This will only have a single parameter for each channel, and will result in a single output value. This acts like a single neuron with input from same position in each feature map. Not really a convolution operation, linear weighting or projection of the input.

[Visualisation from deeplearning.ai](https://www.coursera.org/lecture/convolutional-neural-networks/networks-in-networks-and-1x1-convolutions-ZTb8x)

## Count layer

In [None]:
class Count_layer(nn.Module):
    """
    Avg pool, max pool, concat
    1x1 conv/merge concatenated layer
    """
    def __init__(self, inplanes=512, pool=2):
        super(Count_layer, self).__init__()
        self.avgpool_layer = nn.Sequential(
            nn.Conv2d(inplanes, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.AvgPool2d((pool, pool), stride=pool),
        )
        self.maxpool_layer = nn.Sequential(
            nn.Conv2d(inplanes, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((pool, pool), stride=pool),
        )
        self.conv1x1= nn.Sequential(
            nn.Conv2d(inplanes*2, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        x_avg = self.avgpool_layer(x)
        x_max = self.maxpool_layer(x)

        x = torch.cat([x_avg, x_max], dim=1)
        x = self.conv1x1(x)
        return x
    
count_layer5 = Count_layer(pool=2)
count_layer4 = Count_layer(pool=4)
count_layer3 = Count_layer(pool=8)

In [None]:
layer5_k = nn.Sequential(
    nn.Conv2d(512, 1, kernel_size=1),
    nn.Tanh(),
)
layer5_i = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.Sigmoid(),
)
layer5_p = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.ReLU(),
)

layer4_k = nn.Sequential(
    nn.Conv2d(512, 1, kernel_size=1),
    nn.Tanh(),
)
layer4_i = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.Sigmoid(),
)
layer4_p = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.ReLU(),
)

layer3_k = nn.Sequential(
    nn.Conv2d(512, 1, kernel_size=1),
    nn.Tanh(),
)
layer3_i = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.Sigmoid(),
)
layer3_p = nn.Sequential(
    nn.Conv2d(512, self.stage_num[0], kernel_size=1),
    nn.ReLU(),
)

## Activation functions

#### *Deep Learning for Vision systems, Mohamed Elgendy, pp51-61*

"The purpose of an activation function is to introduce non-linearity into the network. Without it it will perform similary to a single perceptron no matter how many layers we add. 

They are needed to restrict the output value to a finite value.

The composition of 2 linear functions is a linear function so unless you use a non-linear activation function you are not computing any interesting functions no matter how deep you make the network.

When we derrive a linear function we get a constant so it doesn't depend on the input value. This means every time we do backprop the gradient will be the same. This means we're not improving the error

### Heaviside
Produces a binary output, mainly used in binary classification problems to predict class

### Sigmoid/logistic function

$ \frac{1}{1 + e^{-z}} $

Often used in binary classification problems to predict the probability of a class when you have 2 classes. Converts infinite continuous variables into simple probabilities between 0 and 1. 

In example where linear models produce probabilities greater than 1 or less than zero. Using exponents gets rid of the values less than 0, and dividing an equation by itself plus a small value will give us a number smaller than 1.

### Softmax function

$ \sigma_j = \frac{e^{x_j}}{\sum{e^{x_i}}} $

Generalisation of the sigmoid function. Used to obtain classification probabilities when we have more than two classes. Forces the outputs of a network to sum to 1. This also works fine when classifying between 2 classes

### Hyperbolic tangent (tanh)
Shifted version of the sigmoid. Instead of between 0 and 1, between -1 and 1. Almost always works better than sigmoids in hidden layers because it has the effect of centering your data so that the mean of the data is close to zero rather than 0.5, which makes the learning for the next layer a bit easier. 

One downside to tanh and sigmoid is if the value if very large the gradient is almost zero

### ReLU
Piecewise linear function that is zero for all values less than or equal to zero, and a*x for all values greater than zero

[Why relu works](https://towardsdatascience.com/if-rectified-linear-units-are-linear-how-do-they-add-nonlinearity-40247d3e4792) 

![Annulus problem](img/relu.png)

"Whereas tanh, a smooth, curved function, draws a clean envelope around the circle (and linear fails completely), ReLU draws a hexagon, with several pointed corners. In fact, this is what ReLU’s advantage is: it can bend the linear function at a certain point, to a certain degree. Combined with the biases and weights from the previous layer, the ReLU can take the form of a bend at any location at any degree.

These small bends form the building blocks of approximations. Any relationship or function can be roughly estimated by aggregating many ReLU functions together, which occurs when neurons are collapsed and combined in the following layer. This has been mathematically proven, for example, with a sine wave or an exponential function, much like how a lower-degree Taylor series fits a function.

The strength of the ReLU function lies not in itself, but in an entire army of ReLUs. This is why using a few ReLUs in a neural network does not yield satisfactory results; instead, there must be an abundance of ReLU activations to allow the network to construct an entire map of points. In multi-dimensional space, rectified linear units combine to form complex polyhedra along the class boundaries.

Here lies the reason why ReLU works so well: when there are enough of them, they can approximate any function just as well as other activation functions like sigmoid or tanh, much like stacking hundreds of Legos, without the downsides. There are several issues with smooth-curve functions that do not occur with ReLU — one being that computing the derivative, or the rate of change, the driving force behind gradient descent, is much cheaper with ReLU than with any other smooth-curve function.

Another is that sigmoid and other curves have an issue with the vanishing gradient problem; because the derivative of the sigmoid function gradually slopes off for larger absolute values of x."

In [32]:
x = torch.ones((32,32,512)).unsqueeze(0)
x.shape

torch.Size([1, 32, 32, 512])

In [34]:
x[:, 0, :, :].shape

torch.Size([1, 32, 512])

In [43]:
os.path.normpath(os.getcwd()).split(os.sep)

['G:',
 'My Drive',
 'computer_vision',
 'code',
 'paper_reviews',
 'adaptive_mixture_regression']

In [None]:
if load_weights:
    #self._initialize_weights()

    mod = models.vgg16(pretrained=False)
    pretrain_path = './models/Pretrain_model/vgg16-397923af.pth'
    print(f'path: {os.getcwd()}')
    mod.load_state_dict(torch.load(pretrain_path))

    new_state_dict = OrderedDict()
    for key, params in mod.features[0:23].state_dict().items():
        new_state_dict[key] = params
    self.layer3.load_state_dict(new_state_dict)

    new_state_dict = OrderedDict()
    for key, params in mod.features[23:30].state_dict().items():
        key = str(int(key[:2]) - 23) + key[2:]
        new_state_dict[key] = params
    self.layer4.load_state_dict(new_state_dict)

    new_state_dict = OrderedDict()
    for key, params in mod.features[23:30].state_dict().items():
        key = str(int(key[:2]) - 23) + key[2:]
        new_state_dict[key] = params
    self.layer5.load_state_dict(new_state_dict)

In [None]:
class VGG16_LCM_REG(nn.Module):
    def __init__(self, load_weights=False, stage_num=[3,3,3], count_range=patch_max, lambda_i=1., lambda_k=1.):
        super(VGG16_LCM_REG, self).__init__()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        
        x3 = self.layer3(x)
        x4 = self.layer4(x3)
        x5 = self.layer5(x4)

        if self.multi_fuse:
            x5 = self.fuse_layer5(x5)
            x4 = self.fuse_layer4(x4)
            x3 = self.fuse_layer3(x3)

        x5_= self.count_layer5(x5)
        p5 = self.layer5_p(x5_)
        if self.soft_interval:
            k5 = self.layer5_k(x5_)
            i5 = self.layer5_i(x5_)

        x4_ = self.count_layer4(x4)
        p4 = self.layer4_p(x4_)
        if self.soft_interval:
            k4 = self.layer4_k(x4_)
            i4 = self.layer4_i(x4_)

        x3_ = self.count_layer3(x3)
        p3 = self.layer3_p(x3_)
        if self.soft_interval:
            k3 = self.layer3_k(x3_)
            i3 = self.layer3_i(x3_)

        stage1_regress = p5[:, 0, :, :] * 0
        stage2_regress = p4[:, 0, :, :] * 0
        stage3_regress = p3[:, 0, :, :] * 0

        for index in range(self.stage_num[0]):
            if self.soft_interval:
                stage1_regress = stage1_regress + (float(index) + self.lambda_i * i5[:, index, :, :]) * p5[:, index, :, :]
            else:
                stage1_regress = stage1_regress + float(index) * p5[:, index, :, :]
        stage1_regress = torch.unsqueeze(stage1_regress, 1)
        if self.soft_interval:
            stage1_regress = stage1_regress / ( float(self.stage_num[0]) * (1. + self.lambda_k * k5) )
        else:
            stage1_regress = stage1_regress / float(self.stage_num[0])


        for index in range(self.stage_num[1]):
            if self.soft_interval:
                stage2_regress = stage2_regress + (float(index) + self.lambda_i * i4[:, index, :, :]) * p4[:, index, :, :]
            else:
                stage2_regress = stage2_regress + float(index) * p4[:, index, :, :]
        stage2_regress = torch.unsqueeze(stage2_regress, 1)
        if self.soft_interval:
            stage2_regress = stage2_regress / ( (float(self.stage_num[0]) * (1. + self.lambda_k * k5)) *
                                                (float(self.stage_num[1]) * (1. + self.lambda_k * k4)) )
        else:
            stage2_regress = stage2_regress / float( self.stage_num[0] * self.stage_num[1] )


        for index in range(self.stage_num[2]):
            if self.soft_interval:
                stage3_regress = stage3_regress + (float(index) + self.lambda_i * i3[:, index, :, :]) * p3[:, index, :, :]
            else:
                stage3_regress = stage3_regress + float(index) * p3[:, index, :, :]
        stage3_regress = torch.unsqueeze(stage3_regress, 1)
        if self.soft_interval:
            stage3_regress = stage3_regress / ( (float(self.stage_num[0]) * (1. + self.lambda_k * k5)) *
                                                (float(self.stage_num[1]) * (1. + self.lambda_k * k4)) *
                                                (float(self.stage_num[2]) * (1. + self.lambda_k * k3)) )
        else:
            stage3_regress = stage3_regress / float( self.stage_num[0] * self.stage_num[1] * self.stage_num[2] )

        # regress_count = stage1_regress * self.count_range
        # regress_count = (stage1_regress + stage2_regress) * self.count_range
        regress_count = (stage1_regress + stage2_regress + stage3_regress) * self.count_range

        return regress_count

    def VGG_make_layers(self, cfg, in_channels=3, batch_norm=False, dilation=1):
        d_rate = dilation
        layers = []
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                in_channels = v
        return nn.Sequential(*layers)


class Count_layer(nn.Module):
    """
    Avg pool, max pool, concat
    1x1 conv/merge concatenated layer
    """
    def __init__(self, inplanes=512, pool=2):
        super(Count_layer, self).__init__()
        self.avgpool_layer = nn.Sequential(
            nn.Conv2d(inplanes, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.AvgPool2d((pool, pool), stride=pool),
        )
        self.maxpool_layer = nn.Sequential(
            nn.Conv2d(inplanes, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((pool, pool), stride=pool),
        )
        self.conv1x1= nn.Sequential(
            nn.Conv2d(inplanes*2, inplanes, kernel_size=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        x_avg = self.avgpool_layer(x)
        x_max = self.maxpool_layer(x)

        x = torch.cat([x_avg, x_max], dim=1)
        x = self.conv1x1(x)
        return x


class DC_layer(nn.Module):
    """
    Direct counting layer


    """
    def __init__(self, level, fuse=False):
        super(DC_layer, self).__init__()
        self.level = level
        self.conv1x1_d1 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d2 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d3 = nn.Conv2d(512, 512, kernel_size=1)
        self.conv1x1_d4 = nn.Conv2d(512, 512, kernel_size=1)

        self.conv_d1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
        self.conv_d2 = nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2)
        self.conv_d3 = nn.Conv2d(512, 512, kernel_size=3, padding=3, dilation=3)
        self.conv_d4 = nn.Conv2d(512, 512, kernel_size=3, padding=4, dilation=4)
        
        self.fuse = fuse
        if self.fuse:
            self.fuse = nn.Conv2d(512*2, 512, kernel_size=3, padding=1)
            self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x1 = self.conv1x1_d1(x)
        x2 = self.conv1x1_d2(x)
        x3 = self.conv1x1_d3(x)
        x4 = self.conv1x1_d4(x)

        x1 = self.conv_d1(x1)
        x2 = self.conv_d2(x2)
        x3 = self.conv_d3(x3)
        x4 = self.conv_d4(x4)

        # x = torch.cat([x1, x2, x3, x4], dim=1)
        # x = self.relu(self.fuse(x))
        x = Maxout(x1, x2, x3, x4)
        return x

def Maxout(x1, x2, x3, x4):
    """

    Args:
        x1:
        x2:
        x3:
        x4:

    Returns:

    """

    # mask for x1 >= x2
    mask_1 = torch.ge(x1, x2)
    mask_1 = mask_1.float()
    x = mask_1 * x1 + (1-mask_1) * x2

    mask_2 = torch.ge(x, x3)
    mask_2 = mask_2.float()
    x = mask_2 * x + (1-mask_2) * x3

    mask_3 = torch.ge(x, x4)
    mask_3 = mask_3.float()
    x = mask_3 * x + (1-mask_3) * x4
    return x


if __name__ == "__main__":
    model = VGG16_LCM_REG(load_weights=False)

    model.eval()
    image = torch.randn(2, 3, 384, 384)
    x5, c = model(image)
    # print(model)
    print("input:", image.shape)
    # print("x5:", x5.shape)
    print("c:", c.shape)
