# Import

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from PIL import Image
from torch.utils import data
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torchvision
import random
import torch.optim as opt
from tqdm import tqdm
import time

import os
from torchsummary import summary
import cv2
from sklearn.model_selection import train_test_split

import copy
from collections import defaultdict

import re
import math
import imgviz

from thop import profile
from ptflops import get_model_complexity_info

In [2]:
torch.cuda.is_available()

True

In [3]:
class IS_Dataset_test(Dataset):
  CLASSES = ['background', 'girder', 'net', 'lanyard', 'guardrail']
  def __init__(self, data_list, label_list, classes, cm2lbl, transform=None, transform_toTensor=None, resize_size=None):

    self.data_list = data_list
    self.label_list = label_list
    self.transform = transform
    self.transform_toTensor = transform_toTensor
    self.cm2lbl = cm2lbl
    self.resize_size = resize_size
    # convert str names to class values on masks
    self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
    print('Read ' + str(len(self.data_list)) + ' images')
    
  def __getitem__(self, index):
    # read data
    image = cv2.imread(self.data_list[index])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     image = cv2.resize(image, self.resize_size, interpolation=cv2.INTER_AREA)
        
    mask = cv2.imread(self.label_list[index])
#     mask = cv2.resize(mask, self.resize_size, interpolation=cv2.INTER_NEAREST)
    if self.resize_size:
        image = cv2.resize(image, self.resize_size, interpolation=cv2.INTER_AREA)
        mask = cv2.resize(mask, self.resize_size, interpolation=cv2.INTER_NEAREST)
    if image.shape[0] > 1400 or image.shape[1] > 1400 :
        ratio = ratioFunction(image.shape[0], image.shape[1])
        new_size = (960, int(960*ratio))
        image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
        mask = cv2.resize(mask, new_size, interpolation=cv2.INTER_NEAREST)
    if image.shape[0]%16 !=0 or image.shape[1]%16 !=0 :
        new_size = (960, 720)
        image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
        mask = cv2.resize(mask, new_size, interpolation=cv2.INTER_NEAREST)
    print(image.shape)
    # apply data augmentations
    if self.transform:
      # ndarray to PIL image
      image = Image.fromarray(image)
      mask = Image.fromarray(mask)
      # apply the transforms
      image = self.transform(image)
      mask = self.transform(mask)

    mask = image2label(mask, self.cm2lbl)
    mask = mask.squeeze()
    # print(np.unique(mask))
    # print(mask.shape)

    masks = [(mask == v) for v in self.class_values]
    mask = np.stack(masks, axis=-1).astype('float')

    if self.transform_toTensor:
      image = self.transform_toTensor(image)
      mask = self.transform_toTensor(mask)

    return image, mask
    
  def __len__(self):
    return len(self.data_list)

In [22]:
def read_images(root_path): 
  """ 
    Args : directory path
    Return : list with file path
  """

  for root, dirs, files in os.walk(root_path):
    print("path：", root)
    print("directory：", dirs)
    print("file：", files)
    
  org_imgs = [(root + "/" + file_name) for file_name in tqdm(files)]
  org_imgs.sort()
  print("Read ",len(org_imgs), " files")
  return org_imgs

def make_cm2lbl():

  colormap = [[0,0,0], [0,0,128], [0,128,128], [128,0,0], [0,128,0]] # background, grider, net, lanyard, guardrail 

  cm2lbl = np.zeros(256**3) # for each pixel 0~255 , channel = 3(RGB)
  for i,cm in enumerate(colormap):
      cm2lbl[(cm[0]*256+cm[1])*256+cm[2]] = i
      # print(i)
      # print((cm[0]*256+cm[1])*256+cm[2]
  return cm2lbl


def image2label(img, cm2lbl):
  data = np.array(img, dtype='int32')
  idx = (data[:, :, 0] * 256 + data[:, :, 1]) * 256 + data[:, :, 2]
  # print(np.unique(idx))
  # print(cm2lbl[idx])
  result = np.array(cm2lbl[idx], dtype='int64')  
  return result[:,:,None]

# helper function for data visualization
def visualize(**images):
  """PLot images in one row."""
  n = len(images)
  plt.figure(figsize=(16, 5))
  for i, (name, image) in enumerate(images.items()):
    plt.subplot(1, n, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.title(' '.join(name.split('_')).title())
    plt.imshow(image)
  plt.show()

def print_network(net):
  num_params = 0
  for param in net.parameters():
    num_params += param.numel()
  print(net)
  print('Total number of parameters: %d' % num_params)

In [23]:
def ratioFunction(num1, num2):
    num1 = int(num1) # Now we are good
    num2 = int(num2) # Good, good
    if num1 > num2:
        ratio12 = num2/num1
    else:
        ratio12 = num1/num2
#     print('The ratio of', num1, 'and', num2,'is', ratio12 + '.')
    return ratio12
ratioFunction(288, 216)

0.75

In [6]:
viz_classes = ['guardrail']
classes = ['guardrail']
test_features_folder = '/home/user/Desktop/lanyard_segmentation/guardrail/test_data/features'
test_labels_folder = '/home/user/Desktop/lanyard_segmentation/guardrail/test_data/labels'
orig_features_folder = '/home/user/Desktop/lanyard_segmentation/guardrail/orig_features'
orig_labels_folder = '/home/user/Desktop/lanyard_segmentation/guardrail/orig_labels'

In [7]:
test_input_imgs = read_images(test_features_folder)
test_output_imgs = read_images(test_labels_folder)

transform_toTensor = transforms.Compose([
    transforms.ToTensor()
])
cm2lbl = make_cm2lbl()

test_dataset = IS_Dataset_test(
      test_input_imgs, 
      test_output_imgs, 
#       transform=transform,
      transform_toTensor=transform_toTensor,
      classes=classes,
      cm2lbl=cm2lbl, resize_size=(512,512)
   )
test_dataloader = DataLoader(test_dataset,2, shuffle=True, num_workers=2)


path： /home/user/Desktop/lanyard_segmentation/guardrail/test_data/features
directory： []
file： ['0075.png', '0086.png', '0019.png', '0083.png', '0054.png', '0001.png', '0050.png', '0056.png', '0058.png', '0084.png', '0081.png', '0034.png', '0023.png']


100%|██████████| 13/13 [00:00<00:00, 181753.17it/s]


Read  13  files
path： /home/user/Desktop/lanyard_segmentation/guardrail/test_data/labels
directory： []
file： ['0075.png', '0086.png', '0019.png', '0083.png', '0054.png', '0001.png', '0050.png', '0056.png', '0058.png', '0084.png', '0081.png', '0034.png', '0023.png']


100%|██████████| 13/13 [00:00<00:00, 230067.31it/s]

Read  13  files
Read 13 images





# EDANet

In [3]:
class DownsampleBlock(nn.Module):
  def __init__(self, nc_input, nc_output):
    '''
    Arguments:
    nc_input : Win, number of input channel
    nc_output : Wout, number of output channel
    '''

    super(DownsampleBlock,self).__init__()
    self.nc_input = nc_input
    self.nc_output = nc_output

    if self.nc_input < self.nc_output:
      # Win < Wout
      self.conv = nn.Conv2d(nc_input, nc_output-nc_input, kernel_size=3, stride=2, padding=1)
      self.pool = nn.MaxPool2d(2, stride=2)
    else:
      # Win > Wout
      self.conv = nn.Conv2d(nc_input, nc_output, kernel_size=3, stride=2, padding=1)

    self.batchNorm = nn.BatchNorm2d(nc_output)
    self.relu = nn.ReLU()

  def forward(self, x):
    if self.nc_input < self.nc_output:
      out = torch.cat([self.conv(x), self.pool(x)], 1)
    else:
      out = self.conv(x)
    
    out = self.batchNorm(out)
    out = self.relu(out)
    return out


In [4]:
class EDABlock(nn.Module):
  def __init__(self, nc_input, dilated, k = 40, dropprob = 0.02):
    '''
    Arguments:
    nc_input : number of input channel
    k : growth rate
    dilated : possible dilated convalution
    dropprob : probability, a dropout layer between the last ReLU and the concatenation of each module
    '''
    super(EDABlock,self).__init__()
    self.conv1x1_0 = nn.Conv2d(nc_input, k, kernel_size=1)
    self.batchNorm_0 = nn.BatchNorm2d(k)

    self.conv3x1_1 = nn.Conv2d(k, k, kernel_size=(3,1), padding=(1,0))
    self.conv1x3_1 = nn.Conv2d(k, k, kernel_size=(1,3), padding=(0,1))
    self.batchNorm_1 = nn.BatchNorm2d(k)

    self.conv3x1_2 = nn.Conv2d(k, k, kernel_size=(3,1), stride=1, padding=(dilated,0), dilation=dilated)
    self.conv1x3_2 = nn.Conv2d(k, k, kernel_size=(1,3), stride=1, padding=(0,dilated), dilation=dilated)
    self.batchNorm_2 = nn.BatchNorm2d(k)
    self.dropout = nn.Dropout2d(dropprob)
    self.relu = nn.ReLU()

  def forward(self, x):
    input = x

    output = self.conv1x1_0(x)
    output = self.batchNorm_0(output)
    output = self.relu(output)

    output = self.conv3x1_1(output)
    output = self.conv1x3_1(output)
    output = self.batchNorm_1(output)
    output = self.relu(output)

    output = self.conv3x1_2(output)
    output = self.conv1x3_2(output)
    output = self.batchNorm_2(output)
    output = self.relu(output)

    if (self.dropout.p != 0):
      output = self.dropout(output)

    output = torch.cat((output, input), 1)

    return output


In [5]:
class EDAnet(nn.Module):
  def __init__(self, n_class=1):
    '''
    Arguments:
    nc_input : number of input channel
    k : growth rate
    dilated : possible dilated convalution
    dropprob : probability, a dropout layer between the last ReLU and the concatenation of each module
    '''
    super(EDAnet,self).__init__()
    self.layers = nn.ModuleList()
    self.dilation1 = [1,1,1,2,2]
    self.dilation2 = [2,2,4,4,8,8,16,16]

    # DownsampleBlock1
    self.layers.append(DownsampleBlock(3, 15))

    # DownsampleBlock2
    self.layers.append(DownsampleBlock(15, 60))

    # EDA module 1-1~1-5
    for i in range(len(self.dilation1)):
      self.layers.append(EDABlock(60 + 40 * i, self.dilation1[i]))

    # DownsampleBlock3
    self.layers.append(DownsampleBlock(260, 130))

    # EDA module 2-1~2-8
    for j in range(len(self.dilation2)):
      self.layers.append(EDABlock(130 + 40 * j, self.dilation2[j]))

    # Projection layer
    self.project_layer = nn.Conv2d(450, n_class, kernel_size = 1)

    self.weights_init()
  
  def weights_init(self):
    for index, m in enumerate(self.modules()):
      classname = m.__class__.__name__
      if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
      elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

  def forward(self, x):
    output = x

    for layer in self.layers:
      output = layer(output)
      # print(output.shape)

    output = self.project_layer(output)

    # Bilinear interpolation x8
    output = F.interpolate(output,scale_factor = 8,mode = 'bilinear',align_corners=True)

    # # Bilinear interpolation x2 (inference only)
    # if not self.training:
    #   output = F.interpolate(output, scale_factor=2, mode='bilinear',align_corners=True)

    return output


In [6]:
edanet = EDAnet().eval()
device = torch.device("cuda")
edanet.to(device)

# dummy_input = torch.randn(32, 3,512,512, dtype=torch.float).to(device)
# repetitions=100
# total_time = 0
# with torch.no_grad():
#     for rep in range(repetitions):
#         starter, ender = torch.cuda.Event(enable_timing=True),   torch.cuda.Event(enable_timing=True)
#         starter.record()
#         _ = edanet(dummy_input)
#         ender.record()
#         torch.cuda.synchronize()
#         curr_time = starter.elapsed_time(ender)/1000
#         total_time += curr_time
# Throughput =   (repetitions*32)/total_time
# print('Final Throughput:',Throughput)

EDAnet(
  (layers): ModuleList(
    (0): DownsampleBlock(
      (conv): Conv2d(3, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (batchNorm): BatchNorm2d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (1): DownsampleBlock(
      (conv): Conv2d(15, 45, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (batchNorm): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (2): EDABlock(
      (conv1x1_0): Conv2d(60, 40, kernel_size=(1, 1), stride=(1, 1))
      (batchNorm_0): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3x1_1): Conv2d(40, 40, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
      (conv1x3_1): Conv2d(40, 40, kernel_size=(1, 3),

In [7]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = edanet(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.0172421932220459


In [59]:


flops, params = profile(edanet, inputs=(input, ))
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[91m[WARN] Cannot find rule for <class '__main__.DownsampleBlock'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.dropout.Dropout2d'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class '__main__.EDABlock'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.ModuleList'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class '__main__.EDAnet'>. Treat it as zero Macs and zero Params.[00m
FLOPs = 4.440788992G
Params = 0.681367M


In [62]:
edanet = EDAnet().eval().cuda()

res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= edanet(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

macs, params = get_model_complexity_info(edanet, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 131.414617
EDAnet(
  0.681 M, 100.000% Params, 4.459 GMac, 100.000% MACs, 
  (layers): ModuleList(
    0.681 M, 99.934% Params, 4.457 GMac, 99.959% MACs, 
    (0): DownsampleBlock(
      0.0 M, 0.054% Params, 0.026 GMac, 0.578% MACs, 
      (conv): Conv2d(0.0 M, 0.049% Params, 0.022 GMac, 0.494% MACs, 3, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (pool): MaxPool2d(0.0 M, 0.000% Params, 0.001 GMac, 0.018% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (batchNorm): BatchNorm2d(0.0 M, 0.004% Params, 0.002 GMac, 0.044% MACs, 15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(0.0 M, 0.000% Params, 0.001 GMac, 0.022% MACs, )
    )
    (1): DownsampleBlock(
      0.006 M, 0.916% Params, 0.104 GMac, 2.337% MACs, 
      (conv): 

In [18]:
edanet = EDAnet().eval().cuda()
summary(edanet,  (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 12, 256, 256]             336
         MaxPool2d-2          [-1, 3, 256, 256]               0
       BatchNorm2d-3         [-1, 15, 256, 256]              30
              ReLU-4         [-1, 15, 256, 256]               0
   DownsampleBlock-5         [-1, 15, 256, 256]               0
            Conv2d-6         [-1, 45, 128, 128]           6,120
         MaxPool2d-7         [-1, 15, 128, 128]               0
       BatchNorm2d-8         [-1, 60, 128, 128]             120
              ReLU-9         [-1, 60, 128, 128]               0
  DownsampleBlock-10         [-1, 60, 128, 128]               0
           Conv2d-11         [-1, 40, 128, 128]           2,440
      BatchNorm2d-12         [-1, 40, 128, 128]              80
             ReLU-13         [-1, 40, 128, 128]               0
           Conv2d-14         [-1, 40, 1

# EDANet ghost

In [8]:
class GhostModule(nn.Module):
    def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, dilated=None):
        super(GhostModule, self).__init__()
        self.oup = oup
        init_channels = math.ceil(oup / ratio)
        new_channels = init_channels*(ratio-1)
        self.primary_conv = nn.Sequential(
            nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
            nn.BatchNorm2d(init_channels),
            nn.ReLU(inplace=True) if relu else nn.Sequential(),
        )

        self.cheap_operation = nn.Sequential(
            nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
            nn.BatchNorm2d(new_channels),
            nn.ReLU(inplace=True) if relu else nn.Sequential(),
        )

    def forward(self, x):
        x1 = self.primary_conv(x)
        x2 = self.cheap_operation(x1)
        out = torch.cat([x1,x2], dim=1)
        return out[:,:self.oup,:,:]

In [9]:
class DownsampleBlock(nn.Module):
  def __init__(self, nc_input, nc_output):
    '''
    Arguments:
    nc_input : Win, number of input channel
    nc_output : Wout, number of output channel
    '''

    super(DownsampleBlock,self).__init__()
    self.nc_input = nc_input
    self.nc_output = nc_output

    if self.nc_input < self.nc_output:
      # Win < Wout
      self.conv = GhostModule(nc_input, nc_output-nc_input, kernel_size=3, stride=2, relu=True)
      # self.conv = nn.Conv2d(nc_input, nc_output-nc_input, kernel_size=3, stride=2, padding=1)
      self.pool = nn.MaxPool2d(2, stride=2)
    else:
      # Win > Wout
      self.conv = GhostModule(nc_input, nc_output, kernel_size=3, stride=2, relu=True)
      # self.conv = nn.Conv2d(nc_input, nc_output, kernel_size=3, stride=2, padding=1)

    self.batchNorm = nn.BatchNorm2d(nc_output)
    self.relu = nn.ReLU()

  def forward(self, x):
    if self.nc_input < self.nc_output:
      out = torch.cat([self.conv(x), self.pool(x)], 1)
    else:
      out = self.conv(x)
    
    # out = self.batchNorm(out)
    # out = self.relu(out)
    return out


In [10]:
class EDABlock_ghost(nn.Module):
  def __init__(self, nc_input, dilated, k = 40, dropprob = 0.02):
    '''
    Arguments:
    nc_input : number of input channel
    k : growth rate
    dilated : possible dilated convalution
    dropprob : probability, a dropout layer between the last ReLU and the concatenation of each module
    '''
    # GhostModule(inp, hidden_dim, kernel_size=1, relu=True)
    super(EDABlock_ghost,self).__init__()
    # self.conv1x1_0 = nn.Conv2d(nc_input, k, kernel_size=1)
    self.conv_0 = GhostModule(nc_input, k, kernel_size=1, relu=True)
    self.batchNorm_0 = nn.BatchNorm2d(k)

    # self.conv3x1_1 = nn.Conv2d(k, k, kernel_size=(3,1), padding=(1,0))
    # self.conv1x3_1 = nn.Conv2d(k, k, kernel_size=(1,3), padding=(0,1))
    self.conv_1 = GhostModule(k, k, kernel_size=3, relu=True)
    self.batchNorm_1 = nn.BatchNorm2d(k)

    self.conv3x1_2 = nn.Conv2d(k, k, kernel_size=(3,1), stride=1, padding=(dilated,0), dilation=dilated)
    self.conv1x3_2 = nn.Conv2d(k, k, kernel_size=(1,3), stride=1, padding=(0,dilated), dilation=dilated)
    self.batchNorm_2 = nn.BatchNorm2d(k)
    self.dropout = nn.Dropout2d(dropprob)
    self.relu = nn.ReLU()

  def forward(self, x):
    input = x

    output = self.conv_0(x)
    # output = self.conv1x1_0(x)
    # output = self.batchNorm_0(output)
    # output = self.relu(output)

    output = self.conv_1(output)
    # output = self.conv3x1_1(output)
    # output = self.conv1x3_1(output)
    # output = self.batchNorm_1(output)
    # output = self.relu(output)

    output = self.conv3x1_2(output)
    output = self.conv1x3_2(output)
    output = self.batchNorm_2(output)
    output = self.relu(output)

    if (self.dropout.p != 0):
      output = self.dropout(output)

    output = torch.cat((output, input), 1)

    return output


In [11]:
class EDAnet_ghost(nn.Module):
  def __init__(self, n_class=1):
    '''
    Arguments:
    nc_input : number of input channel
    k : growth rate
    dilated : possible dilated convalution
    dropprob : probability, a dropout layer between the last ReLU and the concatenation of each module
    '''
    super(EDAnet_ghost,self).__init__()
    self.layers = nn.ModuleList()
    self.dilation1 = [1,1,1,2,2]
    self.dilation2 = [2,2,4,4,8,8,16,16]

    # DownsampleBlock1
    self.layers.append(DownsampleBlock(3, 15))

    # DownsampleBlock2
    self.layers.append(DownsampleBlock(15, 60))

    # EDA module 1-1~1-5
    for i in range(len(self.dilation1)):
      self.layers.append(EDABlock_ghost(60 + 40 * i, self.dilation1[i]))

    # DownsampleBlock3
    self.layers.append(DownsampleBlock(260, 130))

    # EDA module 2-1~2-8
    for j in range(len(self.dilation2)):
      self.layers.append(EDABlock_ghost(130 + 40 * j, self.dilation2[j]))

    # Projection layer
    self.project_layer = nn.Conv2d(450, n_class, kernel_size = 1)

    self.weights_init()
  
  def weights_init(self):
    for index, m in enumerate(self.modules()):
      classname = m.__class__.__name__
      if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
      elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

  def forward(self, x):
    output = x

    for layer in self.layers:
      output = layer(output)
      # print(output.shape)

    output = self.project_layer(output)

    # Bilinear interpolation x8
    output = F.interpolate(output,scale_factor = 8,mode = 'bilinear',align_corners=True)

    # # Bilinear interpolation x2 (inference only)
    # if not self.training:
    #   output = F.interpolate(output, scale_factor=2, mode='bilinear',align_corners=True)

    return output


In [12]:
edanet_ghost = EDAnet_ghost().eval().cuda()
summary(edanet_ghost,  (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 6, 256, 256]             162
       BatchNorm2d-2          [-1, 6, 256, 256]              12
              ReLU-3          [-1, 6, 256, 256]               0
            Conv2d-4          [-1, 6, 256, 256]              54
       BatchNorm2d-5          [-1, 6, 256, 256]              12
              ReLU-6          [-1, 6, 256, 256]               0
       GhostModule-7         [-1, 12, 256, 256]               0
         MaxPool2d-8          [-1, 3, 256, 256]               0
   DownsampleBlock-9         [-1, 15, 256, 256]               0
           Conv2d-10         [-1, 23, 128, 128]           3,105
      BatchNorm2d-11         [-1, 23, 128, 128]              46
             ReLU-12         [-1, 23, 128, 128]               0
           Conv2d-13         [-1, 23, 128, 128]             207
      BatchNorm2d-14         [-1, 23, 1

In [13]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = edanet_ghost(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.010745525360107422


In [68]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= edanet_ghost(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

macs, params = get_model_complexity_info(edanet_ghost, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 102.120053
EDAnet_ghost(
  0.444 M, 100.000% Params, 3.128 GMac, 100.000% MACs, 
  (layers): ModuleList(
    0.444 M, 99.898% Params, 3.127 GMac, 99.941% MACs, 
    (0): DownsampleBlock(
      0.0 M, 0.061% Params, 0.017 GMac, 0.553% MACs, 
      (conv): GhostModule(
        0.0 M, 0.054% Params, 0.017 GMac, 0.528% MACs, 
        (primary_conv): Sequential(
          0.0 M, 0.039% Params, 0.012 GMac, 0.377% MACs, 
          (0): Conv2d(0.0 M, 0.036% Params, 0.011 GMac, 0.339% MACs, 3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(0.0 M, 0.003% Params, 0.001 GMac, 0.025% MACs, 6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.013% MACs, inplace=True)
        )
        (cheap_operation): 

# DeepLabV3Plus-Mobilenet

[[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2]]

In [77]:
def conv_bn(inp, oup, stride, BatchNorm):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        BatchNorm(oup),
        nn.ReLU6(inplace=True)
    )


def fixed_padding(inputs, kernel_size, dilation):
    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
    return padded_inputs


class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = round(inp * expand_ratio)
        self.use_res_connect = self.stride == 1 and inp == oup
        self.kernel_size = 3
        self.dilation = dilation

        if expand_ratio == 1:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, 1, bias=False),
                BatchNorm(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, bias=False),
                BatchNorm(oup),
            )

    def forward(self, x):
        x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation)
        if self.use_res_connect:
            x = x + self.conv(x_pad)
        else:
            x = self.conv(x_pad)
        return x


class MobileNetV2(nn.Module):
    def __init__(self, output_stride=16, BatchNorm=None, width_mult=1., pretrained=False):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        current_stride = 1
        rate = 1
        interverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        # building first layer
        input_channel = int(input_channel * width_mult)
        self.features = [conv_bn(3, input_channel, 2, BatchNorm)]
        current_stride *= 2
        # building inverted residual blocks
        for t, c, n, s in interverted_residual_setting:
            if current_stride == output_stride:
                stride = 1
                dilation = rate
                rate *= s
            else:
                stride = s
                dilation = 1
                current_stride *= s
            output_channel = int(c * width_mult)
            for i in range(n):
                if i == 0:
                    self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm))
                else:
                    self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm))
                input_channel = output_channel
        self.features = nn.Sequential(*self.features)
        self._initialize_weights()

        if pretrained:
            self._load_pretrained_model()
        print(len(self.features))
        self.low_level_features = self.features[0:4]
        self.high_level_features = self.features[4:]
        

    def forward(self, x):
        low_level_feat = self.low_level_features(x)
        print('low_level_feat shape', low_level_feat.shape)
        x = self.high_level_features(low_level_feat)
        print('x1 shape', x.shape)

        return x, low_level_feat

    def _load_pretrained_model(self):
        pretrain_dict = model_zoo.load_url('http://jeff95.me/models/mobilenet_v2-6a65762b.pth')
        model_dict = {}
        state_dict = self.state_dict()
        for k, v in pretrain_dict.items():
            if k in state_dict:
                model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            # elif isinstance(m, SynchronizedBatchNorm2d):
            #     m.weight.data.fill_(1)
            #     m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


In [78]:

class ASPP_module(nn.Module):
    def __init__(self, inplanes, planes, rate):
        super(ASPP_module, self).__init__()
        if rate == 1:
            kernel_size = 1
            padding = 0
        else:
            kernel_size = 3
            padding = rate
        self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
                                            stride=1, padding=padding, dilation=rate, bias=False)
        self.bn = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU()

        self.__init_weight()

    def forward(self, x):
        x = self.atrous_convolution(x)
        x = self.bn(x)

        return self.relu(x)

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

In [79]:
class DeepLabv3_plus_mobilenet(nn.Module):
    def __init__(self, nInputChannels=3, n_classes=21, os=16, pretrained=False, _print=True):
        if _print:
            print("Constructing DeepLabv3+ model...")
            print("Number of classes: {}".format(n_classes))
            print("Output stride: {}".format(os))
            print("Number of Input Channels: {}".format(nInputChannels))
        super(DeepLabv3_plus_mobilenet, self).__init__()

        # Atrous Conv
        self.efficient_features = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)

        # ASPP
        if os == 16:
            rates = [1, 6, 12, 18]
        elif os == 8:
            rates = [1, 12, 24, 36]
        else:
            raise NotImplementedError

        self.aspp1 = ASPP_module(320, 256, rate=rates[0])
        self.aspp2 = ASPP_module(320, 256, rate=rates[1])
        self.aspp3 = ASPP_module(320, 256, rate=rates[2])
        self.aspp4 = ASPP_module(320, 256, rate=rates[3])

        self.relu = nn.ReLU()

        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
                                             nn.Conv2d(320, 256, 1, stride=1, bias=False),
                                             nn.BatchNorm2d(256),
                                             nn.ReLU())

        self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(256)

        # adopt [1x1, 48] for channel reduction.
        self.conv2 = nn.Conv2d(24, 48, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(48)

        self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
                                       nn.BatchNorm2d(256),
                                       nn.ReLU(),
                                       nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
                                       nn.BatchNorm2d(256),
                                       nn.ReLU(),
                                       nn.Conv2d(256, n_classes, kernel_size=1, stride=1))

    def forward(self, input):
        x, low_level_features = self.efficient_features(input)
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)),
                                int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)

        low_level_features = self.conv2(low_level_features)
        low_level_features = self.bn2(low_level_features)
        low_level_features = self.relu(low_level_features)


        x = torch.cat((x, low_level_features), dim=1)
        x = self.last_conv(x)
        x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x

    def freeze_bn(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                # torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()





In [80]:
def get_1x_lr_params(model):
    """
    This generator returns all the parameters of the net except for
    the last classification layer. Note that for each batchnorm layer,
    requires_grad is set to False in deeplab_resnet.py, therefore this function does not return
    any batchnorm parameter
    """
    b = [model.xception_features]
    for i in range(len(b)):
        for k in b[i].parameters():
            if k.requires_grad:
                yield k


def get_10x_lr_params(model):
    """
    This generator returns all the parameters for the last layer of the net,
    which does the classification of pixel into classes
    """
    b = [model.aspp1, model.aspp2, model.aspp3, model.aspp4, model.conv1, model.conv2, model.last_conv]
    for j in range(len(b)):
        for k in b[j].parameters():
            if k.requires_grad:
                yield k

In [81]:
efficient_features = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)
efficient_features

18


MobileNetV2(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(96,

In [82]:
summary(efficient_features.cuda(),  (3, 512, 512))

low_level_feat shape torch.Size([2, 24, 128, 128])
x1 shape torch.Size([2, 320, 32, 32])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             864
            Conv2d-2         [-1, 32, 256, 256]             864
       BatchNorm2d-3         [-1, 32, 256, 256]              64
       BatchNorm2d-4         [-1, 32, 256, 256]              64
             ReLU6-5         [-1, 32, 256, 256]               0
             ReLU6-6         [-1, 32, 256, 256]               0
            Conv2d-7         [-1, 32, 256, 256]             288
            Conv2d-8         [-1, 32, 256, 256]             288
       BatchNorm2d-9         [-1, 32, 256, 256]              64
      BatchNorm2d-10         [-1, 32, 256, 256]              64
            ReLU6-11         [-1, 32, 256, 256]               0
            ReLU6-12         [-1, 32, 256, 256]               0
           Con

In [83]:
input =torch.randn(1,3,512,512).cuda()


output = efficient_features(input)

print(output[0].shape)

low_level_feat shape torch.Size([1, 24, 128, 128])
x1 shape torch.Size([1, 320, 32, 32])
torch.Size([1, 320, 32, 32])


In [18]:
deeplab_mobilenet = DeepLabv3_plus_mobilenet(nInputChannels=3, n_classes=1, os=16, pretrained=False, _print=True).eval().cuda()
summary(deeplab_mobilenet,  (3, 512, 512))

Constructing DeepLabv3+ model...
Number of classes: 1
Output stride: 16
Number of Input Channels: 3
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             864
            Conv2d-2         [-1, 32, 256, 256]             864
       BatchNorm2d-3         [-1, 32, 256, 256]              64
       BatchNorm2d-4         [-1, 32, 256, 256]              64
             ReLU6-5         [-1, 32, 256, 256]               0
             ReLU6-6         [-1, 32, 256, 256]               0
            Conv2d-7         [-1, 32, 256, 256]             288
            Conv2d-8         [-1, 32, 256, 256]             288
       BatchNorm2d-9         [-1, 32, 256, 256]              64
      BatchNorm2d-10         [-1, 32, 256, 256]              64
            ReLU6-11         [-1, 32, 256, 256]               0
            ReLU6-12         [-1, 32, 256, 256]               0
   



In [19]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = deeplab_mobilenet(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.019766807556152344


In [74]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= deeplab_mobilenet(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

macs, params = get_model_complexity_info(deeplab_mobilenet, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 66.869966
DeepLabv3_plus_mobilenet(
  7.623 M, 131.178% Params, 29.135 GMac, 100.000% MACs, 
  (efficient_features): MobileNetV2(
    3.623 M, 62.356% Params, 5.256 GMac, 18.040% MACs, 
    (features): Sequential(
      1.812 M, 31.178% Params, 2.628 GMac, 9.020% MACs, 
      (0): Sequential(
        0.001 M, 0.016% Params, 0.063 GMac, 0.216% MACs, 
        (0): Conv2d(0.001 M, 0.015% Params, 0.057 GMac, 0.194% MACs, 3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(0.0 M, 0.001% Params, 0.004 GMac, 0.014% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(0.0 M, 0.000% Params, 0.002 GMac, 0.007% MACs, inplace=True)
      )
      (1): InvertedResidual(
        0.001 M, 0.015% Params, 0.061 GMac, 0.209% MACs, 
       

# DeepLabV3+ Mobilenet ghost

In [15]:
class GhostModule(nn.Module):
    def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, is_relu6=False):
        super(GhostModule, self).__init__()
        self.oup = oup
        init_channels = math.ceil(oup / ratio)
        new_channels = init_channels*(ratio-1)
        if is_relu6:
            self.primary_conv = nn.Sequential(
                nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
                nn.BatchNorm2d(init_channels),
                nn.ReLU6(inplace=True) if relu else nn.Sequential(),
            )

            self.cheap_operation = nn.Sequential(
                nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
                nn.BatchNorm2d(new_channels),
                nn.ReLU6(inplace=True) if relu else nn.Sequential(),
            )
        else:
            self.primary_conv = nn.Sequential(
                nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
                nn.BatchNorm2d(init_channels),
                nn.ReLU(inplace=True) if relu else nn.Sequential(),
            )

            self.cheap_operation = nn.Sequential(
                nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
                nn.BatchNorm2d(new_channels),
                nn.ReLU(inplace=True) if relu else nn.Sequential(),
            )            
    def forward(self, x):
        x1 = self.primary_conv(x)
        x2 = self.cheap_operation(x1)
        out = torch.cat([x1,x2], dim=1)
        return out[:,:self.oup,:,:]

In [16]:
def conv_bn(inp, oup, stride, BatchNorm):
    return nn.Sequential(
        GhostModule(inp, oup, kernel_size=3, stride=stride, relu=True, is_relu6=True)
#         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
#         BatchNorm(oup),
#         nn.ReLU6(inplace=True)
    )


def fixed_padding(inputs, kernel_size, dilation):
    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
    return padded_inputs


class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = round(inp * expand_ratio)
        self.use_res_connect = self.stride == 1 and inp == oup
        self.kernel_size = 3
        self.dilation = dilation

        if expand_ratio == 1:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, 1, bias=False),
                BatchNorm(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
                BatchNorm(hidden_dim),
                nn.ReLU6(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, bias=False),
                BatchNorm(oup),
            )

    def forward(self, x):
        x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation)
        if self.use_res_connect:
            x = x + self.conv(x_pad)
        else:
            x = self.conv(x_pad)
        return x


class MobileNetV2(nn.Module):
    def __init__(self, output_stride=16, BatchNorm=None, width_mult=1., pretrained=False):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        current_stride = 1
        rate = 1
        interverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        # building first layer
        input_channel = int(input_channel * width_mult)
        self.features = [conv_bn(3, input_channel, 2, BatchNorm)]
        current_stride *= 2
        # building inverted residual blocks
        for t, c, n, s in interverted_residual_setting:
            if current_stride == output_stride:
                stride = 1
                dilation = rate
                rate *= s
            else:
                stride = s
                dilation = 1
                current_stride *= s
            output_channel = int(c * width_mult)
            for i in range(n):
                if i == 0:
                    self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm))
                else:
                    self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm))
                input_channel = output_channel
        self.features = nn.Sequential(*self.features)
        self._initialize_weights()

        if pretrained:
            self._load_pretrained_model()

        self.low_level_features = self.features[0:4]
        self.high_level_features = self.features[4:]

    def forward(self, x):
        low_level_feat = self.low_level_features(x)
        x = self.high_level_features(low_level_feat)
        return x, low_level_feat

    def _load_pretrained_model(self):
        pretrain_dict = model_zoo.load_url('http://jeff95.me/models/mobilenet_v2-6a65762b.pth')
        model_dict = {}
        state_dict = self.state_dict()
        for k, v in pretrain_dict.items():
            if k in state_dict:
                model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            # elif isinstance(m, SynchronizedBatchNorm2d):
            #     m.weight.data.fill_(1)
            #     m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


In [17]:

class ASPP_module(nn.Module):
    def __init__(self, inplanes, planes, rate):
        super(ASPP_module, self).__init__()
        if rate == 1:
            kernel_size = 1
            padding = 0
        else:
            kernel_size = 3
            padding = rate
        self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
                                            stride=1, padding=padding, dilation=rate, bias=False)
        self.bn = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU()

        self.__init_weight()

    def forward(self, x):
        x = self.atrous_convolution(x)
        x = self.bn(x)

        return self.relu(x)

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

In [18]:
class DeepLabv3_plus_mobilenet_ghost(nn.Module):
    def __init__(self, nInputChannels=3, n_classes=21, os=16, pretrained=False, _print=True):
        if _print:
            print("Constructing DeepLabv3+ model...")
            print("Number of classes: {}".format(n_classes))
            print("Output stride: {}".format(os))
            print("Number of Input Channels: {}".format(nInputChannels))
        super(DeepLabv3_plus_mobilenet_ghost, self).__init__()

        # Atrous Conv
        self.efficient_features = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)

        # ASPP
        if os == 16:
            rates = [1, 6, 12, 18]
        elif os == 8:
            rates = [1, 12, 24, 36]
        else:
            raise NotImplementedError

        self.aspp1 = ASPP_module(320, 256, rate=rates[0])
        self.aspp2 = ASPP_module(320, 256, rate=rates[1])
        self.aspp3 = ASPP_module(320, 256, rate=rates[2])
        self.aspp4 = ASPP_module(320, 256, rate=rates[3])

        self.relu = nn.ReLU()

        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
                                             GhostModule(320, 256, kernel_size=1, stride=1, relu=True)
#                                              nn.Conv2d(320, 256, 1, stride=1, bias=False),
#                                              nn.BatchNorm2d(256),
#                                              nn.ReLU()
                                            )

        self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(256)

        # adopt [1x1, 48] for channel reduction.
        self.conv2 = nn.Conv2d(24, 48, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(48)

        self.last_conv = nn.Sequential(GhostModule(304, 256, kernel_size=3, stride=1, relu=True),
                                       GhostModule(256, 256, kernel_size=3, stride=1, relu=True),
#             nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
#                                        nn.BatchNorm2d(256),
#                                        nn.ReLU(),
#                                        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
#                                        nn.BatchNorm2d(256),
#                                        nn.ReLU(),
                                       nn.Conv2d(256, n_classes, kernel_size=1, stride=1))

    def forward(self, input):
        x, low_level_features = self.efficient_features(input)
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)),
                                int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)

        low_level_features = self.conv2(low_level_features)
        low_level_features = self.bn2(low_level_features)
        low_level_features = self.relu(low_level_features)


        x = torch.cat((x, low_level_features), dim=1)
        x = self.last_conv(x)
        x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x

    def freeze_bn(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                # torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()





In [20]:
efficient_features = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d).cuda()
summary(efficient_features,  (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 256, 256]             432
            Conv2d-2         [-1, 16, 256, 256]             432
       BatchNorm2d-3         [-1, 16, 256, 256]              32
       BatchNorm2d-4         [-1, 16, 256, 256]              32
             ReLU6-5         [-1, 16, 256, 256]               0
             ReLU6-6         [-1, 16, 256, 256]               0
            Conv2d-7         [-1, 16, 256, 256]             144
            Conv2d-8         [-1, 16, 256, 256]             144
       BatchNorm2d-9         [-1, 16, 256, 256]              32
      BatchNorm2d-10         [-1, 16, 256, 256]              32
            ReLU6-11         [-1, 16, 256, 256]               0
            ReLU6-12         [-1, 16, 256, 256]               0
      GhostModule-13         [-1, 32, 256, 256]               0
      GhostModule-14         [-1, 32, 2

In [21]:
efficient_features

MobileNetV2(
  (features): Sequential(
    (0): Sequential(
      (0): GhostModule(
        (primary_conv): Sequential(
          (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (cheap_operation): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
      )
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4)

In [24]:
deeplab_mobilenet_ghost = DeepLabv3_plus_mobilenet_ghost(nInputChannels=3, n_classes=1, os=16, pretrained=False, _print=True).eval().cuda()
summary(deeplab_mobilenet_ghost,  (3, 512, 512))

Constructing DeepLabv3+ model...
Number of classes: 1
Output stride: 16
Number of Input Channels: 3
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 256, 256]             432
            Conv2d-2         [-1, 16, 256, 256]             432
       BatchNorm2d-3         [-1, 16, 256, 256]              32
       BatchNorm2d-4         [-1, 16, 256, 256]              32
             ReLU6-5         [-1, 16, 256, 256]               0
             ReLU6-6         [-1, 16, 256, 256]               0
            Conv2d-7         [-1, 16, 256, 256]             144
            Conv2d-8         [-1, 16, 256, 256]             144
       BatchNorm2d-9         [-1, 16, 256, 256]              32
      BatchNorm2d-10         [-1, 16, 256, 256]              32
            ReLU6-11         [-1, 16, 256, 256]               0
            ReLU6-12         [-1, 16, 256, 256]               0
   

In [25]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = deeplab_mobilenet_ghost(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.01770472526550293


In [80]:
# res = []
# for i in tqdm(range(13)):
#     inputs, _ = test_dataset[i]
#     inputs = inputs.cuda().unsqueeze(0)

#     torch.cuda.synchronize()
#     start = time.time()
#     predict= deeplab_mobilenet_ghost(inputs).data
#     torch.cuda.synchronize()
#     end = time.time()
#     res.append(end-start)
# time_sum = 0
# for i in res:
#     time_sum += i
    
# print("FPS: %f"%(1.0/(time_sum/len(res))))

In [81]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= deeplab_mobilenet_ghost(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))
macs, params = get_model_complexity_info(deeplab_mobilenet_ghost, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 75.185217
DeepLabv3_plus_mobilenet_ghost(
  6.939 M, 135.324% Params, 18.565 GMac, 100.000% MACs, 
  (efficient_features): MobileNetV2(
    3.623 M, 70.648% Params, 5.218 GMac, 28.108% MACs, 
    (features): Sequential(
      1.811 M, 35.324% Params, 2.609 GMac, 14.054% MACs, 
      (0): Sequential(
        0.001 M, 0.012% Params, 0.044 GMac, 0.237% MACs, 
        (0): GhostModule(
          0.001 M, 0.012% Params, 0.044 GMac, 0.237% MACs, 
          (primary_conv): Sequential(
            0.0 M, 0.009% Params, 0.031 GMac, 0.169% MACs, 
            (0): Conv2d(0.0 M, 0.008% Params, 0.028 GMac, 0.153% MACs, 3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            (1): BatchNorm2d(0.0 M, 0.001% Params, 0.002 GMac, 0.011% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_runni

# Unet++

In [3]:
class conv_block_nested(nn.Module):

    def __init__(self, in_ch, mid_ch, out_ch):
        super(conv_block_nested, self).__init__()
        self.activation = nn.ReLU(inplace=True)
        
        self.conv3x1_1 = nn.Conv2d(in_ch, in_ch, kernel_size=(3,1), padding=(1,0), bias=True)
        self.conv1x3_1 = nn.Conv2d(in_ch, mid_ch, kernel_size=(1,3), padding=(0,1), bias=True)
        
#         self.conv1 = nn.Conv2d(in_ch, mid_ch, kernel_size=3, padding=1, bias=True)
        self.bn1 = nn.BatchNorm2d(mid_ch)
    
        self.conv3x1_2 = nn.Conv2d(mid_ch, mid_ch, kernel_size=(3,1), padding=(1,0), bias=True)
        self.conv1x3_2 = nn.Conv2d(mid_ch, out_ch, kernel_size=(1,3), padding=(0,1), bias=True)
#         self.conv2 = nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1, bias=True)
        self.bn2 = nn.BatchNorm2d(out_ch)

    def forward(self, x):
#         x = self.conv1(x)
        x = self.conv3x1_1(x)
        x = self.conv1x3_1(x)
        
        x = self.bn1(x)
        x = self.activation(x)

#         x = self.conv2(x)
        x = self.conv3x1_2(x)
        x = self.conv1x3_2(x)
        x = self.bn2(x)
        output = self.activation(x)

        return output

class Nested_UNet(nn.Module):

    def __init__(self, in_ch=3, out_ch=1):
        super(Nested_UNet, self).__init__()

        n1 = 32
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv0_0 = conv_block_nested(in_ch, filters[0], filters[0])
        self.conv1_0 = conv_block_nested(filters[0], filters[1], filters[1])
        self.conv2_0 = conv_block_nested(filters[1], filters[2], filters[2])
        self.conv3_0 = conv_block_nested(filters[2], filters[3], filters[3])
        self.conv4_0 = conv_block_nested(filters[3], filters[4], filters[4])

        self.conv0_1 = conv_block_nested(filters[0] + filters[1], filters[0], filters[0])
        self.conv1_1 = conv_block_nested(filters[1] + filters[2], filters[1], filters[1])
        self.conv2_1 = conv_block_nested(filters[2] + filters[3], filters[2], filters[2])
        self.conv3_1 = conv_block_nested(filters[3] + filters[4], filters[3], filters[3])

        self.conv0_2 = conv_block_nested(filters[0]*2 + filters[1], filters[0], filters[0])
        self.conv1_2 = conv_block_nested(filters[1]*2 + filters[2], filters[1], filters[1])
        self.conv2_2 = conv_block_nested(filters[2]*2 + filters[3], filters[2], filters[2])

        self.conv0_3 = conv_block_nested(filters[0]*3 + filters[1], filters[0], filters[0])
        self.conv1_3 = conv_block_nested(filters[1]*3 + filters[2], filters[1], filters[1])

        self.conv0_4 = conv_block_nested(filters[0]*4 + filters[1], filters[0], filters[0])

        self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)

    def forward(self, x):

        x0_0 = self.conv0_0(x)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x0_1 = self.conv0_1(torch.cat([x0_0, self.Up(x1_0)], 1))

        x2_0 = self.conv2_0(self.pool(x1_0))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.Up(x2_0)], 1))
        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.Up(x1_1)], 1))

        x3_0 = self.conv3_0(self.pool(x2_0))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.Up(x3_0)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.Up(x2_1)], 1))
        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.Up(x1_2)], 1))

        x4_0 = self.conv4_0(self.pool(x3_0))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.Up(x4_0)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.Up(x3_1)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.Up(x2_2)], 1))
        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.Up(x1_3)], 1))

        output = self.final(x0_4)
        return output

In [4]:
unetplusplus = Nested_UNet().eval().cuda()
summary(unetplusplus,  (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 3, 512, 512]              30
            Conv2d-2         [-1, 32, 512, 512]             320
       BatchNorm2d-3         [-1, 32, 512, 512]              64
              ReLU-4         [-1, 32, 512, 512]               0
            Conv2d-5         [-1, 32, 512, 512]           3,104
            Conv2d-6         [-1, 32, 512, 512]           3,104
       BatchNorm2d-7         [-1, 32, 512, 512]              64
              ReLU-8         [-1, 32, 512, 512]               0
 conv_block_nested-9         [-1, 32, 512, 512]               0
        MaxPool2d-10         [-1, 32, 256, 256]               0
           Conv2d-11         [-1, 32, 256, 256]           3,104
           Conv2d-12         [-1, 64, 256, 256]           6,208
      BatchNorm2d-13         [-1, 64, 256, 256]             128
             ReLU-14         [-1, 64, 2

In [5]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = unetplusplus(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.16598844528198242


In [10]:

macs, params = get_model_complexity_info(unetplusplus, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Nested_UNet(
  8.597 M, 100.000% Params, 196.732 GMac, 100.000% MACs, 
  (pool): MaxPool2d(0.0 M, 0.000% Params, 0.016 GMac, 0.008% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Up): Upsample(0.0 M, 0.000% Params, 0.103 GMac, 0.052% MACs, scale_factor=2.0, mode=bilinear)
  (conv0_0): conv_block_nested(
    0.007 M, 0.078% Params, 1.769 GMac, 0.899% MACs, 
    (activation): ReLU(0.0 M, 0.000% Params, 0.017 GMac, 0.009% MACs, inplace=True)
    (conv3x1_1): Conv2d(0.0 M, 0.000% Params, 0.008 GMac, 0.004% MACs, 3, 3, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
    (conv1x3_1): Conv2d(0.0 M, 0.004% Params, 0.084 GMac, 0.043% MACs, 3, 32, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
    (bn1): BatchNorm2d(0.0 M, 0.001% Params, 0.017 GMac, 0.009% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3x1_2): Conv2d(0.003 M, 0.036% Params, 0.814 GMac, 0.414% MACs, 32, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
   

In [18]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= unetplusplus(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
    print('-')
    
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)


RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 10.75 GiB total capacity; 9.38 GiB already allocated; 51.69 MiB free; 9.41 GiB reserved in total by PyTorch)

In [17]:
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

ZeroDivisionError: division by zero

In [None]:
res = []
for i in tqdm(range(13)):
    inputs, _ = test_dataset[i]
    inputs = inputs.cuda().unsqueeze(0)

    torch.cuda.synchronize()
    start = time.time()
    predict= unetplusplus(inputs).data
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))


# Unet++ Ghost

In [3]:
class GhostModule(nn.Module):
    def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, dilated=None):
        super(GhostModule, self).__init__()
        self.oup = oup
        init_channels = math.ceil(oup / ratio)
        new_channels = init_channels*(ratio-1)
        self.primary_conv = nn.Sequential(
            nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
            nn.BatchNorm2d(init_channels),
            nn.ReLU(inplace=True) if relu else nn.Sequential(),
        )

        self.cheap_operation = nn.Sequential(
            nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
            nn.BatchNorm2d(new_channels),
            nn.ReLU(inplace=True) if relu else nn.Sequential(),
        )

    def forward(self, x):
        x1 = self.primary_conv(x)
        x2 = self.cheap_operation(x1)
        out = torch.cat([x1,x2], dim=1)
        return out[:,:self.oup,:,:]

In [4]:
class conv_block_nested(nn.Module):

    def __init__(self, in_ch, mid_ch, out_ch):
        super(conv_block_nested, self).__init__()
        self.activation = nn.ReLU(inplace=True)
        self.conv1 = GhostModule(in_ch, mid_ch, kernel_size=3, relu=True)
#         self.conv3x1_1 = nn.Conv2d(in_ch, in_ch, kernel_size=(3,1), padding=(1,0), bias=True)
#         self.conv1x3_1 = nn.Conv2d(in_ch, mid_ch, kernel_size=(1,3), padding=(0,1), bias=True)
        
#         self.conv1 = nn.Conv2d(in_ch, mid_ch, kernel_size=3, padding=1, bias=True)
#         self.bn1 = nn.BatchNorm2d(mid_ch)
        
        self.conv2 = GhostModule(mid_ch, out_ch, kernel_size=3, relu=True)
#         self.conv3x1_2 = nn.Conv2d(mid_ch, mid_ch, kernel_size=(3,1), padding=(1,0), bias=True)
#         self.conv1x3_2 = nn.Conv2d(mid_ch, out_ch, kernel_size=(1,3), padding=(0,1), bias=True)
#         self.conv2 = nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1, bias=True)
#         self.bn2 = nn.BatchNorm2d(out_ch)

    def forward(self, x):
        x = self.conv1(x)
#         x = self.conv3x1_1(x)
#         x = self.conv1x3_1(x)
        
#         x = self.bn1(x)
#         x = self.activation(x)

        x = self.conv2(x)
#         x = self.conv3x1_2(x)
#         x = self.conv1x3_2(x)
#         x = self.bn2(x)
#         output = self.activation(x)

        return x

class Nested_UNet_ghost(nn.Module):

    def __init__(self, in_ch=3, out_ch=1):
        super(Nested_UNet_ghost, self).__init__()

        n1 = 32
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv0_0 = conv_block_nested(in_ch, filters[0], filters[0])
        self.conv1_0 = conv_block_nested(filters[0], filters[1], filters[1])
        self.conv2_0 = conv_block_nested(filters[1], filters[2], filters[2])
        self.conv3_0 = conv_block_nested(filters[2], filters[3], filters[3])
        self.conv4_0 = conv_block_nested(filters[3], filters[4], filters[4])

        self.conv0_1 = conv_block_nested(filters[0] + filters[1], filters[0], filters[0])
        self.conv1_1 = conv_block_nested(filters[1] + filters[2], filters[1], filters[1])
        self.conv2_1 = conv_block_nested(filters[2] + filters[3], filters[2], filters[2])
        self.conv3_1 = conv_block_nested(filters[3] + filters[4], filters[3], filters[3])

        self.conv0_2 = conv_block_nested(filters[0]*2 + filters[1], filters[0], filters[0])
        self.conv1_2 = conv_block_nested(filters[1]*2 + filters[2], filters[1], filters[1])
        self.conv2_2 = conv_block_nested(filters[2]*2 + filters[3], filters[2], filters[2])

        self.conv0_3 = conv_block_nested(filters[0]*3 + filters[1], filters[0], filters[0])
        self.conv1_3 = conv_block_nested(filters[1]*3 + filters[2], filters[1], filters[1])

        self.conv0_4 = conv_block_nested(filters[0]*4 + filters[1], filters[0], filters[0])

        self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)

    def forward(self, x):

        x0_0 = self.conv0_0(x)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x0_1 = self.conv0_1(torch.cat([x0_0, self.Up(x1_0)], 1))

        x2_0 = self.conv2_0(self.pool(x1_0))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.Up(x2_0)], 1))
        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.Up(x1_1)], 1))

        x3_0 = self.conv3_0(self.pool(x2_0))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.Up(x3_0)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.Up(x2_1)], 1))
        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.Up(x1_2)], 1))

        x4_0 = self.conv4_0(self.pool(x3_0))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.Up(x4_0)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.Up(x3_1)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.Up(x2_2)], 1))
        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.Up(x1_3)], 1))

        output = self.final(x0_4)
        return output

In [5]:
unetplusplus_ghos = Nested_UNet_ghost().eval().cuda()
summary(unetplusplus_ghos,  (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 512, 512]             432
       BatchNorm2d-2         [-1, 16, 512, 512]              32
              ReLU-3         [-1, 16, 512, 512]               0
            Conv2d-4         [-1, 16, 512, 512]             144
       BatchNorm2d-5         [-1, 16, 512, 512]              32
              ReLU-6         [-1, 16, 512, 512]               0
       GhostModule-7         [-1, 32, 512, 512]               0
            Conv2d-8         [-1, 16, 512, 512]           4,608
       BatchNorm2d-9         [-1, 16, 512, 512]              32
             ReLU-10         [-1, 16, 512, 512]               0
           Conv2d-11         [-1, 16, 512, 512]             144
      BatchNorm2d-12         [-1, 16, 512, 512]              32
             ReLU-13         [-1, 16, 512, 512]               0
      GhostModule-14         [-1, 32, 5

In [6]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = unetplusplus_ghos(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.09329056739807129


In [30]:
macs, params = get_model_complexity_info(unetplusplus_ghos, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Nested_UNet_ghost(
  4.6 M, 100.000% Params, 70.109 GMac, 100.000% MACs, 
  (pool): MaxPool2d(0.0 M, 0.000% Params, 0.016 GMac, 0.022% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Up): Upsample(0.0 M, 0.000% Params, 0.103 GMac, 0.147% MACs, scale_factor=2.0, mode=bilinear)
  (conv0_0): conv_block_nested(
    0.005 M, 0.119% Params, 1.447 GMac, 2.064% MACs, 
    (activation): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.000% MACs, inplace=True)
    (conv1): GhostModule(
      0.001 M, 0.014% Params, 0.176 GMac, 0.251% MACs, 
      (primary_conv): Sequential(
        0.0 M, 0.010% Params, 0.126 GMac, 0.179% MACs, 
        (0): Conv2d(0.0 M, 0.009% Params, 0.113 GMac, 0.162% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(0.0 M, 0.001% Params, 0.008 GMac, 0.012% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(0.0 M, 0.000% Params, 0.004 GMac, 0.006% MACs, inplace=True)
 

In [35]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= unetplusplus_ghos(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 28.906437


In [15]:
res = []
for i in tqdm(range(13)):
    inputs, _ = test_dataset[i]
    inputs = inputs.cuda().unsqueeze(0)

    torch.cuda.synchronize()
    start = time.time()
    predict= unetplusplus_ghos(inputs).data
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

  8%|▊         | 1/13 [00:00<00:05,  2.31it/s]

(512, 512, 3)



 15%|█▌        | 2/13 [00:00<00:04,  2.35it/s]

(512, 512, 3)



 23%|██▎       | 3/13 [00:01<00:04,  2.41it/s]

(512, 512, 3)



 31%|███       | 4/13 [00:01<00:03,  2.41it/s]

(512, 512, 3)



 38%|███▊      | 5/13 [00:01<00:03,  2.61it/s]

(512, 512, 3)



 46%|████▌     | 6/13 [00:02<00:02,  2.69it/s]

(512, 512, 3)



 54%|█████▍    | 7/13 [00:02<00:02,  2.76it/s]

(512, 512, 3)



 62%|██████▏   | 8/13 [00:03<00:01,  2.80it/s]

(512, 512, 3)



 69%|██████▉   | 9/13 [00:03<00:01,  2.70it/s]

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)


100%|██████████| 13/13 [00:03<00:00,  3.47it/s]

(512, 512, 3)
(512, 512, 3)
FPS: 29.746869





# DeepLabV3Plus_Xception

In [84]:
class SeparableConv2d(nn.Module):
    def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=0, dilation=1, bias=False):
        super(SeparableConv2d, self).__init__()

        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, padding, dilation,
                               groups=inplanes, bias=bias)
        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


def fixed_padding(inputs, kernel_size, rate):
    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
    return padded_inputs


class SeparableConv2d_same(nn.Module):
    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False):
        super(SeparableConv2d_same, self).__init__()

        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation,
                               groups=inplanes, bias=bias)
        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)

    def forward(self, x):
        x = fixed_padding(x, self.conv1.kernel_size[0], rate=self.conv1.dilation[0])
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self, inplanes, planes, reps, stride=1, dilation=1, start_with_relu=True, grow_first=True, is_last=False):
        super(Block, self).__init__()

        if planes != inplanes or stride != 1:
            self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False)
            self.skipbn = nn.BatchNorm2d(planes)
        else:
            self.skip = None

        self.relu = nn.ReLU(inplace=True)
        rep = []

        filters = inplanes
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(planes))
            filters = planes

        for i in range(reps - 1):
            rep.append(self.relu)
            rep.append(SeparableConv2d_same(filters, filters, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(planes))

        if not start_with_relu:
            rep = rep[1:]

        if stride != 1:
            rep.append(SeparableConv2d_same(planes, planes, 3, stride=2))

        if stride == 1 and is_last:
            rep.append(SeparableConv2d_same(planes, planes, 3, stride=1))


        self.rep = nn.Sequential(*rep)

    def forward(self, inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x += skip

        return x




In [88]:
class Xception(nn.Module):
    """
    Modified Alighed Xception
    """
    def __init__(self, inplanes=3, os=16, pretrained=False):
        super(Xception, self).__init__()

        if os == 16:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
        elif os == 8:
            entry_block3_stride = 1
            middle_block_rate = 2
            exit_block_rates = (2, 4)
        else:
            raise NotImplementedError


        # Entry flow
        self.conv1 = nn.Conv2d(inplanes, 32, 3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.block1 = Block(64, 128, reps=2, stride=2, start_with_relu=False)
        self.block2 = Block(128, 256, reps=2, stride=2, start_with_relu=True, grow_first=True)
        self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, start_with_relu=True, grow_first=True,
                            is_last=True)

        # Middle flow
        self.block4  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block5  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block6  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block7  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block8  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block9  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)

        # Exit flow
        self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_rates[0],
                             start_with_relu=True, grow_first=False, is_last=True)

        self.conv3 = SeparableConv2d_same(1024, 1536, 3, stride=1, dilation=exit_block_rates[1])
        self.bn3 = nn.BatchNorm2d(1536)

        self.conv4 = SeparableConv2d_same(1536, 1536, 3, stride=1, dilation=exit_block_rates[1])
        self.bn4 = nn.BatchNorm2d(1536)

        self.conv5 = SeparableConv2d_same(1536, 2048, 3, stride=1, dilation=exit_block_rates[1])
        self.bn5 = nn.BatchNorm2d(2048)

        # Init weights
        self.__init_weight()

        # Load pretrained model
        if pretrained:
            self.__load_xception_pretrained()

    def forward(self, x):
        # Entry flow
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.block1(x)
        low_level_feat = x
        x = self.block2(x)
        x = self.block3(x)
        print('Entry flow x shape', x.shape)
        # Middle flow
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)
        x = self.block13(x)
        x = self.block14(x)
        x = self.block15(x)
        x = self.block16(x)
        x = self.block17(x)
        x = self.block18(x)
        x = self.block19(x)
        print('Middle flow x shape', x.shape)
        # Exit flow
        x = self.block20(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)

        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu(x)
        print('Exit flow x shape', x.shape)
        return x, low_level_feat

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def __load_xception_pretrained(self):
        pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth')
        model_dict = {}
        state_dict = self.state_dict()

        for k, v in pretrain_dict.items():
            print(k)
            if k in state_dict:
                if 'pointwise' in k:
                    v = v.unsqueeze(-1).unsqueeze(-1)
                if k.startswith('block12'):
                    model_dict[k.replace('block12', 'block20')] = v
                elif k.startswith('block11'):
                    model_dict[k.replace('block11', 'block12')] = v
                elif k.startswith('conv3'):
                    model_dict[k] = v
                elif k.startswith('bn3'):
                    model_dict[k] = v
                    model_dict[k.replace('bn3', 'bn4')] = v
                elif k.startswith('conv4'):
                    model_dict[k.replace('conv4', 'conv5')] = v
                elif k.startswith('bn4'):
                    model_dict[k.replace('bn4', 'bn5')] = v
                else:
                    model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)

class ASPP_module(nn.Module):
    def __init__(self, inplanes, planes, rate):
        super(ASPP_module, self).__init__()
        if rate == 1:
            kernel_size = 1
            padding = 0
        else:
            kernel_size = 3
            padding = rate
        self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
                                            stride=1, padding=padding, dilation=rate, bias=False)
        self.bn = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU()

        self.__init_weight()

    def forward(self, x):
        x = self.atrous_convolution(x)
        x = self.bn(x)

        return self.relu(x)

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()




In [89]:
class DeepLabv3_plus(nn.Module):
    def __init__(self, nInputChannels=3, n_classes=21, os=16, pretrained=False, _print=True):
        if _print:
            print("Constructing DeepLabv3+ model...")
            print("Number of classes: {}".format(n_classes))
            print("Output stride: {}".format(os))
            print("Number of Input Channels: {}".format(nInputChannels))
        super(DeepLabv3_plus, self).__init__()

        # Atrous Conv
        self.xception_features = Xception(nInputChannels, os, pretrained)

        # ASPP
        if os == 16:
            rates = [1, 6, 12, 18]
        elif os == 8:
            rates = [1, 12, 24, 36]
        else:
            raise NotImplementedError

        self.aspp1 = ASPP_module(2048, 256, rate=rates[0])
        self.aspp2 = ASPP_module(2048, 256, rate=rates[1])
        self.aspp3 = ASPP_module(2048, 256, rate=rates[2])
        self.aspp4 = ASPP_module(2048, 256, rate=rates[3])

        self.relu = nn.ReLU()

        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
                                             nn.Conv2d(2048, 256, 1, stride=1, bias=False),
                                             nn.BatchNorm2d(256),
                                             nn.ReLU())

        self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(256)

        # adopt [1x1, 48] for channel reduction.
        self.conv2 = nn.Conv2d(128, 48, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(48)

        self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
                                       nn.BatchNorm2d(256),
                                       nn.ReLU(),
                                       nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
                                       nn.BatchNorm2d(256),
                                       nn.ReLU(),
                                       nn.Conv2d(256, n_classes, kernel_size=1, stride=1))

    def forward(self, input):
        x, low_level_features = self.xception_features(input)
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)),
                                int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)

        low_level_features = self.conv2(low_level_features)
        low_level_features = self.bn2(low_level_features)
        low_level_features = self.relu(low_level_features)


        x = torch.cat((x, low_level_features), dim=1)
        x = self.last_conv(x)
        x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x

    def freeze_bn(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                # torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()



In [91]:
xception_features = Xception(3, 16, False).cuda()
input =torch.randn(1,3,512,512).cuda()
output = xception_features(input)


Entry flow x shape torch.Size([1, 728, 32, 32])
Middle flow x shape torch.Size([1, 728, 32, 32])
Exit flow x shape torch.Size([1, 2048, 32, 32])


In [6]:
deeplab_xcep = DeepLabv3_plus(nInputChannels=3, n_classes=1, os=16, pretrained=False, _print=True).eval().cuda()
summary(deeplab_xcep,  (3, 512, 512))

Constructing DeepLabv3+ model...
Number of classes: 1
Output stride: 16
Number of Input Channels: 3
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             864
       BatchNorm2d-2         [-1, 32, 256, 256]              64
              ReLU-3         [-1, 32, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          18,432
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
            Conv2d-7         [-1, 64, 256, 256]             576
            Conv2d-8        [-1, 128, 256, 256]           8,192
SeparableConv2d_same-9        [-1, 128, 256, 256]               0
      BatchNorm2d-10        [-1, 128, 256, 256]             256
             ReLU-11        [-1, 128, 256, 256]               0
             ReLU-12        [-1, 128, 256, 256]               0
 



In [7]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = deeplab_xcep(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.07286524772644043


In [41]:
res = []
for id, data in enumerate(test_dataloader):
    inputs, labels = data
    inputs = inputs.cuda()
    labels = labels.cuda()
    torch.cuda.synchronize()
    start = time.time()
    predict= deeplab_xcep(inputs)
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
(512, 512, 3)
FPS: 32.606476


In [20]:
res = []
for i in tqdm(range(13)):
    inputs, _ = test_dataset[i]
    inputs = inputs.cuda().unsqueeze(0)

    torch.cuda.synchronize()
    start = time.time()
    predict= deeplab_xcep(inputs).data
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

  8%|▊         | 1/13 [00:00<00:05,  2.38it/s]

(512, 512, 3)



 15%|█▌        | 2/13 [00:00<00:04,  2.38it/s]

(512, 512, 3)



 23%|██▎       | 3/13 [00:01<00:04,  2.43it/s]

(512, 512, 3)



 31%|███       | 4/13 [00:01<00:03,  2.43it/s]

(512, 512, 3)



 38%|███▊      | 5/13 [00:01<00:03,  2.63it/s]

(512, 512, 3)



 46%|████▌     | 6/13 [00:02<00:02,  2.71it/s]

(512, 512, 3)



 54%|█████▍    | 7/13 [00:02<00:02,  2.83it/s]

(512, 512, 3)



 62%|██████▏   | 8/13 [00:02<00:01,  2.87it/s]

(512, 512, 3)


 85%|████████▍ | 11/13 [00:03<00:00,  4.48it/s]

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)


100%|██████████| 13/13 [00:03<00:00,  3.55it/s]

(512, 512, 3)
(512, 512, 3)
FPS: 34.158480





In [35]:
macs, params = get_model_complexity_info(deeplab_xcep, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

DeepLabv3_plus(
  54.608 M, 100.000% Params, 82.963 GMac, 100.000% MACs, 
  (xception_features): Xception(
    37.775 M, 69.175% Params, 46.317 GMac, 55.828% MACs, 
    (conv1): Conv2d(0.001 M, 0.002% Params, 0.057 GMac, 0.068% MACs, 3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(0.0 M, 0.000% Params, 0.004 GMac, 0.005% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(0.0 M, 0.000% Params, 0.012 GMac, 0.014% MACs, inplace=True)
    (conv2): Conv2d(0.018 M, 0.034% Params, 1.208 GMac, 1.456% MACs, 32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(0.0 M, 0.000% Params, 0.008 GMac, 0.010% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (block1): Block(
      0.053 M, 0.097% Params, 2.2 GMac, 2.652% MACs, 
      (skip): Conv2d(0.008 M, 0.015% Params, 0.134 GMac, 0.162% MACs, 64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
 

# DeepLabV3Plus_Xception_ghost

In [8]:
class GhostModule(nn.Module):
    def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, dilation=None, batchnorm=True):
        super(GhostModule, self).__init__()
        self.oup = oup
        init_channels = math.ceil(oup / ratio)
        new_channels = init_channels*(ratio-1)
        if dilation:
            self.primary_conv = nn.Sequential(
                nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, dilation=dilation,bias=False),
                nn.BatchNorm2d(init_channels) if batchnorm else nn.Sequential(),
                nn.ReLU6(inplace=True) if relu else nn.Sequential(),
            )

            self.cheap_operation = nn.Sequential(
                nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, dilation=dilation, groups=init_channels, bias=False),
                nn.BatchNorm2d(new_channels) if batchnorm else nn.Sequential(),
                nn.ReLU6(inplace=True) if relu else nn.Sequential(),
            )
        else:
            self.primary_conv = nn.Sequential(
                nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
                nn.BatchNorm2d(init_channels) if batchnorm else nn.Sequential(),
                nn.ReLU(inplace=True) if relu else nn.Sequential(),
            )

            self.cheap_operation = nn.Sequential(
                nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False),
                nn.BatchNorm2d(new_channels) if batchnorm else nn.Sequential(),
                nn.ReLU(inplace=True) if relu else nn.Sequential(),
            )            
    def forward(self, x):
        x1 = self.primary_conv(x)
        x2 = self.cheap_operation(x1)
        out = torch.cat([x1,x2], dim=1)
        return out[:,:self.oup,:,:]

In [9]:
class SeparableConv2d(nn.Module):
    def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=0, dilation=1, bias=False):
        super(SeparableConv2d, self).__init__()

        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, padding, dilation,
                               groups=inplanes, bias=bias)
        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


def fixed_padding(inputs, kernel_size, rate):
    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
    return padded_inputs


class SeparableConv2d_same(nn.Module):
    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False):
        super(SeparableConv2d_same, self).__init__()
#         self.ghost = GhostModule(inplanes, planes, kernel_size=3, stride=stride, dilation=1, relu=False, batchnorm=False)
        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation,
                               groups=inplanes, bias=bias)
        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)

    def forward(self, x):
        x = fixed_padding(x, self.conv1.kernel_size[0], rate=self.conv1.dilation[0])
#         x = self.ghost(x)
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self, inplanes, planes, reps, stride=1, dilation=1, start_with_relu=True, grow_first=True, is_last=False):
        super(Block, self).__init__()

        if planes != inplanes or stride != 1:
            self.skip = GhostModule(inplanes, planes, kernel_size=1, stride=stride, relu=False)
#             self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False)
#             self.skipbn = nn.BatchNorm2d(planes)
        else:
            self.skip = None

        self.relu = nn.ReLU(inplace=True)
        rep = []

        filters = inplanes
        if grow_first:
            rep.append(self.relu)
#             rep.append(GhostModule(inplanes, planes, kernel_size=3, stride=1, relu=False, dilation=dilation))
            rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(planes))
            filters = planes

        for i in range(reps - 1):
            rep.append(self.relu)
#             rep.append(GhostModule(filters, filters, kernel_size=3, stride=1, relu=False, dilation=dilation))
            rep.append(SeparableConv2d_same(filters, filters, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
#             rep.append(GhostModule(inplanes, planes, kernel_size=3, stride=1, relu=False, dilation=dilation))
            rep.append(SeparableConv2d_same(inplanes, planes, 3, stride=1, dilation=dilation))
            rep.append(nn.BatchNorm2d(planes))

        if not start_with_relu:
            rep = rep[1:]

        if stride != 1:
            rep.append(GhostModule(planes, planes, kernel_size=3, stride=2, relu=False, batchnorm=False))
#             rep.append(SeparableConv2d_same(planes, planes, 3, stride=2))

        if stride == 1 and is_last:
            rep.append(GhostModule(planes, planes, kernel_size=3, stride=1, relu=False, batchnorm=False))
#             rep.append(SeparableConv2d_same(planes, planes, 3, stride=1))


        self.rep = nn.Sequential(*rep)

    def forward(self, inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
#             skip = self.skipbn(skip)
        else:
            skip = inp

        x += skip

        return x




In [10]:
class Xception(nn.Module):
    """
    Modified Alighed Xception
    """
    def __init__(self, inplanes=3, os=16, pretrained=False):
        super(Xception, self).__init__()

        if os == 16:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
        elif os == 8:
            entry_block3_stride = 1
            middle_block_rate = 2
            exit_block_rates = (2, 4)
        else:
            raise NotImplementedError


        # Entry flow
        self.conv1 = GhostModule(inplanes, 32, kernel_size=3, stride=2)
#         self.conv1 = nn.Conv2d(inplanes, 32, 3, stride=2, padding=1, bias=False)
#         self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = GhostModule(32, 64, kernel_size=3, stride=1, relu=False)
#         self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False)
#         self.bn2 = nn.BatchNorm2d(64)

        self.block1 = Block(64, 128, reps=2, stride=2, start_with_relu=False)
        self.block2 = Block(128, 256, reps=2, stride=2, start_with_relu=True, grow_first=True)
        self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, start_with_relu=True, grow_first=True,
                            is_last=True)

        # Middle flow
        self.block4  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block5  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block6  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block7  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block8  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block9  = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)
        self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_rate, start_with_relu=True, grow_first=True)

        # Exit flow
        self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_rates[0],
                             start_with_relu=True, grow_first=False, is_last=True)
        
        
        self.conv3 = SeparableConv2d_same(1024, 1536, 3, stride=1, dilation=exit_block_rates[1])
        self.bn3 = nn.BatchNorm2d(1536)
        
        self.conv4 = SeparableConv2d_same(1536, 1536, 3, stride=1, dilation=exit_block_rates[1])
        self.bn4 = nn.BatchNorm2d(1536)

        self.conv5 = SeparableConv2d_same(1536, 2048, 3, stride=1, dilation=exit_block_rates[1])
        self.bn5 = nn.BatchNorm2d(2048)

        # Init weights
        self.__init_weight()

        # Load pretrained model
        if pretrained:
            self.__load_xception_pretrained()

    def forward(self, x):
        # Entry flow
        x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)

        x = self.conv2(x)
#         x = self.bn2(x)
#         x = self.relu(x)

        x = self.block1(x)
        low_level_feat = x
        x = self.block2(x)
        x = self.block3(x)

        # Middle flow
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)
        x = self.block13(x)
        x = self.block14(x)
        x = self.block15(x)
        x = self.block16(x)
        x = self.block17(x)
        x = self.block18(x)
        x = self.block19(x)

        # Exit flow
        x = self.block20(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)

        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu(x)

        return x, low_level_feat

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def __load_xception_pretrained(self):
        pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth')
        model_dict = {}
        state_dict = self.state_dict()

        for k, v in pretrain_dict.items():
            print(k)
            if k in state_dict:
                if 'pointwise' in k:
                    v = v.unsqueeze(-1).unsqueeze(-1)
                if k.startswith('block12'):
                    model_dict[k.replace('block12', 'block20')] = v
                elif k.startswith('block11'):
                    model_dict[k.replace('block11', 'block12')] = v
                elif k.startswith('conv3'):
                    model_dict[k] = v
                elif k.startswith('bn3'):
                    model_dict[k] = v
                    model_dict[k.replace('bn3', 'bn4')] = v
                elif k.startswith('conv4'):
                    model_dict[k.replace('conv4', 'conv5')] = v
                elif k.startswith('bn4'):
                    model_dict[k.replace('bn4', 'bn5')] = v
                else:
                    model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)

class ASPP_module(nn.Module):
    def __init__(self, inplanes, planes, rate):
        super(ASPP_module, self).__init__()
        if rate == 1:
            kernel_size = 1
            padding = 0
        else:
            kernel_size = 3
            padding = rate
        self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
                                            stride=1, padding=padding, dilation=rate, bias=False)
#         self.atrous_convolution = GhostModule(inplanes, planes, kernel_size=kernel_size, stride=1, dilation=rate)
        self.bn = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU()

        self.__init_weight()

    def forward(self, x):
        x = self.atrous_convolution(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                # m.weight.data.normal_(0, math.sqrt(2. / n))
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()




In [11]:
class DeepLabv3_plus_ghost(nn.Module):
    def __init__(self, nInputChannels=3, n_classes=21, os=16, pretrained=False, _print=True):
        if _print:
            print("Constructing DeepLabv3+ model...")
            print("Number of classes: {}".format(n_classes))
            print("Output stride: {}".format(os))
            print("Number of Input Channels: {}".format(nInputChannels))
        super(DeepLabv3_plus_ghost, self).__init__()

        # Atrous Conv
        self.xception_features = Xception(nInputChannels, os, pretrained)

        # ASPP
        if os == 16:
            rates = [1, 6, 12, 18]
        elif os == 8:
            rates = [1, 12, 24, 36]
        else:
            raise NotImplementedError

        self.aspp1 = ASPP_module(2048, 256, rate=rates[0])
        self.aspp2 = ASPP_module(2048, 256, rate=rates[1])
        self.aspp3 = ASPP_module(2048, 256, rate=rates[2])
        self.aspp4 = ASPP_module(2048, 256, rate=rates[3])

        self.relu = nn.ReLU()

        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
                                             GhostModule(2048, 256, kernel_size=1, stride=1)
#                                              nn.Conv2d(2048, 256, 1, stride=1, bias=False),
#                                              nn.BatchNorm2d(256),
#                                              nn.ReLU()
                                            )
        self.conv1 = GhostModule(1280, 256, kernel_size=1, stride=1)
#         self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
#         self.bn1 = nn.BatchNorm2d(256)

        # adopt [1x1, 48] for channel reduction.
        self.conv2 = GhostModule(128, 48, kernel_size=1, stride=1)
#         self.conv2 = nn.Conv2d(128, 48, 1, bias=False)
#         self.bn2 = nn.BatchNorm2d(48)

        self.last_conv = nn.Sequential(GhostModule(304, 256, kernel_size=3, stride=1),
                                       GhostModule(256, 256, kernel_size=3, stride=1),
                                       GhostModule(256, n_classes, kernel_size=1, stride=1, relu=False, batchnorm=False)
#                                        nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
#                                        nn.BatchNorm2d(256),
#                                        nn.ReLU(),
#                                        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
#                                        nn.BatchNorm2d(256),
#                                        nn.ReLU(),
#                                        nn.Conv2d(256, n_classes, kernel_size=1, stride=1)
                                      )

    def forward(self, input):
        x, low_level_features = self.xception_features(input)
        x1 = self.aspp1(x)
        x2 = self.aspp2(x)
        x3 = self.aspp3(x)
        x4 = self.aspp4(x)
        x5 = self.global_avg_pool(x)
        x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x1, x2, x3, x4, x5), dim=1)

        x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)
        x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)),
                                int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True)

        low_level_features = self.conv2(low_level_features)
#         low_level_features = self.bn2(low_level_features)
#         low_level_features = self.relu(low_level_features)


        x = torch.cat((x, low_level_features), dim=1)
        x = self.last_conv(x)
        x = F.upsample(x, size=input.size()[2:], mode='bilinear', align_corners=True)

        return x

    def freeze_bn(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                # torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()



In [12]:
deeplab_xcep_ghost = DeepLabv3_plus_ghost(nInputChannels=3, n_classes=1, os=16, pretrained=False, _print=True).eval().cuda()
summary(deeplab_xcep,  (3, 512, 512))

Constructing DeepLabv3+ model...
Number of classes: 1
Output stride: 16
Number of Input Channels: 3
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             864
       BatchNorm2d-2         [-1, 32, 256, 256]              64
              ReLU-3         [-1, 32, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          18,432
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
            Conv2d-7         [-1, 64, 256, 256]             576
            Conv2d-8        [-1, 128, 256, 256]           8,192
SeparableConv2d_same-9        [-1, 128, 256, 256]               0
      BatchNorm2d-10        [-1, 128, 256, 256]             256
             ReLU-11        [-1, 128, 256, 256]               0
             ReLU-12        [-1, 128, 256, 256]               0
 

In [13]:
input =torch.randn(1,3,720,960).cuda()

torch.cuda.synchronize()
time_start = time.time()

output = deeplab_xcep_ghost(input)
torch.cuda.synchronize()
time_end = time.time()
infer_time = time_end - time_start
print(infer_time)

0.07524251937866211


In [26]:
res = []
for i in tqdm(range(13)):
    inputs, _ = test_dataset[i]
    inputs = inputs.cuda().unsqueeze(0)

    torch.cuda.synchronize()
    start = time.time()
    predict= deeplab_xcep_ghost(inputs).data
    torch.cuda.synchronize()
    end = time.time()
    res.append(end-start)
time_sum = 0
for i in res:
    time_sum += i
    
print("FPS: %f"%(1.0/(time_sum/len(res))))

  8%|▊         | 1/13 [00:00<00:05,  2.30it/s]

(512, 512, 3)



 15%|█▌        | 2/13 [00:00<00:04,  2.34it/s]

(512, 512, 3)



 23%|██▎       | 3/13 [00:01<00:04,  2.41it/s]

(512, 512, 3)



 31%|███       | 4/13 [00:01<00:03,  2.42it/s]

(512, 512, 3)



 38%|███▊      | 5/13 [00:01<00:03,  2.62it/s]

(512, 512, 3)



 46%|████▌     | 6/13 [00:02<00:02,  2.69it/s]

(512, 512, 3)



 54%|█████▍    | 7/13 [00:02<00:02,  2.81it/s]

(512, 512, 3)



 62%|██████▏   | 8/13 [00:03<00:01,  2.86it/s]

(512, 512, 3)


 85%|████████▍ | 11/13 [00:03<00:00,  4.39it/s]

(512, 512, 3)
(512, 512, 3)
(512, 512, 3)


100%|██████████| 13/13 [00:03<00:00,  3.52it/s]

(512, 512, 3)
(512, 512, 3)
FPS: 32.179107





In [41]:
macs, params = get_model_complexity_info(deeplab_xcep_ghost, (3, 512, 512), as_strings=True,
                                           print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

DeepLabv3_plus_ghost(
  58.854 M, 100.000% Params, 78.535 GMac, 100.000% MACs, 
  (xception_features): Xception(
    43.091 M, 73.216% Params, 52.634 GMac, 67.020% MACs, 
    (conv1): GhostModule(
      0.001 M, 0.001% Params, 0.044 GMac, 0.056% MACs, 
      (primary_conv): Sequential(
        0.0 M, 0.001% Params, 0.031 GMac, 0.040% MACs, 
        (0): Conv2d(0.0 M, 0.001% Params, 0.028 GMac, 0.036% MACs, 3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(0.0 M, 0.000% Params, 0.002 GMac, 0.003% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(0.0 M, 0.000% Params, 0.001 GMac, 0.001% MACs, inplace=True)
      )
      (cheap_operation): Sequential(
        0.0 M, 0.000% Params, 0.013 GMac, 0.016% MACs, 
        (0): Conv2d(0.0 M, 0.000% Params, 0.009 GMac, 0.012% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
        (1): BatchNorm2d(0.0 M, 0.000% Params, 0.00