In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import logging

import torch
import torch.nn as nn
from collections import OrderedDict

In [2]:
import torch.nn as nn

In [28]:
inp = (torch.rand(1,10,10)*10)

In [29]:
inp = inp.int()

In [30]:
inp

tensor([[[4, 1, 4, 5, 3, 0, 5, 3, 9, 4],
         [3, 8, 5, 9, 9, 3, 0, 2, 4, 8],
         [8, 5, 2, 6, 1, 9, 8, 7, 5, 0],
         [2, 5, 1, 3, 1, 4, 6, 5, 6, 5],
         [9, 0, 8, 6, 6, 6, 5, 1, 9, 8],
         [3, 9, 2, 3, 4, 7, 7, 2, 3, 5],
         [8, 3, 8, 3, 5, 2, 1, 3, 4, 4],
         [2, 7, 6, 5, 8, 5, 2, 5, 0, 7],
         [9, 1, 1, 3, 3, 3, 7, 6, 0, 6],
         [3, 7, 3, 4, 4, 3, 0, 7, 4, 1]]], dtype=torch.int32)

In [31]:
pool = nn.MaxPool2d(2, stride=2, return_indices=True)

In [32]:
unpool = nn.MaxUnpool2d(2, stride=2)

In [33]:
input = torch.tensor([[[[ 1.,  2.,  3.,  4.],
                            [ 5.,  6.,  7.,  8.],
                            [ 9., 10., 11., 12.],
                            [13., 14., 15., 16.]]]])

In [34]:
input.shape

torch.Size([1, 1, 4, 4])

In [35]:
inp.shape

torch.Size([1, 10, 10])

In [36]:
output, indices = pool(inp)

In [37]:
output

tensor([[[8, 9, 9, 5, 9],
         [8, 6, 9, 8, 6],
         [9, 8, 7, 7, 9],
         [8, 8, 8, 5, 7],
         [9, 4, 4, 7, 6]]], dtype=torch.int32)

In [38]:
inp

tensor([[[4, 1, 4, 5, 3, 0, 5, 3, 9, 4],
         [3, 8, 5, 9, 9, 3, 0, 2, 4, 8],
         [8, 5, 2, 6, 1, 9, 8, 7, 5, 0],
         [2, 5, 1, 3, 1, 4, 6, 5, 6, 5],
         [9, 0, 8, 6, 6, 6, 5, 1, 9, 8],
         [3, 9, 2, 3, 4, 7, 7, 2, 3, 5],
         [8, 3, 8, 3, 5, 2, 1, 3, 4, 4],
         [2, 7, 6, 5, 8, 5, 2, 5, 0, 7],
         [9, 1, 1, 3, 3, 3, 7, 6, 0, 6],
         [3, 7, 3, 4, 4, 3, 0, 7, 4, 1]]], dtype=torch.int32)

In [39]:
indices

tensor([[[11, 13, 14,  6,  8],
         [20, 23, 25, 26, 38],
         [40, 42, 55, 56, 48],
         [60, 62, 74, 77, 79],
         [80, 93, 94, 86, 89]]])

In [41]:
unpool(output.float(), indices)

tensor([[[0., 0., 0., 0., 0., 0., 5., 0., 9., 0.],
         [0., 8., 0., 9., 9., 0., 0., 0., 0., 0.],
         [8., 0., 0., 6., 0., 9., 8., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 6., 0.],
         [9., 0., 8., 0., 0., 0., 0., 0., 9., 0.],
         [0., 0., 0., 0., 0., 7., 7., 0., 0., 0.],
         [8., 0., 8., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 8., 0., 0., 5., 0., 7.],
         [9., 0., 0., 0., 0., 0., 7., 0., 0., 6.],
         [0., 0., 0., 4., 4., 0., 0., 0., 0., 0.]]])

In [2]:
logger = logging.getLogger(__name__)

In [3]:
class SegNetLite(nn.Module):

    def __init__(self, kernel_sizes=[3, 3, 3, 3], down_filter_sizes=[32, 64, 128, 256],
            up_filter_sizes=[128, 64, 32, 32], conv_paddings=[1, 1, 1, 1],
            pooling_kernel_sizes=[2, 2, 2, 2], pooling_strides=[2, 2, 2, 2], **kwargs):
        """Initialize SegNet Module

        Args:
            kernel_sizes (list of ints): kernel sizes for each convolutional layer in downsample/upsample path.
            down_filter_sizes (list of ints): number of filters (out channels) of each convolutional layer in the downsample path.
            up_filter_sizes (list of ints): number of filters (out channels) of each convolutional layer in the upsample path.
            conv_paddings (list of ints): paddings for each convolutional layer in downsample/upsample path.
            pooling_kernel_sizes (list of ints): kernel sizes for each max-pooling layer and its max-unpooling layer.
            pooling_strides (list of ints): strides for each max-pooling layer and its max-unpooling layer.
        """
        super(SegNetLite, self).__init__()
        self.num_down_layers = len(kernel_sizes)
        self.num_up_layers = len(kernel_sizes)

        input_size = 3 # initial number of input channels
        # Construct downsampling layers.
        # As mentioned in the assignment, blocks of the downsampling path should have the
        # following output dimension (igoring batch dimension):
        # 3 x 64 x 64 (input) -> 32 x 32 x 32 -> 64 x 16 x 16 -> 128 x 8 x 8 -> 256 x 4 x 4
        # each block should consist of: Conv2d->BatchNorm2d->ReLU->MaxPool2d
        layers_conv_down = [
            nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
            for inp_ch, fs, ks, pad in zip([input_size]+down_filter_sizes[:-1], down_filter_sizes, kernel_sizes, conv_paddings) 
        ]
        layers_bn_down = [nn.BatchNorm2d(fs) for fs in down_filter_sizes]
        layers_pooling = [nn.MaxPool2d(kernel_size=pks, stride=ps, return_indices=True) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]

        # Convert Python list to nn.ModuleList, so that PyTorch's autograd
        # package can track gradients and update parameters of these layers
        self.layers_conv_down = nn.ModuleList(layers_conv_down)
        self.layers_bn_down = nn.ModuleList(layers_bn_down)
        self.layers_pooling = nn.ModuleList(layers_pooling)

        # Construct upsampling layers
        # As mentioned in the assignment, blocks of the upsampling path should have the
        # following output dimension (igoring batch dimension):
        # 256 x 4 x 4 (input) -> 128 x 8 x 8 -> 64 x 16 x 16 -> 32 x 32 x 32 -> 32 x 64 x 64
        # each block should consist of: MaxUnpool2d->Conv2d->BatchNorm2d->ReLU
        layers_conv_up = [
            nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
            for inp_ch, fs, ks, pad in zip(list(reversed(down_filter_sizes)), up_filter_sizes, kernel_sizes, conv_paddings) 
        ]
        layers_bn_up = [nn.BatchNorm2d(fs) for fs in up_filter_sizes]
        layers_unpooling = [nn.MaxUnpool2d(kernel_size=pks, stride=ps) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]

        # Convert Python list to nn.ModuleList, so that PyTorch's autograd
        # can track gradients and update parameters of these layers
        self.layers_conv_up = nn.ModuleList(layers_conv_up)
        self.layers_bn_up = nn.ModuleList(layers_bn_up)
        self.layers_unpooling = nn.ModuleList(layers_unpooling)

        self.relu = nn.ReLU(True)

        # Implement a final 1x1 convolution to to get the logits of 11 classes (background + 10 digits)
        self.segconv = nn.Conv2d(in_channels=up_filter_sizes[-1], out_channels=11, kernel_size=1)

    def forward(self, x):
        indices_list = [] # keep track of indices for unmaxpooling
        
        # downsample
        for conv, bn, maxpool in zip(self.layers_conv_down, self.layers_bn_down, self.layers_pooling):
            x = conv(x)
            x = bn(x)
            x = self.relu(x)
            x, indices = maxpool(x)
            indices_list.append(indices)
            
        indices_list = list(reversed(indices_list))
        
        # upsample
        for conv, bn, unmaxpool, indices in zip(self.layers_conv_up, self.layers_bn_up, self.layers_unpooling, indices_list):
            x = unmaxpool(x, indices)
            x = conv(x)
            x = bn(x)
            x = self.relu(x)
            
        # final conv => 11 class segm
        x = self.segconv(x)
        return x



In [109]:
num_down_layers = len(kernel_sizes)
num_up_layers = len(kernel_sizes)

input_size = 3 # initial number of input channels
# Construct downsampling layers.
# As mentioned in the assignment, blocks of the downsampling path should have the
# following output dimension (igoring batch dimension):
# 3 x 64 x 64 (input) -> 32 x 32 x 32 -> 64 x 16 x 16 -> 128 x 8 x 8 -> 256 x 4 x 4
# each block should consist of: Conv2d->BatchNorm2d->ReLU->MaxPool2d
layers_conv_down = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip([input_size]+down_filter_sizes[:-1], down_filter_sizes, kernel_sizes, conv_paddings) 
]
layers_bn_down = [nn.BatchNorm2d(fs) for fs in down_filter_sizes]
layers_pooling = [nn.MaxPool2d(kernel_size=pks, stride=ps, return_indices=True) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]

# Convert Python list to nn.ModuleList, so that PyTorch's autograd
# package can track gradients and update parameters of these layers
layers_conv_down = nn.ModuleList(layers_conv_down)
layers_bn_down = nn.ModuleList(layers_bn_down)
layers_pooling = nn.ModuleList(layers_pooling)

# Construct upsampling layers
# As mentioned in the assignment, blocks of the upsampling path should have the
# following output dimension (igoring batch dimension):
# 256 x 4 x 4 (input) -> 128 x 8 x 8 -> 64 x 16 x 16 -> 32 x 32 x 32 -> 32 x 64 x 64
# each block should consist of: MaxUnpool2d->Conv2d->BatchNorm2d->ReLU
layers_conv_up = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip(list(reversed(down_filter_sizes)), up_filter_sizes, kernel_sizes, conv_paddings) 
]
layers_bn_up = [nn.BatchNorm2d(fs) for fs in up_filter_sizes]
layers_unpooling = [nn.MaxUnpool2d(kernel_size=pks, stride=ps) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]

# Convert Python list to nn.ModuleList, so that PyTorch's autograd
# can track gradients and update parameters of these layers
layers_conv_up = nn.ModuleList(layers_conv_up)
layers_bn_up = nn.ModuleList(layers_bn_up)
layers_unpooling = nn.ModuleList(layers_unpooling)

relu = nn.ReLU(True)

# Implement a final 1x1 convolution to to get the logits of 11 classes (background + 10 digits)
segconv = nn.Conv2d(in_channels=up_filter_sizes[-1], out_channels=11, kernel_size=1)



torch.Size([16, 3, 64, 64])

In [127]:
x.shape

torch.Size([16, 3, 64, 64])

In [128]:
conv

Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [131]:
conv(x).shape

RuntimeError: Given groups=1, weight of size [256, 128, 3, 3], expected input[16, 3, 64, 64] to have 128 channels, but got 3 channels instead

In [143]:
x = torch.randn(16, 3, 64, 64)
x.shape

torch.Size([16, 3, 64, 64])

In [144]:
indices_list = [] # keep track of indices for unmaxpooling
# downsample
for conv, bn, maxpool in zip(layers_conv_down, layers_bn_down, layers_pooling):
    x = conv(x)
    x = bn(x)
    x = relu(x)
    x, indices = maxpool(x)
    indices_list.append(indices)


In [146]:
x.shape

torch.Size([16, 256, 4, 4])

In [147]:
layers_conv_up

ModuleList(
  (0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

In [153]:
# upsample
for conv, bn, unmaxpool, indices in zip(layers_conv_up, layers_bn_up, layers_unpooling, list(reversed(indices_list))):
    x = unmaxpool(x, indices)
    x = conv(x)
    x = bn(x)
    x = relu(x)


In [154]:
x.shape

torch.Size([16, 32, 64, 64])

In [157]:
# final conv => 11 class segm
x = segconv(x)

In [158]:
x.shape

torch.Size([16, 11, 64, 64])

In [93]:
inp = torch.randn(16,32,64,64)

In [94]:
nn.Conv2d(in_channels=up_filter_sizes[-1], out_channels=11, kernel_size=1)(inp).shape

torch.Size([16, 11, 64, 64])

In [4]:
def get_seg_net(**kwargs):

    model = SegNetLite(**kwargs)

    return model

In [35]:
X = torch.rand(32, 3,64,64)
X.shape

torch.Size([32, 3, 64, 64])

In [36]:
kernel_sizes=[3, 3, 3, 3]
down_filter_sizes=[32, 64, 128, 256]
up_filter_sizes=[128, 64, 32, 32]
conv_paddings=[1, 1, 1, 1]
pooling_kernel_sizes=[2, 2, 2, 2]
pooling_strides=[2, 2, 2, 2]

In [38]:
input_size

3

In [41]:
xx = nn.Conv2d(in_channels=input_size, out_channels=down_filter_sizes[0], kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2)(X)
xx.shape

torch.Size([32, 32, 64, 64])

In [103]:
pool = nn.MaxPool2d(2, stride=2, return_indices=True)
unpool = nn.MaxUnpool2d(2, stride=2)
input = torch.randn(16, 3, 64, 64)
output, indices = pool(input)
qq = unpool(output, indices)
qq.shape


In [104]:
output, indices = pool(input)


In [106]:
output.shape

torch.Size([16, 3, 32, 32])

In [107]:
indices

tensor([[[[   1,    2,    4,  ...,   58,   60,  126],
          [ 129,  130,  196,  ...,  186,  188,  254],
          [ 256,  258,  325,  ...,  379,  381,  382],
          ...,
          [3777, 3714, 3717,  ..., 3770, 3836, 3774],
          [3905, 3906, 3908,  ..., 3899, 3900, 3903],
          [4033, 3970, 3973,  ..., 4091, 4029, 4030]],

         [[   0,   67,    4,  ...,   58,   61,   63],
          [ 128,  195,  197,  ...,  250,  189,  254],
          [ 257,  259,  324,  ...,  379,  380,  382],
          ...,
          [3712, 3715, 3716,  ..., 3770, 3772, 3774],
          [3905, 3842, 3908,  ..., 3962, 3901, 3903],
          [3968, 3971, 4036,  ..., 4090, 4028, 4094]],

         [[   0,   66,    5,  ...,  123,  124,  126],
          [ 128,  195,  132,  ...,  186,  252,  254],
          [ 257,  258,  260,  ...,  315,  380,  319],
          ...,
          [3776, 3778, 3717,  ..., 3770, 3836, 3839],
          [3840, 3906, 3845,  ..., 3963, 3901, 3903],
          [4033, 4034, 4037,  ...

In [51]:
nn.BatchNorm2d(32)(xx).shape

torch.Size([32, 32, 64, 64])

NameError: name 'ks' is not defined

In [55]:
nn.MaxPool2d(kernel_size=pooling_kernel_sizes[0], stride=pooling_strides[0])

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [45]:
layers_bn_down = [nn.BatchNorm2d(fs) for fs in down_filter_sizes]

In [46]:
layers_bn_down

[BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]

In [48]:
down_filter_sizes[:-1]

[32, 64, 128]

In [49]:
layers_conv_down = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip([3]+down_filter_sizes[:-1], down_filter_sizes, kernel_sizes, conv_paddings) 
]

In [56]:
layers_conv_down

[Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]

In [57]:
layers_pooling = [nn.MaxPool2d(kernel_size=pks, stride=ps) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]

In [58]:
layers_pooling

[MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)]

In [59]:
num_down_layers = len(kernel_sizes)
num_up_layers = len(kernel_sizes)

input_size = 3 # initial number of input channels
# Construct downsampling layers.
# As mentioned in the assignment, blocks of the downsampling path should have the
# following output dimension (igoring batch dimension):
# 3 x 64 x 64 (input) -> 32 x 32 x 32 -> 64 x 16 x 16 -> 128 x 8 x 8 -> 256 x 4 x 4
# each block should consist of: Conv2d->BatchNorm2d->ReLU->MaxPool2d
layers_conv_down = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip([input_size]+down_filter_sizes[:-1], down_filter_sizes, kernel_sizes, conv_paddings) 
]
layers_bn_down = [nn.BatchNorm2d(fs) for fs in down_filter_sizes]
layers_pooling = [nn.MaxPool2d(kernel_size=pks, stride=ps) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]


In [95]:
# Convert Python list to nn.ModuleList, so that PyTorch's autograd
# package can track gradients and update parameters of these layers
layers_conv_down = nn.ModuleList(layers_conv_down)
layers_bn_down = nn.ModuleList(layers_bn_down)
layers_pooling = nn.ModuleList(layers_pooling)

In [97]:
for c in layers_conv_down:
    c

In [98]:
c

Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [61]:
down_filter_sizes

[32, 64, 128, 256]

In [70]:
up_filter_sizes

[128, 64, 32, 32]

In [72]:
layers_conv_up = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip(list(reversed(down_filter_sizes)), up_filter_sizes, kernel_sizes, conv_paddings) 
]

In [73]:
layers_conv_up

[Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))]

In [75]:
layers_bn_up = [nn.BatchNorm2d(fs) for fs in up_filter_sizes]

In [76]:
layers_bn_up

[BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]

In [83]:
ee = torch.randn(32, 256, 4, 4)
ee.shape

torch.Size([32, 256, 4, 4])

In [89]:
layers_unpooling[0](layers_conv_up[0](ee)).shape

TypeError: MaxUnpool2d.forward() missing 1 required positional argument: 'indices'

In [84]:
# Construct upsampling layers
# As mentioned in the assignment, blocks of the upsampling path should have the
# following output dimension (igoring batch dimension):
# 256 x 4 x 4 (input) -> 128 x 8 x 8 -> 64 x 16 x 16 -> 32 x 32 x 32 -> 32 x 64 x 64
# each block should consist of: MaxUnpool2d->Conv2d->BatchNorm2d->ReLU
layers_conv_up = [
    nn.Conv2d(in_channels=inp_ch, out_channels=fs, kernel_size=ks, padding=pad)
    for inp_ch, fs, ks, pad in zip(list(reversed(down_filter_sizes)), up_filter_sizes, kernel_sizes, conv_paddings) 
]
layers_bn_up = [nn.BatchNorm2d(fs) for fs in up_filter_sizes]
layers_unpooling = [nn.MaxUnpool2d(kernel_size=pks, stride=ps) for pks, ps in zip(pooling_kernel_sizes, pooling_strides)]


In [85]:
layers_unpooling

[MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),
 MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),
 MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)),
 MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))]

In [10]:


# Convert Python list to nn.ModuleList, so that PyTorch's autograd
# can track gradients and update parameters of these layers
layers_conv_up = nn.ModuleList(layers_conv_up)
layers_bn_up = nn.ModuleList(layers_bn_up)
layers_unpooling = nn.ModuleList(layers_unpooling)

relu = nn.ReLU(True)