In [1]:
# FCN
# Structure
## Backbone : vgg16
## Stand : Encoder / Decoder

# url : https://github.com/initiativealmendra/DL-architectures/blob/master/FCN.py

In [24]:
# Import library for modeling

import torch
import torch.nn as nn

from torchvision import models
from torchvision.models.vgg import VGG

import warnings
warnings.filterwarnings('ignore')

import gc

print(f'Import library')


Import library


In [16]:
# Function for VGG16


class encoder(VGG):
    '''
    1. Extract features from the image
    2. Get model from torchvision, VGG and make VGG16 model
    '''

    '''
    pretrained : Apply torchvision.models.vgg16 pretrained parameters
    requires_grad : False if you want to freeze the encoder and avoid to train it.
    batch_norm :
        When apply the batch normalization, weight will be in distribtuion of mean 0 and variance 1.
        If activiation function ReLU is applied, then the half of data distribution will be 0.as_integer_ratio
        In this case, normalization won't be useless.
    show_parms : Display the params to see the dimensionallity
    '''
    def __init__(self, weights = True, requires_grad = True, batch_norm = False, show_parms =False):

        # Get init information of torchvision VGG
        # num_classes: int = 1000, init_weights: bool = True, dropout: float = 0.5
        # activation function : relu
        super().__init__(self.make_layers(batch_norm))

        # if pretrained is True, get pretrained weights
        # load_state_dict() : load the saved objects of parameters which mapped by tensor.
        # state_dict() : Mapped the parameters into tensor.

        # exec() : execute statement
        if weights:
            exec("self.load_state_dict(models.vgg16(pretrained = True).state_dict())")

        # False : If want to freeze the encoder and avoid to train it
        if not requires_grad:
            for parameter in super().named_parameters():
                parameter.requires_grad = False

        # Remove the fully connected layer in the VGG models
        # Full Convolution Networks use VGG model, which replaced the fully connected layers into convolution layers

        del self.classifier

        # Display parameters tp see the dimensionallity
        # named_parameters() : return the tuple iterator of combination in (name, parameter)
        if show_parms:
            for name, parm in self.named_parameters():
                print(f'Name : {name}.'
                      f'Parameters : {parm.size()}')

    # need to make convolution layers
    def make_layers(self, batch_norm):
        preset_layer_lst = [
            64, 64, 'P',
            128, 128, 'P',
            256, 256, 256, 'P',
            512, 512, 512, 'P',
            512, 512, 512, 'P'
        ]

        return_layer_lst = list()
        channel_number = 3

        for layer_ in preset_layer_lst:
            if layer_ == 'P':
                return_layer_lst += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else :
                conv2d = nn.Conv2d(
                    in_channels = channel_number,
                    out_channels= layer_,
                    kernel_size=3, padding=1)

                # Try to make a weight in the gaussian distribution
                if batch_norm:
                    return_layer_lst += [conv2d, nn.BatchNorm2d(layer_), nn.ReLU(inplace=True)]
                else:
                    return_layer_lst += [conv2d, nn.ReLU(inplace=True)]
                channel_number = layer_

        '''
        It seems to be a good method to use when there is only one input value, that is, when data passes through each layer sequentially.
        Simply put, it is a way to put several nn.Modules into a container and run them all at once.
        '''
        return nn.Sequential(*return_layer_lst)
    
    def forward(self, x):
        ranges = ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31))
        output = {}

        for idx in range(len(ranges)):
            for layer in range(ranges[idx][0], ranges[idx][1]):
                x = self.features[layer](x)
            output["pool%d"%(idx+1)] = x
        return output 

class decoder(nn.Module):
    '''
    Upscale the output of the encode such that its the same size at the original image.
    '''

    def __init__(self, encoder, n_classes):
        super().__init__()

        self.encoder = encoder
        self.n_classes = n_classes

        # Transposed convoluions. (To upsampling the previous layers)

        self.relu = nn.ReLU(inplace=True)
        
        # 1x1 convolution lyer
        self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, padding=1, output_padding=1, dilation=1)
        self.bn1 = nn.BatchNorm2d(512)

        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, padding=1, output_padding=1, dilation=1)
        self.bn2 = nn.BatchNorm2d(256)

        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, padding=1, output_padding=1, dilation=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, padding=1, output_padding=1, dilation=1)
        self.bn4 = nn.BatchNorm2d(64)

        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, padding=1, output_padding=1, dilation=1)
        self.bn5 = nn.BatchNorm2d(32)

        # Convolutional socre, layer
        self.score = nn.Conv2d(32, n_classes, 1)

    def forward(self, x):
        x = self.encoder(x)
        # Skip connections (connect the output of one layer to a non-adjacent layer)
        pool_5 = x['pool5'] # Output of the VGG network.
        pool_4 = x['pool4']
        pool_3 = x['pool3']
        
        x = self.relu(self.deconv1(pool_5))
        x = self.bn1(x + pool_4)
        x = self.relu(self.deconv2(x))
        x = self.bn2(x + pool_3)
        x = self.bn3(self.relu(self.deconv3(x)))
        x = self.bn4(self.relu(self.deconv4(x)))
        x = self.bn5(self.relu(self.deconv5(x)))
        x = self.score(x)

        return x

print(f'Function settled.')

Function settled.


In [18]:
%%time

# set VGG model

fcn = decoder(encoder(), n_classes)

Wall time: 3.02 s


In [27]:
%%time

n_classes = 12
batch_size = 10

# It accepts any image size multiple of 32.
img_width = 32
img_height = 32

output = fcn(torch.randn([batch_size, 3, img_width, img_height]))

print(f'outsize : {output.size()}\n')

outsize : torch.Size([10, 12, 32, 32])

Wall time: 128 ms


In [28]:
%%time

n_classes = 12
batch_size = 10

# It accepts any image size multiple of 32.
# If size is smaller than 32, then evoke error as like below,
# RuntimeError: Given input size: (512x1x1). Calculated output size: (512x0x0). Output size is too small

img_width = 31
img_height = 31

output = fcn(torch.randn([batch_size, 3, img_width, img_height]))
print(output.size())

RuntimeError: Given input size: (512x1x1). Calculated output size: (512x0x0). Output size is too small