In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torchvision import models
from torchvision.models.vgg import VGG


import numpy as np 
import time

import numpy as np

In [3]:
class FCN32s(nn.Module):
    def __init__(self, model, num_classes):
        super(FCN32s, self).__init__()
        self.num_classes = num_classes
        self.model = model
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)

    def forward(self, x):
        output = self.model(x)
        x5 = output['x5']

        score = nn.BatchNorm2d(nn.ReLU(self.deconv1(x5), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv2(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv3(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv4(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv5(score), inplace=True))
        score = self.classifier(score)

        return score        

In [4]:
class FCN16s(nn.Module):
    def __init__(self, model, num_classes):
        super(FCN16s, self).__init__()
        self.num_classes = num_classes
        self.model = model
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)

    def forward(self, x):
        output = self.model(x)
        x5 = output['x5']
        x4 = output['x4']

        score = nn.ReLU(self.deconv1(x5), inplace=True)
        score = nn.BatchNorm2d(score + x4)
        score = nn.BatchNorm2d(nn.ReLU(self.deconv2(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv3(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv4(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv5(score), inplace=True))
        score = self.classifier(score)

        return score        

In [5]:
class FCN8s(nn.Module):
    def __init__(self, model, num_classes):
        super(FCN8s, self).__init__()
        self.num_classes = num_classes
        self.model = model
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)

    def forward(self, x):
        output = self.model(x)
        x5 = output['x5']
        x4 = output['x4']
        x3 = output['x3']

        score = nn.ReLU(self.deconv1(x5), inplace=True)
        score = nn.BatchNorm2d(score + x4)
        score = nn.ReLU(self.deconv2(x4), inplace=True)
        score = nn.BatchNorm2d(score + x3)
        score = nn.BatchNorm2d(nn.ReLU(self.deconv3(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv4(score), inplace=True))
        score = nn.BatchNorm2d(nn.ReLU(self.deconv5(score), inplace=True))
        score = self.classifier(score)

        return score        

In [6]:
class FCNs(nn.Module):
    def __init__(self, model, num_classes):
        super(FCNs, self).__init__()
        self.num_classes = num_classes
        self.model = model
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)

        
    def forward(self, x):
        output = self.model(x)

        x5 = output['x5']
        x4 = output['x4']
        x3 = output['x3']
        x2 = output['x2']
        x1 = output['x1']

        score = nn.BatchNorm2d(nn.ReLU(self.deconv1(x5), inplace=True))
        score += x4
        score = nn.BatchNorm2d(nn.ReLU(self.deconv2(score), inplace=True))
        score += x3
        score = nn.BatchNorm2d(nn.ReLU(self.deconv3(score), inplace=True))
        score += x2
        score = nn.BatchNorm2d(nn.ReLU(self.deconv4(score), inplace=True))
        score += x1
        score = nn.BatchNorm2d(nn.ReLU(self.deconv5(score), inplace=True))
        score = self.classifier(score)


In [7]:
class VGGNet(VGG):
    def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True, show_params=False):
        super().__init__(make_layers(params['cfg'][model]))
        self.ranges = params['ranges'][model]

        if pretrained:
            exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model)

        if not requires_grad:
            for param in super().parameters():
                param.requires_grad = False

        if remove_fc:
            del self.classifier

        if show_params:
            for name, param in self.named_parameters():
                print(name, param.size())

    def forward(self, x):
        output = {}
        
        for idx in range(len(self.ranges)):
            for layer in range(self.ranges[idx][0], self.ranges[idx][1]):
                x = self.features[layer](x)
            output["x%d"%(idx+1)] = x

        return output

In [8]:
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [9]:
params = {
    'ranges' : {'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31))},
    'cfg' : {'vgg16' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']}
}

In [11]:
if __name__ == "__main__":
    model = VGGNet(requires_grad=True)
    fcn = FCNs(model=model, num_classes=2)
    fcn = fcn.cuda()
    criterion = nn.BCELoss().cuda()
    optimizer = optim.SGD(fcn.parameters(), lr=1e-2, momentum=0.7)

## Reference
- [Fully Convolutional Networks for Semantic Segmentation](https://gaussian37.github.io/vision-segmentation-fcn/)