FCN are used in semantic segmentation tasks. We will write FCN32, FCN16 and FCN8. 
Original paper 

(https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf) 

and implimentation 

(https://github.com/zijundeng/pytorch-semantic-segmentation/blob/master/models/fcn32s.py)

In [1]:
import torch
from torch import nn
from torchvision import models

In [2]:
vgg16=models.vgg16(pretrained=False)

In [3]:
vgg16.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [4]:
vgg16.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [5]:
nn.Conv2d(512,4096,kernel_size=7).weight.data.shape

torch.Size([4096, 512, 7, 7])

In [6]:
nn.Conv2d(4096,4096,kernel_size=1).weight.data.shape

torch.Size([4096, 4096, 1, 1])

In [7]:
class FCN32(nn.Module):
    def __init__(self,num_classes):
        super().__init__()
        model=models.vgg16(pretrained=True)
        features=model.features
        classifier=model.classifier
        features[0].padding=(100,100) #(https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/pascalcontext-fcn32s/net.py)
        self.features5=features
        fc6=nn.Conv2d(512,4096,kernel_size=7)
        fc6.weight.data.copy_(classifier[0].weight.data.view(4096,512,7,7))
        fc6.bias.data.copy_(classifier[0].bias.data)
        fc7=nn.Conv2d(4096,4096,kernel_size=1)
        fc7.weight.data.copy_(classifier[3].weight.data.view(4096,4096,1,1))
        fc7.bias.data.copy_(classifier[3].bias.data)
        scores=nn.Conv2d(4096,num_classes,kernel_size=1)
        self.scores=nn.Sequential(fc6,nn.ReLU(inplace=True),nn.Dropout(),
                                  fc7,nn.ReLU(inplace=True),nn.Dropout(),
                                 scores) #(1,2,7,7)
        self.upsample=nn.ConvTranspose2d(num_classes,num_classes,kernel_size=64,stride=32,bias=False)
    def forward(self,X):
        X_size = X.size()
        X=self.features5(X)
        X=self.scores(X)
        X=self.upsample(X)
        X=torch.sigmoid(X)
        #return X[:, :, 19: (19 + X_size[2]), 19: (19 + X_size[3])].contiguous() ### Original paper Crop() [https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/pascalcontext-fcn32s/net.py]
        #return X[:,:,0:X_size[2],0:X_size[3]]
        return X[:,:,16:16+X_size[2],16:16+X_size[3]] ### Center Crop ####

In [8]:
m=FCN32(2)

In [9]:
sample=torch.randn(1,3,224,224)

In [10]:
m(sample).shape

torch.Size([1, 2, 224, 224])

In [11]:
[i for i in m.children()][0:-1]

[Sequential(
   (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(100, 100))
   (1): ReLU(inplace=True)
   (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (3): ReLU(inplace=True)
   (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (6): ReLU(inplace=True)
   (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (8): ReLU(inplace=True)
   (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (11): ReLU(inplace=True)
   (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (13): ReLU(inplace=True)
   (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (15): ReLU(inplace=True)
   (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (17): Con

In [12]:
nn.Sequential(*[i for i in m.children()][0:-1])(sample).shape

torch.Size([1, 2, 7, 7])

In [13]:
'''[i for i in FCN32(2).children()][0:2]
m=nn.Sequential(*[i for i in FCN32(2).children()][0:2])
m(sample).shape
vgg16=models.vgg16(pretrained=True)
m_t=vgg16.features
m_t(sample).shape'''

'[i for i in FCN32(2).children()][0:2]\nm=nn.Sequential(*[i for i in FCN32(2).children()][0:2])\nm(sample).shape\nvgg16=models.vgg16(pretrained=True)\nm_t=vgg16.features\nm_t(sample).shape'

In [14]:
vgg16.features[0:24]#### This is pool4 output, this then passes through a 1*1 convolution
### then through upsampling: stride 1 padding 0, producing output of 26*26, this is then upsampled with kernel size 4, stride 2, resulting
### in size of 30*30

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [15]:
###### FCN16 #########
class FCN16(nn.Module):
    def __init__(self,num_classes):
        super().__init__()
        mod=models.vgg16(pretrained=True)
        features=mod.features
        classifier=mod.classifier
        features[0].padding=(100,100)
        self.features4 = nn.Sequential(*features[: 24])
        self.features5 = nn.Sequential(*features[24:])

        self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.score_pool4.weight.data.zero_()
        self.score_pool4.bias.data.zero_()

        fc6 = nn.Conv2d(512, 4096, kernel_size=7)
        fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
        fc6.bias.data.copy_(classifier[0].bias.data)
        fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
        fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
        fc7.bias.data.copy_(classifier[3].bias.data)
        score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
        score_fr.weight.data.zero_()
        score_fr.bias.data.zero_()
        self.score_fr = nn.Sequential(
            fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
        )

        self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
        self.upscore16 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=16, bias=False)

    def forward(self, x):
        x_size = x.size()
        pool4 = self.features4(x)
        #print(f'pool4 {pool4.shape}')
        pool5 = self.features5(pool4)

        score_fr = self.score_fr(pool5)
        upscore2 = self.upscore2(score_fr) ##16*16

        score_pool4 = self.score_pool4(0.01 * pool4) ### 26*26
        #print(f"score_pool4:{score_pool4.shape}, upsocore2:{upscore2.shape}")
        upscore16 = self.upscore16(score_pool4[:, :, 5: (5 + upscore2.size()[2]), 5: (5 + upscore2.size()[3])]
                                   + upscore2)
        #print(f"upscore16:{upscore16.shape}") ## 272*272 (272-224)/2
        return upscore16[:, :, 24: (24 + x_size[2]), 24: (24 + x_size[3])].contiguous()

In [16]:
fcn16=FCN16(2)

In [17]:
fcn16(sample).shape

torch.Size([1, 2, 224, 224])

In [18]:
class FCN8(nn.Module):
    def __init__(self,num_classes):
        super().__init__()
        mod=models.vgg16(pretrained=True)
        features=mod.features
        classifier=mod.classifier
        features[0].padding=(100,100)
        self.features3 = nn.Sequential(*features[: 17])
        self.features4 = nn.Sequential(*features[17: 24])
        self.features5 = nn.Sequential(*features[24:])

        self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)
        self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.score_pool3.weight.data.zero_()
        self.score_pool3.bias.data.zero_()
        self.score_pool4.weight.data.zero_()
        self.score_pool4.bias.data.zero_()

        fc6 = nn.Conv2d(512, 4096, kernel_size=7)
        fc6.weight.data.copy_(classifier[0].weight.data.view(4096, 512, 7, 7))
        fc6.bias.data.copy_(classifier[0].bias.data)
        fc7 = nn.Conv2d(4096, 4096, kernel_size=1)
        fc7.weight.data.copy_(classifier[3].weight.data.view(4096, 4096, 1, 1))
        fc7.bias.data.copy_(classifier[3].bias.data)
        score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
        score_fr.weight.data.zero_()
        score_fr.bias.data.zero_()
        self.score_fr = nn.Sequential(
            fc6, nn.ReLU(inplace=True), nn.Dropout(), fc7, nn.ReLU(inplace=True), nn.Dropout(), score_fr
        )

        self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
        self.upscore_pool4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8, bias=False)

    def forward(self, x):
        x_size = x.size()
        pool3 = self.features3(x)
        pool4 = self.features4(pool3)
        pool5 = self.features5(pool4)
        #print(f"pool3: {pool3.shape}, pool4: {pool4.shape}, pool5: {pool5.shape}")
        score_fr = self.score_fr(pool5)
        #print(f"score_fr: {score_fr.shape}")
        upscore2 = self.upscore2(score_fr)
        #print(f"upscore2: {upscore2.shape}")
        score_pool4 = self.score_pool4(0.01 * pool4)
        upscore_pool4 = self.upscore_pool4(score_pool4[:, :, 5: (5 + upscore2.size()[2]), 5: (5 + upscore2.size()[3])]
                                           + upscore2)
        #print(f"upscore_pool4: {upscore_pool4.shape}")
        score_pool3 = self.score_pool3(0.0001 * pool3)
        upscore8 = self.upscore8(score_pool3[:, :, 9: (9 + upscore_pool4.size()[2]), 9: (9 + upscore_pool4.size()[3])]
                                 + upscore_pool4)
        #print(f"upscore8: {upscore8.shape}")
        return upscore8[:, :, 28: (28 + x_size[2]), 28: (28 + x_size[3])].contiguous()

In [19]:
fcn8=FCN8(2)

In [20]:
fcn8(sample).shape

torch.Size([1, 2, 224, 224])