In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import random

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms


from torch.utils.data import Dataset, DataLoader


# Model

In [3]:
class Base(nn.Module):
    def __init__(self):
        super(Base, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1), # (N, 64, 300, 300)
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), # (N, 64, 300, 300)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2) # (N, 64, 150, 150)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1), #(N, 128, 150, 150)
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), # (N, 128, 150, 150)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # (N, 128, 75, 75)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1), # (N, 256, 75, 75)
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), # (N, 256, 75, 75)
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), # (N, 256, 75, 75)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # (N, 256, 38, 38)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1), # (N, 512, 38, 38)
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), # (N, 512, 38, 38)
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), # (N, 512, 38, 38)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # (N, 512, 19, 19)
        )

        self.layer5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, padding=1), # (N, 512, 19, 19)
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), # (N, 512, 19, 19)
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), # (N, 512, 19, 19)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1) # (N, 512, 19, 19)
        )

        self.layer6 = nn.Sequential(
            nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), # (N, 1024, 19, 19)
            nn.ReLU(inplace=True),
        )
        
        self.layer7 = nn.Sequential(
            nn.Conv2d(1024, 1024, kernel_size=1), # (N, 1024, 19, 19)
            nn.ReLU(inplace=True),
        )

        self.load_pretrained_params()

    def forward(self, input):
        #Base
        out = self.layer1(input)
        out = self.layer2(out)
        out = self.layer3(out)
        conv4_3_feat = out
        
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        conv7_feat = out

        return conv4_3_feat, conv7_feat 

    def load_pretrained_params(self):

        state_dict = self.state_dict()
        params_keys = list(state_dict.keys())

        vgg = torchvision.models.vgg16(pretrained=True)
            
        pretrained_state_dict = vgg.state_dict()
        pretrained_params_keys = list(pretrained_state_dict.keys())

        for i, key in enumerate(params_keys[:-4]):
            state_dict[key] = pretrained_state_dict[pretrained_params_keys[i]]


        #Convert fc6, fc7 to convolutional layers
        w_fc6 = pretrained_state_dict['classifier.0.weight'].view(4096, 512, 7, 7)
        b_fc6 = pretrained_state_dict['classifier.0.bias'] # (4096,)

        w_fc7 = pretrained_state_dict['classifier.3.weight'].view(4096, 4096, 1, 1)
        b_fc7 = pretrained_state_dict['classifier.3.bias'] #(4096, )

        # Subsample parameters of fc6, fc7
        w_conv6 = torch.index_select(input=w_fc6, dim=0, index=torch.arange(0, 4096, step=4)) # (1024, 512, 7, 7)
        w_conv6 = torch.index_select(input=w_conv6, dim=2, index=torch.arange(0, 7, step=3)) # (1024, 512, 3, 7)
        w_conv6 = torch.index_select(input=w_conv6, dim=3, index=torch.arange(0, 7, step=3)) #(1024, 512, 3, 3)
        
        b_conv6 = torch.index_select(input=b_fc6, dim=0, index=torch.arange(0, 4096, step=4)) #(1024,)


        w_conv7 = torch.index_select(input=w_fc7, dim=0, index=torch.arange(0, 4096, step=4)) #(1024, 4096, 1, 1)
        w_conv7 = torch.index_select(input=w_conv7, dim=1, index=torch.arange(0, 4096, step=4)) #(1024, 1024, 1, 1)

        b_conv7 = b_conv6 = torch.index_select(input=b_fc6, dim=0, index=torch.arange(0, 4096, step=4)) #(1024,)


        state_dict['layer6.0.weight'] = w_conv6
        state_dict['layer6.0.bias'] = b_conv6
        state_dict['layer7.0.weight'] = w_conv7
        state_dict['layer7.0.bias'] = b_conv7

        self.load_state_dict(state_dict)

        print('Loaded pretrained model VGG to Base.') 

    

In [4]:
class Extras(nn.Module):
    def __init__(self):
        super(Extras, self).__init__()
        self.layer8 = nn.Sequential(
            nn.Conv2d(1024, 256, kernel_size=1, padding=0), #(N, 256, 19, 19)
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2), # (N, 512, 10, 10)
            nn.ReLU(inplace=True),
        )

        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 128, kernel_size=1, padding=0), # (N, 128, 10, 10)
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), # (N, 256, 5, 5)
            nn.ReLU(inplace=True),
        )

        self.layer10 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=1, padding=0), #(N, 128, 5, 5)
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=0), # (N, 256, 3, 3)
            nn.ReLU(inplace=True),
        )
        
        self.layer11 = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=1, padding=0), #(N, 128, 3, 3)
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=0), #(N, 256, 1, 1)
            nn.ReLU(inplace=True),
        )

        self.init_params()

    def init_params(self):
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.)
        
    def forward(self, input):
        #Extras
        out = self.layer8(input)
        conv8_2_feat = out
        out = self.layer9(out)
        conv9_2_feat = out
        out = self.layer10(out)
        conv10_2_feat = out
        conv11_2_feat = self.layer11(out)

        return conv8_2_feat, conv9_2_feat, conv10_2_feat, conv11_2_feat
        
        

In [5]:
class Predict(nn.Module):
    def __init__(self, num_classes):
        super(Predict, self).__init__()
        self.num_classes = num_classes
        num_boxes = {'conv4_3':4, 'conv7':6, 'conv8_2':6, 'conv9_2':6, 'conv10_2':4, 'conv11_2':4} #Number of default boxes for each feature

        #Location
        self.loc_conv4_3 = nn.Conv2d(256, num_boxes['conv4_3']*4, kernel_size=3, padding=1)
        self.loc_conv7 = nn.Conv2d(1024, num_boxes['conv7']*4, kernel_size=3, padding=1)
        self.loc_conv8_2 = nn.Conv2d(512, num_boxes['conv8_2']*4, kernel_size=3, padding=1)
        self.loc_conv9_2 = nn.Conv2d(256, num_boxes['conv9_2']*4, kernel_size=3, padding=1)
        self.loc_conv10_2 = nn.Conv2d(256, num_boxes['conv10_2']*4, kernel_size=3, padding=1)
        self.loc_conv11_2 = nn.Conv2d(256, num_boxes['conv11_2']*4, kernel_size=3, padding=1)

        #Classify
        self.cl_conv4_3 = nn.Conv2d(256, num_boxes['conv4_3']*self.num_classes, kernel_size=3, padding=1)
        self.cl_conv7 = nn.Conv2d(1024, num_boxes['conv7']*self.num_classes, kernel_size=3, padding=1)
        self.cl_conv8_2 = nn.Conv2d(512, num_boxes['conv8_2']*self.num_classes, kernel_size=3, padding=1)
        self.cl_conv9_2 = nn.Conv2d(256, num_boxes['conv9_2']*self.num_classes, kernel_size=3, padding=1)
        self.cl_conv10_2 = nn.Conv2d(256, num_boxes['conv10_2']*self.num_classes, kernel_size=3, padding=1)
        self.cl_conv11_2 = nn.Conv2d(256, num_boxes['conv11_2']*self.num_classes, kernel_size=3, padding=1)

        self.init_params()

    def init_params(self):
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.)
    
    
    '''If you just want to reshape tensors, use torch.reshape.
       If you're also concerned about memory usage and want to ensure that the two tensors share the same data, use torch.view.'''
       
    def forward(self, conv4_3_feat, conv7_feat, conv8_2_feat, conv9_2_feat, conv10_2_feat, conv11_2_feat):
        batch_size = conv4_3_feat.shape[0]

        #Location
        loc_conv4_3 = self.loc_conv4_3(conv4_3_feat) # (N, 16, 38, 38)
        loc_conv4_3 = loc_conv4_3.permute(0, 2, 3, 1).contiguous() # (N, 38, 38, 16)
        loc_conv4_3 = loc_conv4_3.view(batch_size, -1, 4) #(N, 5776, 4)
        
        loc_conv7 = self.loc_conv7(conv7_feat) #(N, 24, 19, 19)
        loc_conv7 = loc_conv7.permute(0, 2, 3, 1).contiguous() #(N, 19, 19, 24)
        loc_conv7 = loc_conv7.view(batch_size, -1, 4) #(N, 2166, 4)

        loc_conv8_2 = self.loc_conv8_2(conv8_2_feat)
        loc_conv8_2 = loc_conv8_2.permute(0, 2, 3, 1).contiguous()
        loc_conv8_2 = loc_conv8_2.view(batch_size, -1, 4)
        
        loc_conv9_2 = self.loc_conv9_2(conv9_2_feat)
        loc_conv9_2 = loc_conv9_2.permute(0, 2, 3, 1).contiguous()
        loc_conv9_2 = loc_conv9_2.view(batch_size, -1, 4)

        loc_conv10_2 = self.loc_conv10_2(conv10_2_feat)
        loc_conv10_2 = loc_conv10_2.permute(0, 2, 3, 1).contiguous()
        loc_conv10_2 = loc_conv10_2.view(batch_size, -1, 4)

        loc_conv11_2 = self.loc_conv11_2(conv11_2_feat)
        loc_conv11_2 = loc_conv11_2.permute(0, 2, 3, 1).contiguous()
        loc_conv11_2 = loc_conv11_2.view(batch_size, -1, 4)

        #Classification
        cl_conv4_3 = self.cl_conv4_3(conv4_3_feat)  #(N, classes*4, 38, 38)
        cl_conv4_3 = cl_conv4_3.permute(0, 2, 3, 1).contiguous() #(N, 38, 38, classes*4)
        cl_conv4_3 = cl_conv4_3.view(batch_size, -1, self.num_classes) # (N, 5776, classes)

        cl_conv7 = self.cl_conv7(conv7_feat) #(N, classes*6, 19, 19)
        cl_conv7 = cl_conv7.permute(0, 2, 3, 1).contiguous() # (N, 19, 19, classes*6)
        cl_conv7 = cl_conv7.view(batch_size, -1, self.num_classes) # (N, 2166, classes)

        cl_conv8_2 = self.cl_conv8_2(conv8_2_feat)
        cl_conv8_2 = cl_conv8_2.permute(0, 2, 3, 1).contiguous()
        cl_conv8_2 = cl_conv8_2.view(batch_size, -1, self.num_classes)

        cl_conv9_2 = self.cl_conv9_2(conv9_2_feat)
        cl_conv9_2 = cl_conv9_2.permute(0, 2, 3, 1).contiguous()
        cl_conv9_2 = cl_conv9_2.view(batch_size, -1, self.num_classes)

        cl_conv10_2 = self.cl_conv10_2(conv10_2_feat)
        cl_conv10_2 = cl_conv10_2.permute(0, 2, 3, 1).contiguous()
        cl_conv10_2 = cl_conv10_2.view(batch_size, -1, self.num_classes)

        cl_conv11_2 = self.cl_conv11_2(conv11_2_feat)
        cl_conv11_2 = cl_conv11_2.permute(0, 2, 3, 1).contiguous()
        cl_conv11_2 = cl_conv11_2.view(batch_size, -1, self.num_classes)

        
        locs = torch.cat((loc_conv4_3, loc_conv7, loc_conv8_2, loc_conv9_2, loc_conv10_2, loc_conv11_2), dim=1) # dim: the dimention over which the tensors are concatnated
        classifs = torch.cat((cl_conv4_3, cl_conv7, cl_conv8_2, cl_conv9_2, cl_conv10_2, cl_conv11_2), dim=1) 

        return locs, classifs


In [6]:
class DefaultBoxes():
    def __init__(self, aspect_ratios, features_size):
        self.ratios = aspect_ratios #for each feature
        self.feat_size = feature_size

    def create_dbox(self, m=6, s_max=0.9, s_min=0.2):
        """ Args:
                m: number of feature maps for prediction,
                s_max: max scale for each default bounding box,
                s_min: min scale for each default bounding box,  

            Return: Tensor of default bounding boxes with (cx, cy, w, h)     
        """

        def_boxes = []

        for k in range(m):
            s_k = s_min + (s_max - s_min) * k / (m - 1)
            w = []
            h = []
            for r in self.ratios[k]:
                sqrt_r = np.sqrt(r)

                temp_w = s_k * sqrt_r
                temp_h = s_k / sqrt_r
                w.append(temp_w)
                h.append(temp_h)
                if r == 1:
                    temp_w = np.sqrt(s_k**2 + 1) * sqrt_r
                    temp_h = np.sqrt(s_k**2 + 1) * sqrt_r
                    w.append(temp_w)
                    h.append(temp_h)

            for i in range(1, self.feat_size[k] + 1):
                cy = (i + 0.5) / self.feat_size[k]

                for j in range(1, self.feat_size[k] + 1):
                    cx = (j + 0.5) / self.feat_size[k]
                    
                    for l in range(len(w)):
                        temp = [cx, cy, w[l], h[l]]
                        def_boxes.append(temp)
                        
        def_boxes = torch.tensor(def_boxes)
        def_boxes.clamp_(min=0, max=1)
        return def_boxes


In [6]:
class SSD300(nn.Module):
    def __init__(self, num_classes, aspect_ratios, features_size):
        super(SSD300, self).__init__()

        self.num_classes = num_classes

        self.base = Base()
        self.extras = Extras()
        self.predict = Predict(num_classes)

        dbox = DefaultBoxes(aspect_ratios, features_size)
        self.def_boxes = dbox.create_dbox()


    def forward(self, image):
        conv4_3_feat, conv7_feat = self.base(image) #(N, 512, 38, 38), (N, 1024, 19, 19)

        # L2 Norm 
        norm = conv4_3_feat.pow(2).sum(dim=1, keepdim=True) #(N, 1, 38, 38)
        norm = torch.sqrt(norm)
        conv4_3_feat = conv4_3_feat / norm #(N, 1, 38, 38)
        conv4_3_feat = conv4_3_feat * 20 


        conv8_2_feat, conv9_2_feat, conv10_2_feat, conv11_2_feat = self.extras(conv7_feat)
        locs, classifs = self.predict(conv4_3_feat, conv7_feat, conv8_2_feat, conv9_2_feat, conv10_2_feat, conv11_2_feat)

        return locs, classifs
    

      

In [49]:
def NMS(boxes, scores, overlap=0.45, top_k=200):
    """ boxes: (8732, 4)
        scores: (8732, )
        Giu 200 box co score cao nhat
        Cac box co overlap > 0.45 so voi box co score cao nhat se bi loai

        Return: tensor chua index của các box thỏa mãn
    """

    count = 0
    keep = []
    
    xmin = boxes[:, 0] # (8732, )
    ymin = boxes[:, 1]
    xmax = boxes[:, 2]
    ymax = boxes[:, 3]

    area = (xmax - xmin) * (ymax - ymin)

    _, idx = torch.sort(scores, dim=0)

    idx = idx[-top_k:]
    

    while(idx.numel() > 0):

        i = idx[-1] #Lấy index của box có score cao nhất
        keep.append(i)

        if idx.size(0) == 1:
            break

        idx = idx[:-1]

        temp_xmin = torch.index_select(input=xmin, dim=0, index=idx) # (199,)
        temp_ymin = torch.index_select(input=ymin, dim=0, index=idx)
        temp_xmax = torch.index_select(input=xmax, dim=0, index=idx)
        temp_ymax = torch.index_select(input=ymax, dim=0, index=idx)

        temp_xmin.clamp_(min=xmin[i]) #(199, )
        temp_ymin.clamp_(min=ymin[i])
        temp_xmax.clamp_(max=xmax[i])
        temp_ymax.clamp_(max=ymax[i])
        
        temp_w = temp_xmax - temp_xmin #(199, )
        temp_h = temp_ymax - temp_ymin

        temp_w.clamp_(min=0.) # (199, )
        temp_h.clamp_(min=0.)

        intersect = temp_w * temp_h  #(199, )
        others_area = torch.index_select(input=area, dim=0, index=idx) # (199, )

        union = area[i] + others_area - 2 * intersect

        iou = intersect / union 

        idx = idx[torch.le(iou, overlap)]

    return torch.tensor(keep)


In [55]:
boxes = np.random.randint(1, 300, (8732,4))
boxes = torch.from_numpy(boxes)

scores = torch.rand(8732)

keep = NMS(boxes, scores)

keep.shape

torch.Size([187])

In [20]:
x = torch.rand((1, 3, 300, 300 ))
aspect_ratios = [[1, 2, 0.5], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 0.5], [1, 2, 0.5]]
feature_size = [38, 19, 10, 5, 3, 1]

model = SSD300(21, aspect_ratios, feature_size)

locs, classifs = model(x)

Loaded pretrained model VGG to Base.


In [21]:
classifs.shape

torch.Size([1, 8732, 21])

In [9]:
print(sum(p.numel() for p in model.parameters() if p.requires_grad))
print(sum(p.numel() for p in model.parameters()))

26054574
26054574


In [10]:
w = torch.empty(3, 5)
nn.init.uniform_(w, a=1, b=2)

tensor([[1.9320, 1.5141, 1.3847, 1.5824, 1.8143],
        [1.9848, 1.9508, 1.9867, 1.4063, 1.8686],
        [1.8464, 1.7952, 1.2754, 1.9279, 1.8279]])

In [11]:
aspect_ratios = [[1, 2, 0.5], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 0.5], [1, 2, 0.5]]
feature_size = [38, 19, 10, 5, 3, 1]


defbox = DefaultBoxes(aspect_ratios, feature_size)

defbox_tensor = defbox.create_dbox()

df = pd.DataFrame(defbox_tensor.numpy())

df.head(15)

Unnamed: 0,0,1,2,3
0,0.039474,0.039474,0.2,0.2
1,0.039474,0.039474,1.0,1.0
2,0.039474,0.039474,0.282843,0.141421
3,0.039474,0.039474,0.141421,0.282843
4,0.065789,0.039474,0.2,0.2
5,0.065789,0.039474,1.0,1.0
6,0.065789,0.039474,0.282843,0.141421
7,0.065789,0.039474,0.141421,0.282843
8,0.092105,0.039474,0.2,0.2
9,0.092105,0.039474,1.0,1.0


In [12]:
ratios = [[1, 2, 0.5], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 3, 0.5, 0.333], [1, 2, 0.5], [1, 2, 0.5]]
feat_size = [38, 19, 10, 5, 3, 1]

def_box = []
m = 6
s_max = 0.9
s_min = 0.2
for k in range(m):
    s_k = s_min + (s_max - s_min) * k / (m - 1)
    w = []
    h = []
    for r in ratios[k]:
        sqrt_r = np.sqrt(r)

        temp_w = s_k * sqrt_r
        temp_h = s_k / sqrt_r
        w.append(temp_w)
        h.append(temp_h)
        if r == 1:
            temp_w = np.sqrt(s_k**2+1) * sqrt_r
            temp_h = np.sqrt(s_k**2+1) * sqrt_r
            w.append(temp_w)
            h.append(temp_h)
    print('w = ', w)
    print('h = ', h)

    for i in range(1, feat_size[k]+1):
        cy = (i + 0.5) / feat_size[k]
        for j in range(1, feat_size[k]+1):
            cx = (j + 0.5) / feat_size[k]
            for l in range(len(w)):
                temp = [cx, cy, w[l], h[l]]
                def_box.append(temp)
                # print(temp)


w =  [0.2, 1.019803902718557, 0.28284271247461906, 0.14142135623730953]
h =  [0.2, 1.019803902718557, 0.1414213562373095, 0.282842712474619]
w =  [0.33999999999999997, 1.0562196741208714, 0.4808326112068523, 0.5888972745734182, 0.24041630560342614, 0.19620091742904772]
h =  [0.33999999999999997, 1.0562196741208714, 0.2404163056034261, 0.19629909152447275, 0.4808326112068522, 0.5891919442313744]
w =  [0.48, 1.1092339699089637, 0.6788225099390857, 0.831384387633061, 0.33941125496954283, 0.27698953048806735]
h =  [0.48, 1.1092339699089637, 0.3394112549695428, 0.27712812921102037, 0.6788225099390855, 0.831800391856058]
w =  [0.6199999999999999, 1.176605286406618, 0.8768124086713188, 1.0738715006927038, 0.4384062043356594, 0.357778143547087]
h =  [0.6199999999999999, 1.176605286406618, 0.43840620433565936, 0.3579571668975679, 0.8768124086713187, 1.0744088394807416]
w =  [0.76, 1.2560254774486066, 1.0748023074035524, 0.5374011537017762]
h =  [0.76, 1.2560254774486066, 0.5374011537017761, 1.0

In [13]:
def_box = torch.tensor(def_box)

In [14]:
def_box.shape

torch.Size([8732, 4])

In [15]:
sum(p.numel() for p in model.parameters())

26054574

In [22]:
x = torch.rand(2, 3, 4)
x = x.permute(1, 2, 0)
x = x.reshape(3, 2, -1)

print(x)


tensor([[[0.8940, 0.3919, 0.2239, 0.0717],
         [0.7577, 0.1557, 0.2804, 0.1712]],

        [[0.5233, 0.2915, 0.5151, 0.6187],
         [0.1340, 0.7744, 0.2067, 0.6920]],

        [[0.0206, 0.8929, 0.2861, 0.9731],
         [0.7506, 0.6531, 0.0704, 0.1942]]])


In [23]:
x[0, 0] = 1
x

tensor([[[1.0000, 1.0000, 1.0000, 1.0000],
         [0.7577, 0.1557, 0.2804, 0.1712]],

        [[0.5233, 0.2915, 0.5151, 0.6187],
         [0.1340, 0.7744, 0.2067, 0.6920]],

        [[0.0206, 0.8929, 0.2861, 0.9731],
         [0.7506, 0.6531, 0.0704, 0.1942]]])

In [None]:
vgg = torchvision.models.vgg16(pretrained=True)

print(vgg)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
state_dict = vgg.state_dict()
p_fc6 = state_dict['classifier.0.weight'].view(4096, 512, 7, 7)
index = torch.tensor([0, 1, 2])
p_conv6 = torch.index_select(input=p_fc6, dim=2, index=index)
print(p_conv6.shape)
p_conv6 = torch.index_select(input=p_conv6, dim=3, index=index)

print(p_conv6.shape)


torch.Size([4096, 512, 3, 7])
torch.Size([4096, 512, 3, 3])


In [None]:
p_fc7 = state_dict['classifier.3.weight'].view(4096, 4096, 1, 1)
index = torch.tensor([0, 1, 2])
p_conv7 = torch.index_select(input=p_fc7, dim=0, index=index)
print(p_conv7.shape)
p_conv7 = torch.index_select(input=p_conv7, dim=1, index=index)
print(p_conv7.shape)

torch.Size([3, 4096, 1, 1])
torch.Size([3, 3, 1, 1])


In [None]:
ssd.state_dict()['base.layer6.0.weight'].shape

torch.Size([1024, 512, 3, 3])

In [None]:
vgg = torchvision.models.vgg16(pretrained=True)
for param in vgg.parameters():
    param.requires_grad = False
            
pretrained_state_dict = vgg.state_dict()
pretrained_params_keys = list(pretrained_state_dict.keys())


        #Convert fc6, fc7 to convolutional layers
w_fc6 = pretrained_state_dict['classifier.0.weight'].view(4096, 512, 7, 7)
b_fc6 = pretrained_state_dict['classifier.0.bias'] # (4096,)

w_fc7 = pretrained_state_dict['classifier.3.weight'].view(4096, 4096, 1, 1)
b_fc7 = pretrained_state_dict['classifier.3.bias'] #(4096, )

        # Subsample parameters of fc6, fc7
w_conv6 = torch.index_select(input=w_fc6, dim=0, index=torch.arange(0, 4096, step=4)) # (1024, 512, 7, 7)
w_conv6 = torch.index_select(input=w_conv6, dim=2, index=torch.arange(0, 7, step=3)) # (1024, 512, 3, 7)
w_conv6 = torch.index_select(input=w_conv6, dim=3, index=torch.arange(0, 7, step=3)) #(1024, 512, 3, 3)
b_conv6 = torch.index_select(input=b_fc6, dim=0, index=torch.arange(0, 4096, step=4)) #(1024,)


w_conv7 = torch.index_select(input=w_fc7, dim=0, index=torch.arange(0, 4096, step=4)) #(1024, 4096, 1, 1)
w_conv7 = torch.index_select(input=w_conv7, dim=1, index=torch.arange(0, 4096, step=4)) #(1024, 1024, 1, 1)
b_conv7 = b_conv6 = torch.index_select(input=b_fc6, dim=0, index=torch.arange(0, 4096, step=4)) #(1024,)

In [None]:
w_conv7.requires_grad

False