In [1]:
# Take in a model with linear layer which say works for image of resolution 3x224x224
# Convert it into a spatial model which can now work on any image size

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

In [2]:
m1 = models.alexnet(pretrained=True)  # pretrained alexnet model
m1.eval()
for (name, layer) in m1._modules.items():
    #iteration over outer layers
    print((name, layer))

('features', Sequential (
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU (inplace)
  (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU (inplace)
  (5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU (inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU (inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU (inplace)
  (12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
))
('classifier', Sequential (
  (0): Dropout (p = 0.5)
  (1): Linear (9216 -> 4096)
  (2): ReLU (inplace)
  (3): Dropout (p = 0.5)
  (4): Linear (4096 -> 4096)
  (5): ReLU (inplace)
  (6): Linear (4096 -> 1000)
))


In [3]:
class ModelDef(nn.Module):

    def __init__(self, num_classes=1000):
        super(ModelDef, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),                                
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Conv2d(256, 4096, kernel_size=6),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Conv2d(4096, 4096, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(4096, num_classes, kernel_size=1)
        )                                                         
                                                                  
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [4]:
m2 = ModelDef()
m2.eval()
for (name, layer) in m2._modules.items():
    #iteration over outer layers
    print((name, layer))

('features', Sequential (
  (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU (inplace)
  (2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU (inplace)
  (5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU (inplace)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU (inplace)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU (inplace)
  (12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
))
('classifier', Sequential (
  (0): Dropout (p = 0.5)
  (1): Conv2d(256, 4096, kernel_size=(6, 6), stride=(1, 1))
  (2): ReLU (inplace)
  (3): Dropout (p = 0.5)
  (4): Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU (inplace)
  (6): Conv2d(4096, 1000, kernel_size=(1, 1), stride=

In [5]:
print(m2._modules['classifier'][1].weight[1][1])
print(m1._modules['classifier'][1].weight[1][36:72]) # Weights of the models are different at this point

Variable containing:
1.00000e-02 *
 -0.7129 -0.3940  0.5958 -0.7427 -0.9551 -1.0343
  1.0163  1.0155 -0.0219 -0.8092  0.4064 -0.3752
  0.9636  0.7135 -0.9375  0.8450  0.1269  0.6673
  0.8593  1.0271  0.6331  1.0400  0.4612 -0.1184
 -0.5954 -0.3229 -0.4763 -0.1278  0.4813 -0.6584
  0.1439 -1.0080 -0.8007  0.0180 -0.1923 -0.1401
[torch.FloatTensor of size 6x6]

Variable containing:
1.00000e-02 *
 -0.9219
 -0.0723
 -1.0670
 -2.1309
 -0.8962
 -2.0367
 -1.0963
 -1.1400
 -1.0789
 -0.9955
 -0.5506
 -0.5041
 -2.1386
 -0.7569
 -2.2493
 -0.7488
 -1.8975
 -1.0458
 -1.7070
 -0.2994
 -0.1663
 -0.0909
 -0.8320
 -0.3444
 -1.1933
  0.3273
  0.2165
  0.2178
 -0.1997
 -0.5213
 -0.4225
 -0.1865
 -0.8689
 -0.6569
 -0.3979
 -0.3722
[torch.FloatTensor of size 36]



In [6]:
x = Variable(torch.FloatTensor(1, 3, 224, 224))
y1 = m1(x)
y2 = m2(x)
print(y1[0, :5])
print(y2[0, :5, 0, 0])  # Different output values; as expected!!!

Variable containing:
-8.7606e+28
-2.1060e+27
-1.2613e+28
-1.2498e+29
-4.9750e+28
[torch.FloatTensor of size 5]

Variable containing:
-2.6316e+26
-2.3292e+26
-1.7740e+26
 3.5361e+26
-2.8206e+25
[torch.FloatTensor of size 5]



In [7]:
for i, j in zip(m1.modules(), m2.modules()):
    if not list(i.children()):
        if isinstance(i, nn.Linear):  # copy weights of linear layer into conv2d
            j.weight.data = i.weight.data.view(j.weight.size())
            j.bias.data = i.bias.data
        else:
            if len(i.state_dict()) > 0:  # relu and dropout do not have anything in their state_dict
                j.weight.data = i.weight.data
                j.bias.data = i.bias.data

In [8]:
print(m2._modules['classifier'][1].weight[0][1])
print(m1._modules['classifier'][1].weight[0][36:72]) # Weights of both the models are now exactly the same

Variable containing:
1.00000e-02 *
 -0.3969 -0.8168 -0.3132  0.0382 -1.2532 -0.7787
 -0.5779 -0.6933  0.1444  0.3889 -0.5300  0.1078
 -0.0160 -0.5410  0.3189 -0.1015 -0.3006 -0.1682
 -0.8105  0.4949 -0.0498  0.6025 -0.7505 -0.4757
 -0.8852 -0.7535 -0.7075  0.5752  0.2680 -1.7264
  0.3389 -0.7997  0.4491  1.4019  0.5940  0.2137
[torch.FloatTensor of size 6x6]

Variable containing:
1.00000e-02 *
 -0.3969
 -0.8168
 -0.3132
  0.0382
 -1.2532
 -0.7787
 -0.5779
 -0.6933
  0.1444
  0.3889
 -0.5300
  0.1078
 -0.0160
 -0.5410
  0.3189
 -0.1015
 -0.3006
 -0.1682
 -0.8105
  0.4949
 -0.0498
  0.6025
 -0.7505
 -0.4757
 -0.8852
 -0.7535
 -0.7075
  0.5752
  0.2680
 -1.7264
  0.3389
 -0.7997
  0.4491
  1.4019
  0.5940
  0.2137
[torch.FloatTensor of size 36]



In [9]:
y1 = m1(x)
y2 = m2(x)
print(y1[0][:5])
print(y2[0, :5, 0, 0])  # Same output values as expected!!!

Variable containing:
-8.7606e+28
-2.1060e+27
-1.2613e+28
-1.2498e+29
-4.9750e+28
[torch.FloatTensor of size 5]

Variable containing:
-8.7606e+28
-2.1060e+27
-1.2613e+28
-1.2498e+29
-4.9750e+28
[torch.FloatTensor of size 5]



In [10]:
x1 = Variable(torch.FloatTensor(1, 3, 300, 300))
y2 = m2(x1)
print(y2.size())       # Now the network is capable of giving spatial output

torch.Size([1, 1000, 3, 3])
