In [41]:
import torch
from torchvision.models import inception_v3
from torchvision import models
from torch import nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F

In [20]:
model = inception_v3(pretrained=True)
model.Mixed_7c.register_forward_hook(output_hook)


<torch.utils.hooks.RemovableHandle at 0x7ff3a3544e80>

In [27]:
def output_hook(module, input, output):
    global mixed_7c_output
    mixed_7c_output = output

In [43]:
inp_ts = torch.FloatTensor(2,3,299,299).normal_(0,1)
inp_ts.requires_grad = True

In [29]:
model(inp_ts)

InceptionOutputs(logits=tensor([[ 0.0321,  1.2157,  1.3836,  ...,  0.3750,  0.0776, -0.9441],
        [-0.5735, -0.7396, -0.5489,  ..., -0.5914, -0.8045,  0.5662]],
       grad_fn=<AddmmBackward>), aux_logits=tensor([[-1.0451,  1.9003,  0.8260,  ..., -0.2205,  0.0702, -0.1304],
        [ 0.3267, -2.3561, -0.8446,  ..., -0.7612, -0.2905,  0.3184]],
       grad_fn=<AddmmBackward>))

In [31]:
print(mixed_7c_output.shape)

NameError: name 'mixed_7c_output' is not defined

In [39]:
def conv1x1(in_planes, out_planes, bias=False):
    "1x1 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1,
                     padding=0, bias=bias)

In [35]:
class CNN_ENCODER(nn.Module):
    def __init__(self):
        super(CNN_ENCODER, self).__init__()

        self.nef = 256  # define a uniform ranker

        model = models.inception_v3()
        url = 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth'
        model.load_state_dict(model_zoo.load_url(url))
        for param in model.parameters():
            param.requires_grad = False
        print('Load pretrained model from ', url)
        # print(model)

        self.define_module(model)
        self.init_trainable_weights()

    def define_module(self, model):
        self.Conv2d_1a_3x3 = model.Conv2d_1a_3x3
        self.Conv2d_2a_3x3 = model.Conv2d_2a_3x3
        self.Conv2d_2b_3x3 = model.Conv2d_2b_3x3
        self.Conv2d_3b_1x1 = model.Conv2d_3b_1x1
        self.Conv2d_4a_3x3 = model.Conv2d_4a_3x3
        self.Mixed_5b = model.Mixed_5b
        self.Mixed_5c = model.Mixed_5c
        self.Mixed_5d = model.Mixed_5d
        self.Mixed_6a = model.Mixed_6a
        self.Mixed_6b = model.Mixed_6b
        self.Mixed_6c = model.Mixed_6c
        self.Mixed_6d = model.Mixed_6d
        self.Mixed_6e = model.Mixed_6e
        self.Mixed_7a = model.Mixed_7a
        self.Mixed_7b = model.Mixed_7b
        self.Mixed_7c = model.Mixed_7c

        self.emb_features = conv1x1(768, self.nef)
        self.emb_cnn_code = nn.Linear(2048, self.nef)

    def init_trainable_weights(self):
        initrange = 0.1
        self.emb_features.weight.data.uniform_(-initrange, initrange)
        self.emb_cnn_code.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        features = None
        # --> fixed-size input: batch x 3 x 299 x 299
        x = nn.Upsample(size=(299, 299), mode='bilinear')(x)
        # 299 x 299 x 3
        x = self.Conv2d_1a_3x3(x)
        # 149 x 149 x 32
        x = self.Conv2d_2a_3x3(x)
        # 147 x 147 x 32
        x = self.Conv2d_2b_3x3(x)
        # 147 x 147 x 64
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # 73 x 73 x 64
        x = self.Conv2d_3b_1x1(x)
        # 73 x 73 x 80
        x = self.Conv2d_4a_3x3(x)
        # 71 x 71 x 192

        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # 35 x 35 x 192
        x = self.Mixed_5b(x)
        # 35 x 35 x 256
        x = self.Mixed_5c(x)
        # 35 x 35 x 288
        x = self.Mixed_5d(x)
        # 35 x 35 x 288

        x = self.Mixed_6a(x)
        # 17 x 17 x 768
        x = self.Mixed_6b(x)
        # 17 x 17 x 768
        x = self.Mixed_6c(x)
        # 17 x 17 x 768
        x = self.Mixed_6d(x)
        # 17 x 17 x 768
        x = self.Mixed_6e(x)
        # 17 x 17 x 768

        # image region features
        features = x
        # 17 x 17 x 768

        x = self.Mixed_7a(x)
        # 8 x 8 x 1280
        x = self.Mixed_7b(x)
        # 8 x 8 x 2048
        x = self.Mixed_7c(x)
        # 8 x 8 x 2048
        x = F.avg_pool2d(x, kernel_size=8)
        # 1 x 1 x 2048
        # x = F.dropout(x, training=self.training)
        # 1 x 1 x 2048
        x = x.view(x.size(0), -1)
        # 2048

        # global image features
        cnn_code = self.emb_cnn_code(x)
        # 512
        if features is not None:
            features = self.emb_features(features)
        return features, cnn_code

In [42]:
model = CNN_ENCODER()
features, cnn_code = model(inp_ts)

Load pretrained model from  https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth


tensor([[[[ 9.9121e-01, -1.5410e+00,  1.5733e+00,  ..., -2.2380e+00,
            8.2939e-01,  5.3705e-01],
          [ 9.4397e-01,  1.0273e-01,  3.6461e-01,  ...,  1.1980e+00,
            2.9882e-01, -2.5564e-01],
          [-8.9623e-01,  2.0237e-01,  3.0076e-01,  ..., -6.1049e-01,
            5.8420e-01,  2.2611e+00],
          ...,
          [ 5.3627e-01, -2.1972e-01,  7.8903e-01,  ...,  1.2882e+00,
            2.4788e-01,  9.9391e-02],
          [-4.4042e-01, -6.0449e-01, -8.7504e-01,  ..., -1.0217e+00,
            4.8940e-02,  1.1941e-01],
          [-9.1444e-01, -1.3114e+00, -9.3179e-01,  ..., -6.2491e-01,
           -6.4234e-01,  8.7960e-01]],

         [[ 5.9128e-01, -1.3882e-01,  1.1439e+00,  ..., -1.0924e+00,
           -5.2835e-01,  5.6538e-01],
          [ 1.3652e+00,  6.7491e-01,  9.6551e-01,  ...,  9.8404e-01,
           -1.1610e+00,  4.2983e-02],
          [ 1.1171e+00,  2.3805e-01,  4.2848e-01,  ..., -7.4013e-01,
           -2.5052e+00,  6.3910e-01],
          ...,
     

In [56]:
ts = torch.FloatTensor(1,3,10,10).normal_(0,1)
ts.requires_grad = True

In [77]:
conv = nn.Conv2d(3,5, 3,1,0)
for param in conv.parameters():
    param.requires_grad = False
lin = nn.Linear(320, 1)
for param in lin.parameters():
    param.requires_grad = False


In [84]:
ts_conv = conv(ts)
ts_conv = ts_conv.view(1,-1)
ts_lin = lin(ts_conv).view(-1)
loss = ts_lin**2 + ts_lin*3 + 5
loss.backward()
"""
target = torch.tensor(10)
loss = nn.CrossEntropyLoss()
print(middle.shape, target.shape)
out = loss(middle, target)
"""

'\ntarget = torch.tensor(10)\nloss = nn.CrossEntropyLoss()\nprint(middle.shape, target.shape)\nout = loss(middle, target)\n'

In [89]:
print(ts.grad)

tensor([[[[ 1.7788e-02,  2.3389e-02,  6.3964e-03, -6.3704e-02, -1.5883e-03,
           -2.0264e-02, -1.6841e-02, -4.5848e-02,  1.2867e-02, -1.2457e-02],
          [ 1.4171e-02, -4.2007e-03, -4.3033e-02,  1.6611e-02, -4.4060e-02,
            8.4577e-02, -1.8137e-02,  2.8998e-02, -1.5769e-02, -9.9292e-03],
          [ 5.7802e-03, -9.7899e-02,  8.0100e-02, -8.4161e-02,  3.1718e-02,
           -4.0788e-02,  9.9572e-02, -2.5918e-02,  6.8904e-03, -1.1736e-02],
          [-3.6263e-02,  1.4079e-02, -5.5197e-02,  5.3881e-02,  8.3585e-02,
           -6.9351e-02,  1.1529e-01, -8.1976e-02, -4.2611e-02, -4.6984e-02],
          [-1.6869e-02,  1.1193e-02,  2.2383e-02, -1.2708e-02, -3.6245e-02,
           -4.4933e-02, -1.1283e-01,  4.0536e-02, -8.0812e-02,  3.9190e-02],
          [-1.9321e-02,  5.7967e-02, -5.9340e-02,  3.6058e-02, -4.5971e-02,
           -6.9076e-03,  1.3050e-03,  2.2095e-02,  3.0725e-02, -7.8929e-02],
          [ 1.4468e-02, -5.6043e-02, -7.0652e-02,  5.2487e-02, -1.6587e-02,
      