## nn package

In [1]:
import torch

In [2]:
from torch.autograd import Variable

In [3]:
import torch.nn as nn

In [4]:
import torch.nn.functional as F

In [5]:
class MNISTConvNet(nn.Module):
    
    def __init__(self):
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
        x = self.pool2(F.relu(self.conv2(x)))
        
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

In [6]:
net = MNISTConvNet()
print(net)

MNISTConvNet (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (320 -> 50)
  (fc2): Linear (50 -> 10)
)


In [7]:
input = Variable(torch.randn(1, 1, 28, 28))
out = net(input)
print(out.size())

torch.Size([1, 10])


In [8]:
target = Variable(torch.LongTensor([3]))
loss_fn = nn.CrossEntropyLoss()
err = loss_fn(out, target)
err.backward()
print(err)

Variable containing:
 2.3481
[torch.FloatTensor of size 1]



In [9]:
print(net.conv1.weight.grad.size())

torch.Size([10, 1, 5, 5])


In [10]:
print(net.conv1.weight.data.norm())
print(net.conv1.weight.grad.data.norm())

1.8619891819897083
0.12695993617775023


In [11]:
def printnorm(self, input, output):
    print("Inside " + self.__class__.__name__ + ' forward')
    print("")
    print("input: ", type(input))
    print("input[0]: ", type(input[0]))
    print("output: ", type(output))
    print("")
    print("input size:", input[0].size())
    print("output size:", output.data.size())
    print("output norm:", output.data.norm())

net.conv2.register_forward_hook(printnorm)
out = net(input)

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.autograd.variable.Variable'>
output:  <class 'torch.autograd.variable.Variable'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: 11.922821988911451


In [12]:
def printgradnorm(self, grad_input, grad_output):
    print("Inside " + self.__class__.__name__ + " backward")
    print("Inside class: " + self.__class__.__name__)
    print("")
    print("grad_input: ", type(grad_input))
    print("grad_input[0]: ", type(grad_input[0]))
    print("grad_output: ", type(grad_output))
    print("grad_output[0]: ", type(grad_output[0]))
    print("")
    print("grad_input size:", grad_input[0].size())
    print("grad_output size:", grad_output[0].size())
    print("grad_input norm:", grad_input[0].data.norm())

In [13]:
net.conv2.register_backward_hook(printgradnorm)

<torch.utils.hooks.RemovableHandle at 0x7f1c9873f8d0>

In [14]:
out = net(input)
err = loss_fn(out, target)
err.backward()

Inside Conv2d forward

input:  <class 'tuple'>
input[0]:  <class 'torch.autograd.variable.Variable'>
output:  <class 'torch.autograd.variable.Variable'>

input size: torch.Size([1, 10, 12, 12])
output size: torch.Size([1, 20, 8, 8])
output norm: 11.922821988911451
Inside Conv2d backward
Inside class: Conv2d

grad_input:  <class 'tuple'>
grad_input[0]:  <class 'torch.autograd.variable.Variable'>
grad_output:  <class 'tuple'>
grad_output[0]:  <class 'torch.autograd.variable.Variable'>

grad_input size: torch.Size([1, 10, 12, 12])
grad_output size: torch.Size([1, 20, 8, 8])
grad_input norm: 0.027417933361079557


In [34]:
class RNN(nn.Module):
    
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
    
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden), 1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        return hidden, output


In [35]:
rnn = RNN(50, 20, 10)

In [36]:
print(rnn)

RNN (
  (i2h): Linear (70 -> 20)
  (h2o): Linear (20 -> 10)
)


In [37]:
loss_fn = nn.MSELoss()

batch_size = 10
TIMESTEPS = 5

In [38]:
batch = Variable(torch.randn(batch_size, 50))

In [39]:
hidden = Variable(torch.zeros(batch_size, 20))

In [40]:
target = Variable(torch.zeros(batch_size, 10))

In [41]:
loss = 0
# rnn(batch, hidden)

In [42]:
for t in range(TIMESTEPS):
    hidden, output = rnn(batch, hidden)
    loss += loss_fn(output, target)
loss.backward()