# Introduction to torch.nn

- [Linear](#Linear)
- [Convolution](#Convolution)
- [Pooling](#Pooling)
- [RNN](#RNN)


In [4]:
import torch
from torch import nn

# Linear

![](https://upload.wikimedia.org/wikipedia/commons/thumb/1/18/Matrix_multiplication_qtl1.svg/220px-Matrix_multiplication_qtl1.svg.png)

In [5]:
linear_layer = nn.Linear(2, 5)

In [6]:
a = torch.randn(2)

In [7]:
linear_layer(a)

tensor([ 0.3644,  0.9429,  1.3355, -0.5222,  0.5814], grad_fn=<AddBackward0>)

In [8]:
linear_layer.__dict__

{'training': True,
 '_parameters': OrderedDict([('weight', Parameter containing:
               tensor([[-0.4207, -0.4730],
                       [ 0.1378, -0.5585],
                       [-0.4909, -0.4753],
                       [-0.4221,  0.6078],
                       [-0.4132, -0.3159]], requires_grad=True)),
              ('bias',
               Parameter containing:
               tensor([-0.2803,  0.3571,  0.6689,  0.0403,  0.1143], requires_grad=True))]),
 '_buffers': OrderedDict(),
 '_non_persistent_buffers_set': set(),
 '_backward_hooks': OrderedDict(),
 '_is_full_backward_hook': None,
 '_forward_hooks': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_state_dict_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_modules': OrderedDict(),
 'in_features': 2,
 'out_features': 5}

In [9]:
linear_layer._parameters["weight"]

Parameter containing:
tensor([[-0.4207, -0.4730],
        [ 0.1378, -0.5585],
        [-0.4909, -0.4753],
        [-0.4221,  0.6078],
        [-0.4132, -0.3159]], requires_grad=True)

In [10]:
linear_layer._parameters["weight"].shape

torch.Size([5, 2])

In [11]:
linear_layer._parameters["bias"].shape

torch.Size([5])

In [12]:
x = torch.randn(10, 2, 10, 1)

In [13]:
linear_layer = nn.Linear(1, 100)

In [14]:
y = linear_layer(x)

In [15]:
y.shape

torch.Size([10, 2, 10, 100])

# Convolution

![](https://www.researchgate.net/profile/Lukas-Mosser/publication/321719286/figure/fig2/AS:570220119298049@1512962478942/Example-of-a-discrete-convolution-a-and-equivalent-transposed-convolution-operation-b.png)

In [16]:
x = torch.randn(10, 1, 24, 24)

In [17]:
conv_layer = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3)

In [18]:
y = conv_layer(x)

In [19]:
y.shape

torch.Size([10, 10, 22, 22])

Add dummy dimension with .unsqueeze

In [20]:
x = torch.randn(24, 24)

In [21]:
x = x.unsqueeze(0).unsqueeze(0)

In [22]:
conv_layer(x).shape

torch.Size([1, 10, 22, 22])

Permute dimensions

In [23]:
x = torch.randn(24, 24, 3)

In [24]:
x.transpose(0, 2).shape

torch.Size([3, 24, 24])

or equivalent

In [25]:
x.permute(2, 1, 0).shape

torch.Size([3, 24, 24])

In [26]:
conv_layer2 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=3, stride=2)

Downsampling

In [30]:
x = torch.randn(10, 1, 24, 24)

In [31]:
y = conv_layer2(x)

In [32]:
y.shape

torch.Size([10, 10, 11, 11])

Output shape can be computed with formula  $W_{out} = [\frac{(W_{in}−(K - 1) +2P )}{S}]+1$

In [33]:
conv_layer2._parameters["weight"].shape

torch.Size([10, 1, 3, 3])

# Pooling

![](https://production-media.paperswithcode.com/methods/MaxpoolSample2.png)

In [37]:
pooling = nn.MaxPool2d(3, 2)

In [38]:
pooling(x).shape

torch.Size([10, 1, 11, 11])

# RNN

![](https://miro.medium.com/max/1400/0*nukrZzIzKICTAfST)

In [39]:
lstm = nn.LSTM(input_size=10, hidden_size=20, batch_first=True)

In [40]:
x = torch.randn(2, 3, 10)

In [41]:
y = lstm(x)

In [42]:
type(y)

tuple

In [43]:
y[0].shape

torch.Size([2, 3, 20])

In [44]:
print(f"h_n shape {y[1][0].shape}, c_n shape {y[1][0].shape}")

h_n shape torch.Size([1, 2, 20]), c_n shape torch.Size([1, 2, 20])


Starting h_n, c_n also can be provided

In [45]:
rnn = nn.LSTM(10, 20, 2)
x = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(x, (h0, c0))