In [1]:
# Dependencies
import torch.nn as nn

%matplotlib inline

In [2]:
"""
Models in pytorch are based on two main classes--
1. `torch.nn.Module`
    Encapsulates models and their components, e.g., neural nets and layers.
2. `torch.nn.Parameter`
    Parameters are subclass of `torch.Tensor` and represent learning weights.
    When a parameter is assigned as an attribute of a model, the parameter object gets registered with that module.
    When a module instance is an attribute of a model, its parameters are also registered as that of the owning class.
"""

'\nModels in pytorch are based on two main classes--\n1. `torch.nn.Module`\n    Encapsulates models and their components, e.g., neural nets and layers.\n2. `torch.nn.Parameter`\n    Parameters are subclass of `torch.Tensor` and represent learning weights.\n    When a parameter is assigned as an attribute of a model, the parameter object gets registered with that module.\n    When a module instance is an attribute of a model, its parameters are also registered as that of the owning class.\n'

In [3]:
input_dim: int = 10
hidden_dim: int = 5
output_dim: int = 2
batch_size: int = 4

In [4]:
class Model(nn.Module):
    r"""A basic example model.
    """

    #
    def __init__(self):
        r"""The initializer.
        """
        super(Model, self).__init__()
        self.layer1 = nn.Linear(in_features=input_dim, out_features=hidden_dim)
        self.act1 = nn.ReLU()
        self.layer2 = nn.Linear(in_features=hidden_dim, out_features=output_dim)
        self.act2 = nn.Softmax()

    #
    def forward(self, x_):
        r"""Implements the forward pass.

        Parameters
        ----------
        x_:
            Input tensor.
            SHAPE: [*<batch_sizes>, input_dim].

        Returns
        -------
        out:
            Output tensor.
            SHAPE: [*<batch_sizes>, output_dim].
        """
        return self.act2(self.layer2(self.act1(self.layer1(x_))))

In [5]:
model = Model()
print('model:\n{}'.format(model))
print('*'*79)
print('model parameters type:\n{}\nlength: {}'.format(
    type(model.parameters()), len([_ for _ in model.parameters()])
))  # Generator instance.
print('*'*79)
print('model parameters instance type:\n{}'.format(
    type([_ for _ in model.parameters()][0])
))  # `torch.nn.Parameter` instance.
print('*'*79)
print('model parameters 0-th instance:\n{}'.format(
    [_ for _ in model.parameters()][0]
))
print('*'*79)
print('model parameters:\n{}'.format([_ for _ in model.parameters()]))
print('*'*79)
print('model named parameters type:\n{}\nlength: {}'.format(
    type(model.named_parameters()),
    len([_ for _ in model.named_parameters()])
))  # Generator instance.
print('*'*79)
print('model named parameters instance type:\n{}\nlength: {}'.format(
    type([_ for _ in model.named_parameters()][0]),
    len([_ for _ in model.named_parameters()][0])
))  # `Tuple`, 2
print('*'*79)
print('model named parameters attributes instance type:\n{}, {}'.format(
    type([_ for _ in model.named_parameters()][0][0]),
    type([_ for _ in model.named_parameters()][0][1])
))  # `str`, `torch.nn.Parameter`
print('*'*79)
for named_parameter in model.named_parameters():
    print('name of the parameter: {}\nvalue:\n{}'.format(named_parameter[0], named_parameter[1]))
    print('*'*39)

model:
Model(
  (layer1): Linear(in_features=10, out_features=5, bias=True)
  (act1): ReLU()
  (layer2): Linear(in_features=5, out_features=2, bias=True)
  (act2): Softmax(dim=None)
)
*******************************************************************************
model parameters type:
<class 'generator'>
length: 4
*******************************************************************************
model parameters instance type:
<class 'torch.nn.parameter.Parameter'>
*******************************************************************************
model parameters 0-th instance:
Parameter containing:
tensor([[-0.1021,  0.1046, -0.1670, -0.1134,  0.2864,  0.3161, -0.2662,  0.2645,
         -0.1393, -0.1636],
        [ 0.0553, -0.1285, -0.0282,  0.3058, -0.1665, -0.1441, -0.0580, -0.1139,
         -0.0469,  0.1676],
        [-0.0769, -0.1688,  0.0229,  0.1075, -0.0738, -0.2767, -0.2210,  0.1862,
          0.0562,  0.2376],
        [ 0.0818, -0.2843,  0.0921,  0.3091,  0.1554, -0.0688,  0.2361,

In [6]:
print('*'*79)
print("model's layer 1 details")
print('*'*79)
print('model1.layer1 parameters:\n{}'.format(
    [_ for _ in model.layer1.parameters()]
))
print('*'*79)
print('model1.layer1 named parameters:\n{}'.format(
    [_ for _ in model.layer1.named_parameters()]
))

*******************************************************************************
model's layer 1 details
*******************************************************************************
model1.layer1 parameters:
[Parameter containing:
tensor([[-0.1021,  0.1046, -0.1670, -0.1134,  0.2864,  0.3161, -0.2662,  0.2645,
         -0.1393, -0.1636],
        [ 0.0553, -0.1285, -0.0282,  0.3058, -0.1665, -0.1441, -0.0580, -0.1139,
         -0.0469,  0.1676],
        [-0.0769, -0.1688,  0.0229,  0.1075, -0.0738, -0.2767, -0.2210,  0.1862,
          0.0562,  0.2376],
        [ 0.0818, -0.2843,  0.0921,  0.3091,  0.1554, -0.0688,  0.2361, -0.3134,
          0.1612, -0.3060],
        [-0.1447, -0.1402, -0.2425, -0.0482,  0.1836,  0.0423, -0.1611, -0.2022,
          0.1788, -0.2048]], requires_grad=True), Parameter containing:
tensor([0.0502, 0.0859, 0.1637, 0.0009, 0.1557], requires_grad=True)]
*******************************************************************************
model1.layer1 named paramete

In [7]:
# Fully connected linear layer.
print('*'*79)
linear = nn.Linear(in_features=10, out_features=2, bias=True)
print('linear layer:\n{}'.format(linear))
print('*'*79)
print('linear layer parameters:\n{}\nlength: {}'.format(
    [_ for _ in linear.parameters()], len([_ for _ in linear.parameters()])
))
print('*'*79)
print('linear layer named parameters:\n{}\nlength: {}'.format(
    [_ for _ in linear.named_parameters()], len([_ for _ in linear.named_parameters()])
))
print('*'*79)
linear_nobias = nn.Linear(in_features=10, out_features=2, bias=False)
print('linear with no bias layer parameters:\n{}\nlength: {}'.format(
    [_ for _ in linear_nobias.parameters()], len([_ for _ in linear_nobias.parameters()])
))
print('*'*79)
print('linear with no bias layer named parameters:\n{}\nlength: {}'.format(
    [_ for _ in linear_nobias.named_parameters()], len([_ for _ in linear_nobias.named_parameters()])
))

*******************************************************************************
linear layer:
Linear(in_features=10, out_features=2, bias=True)
*******************************************************************************
linear layer parameters:
[Parameter containing:
tensor([[-0.3020, -0.0012, -0.0561, -0.0203, -0.2149, -0.2382, -0.0775, -0.0974,
          0.1755, -0.2390],
        [-0.2036, -0.1802, -0.3014, -0.1920,  0.0076,  0.1387, -0.2897,  0.1927,
          0.0420,  0.1477]], requires_grad=True), Parameter containing:
tensor([-0.2250,  0.0702], requires_grad=True)]
length: 2
*******************************************************************************
linear layer named parameters:
[('weight', Parameter containing:
tensor([[-0.3020, -0.0012, -0.0561, -0.0203, -0.2149, -0.2382, -0.0775, -0.0974,
          0.1755, -0.2390],
        [-0.2036, -0.1802, -0.3014, -0.1920,  0.0076,  0.1387, -0.2897,  0.1927,
          0.0420,  0.1477]], requires_grad=True)), ('bias', Parameter con

In [8]:
# Convolutional layer.
print('*'*79)
conv2d = nn.Conv2d(kernel_size=(4, 5), in_channels=3, out_channels=6, bias=True)
print('convolutional layer:\n{}'.format(conv2d))
print('*'*79)
for param in conv2d.parameters():
    print('param:\n{}'.format(param))
print('*'*79)
for param in conv2d.named_parameters():
    print('param name: {}\nparam value:\n{}'.format(param[0], param[1]))
print('*'*79)
print('number of parameters: {} (={})'.format(
    len([_ for _ in conv2d.parameters()]), len([_ for _ in conv2d.named_parameters()])
))
print('*'*79)
print('conv layer weight shape: {}'.format(conv2d.weight.shape))  # [<out_channels>, <in_channels>, *<kernel_shape>]
print('conv layer bias shape: {}'.format(conv2d.bias.shape))  # [<out_channels>, ]

*******************************************************************************
convolutional layer:
Conv2d(3, 6, kernel_size=(4, 5), stride=(1, 1))
*******************************************************************************
param:
Parameter containing:
tensor([[[[ 0.0387,  0.1239,  0.0778, -0.1054, -0.0785],
          [ 0.0686, -0.0441,  0.1030, -0.1146,  0.0502],
          [-0.0848, -0.0677,  0.0899, -0.1166,  0.0716],
          [-0.0855,  0.0031,  0.0800,  0.0978, -0.0279]],

         [[-0.0388,  0.0523,  0.0908,  0.0155, -0.1143],
          [ 0.0673,  0.1113,  0.1271, -0.0353, -0.0797],
          [ 0.1083, -0.0738,  0.0285, -0.1175,  0.0549],
          [ 0.0123, -0.0157, -0.0755,  0.0169, -0.1232]],

         [[ 0.0446,  0.0992,  0.0155,  0.0110, -0.0657],
          [-0.0301,  0.1129,  0.0429, -0.0961,  0.0464],
          [-0.0254,  0.0151, -0.1281, -0.1192, -0.0042],
          [-0.1161, -0.0056, -0.1036, -0.1120, -0.0275]]],


        [[[-0.0276, -0.1274,  0.1200,  0.0728, -0.

In [9]:
# Vanilla recurrent layer.
print('*'*79)
rnn = nn.RNN(input_size=7, hidden_size=3, num_layers=2, bias=True, bidirectional=True)
print('bidirectional rnn layer:\n{}'.format(rnn))
print('*'*79)
for param in rnn.parameters():
    print('param:\n{}'.format(param))
print('*'*79)
for param in rnn.named_parameters():
    print('param name: {}\nparam value:\n{}'.format(param[0], param[1]))
print('*'*79)
print('number of parameters: {} (={})'.format(
    len([_ for _ in rnn.parameters()]), len([_ for _ in rnn.named_parameters()])
))
print('*'*79)
print('*'*79)
rnn = nn.RNN(input_size=7, hidden_size=3, num_layers=2, bias=True, bidirectional=False)
print('vanilla rnn layer:\n{}'.format(rnn))
print('*'*79)
for param in rnn.parameters():
    print('param:\n{}'.format(param))
print('*'*79)
for param in rnn.named_parameters():
    print('param name: {}\nparam value:\n{}'.format(param[0], param[1]))
print('*'*79)
print('number of parameters: {} (={})'.format(
    len([_ for _ in rnn.parameters()]), len([_ for _ in rnn.named_parameters()])
))
print('*'*79)

*******************************************************************************
bidirectional rnn layer:
RNN(7, 3, num_layers=2, bidirectional=True)
*******************************************************************************
param:
Parameter containing:
tensor([[ 0.1433,  0.0512, -0.1738,  0.2987, -0.0241,  0.0783, -0.3431],
        [-0.3102, -0.5424,  0.2054,  0.2688,  0.4827, -0.2915, -0.0637],
        [ 0.0674,  0.3227, -0.1208, -0.0980,  0.0801,  0.5475, -0.0480]],
       requires_grad=True)
param:
Parameter containing:
tensor([[ 0.1513,  0.3481,  0.4531],
        [ 0.2473,  0.3999, -0.5677],
        [ 0.0337, -0.2154,  0.4103]], requires_grad=True)
param:
Parameter containing:
tensor([-0.5614,  0.1699,  0.3528], requires_grad=True)
param:
Parameter containing:
tensor([0.3705, 0.3998, 0.4522], requires_grad=True)
param:
Parameter containing:
tensor([[-0.2418, -0.4999, -0.3313,  0.0535, -0.5116, -0.1570, -0.0919],
        [ 0.0283, -0.5324,  0.3458, -0.0493,  0.1670,  0.4153,  0

In [10]:
# Long Short Term Memory (LSTM) and variants with `bias=False`, `bidirectional=False`
lstm = nn.LSTM(input_size=7, hidden_size=3, num_layers=2, bias=True, bidirectional=True)
print('bidirectional lstm layer:\n{}'.format(lstm))
print('*'*79)
for param in lstm.named_parameters():
    print('param name: {}\nparam value:\n{}'.format(param[0], param[1]))
print('*'*79)
print('number of parameters: {} (={})'.format(
    len([_ for _ in lstm.parameters()]), len([_ for _ in lstm.named_parameters()])
))
print('*'*79)

bidirectional rnn layer:
LSTM(7, 3, num_layers=2, bidirectional=True)
*******************************************************************************
param name: weight_ih_l0
param value:
Parameter containing:
tensor([[ 0.5433,  0.1038, -0.2110, -0.3104, -0.4350, -0.1444, -0.3498],
        [-0.4919, -0.3306,  0.2637,  0.0951, -0.3271,  0.5270,  0.2897],
        [-0.3156,  0.0363, -0.5236,  0.3686, -0.0076,  0.4734,  0.0722],
        [-0.4724,  0.4088, -0.2999, -0.2423, -0.3757, -0.2682, -0.2645],
        [ 0.3096,  0.1195, -0.1634,  0.0052, -0.1144, -0.4067, -0.1990],
        [-0.2592,  0.2956, -0.2253,  0.3354,  0.4368, -0.4077, -0.0589],
        [-0.5396,  0.2206, -0.1965,  0.0478, -0.1008,  0.4866,  0.4834],
        [-0.0953, -0.1377,  0.1541, -0.4141, -0.4252, -0.5530,  0.3502],
        [ 0.2661, -0.3202,  0.3023, -0.0863,  0.0873, -0.3209,  0.5054],
        [ 0.1389, -0.4313,  0.4500,  0.4513,  0.4787, -0.0702,  0.1273],
        [-0.5655,  0.3049,  0.1546,  0.3812, -0.2219,  0.011

In [12]:
# Gated recurrent unit (GRU) and variants with `bias=False`, `bidirectional=False`
gru = nn.GRU(input_size=7, hidden_size=3, num_layers=2, bias=True, bidirectional=True)
print('bidirectional gru layer:\n{}'.format(gru))
print('*'*79)
for param in gru.named_parameters():
    print('param name: {}\nparam value:\n{}'.format(param[0], param[1]))
print('*'*79)
print('number of parameters: {} (={})'.format(
    len([_ for _ in gru.parameters()]), len([_ for _ in gru.named_parameters()])
))
print('*'*79)

bidirectional gru layer:
GRU(7, 3, num_layers=2, bidirectional=True)
*******************************************************************************
param name: weight_ih_l0
param value:
Parameter containing:
tensor([[-0.3999, -0.5689,  0.2804,  0.2479, -0.3600, -0.0287, -0.2918],
        [ 0.4265,  0.3369,  0.5603, -0.5080,  0.3118,  0.5395, -0.0970],
        [ 0.5601, -0.2796,  0.5429,  0.5337, -0.3717,  0.2954, -0.3988],
        [-0.2428, -0.2172,  0.1587,  0.3153, -0.0432, -0.5757, -0.1799],
        [ 0.2090,  0.4545, -0.5349,  0.2925,  0.5290, -0.5542, -0.1370],
        [-0.1399, -0.2855, -0.0012, -0.1134, -0.5016, -0.3951,  0.5477],
        [ 0.1161, -0.0954, -0.0290, -0.5393, -0.0318, -0.2148, -0.5338],
        [ 0.3792,  0.2699, -0.1008, -0.1262, -0.3191,  0.5485,  0.1139],
        [ 0.4190, -0.4209,  0.3007,  0.0476, -0.0526, -0.5619,  0.0685]],
       requires_grad=True)
param name: weight_hh_l0
param value:
Parameter containing:
tensor([[-0.3839,  0.5737,  0.2372],
        [