<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Custom-Layers" data-toc-modified-id="Custom-Layers-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Custom Layers</a></span></li><li><span><a href="#The-Sequential-Block" data-toc-modified-id="The-Sequential-Block-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>The Sequential Block</a></span></li><li><span><a href="#Parameter-Management" data-toc-modified-id="Parameter-Management-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Parameter Management</a></span></li><li><span><a href="#Parameter-Access" data-toc-modified-id="Parameter-Access-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Parameter Access</a></span></li><li><span><a href="#All-Parameters-at-Once" data-toc-modified-id="All-Parameters-at-Once-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>All Parameters at Once</a></span></li><li><span><a href="#Parameter-Initialization" data-toc-modified-id="Parameter-Initialization-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Parameter Initialization</a></span></li></ul></div>

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

## Custom Layers

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight1=nn.Parameter(torch.rand(3,5))
        self.bias1=nn.Parameter(torch.randn(5,))
        self.weight2=nn.Parameter(torch.randn(5,1))
        self.bias2=nn.Parameter(torch.randn(1,))
    def forward(self,inputs):
        h1=F.relu(torch.matmul(inputs,self.weight1.data)+self.bias1.data)
        output=torch.matmul(h1,self.weight2.data)+self.bias2.data
        return output
net=MLP()

In [3]:
#torch.randn-Returns a tensor filled with random numbers from a normal distribution
#x=torch.Tensor(10,3).normal_(0,1)
#x=torch.Tensor(10,3).uniform_(0,1)
# torch.rand-Returns a tensor filled with random numbers from a uniform distribution
x=torch.rand(10,3)
x

tensor([[0.1754, 0.6151, 0.7223],
        [0.0396, 0.1980, 0.6702],
        [0.8456, 0.9100, 0.4330],
        [0.7901, 0.0625, 0.4999],
        [0.5814, 0.6105, 0.0684],
        [0.9576, 0.4503, 0.9600],
        [0.7145, 0.7802, 0.1436],
        [0.1509, 0.2680, 0.8937],
        [0.4680, 0.0214, 0.7032],
        [0.2488, 0.5711, 0.0697]])

In [4]:
net(x)

tensor([[-4.1595],
        [-3.6810],
        [-4.2358],
        [-3.5359],
        [-3.6290],
        [-4.2945],
        [-3.8576],
        [-3.9727],
        [-3.6241],
        [-3.5629]])

In [5]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.lr1=nn.Linear(3,5)
        self.relu=nn.ReLU()
        self.lr2=nn.Linear(5,1)
        #  initializing the parameters
        self.lr1.weight.detach().normal_(0.0,0.1)
        self.lr1.bias.detach().zero_()
        self.lr2.weight.detach().normal_(0.0,0.1)
        self.lr2.bias.detach().zero_()

    def forward(self,inputs):
        h1=self.relu(self.lr1(inputs))
        output=self.lr2(h1)
        return output
net=MLP()

In [6]:
net(x)

tensor([[-0.0029],
        [-0.0105],
        [-0.0012],
        [-0.0153],
        [-0.0004],
        [-0.0140],
        [-0.0006],
        [-0.0142],
        [-0.0222],
        [-0.0003]], grad_fn=<AddmmBackward>)

## The Sequential Block

In [7]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block]=block
    def forward(self,x):
        for block in self._modules.values():
            x=block(x)
        return x

In [8]:
net=MySequential(
                   nn.Linear(3,5),
                    nn.ReLU(), 
                    nn.Linear(5,1))

In [9]:
x=torch.rand(10,3)

In [10]:
net(x)

tensor([[ 0.0129],
        [ 0.0784],
        [ 0.0917],
        [ 0.0117],
        [-0.0061],
        [-0.0427],
        [ 0.0132],
        [ 0.0742],
        [ 0.0908],
        [ 0.0695]], grad_fn=<AddmmBackward>)

In [11]:
net=nn.Sequential(
                   nn.Linear(3,5),
                    nn.ReLU(), 
                    nn.Linear(5,1))

In [12]:
net(x)

tensor([[0.1962],
        [0.2450],
        [0.1304],
        [0.2098],
        [0.1785],
        [0.2105],
        [0.2206],
        [0.1488],
        [0.2406],
        [0.1012]], grad_fn=<AddmmBackward>)

## Parameter Management

In [13]:
network = nn.Sequential(
 nn.Linear(3,4),nn.ReLU(),nn.Linear(4,2))

In [14]:
network(x)

tensor([[-0.5037,  0.3265],
        [-0.3941,  0.1183],
        [-0.4305,  0.2336],
        [-0.3946,  0.1286],
        [-0.3957,  0.1567],
        [-0.5187,  0.3427],
        [-0.4630,  0.2665],
        [-0.4431,  0.2495],
        [-0.4036,  0.1581],
        [-0.4731,  0.3077]], grad_fn=<AddmmBackward>)

## Parameter Access

In [15]:
print(network[0].bias.data,'\n')
print(network[0].bias)

tensor([-0.2014,  0.2191, -0.3463, -0.4553]) 

Parameter containing:
tensor([-0.2014,  0.2191, -0.3463, -0.4553], requires_grad=True)


In [16]:
print(network[2].weight.data,'\n')
print(network[2].weight)

tensor([[-0.1901, -0.0140, -0.3808, -0.4765],
        [ 0.2652,  0.3377,  0.4329, -0.0679]]) 

Parameter containing:
tensor([[-0.1901, -0.0140, -0.3808, -0.4765],
        [ 0.2652,  0.3377,  0.4329, -0.0679]], requires_grad=True)


## All Parameters at Once

In [17]:
network.state_dict()

OrderedDict([('0.weight', tensor([[ 0.5381, -0.4474, -0.4941],
                      [-0.1267,  0.1290,  0.3772],
                      [-0.2581,  0.4610,  0.5257],
                      [ 0.4608, -0.5509, -0.0230]])),
             ('0.bias', tensor([-0.2014,  0.2191, -0.3463, -0.4553])),
             ('2.weight', tensor([[-0.1901, -0.0140, -0.3808, -0.4765],
                      [ 0.2652,  0.3377,  0.4329, -0.0679]])),
             ('2.bias', tensor([-0.3904,  0.0274]))])

In [18]:
network.named_modules

<bound method Module.named_modules of Sequential(
  (0): Linear(in_features=3, out_features=4, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4, out_features=2, bias=True)
)>

In [19]:
class MLP1(nn.Module):
    def __init__(self,**kwargs):
        super().__init__()
        self.lr1=nn.Linear(3,5)
        self.relu=nn.ReLU()
        self.lr2=nn.Linear(5,1)
    def forward(self,inputs):
        h1=self.relu(self.lr1(x))
        return self.lr2(h1)
    

In [20]:
nett=MLP1()

In [21]:
nett(x)

tensor([[0.2565],
        [0.1754],
        [0.3915],
        [0.2705],
        [0.3569],
        [0.2121],
        [0.2735],
        [0.3721],
        [0.1774],
        [0.4157]], grad_fn=<AddmmBackward>)

In [22]:
nett.lr1.weight.data

tensor([[-0.0887, -0.3614,  0.3325],
        [-0.1560,  0.3222, -0.4743],
        [ 0.3742,  0.4434, -0.0769],
        [-0.0317,  0.0601, -0.0036],
        [-0.2493,  0.2729,  0.0655]])

In [23]:
nett.lr1.weight

Parameter containing:
tensor([[-0.0887, -0.3614,  0.3325],
        [-0.1560,  0.3222, -0.4743],
        [ 0.3742,  0.4434, -0.0769],
        [-0.0317,  0.0601, -0.0036],
        [-0.2493,  0.2729,  0.0655]], requires_grad=True)

In [24]:
nett.lr2.bias.data

tensor([0.4157])

In [25]:
nett.parameters

<bound method Module.parameters of MLP1(
  (lr1): Linear(in_features=3, out_features=5, bias=True)
  (relu): ReLU()
  (lr2): Linear(in_features=5, out_features=1, bias=True)
)>

In [26]:
nett.state_dict()

OrderedDict([('lr1.weight', tensor([[-0.0887, -0.3614,  0.3325],
                      [-0.1560,  0.3222, -0.4743],
                      [ 0.3742,  0.4434, -0.0769],
                      [-0.0317,  0.0601, -0.0036],
                      [-0.2493,  0.2729,  0.0655]])),
             ('lr1.bias',
              tensor([-0.4845,  0.3708, -0.1654, -0.4358, -0.3950])),
             ('lr2.weight',
              tensor([[-0.2624, -0.4453, -0.1863,  0.3561, -0.3224]])),
             ('lr2.bias', tensor([0.4157]))])

##  Parameter Initialization

In [27]:
nett.lr1.weight.data=torch.empty(size=(5,3)).normal_(mean=0,std=0.01)
nett.lr1.bias.data=torch.empty(size=(5,)).normal_(mean=0,std=0.01)

In [28]:
nett.state_dict()

OrderedDict([('lr1.weight', tensor([[-0.0049,  0.0184,  0.0062],
                      [ 0.0100, -0.0077, -0.0080],
                      [ 0.0057, -0.0088,  0.0058],
                      [ 0.0027, -0.0103,  0.0018],
                      [ 0.0097, -0.0044,  0.0090]])),
             ('lr1.bias',
              tensor([-0.0042, -0.0093, -0.0074, -0.0087, -0.0168])),
             ('lr2.weight',
              tensor([[-0.2624, -0.4453, -0.1863,  0.3561, -0.3224]])),
             ('lr2.bias', tensor([0.4157]))])

In [29]:
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
network[0].apply(xavier)

Linear(in_features=3, out_features=4, bias=True)