<a href="https://colab.research.google.com/github/dejanbatanjac/pytorch-learning-101/blob/master/PyTorch_nn_Module.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn

class Module0(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
    # we don't return a thing (read:None), and don't take a thing
    def forward(self)->None: 
        print("Module0:forward")
        
# we create a module instance m0
m0 = Module0().cuda()

# we explicitely run the forward method
m0.forward()
        
m0 = Module0().cuda() 
m0()

print(m0)
    


Module0:forward
Module0:forward
Module0()


In [0]:
# these are identical
m0()
m0.forward()

# 10 times
print("10 more times")
[m0() for i in range(10)]

print(m0)

Module0:forward
Module0:forward
10 more times
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0:forward
Module0()


In [0]:
# This is how we count the number of parameters
# Just for the record we don't have any parameter
# in m0
t = [o.numel() for o in m0.parameters() ]
t, sum(t)

([], 0)

In [0]:
import torch
import torch.nn as nn

# Creating the single linear layer = one matrix tranformation
class Module1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(10,100)        
        
    # we don't return a thing (read:None), and don't take a thing
    def forward(self)->None: 
        print("Module1:forward")
        
# we create a module instance m1
m1 = Module1().cuda()
print(m1)


Module1(
  (l1): Linear(in_features=10, out_features=100, bias=True)
)


In [0]:
t = [p.numel() for p in m1.parameters() if p.requires_grad ]
t, sum(t)

#([1000, 100], 1100)
# 1000 parameters in W and 100 in b and 1100 in total

([1000, 100], 1100)

In [0]:
import torch
import torch.nn as nn

# adding few more different layers
class Module2(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(50*50,100)
        self.l2 = nn.ReLU()
        self.l3 = nn.Linear(100,100)
        self.l4 = nn.Tanh()        
        self.l5 = nn.Linear(100,10) 
        self.l6 = nn.LogSoftmax()
        
    # we don't return a thing (read:None), and don't take a thing
    def forward(self)->None: 
        print("Module2:forward")
        
# we create a module instance m2
m2 = Module2().cuda()
print(m2)


In [0]:
#params analysis
t = [p.numel() for p in m2.parameters() ]
t, sum(t)

#([250000, 100, 10000, 100, 1000, 10], 261210)
# total number of parameter is ~ 261K

([250000, 100, 10000, 100, 1000, 10], 261210)

In [1]:
# let's rewrite our Module2 and create Module3
# showing that what is in the __init__(self) method
# is actually a sequential network

import torch
import torch.nn as nn

sn = torch.nn.Sequential(
        nn.Linear(50*50,100), 
        nn.ReLU(),
        nn.Linear(100,100),
        nn.Tanh(),        
        nn.Linear(100,10), 
        nn.LogSoftmax()
).cuda()
        
# adding few more different layers
class Module3(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = sn
        
    # we don't return a thing (read:None), and don't take a thing
    def forward(self)->None: 
        print("Module3:forward")
        
# we create a module instance m2
m3 = Module3().cuda()
print(m3)

Module3(
  (l1): Sequential(
    (0): Linear(in_features=2500, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Tanh()
    (4): Linear(in_features=100, out_features=10, bias=True)
    (5): LogSoftmax()
  )
)


In [0]:
#params analysis
t = [p.numel() for p in m3.parameters() ]
t, sum(t)

([250000, 100, 10000, 100, 1000, 10], 261210)