# CNN Weights - Learnable Parameters in PyTorch Neural Networks

In [50]:
import torch.nn as nn
import torch

In [51]:
  class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # implement the forward pass
        return t



Hyp par are picked arbitrarily


Learnable parameter - learned during the training process, 
With learnable parameters, we typically start out with a set of arbitrary values, and these values then get updated in an iterative fashion as the network learns.

In fact, when we say that a network is learning, we specifically mean that the network is learning the appropriate values for the learnable parameters. Appropriate values are values that minimize the loss function.

When it comes to our network, we might be thinking, where are these learnable parameters?

Learnable parameters are the weights inside a network

In [52]:
network = Network()

In [53]:
print(network) #gives a string representaion 

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [None]:
# Watch what happens if we stop extending the neural network module class. print(network)
# output is - <__main__.Network object at 0x0000017802302FD0>

In [54]:
  class Network():
    def __init__(self):
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # implement the forward pass
        return t


In [55]:
network = Network()
print(network)

<__main__.Network object at 0x000002385093CEB0>


In [None]:
#For this reason, in object oriented programming, we usually want to provide a string representation of our object inside our classes so that we get useful information when the object is printed. This string representation comes from Python’s default base class called object.

In [None]:
#Overriding - we can override existing functionality after we extend a class

In [56]:
 
 class Network(nn.Module):
     
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):
        # implement the forward pass
        return t
    
    def __repr__(self): #repr - representation, we can change the string fucntion for represention 
        return "lizardnet"

In [57]:
network = Network()

In [58]:
print(network)

lizardnet


In [59]:
class Network(nn.Module):
     
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) #we need to flatten for linear so 12 but why 4*4 ? 4*4 is the last size of  the image pass through the 2 layer CNN
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)


#For the convolutional layers, the kernel_size argument is a Python tuple (5,5) even though we only passed the number 5 in the constructor.
#This is because our filters actually have a height and width, and when we pass a single number, the code inside the layer’s constructor assumes that we want a square filter.

#The stride tells the conv layer how far the filter should slide after each operation in the overall convolution. This tuple says to slide by one unit when moving to the right and also by one unit when moving down.



In [60]:
network = Network()

In [61]:
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [62]:
 network.conv1

Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))

In [24]:
network.conv2

Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))

In [25]:
network.fc1

Linear(in_features=192, out_features=120, bias=True)

In [26]:
network.fc2

Linear(in_features=120, out_features=60, bias=True)

In [27]:
network.out

Linear(in_features=60, out_features=10, bias=True)

In [32]:
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

Accessing weights in the layers

In [65]:
network.conv1.weight

Parameter containing:
tensor([[[[-0.1599,  0.1478,  0.0639,  0.1777,  0.1237],
          [-0.1889,  0.0113,  0.0425,  0.0919,  0.1709],
          [-0.0376,  0.1057, -0.1515,  0.0692,  0.0162],
          [-0.1468, -0.0213, -0.0059,  0.0702, -0.1406],
          [ 0.0336, -0.1803, -0.0336, -0.1287,  0.0335]]],


        [[[-0.0135,  0.0349, -0.1384, -0.0711, -0.0389],
          [ 0.1969, -0.1428, -0.0893, -0.0895, -0.0818],
          [ 0.1853, -0.1297, -0.0136,  0.1117, -0.0759],
          [-0.1582, -0.0573,  0.0216,  0.1188,  0.0395],
          [ 0.0063,  0.1840,  0.0952,  0.1686, -0.1601]]],


        [[[ 0.0955, -0.1780,  0.1662,  0.1536, -0.1353],
          [ 0.0230,  0.1392, -0.0136, -0.0940,  0.1962],
          [ 0.1381, -0.1919,  0.0146,  0.0108, -0.1311],
          [ 0.0095,  0.1662,  0.1009,  0.0139,  0.1424],
          [-0.0072,  0.0633,  0.0392, -0.1529, -0.0486]]],


        [[[-0.1534,  0.1206, -0.1600, -0.1073, -0.1282],
          [ 0.1274,  0.1842, -0.0561, -0.1541, -0.0237

Parameter class extends tensor class and wt tensor is instance of parameter class

In [66]:
network.conv1.weight.shape

torch.Size([6, 1, 5, 5])

In [None]:
#in above 6 - 6 filters 
# 1 - accounts for single input channel
# 5 5 - accounts for h and weight

In [67]:
network.conv2.weight.shape

torch.Size([12, 6, 5, 5])

self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)

This convolved one input channel with 6 filters of size 5*5

All the filters are represented by a weight tensor

In [68]:
network.conv2.weight.shape

torch.Size([12, 6, 5, 5])

12 filters - filter has a depth that matches the no of channels and its able to slide all channels in one go

6 input channels - assumed as giving some depth to each of the filters 

1) All filters are rep using single tensor
2) Filters have depth that accounts for color channels(inp channels)


Weight tensor shape -
#
first axis : no of filters 
#
second axis : depth of filters corr to input channels 
#
thid and fourth - height and width

In [69]:
network.conv2.weight[0].shape

torch.Size([6, 5, 5])

In [None]:
#the above gives a single filter (h , w = 5 and depth = 6)

# Weight tensor for linear/FC layers 

In [70]:
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [71]:
network.fc1.weight.shape

torch.Size([120, 192])

Rank 2 tensor as weight tensor 
#
Height - desired output feature 
# 
width - lenght of input feature 

#
Weight matrix in FC maps a vector space of 4d to 3d 

In [45]:
network.fc2.weight.shape

torch.Size([60, 120])

In [46]:
network.out.weight.shape

torch.Size([10, 60])

In [73]:
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)


In [74]:
weight_matrix = torch.tensor([[1,2,3,4],[2,3,4,5],[3,4,5,6]], dtype=torch.float32)

In [75]:
weight_matrix.matmul(in_features)

tensor([30., 40., 50.])

In [None]:
#access all param at once 

In [55]:
for param in network.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [58]:
for name, param in network.named_parameters():
    print(name,'\t\t', param.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])
