# Lab 03 : Vanilla neural networks -- demo

# Creating a two-layer network

In [1]:
import torch
import torch.nn as nn

### In Pytorch, networks are defined as classes

In [2]:
class two_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        super(two_layer_net , self).__init__()
        
        self.layer1 = nn.Linear( input_size, hidden_size , bias=True)
        self.layer2 = nn.Linear( hidden_size, output_size , bias=True)        
        
    def forward(self, x):
        
        x = self.layer1(x)
        x = torch.relu(x)
        x = self.layer2(x)
        p = torch.softmax(x, dim=0)
        
        return p

### Create an instance that takes input of size 2, then transform it into something of size 5, then into something of size 3
$$
\begin{bmatrix}
\times \\ \times 
\end{bmatrix}
\longrightarrow
\begin{bmatrix}
\times \\ \times \\ \times \\ \times \\ \times
\end{bmatrix}
\longrightarrow
\begin{bmatrix}
\times \\ \times \\ \times
\end{bmatrix}
$$

In [3]:
net= two_layer_net(2,5,3)
print(net)

two_layer_net(
  (layer1): Linear(in_features=2, out_features=5, bias=True)
  (layer2): Linear(in_features=5, out_features=3, bias=True)
)


### Now we are going to make an input vector and feed it to the network:

In [4]:
x=torch.Tensor([1,1])
print(x)

tensor([1., 1.])


In [7]:
p=net.forward(x) # we can also call net(x) instead of net.forward(x)
print(p)

tensor([0.3791, 0.3362, 0.2847], grad_fn=<SoftmaxBackward0>)


### Syntactic easy for the forward method

In [8]:
p=net(x)
print(p)

tensor([0.3791, 0.3362, 0.2847], grad_fn=<SoftmaxBackward0>)


### Let's check that the probability vector indeed sum to 1:

In [9]:
print( p.sum() )

tensor(1., grad_fn=<SumBackward0>)


### This network is composed of two Linear modules that we have called layer1 and layer2. We can see this when we type:

In [10]:
print(net)

two_layer_net(
  (layer1): Linear(in_features=2, out_features=5, bias=True)
  (layer2): Linear(in_features=5, out_features=3, bias=True)
)


### We can access the first module as follow:

In [11]:
print(net.layer1)

Linear(in_features=2, out_features=5, bias=True)


### To get the weights and bias of the first layer we do:

In [12]:
print(net.layer1.weight)

Parameter containing:
tensor([[-0.0428, -0.1495],
        [-0.4138, -0.4214],
        [ 0.5042,  0.3004],
        [ 0.3277, -0.4135],
        [-0.6347, -0.6852]], requires_grad=True)


In [13]:
print(net.layer1.bias)

Parameter containing:
tensor([0.0311, 0.4958, 0.2903, 0.2346, 0.6396], requires_grad=True)


### So to change the first row of the weights from layer 1 you would do:

In [16]:
with torch.no_grad():
    net.layer1.weight[0,0]=10
    net.layer1.weight[0,1]=20
    print(net.layer1.weight)

# Note: Setting weights without torch.no_grad() will cause an error, because the weights are part of the computational 
# graph and require gradients. By using torch.no_grad(), we can temporarily disable gradient tracking, allowing us to 
# modify the weights without affecting the computational graph.

# # Uncomment to try setting weights without torch.no_grad() and see the error:
# net.layer1.weight[0,0]=10

Parameter containing:
tensor([[10.0000, 20.0000],
        [-0.4138, -0.4214],
        [ 0.5042,  0.3004],
        [ 0.3277, -0.4135],
        [-0.6347, -0.6852]], requires_grad=True)


### Now we are going to feed  $x=\begin{bmatrix}1\\1 \end{bmatrix}$ to this modified network:

In [17]:
p=net(x)
print(p)

tensor([2.5692e-07, 9.3828e-01, 6.1720e-02], grad_fn=<SoftmaxBackward0>)


### Alternatively, all the parameters of the network can be accessed by net.parameters(). 

In [18]:
list_of_param = list( net.parameters() )
print(list_of_param)

[Parameter containing:
tensor([[10.0000, 20.0000],
        [-0.4138, -0.4214],
        [ 0.5042,  0.3004],
        [ 0.3277, -0.4135],
        [-0.6347, -0.6852]], requires_grad=True), Parameter containing:
tensor([0.0311, 0.4958, 0.2903, 0.2346, 0.6396], requires_grad=True), Parameter containing:
tensor([[-0.2714, -0.4105, -0.0572,  0.3984,  0.3101],
        [ 0.2358, -0.1271,  0.0285, -0.3532, -0.0427],
        [ 0.1507,  0.3056, -0.4212,  0.3645,  0.2723]], requires_grad=True), Parameter containing:
tensor([-0.2270, -0.3293, -0.1098], requires_grad=True)]
