<a href="https://colab.research.google.com/github/Vatsa10/pytorch/blob/main/day3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Neural Network**

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self,x):
      x = self.flatten(x)
      logits = self.linear_relu_stack(x)
      return logits

In [None]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
X = torch.rand(1,28,28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([5])


In [None]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [None]:
# we are using nn.flatten to convert each 2D 28x28 image to an array of 784 pixel values

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [None]:
# nn.linear helps to apply linear transformation to the input using its stored weights and bias

layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [None]:
# nn.ReLU an activation between our linear layers to introduce non linearity,
# to create a complex mapping between model's input and output, it helps in learning the NN the variety
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.0186,  0.0264,  0.2013, -0.4350,  0.3261,  0.2451, -0.5106, -0.2354,
         -0.2482, -0.1536, -0.4635,  0.1739,  0.3640,  0.1085,  0.0801,  0.2293,
          0.6897, -0.3096, -0.0235,  0.0882],
        [ 0.3521, -0.0983, -0.0563, -0.2762,  0.3567,  0.1350, -0.6070, -0.1307,
         -0.2070, -0.0341, -0.4157,  0.1981,  0.1114,  0.0379,  0.1457,  0.6690,
          0.4446, -0.1523, -0.3109, -0.2374],
        [ 0.5499, -0.1582,  0.0684, -0.7208,  0.1981, -0.0806, -0.4896, -0.0613,
         -0.2780,  0.1717, -0.4365,  0.2430, -0.0728, -0.1148,  0.2786,  0.3404,
          0.3951, -0.2482, -0.0797,  0.2373]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0186, 0.0264, 0.2013, 0.0000, 0.3261, 0.2451, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1739, 0.3640, 0.1085, 0.0801, 0.2293, 0.6897, 0.0000,
         0.0000, 0.0882],
        [0.3521, 0.0000, 0.0000, 0.0000, 0.3567, 0.1350, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1981, 0.1114, 0.0379, 0.14

In [None]:
# we will be using nn.Sequential to pass the data through all the modules as same order it is defined in

seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [None]:
# nn.Softmax is used as the last linear layer to return logits that are raw values from -infinity to infinity
# which are then passed to softmax module then the logits are scaled to values [0,1]
# representing the model's predicted probabilities for each class.

softmax = nn.Softmax(dim=1)  #dim 1 because this paramater will indicate the dimension from the values must sum to 1
pred_probab = softmax(logits)

In [None]:
print(f"Model Architecture; {model}\n\n")
for name, param in model.named_parameters():
  print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Model Architecture; NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0078, -0.0263,  0.0009,  ...,  0.0233, -0.0163, -0.0221],
        [-0.0130, -0.0234, -0.0248,  ..., -0.0161, -0.0304, -0.0115]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([ 0.0352, -0.0288], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0028,  0.0108,  0.0024,  ...,  0.0245, -0.0184, -0.0200],
        [-0.0017,  0.0342,  0.0275,  ...,  0.0159, -0.0137, -0.0309]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | 

# **Automatic Differentiation with torch.autograd**

In [None]:
# back propagation is used to adjust the gradient of the loss function with repect to params

import torch
x = torch.ones(5)
y = torch.zeros(3)
w = torch.randn(5,3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x,w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)

In [None]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x7fbc42d2e140>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7fbc42969090>


In [None]:
# to optimize weights of params in nn we will compute the derivatives of our loss function, we need sigma loss/weights and same for bias under some fixed value of x and y
# to do this we call loss.backward() and then retrieve the values from w.grad and b.grad

loss.backward()
print(w.grad)
print(b.grad)


tensor([[0.3330, 0.1482, 0.2741],
        [0.3330, 0.1482, 0.2741],
        [0.3330, 0.1482, 0.2741],
        [0.3330, 0.1482, 0.2741],
        [0.3330, 0.1482, 0.2741]])
tensor([0.3330, 0.1482, 0.2741])


In [None]:
# when we train a model and just want to apply it to some input data, we dont want to forward the computation
# we can stop tracking computation through newtwork by torch.no_grad()
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [None]:
# also we can do this by detach()
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [None]:
'''
There are reasons you might want to disable gradient tracking:
1). To mark some parameters in your neural network as frozen parameters.
2). To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.
'''


'\nThere are reasons you might want to disable gradient tracking:\n1). To mark some parameters in your neural network as frozen parameters.\n2). To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.\n'