<a href="https://colab.research.google.com/github/Yash-Patil09/Deep_Learning-/blob/main/Basic_NN_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


In [3]:
print("\n\n1. LINEAR LAYER (nn.Linear)")
print("-" * 40)



1. LINEAR LAYER (nn.Linear)
----------------------------------------


**Basic Implement of Perceptron**

In [5]:
# Linear layer performs: output = input @ weight.T + bias
linear = nn.Linear(in_features=3, out_features=2)

In [6]:
print(f"Linear layer: input_size=3, output_size=2")
print(f"Weight matrix shape: {linear.weight.shape}")  # (2, 3)
print(f"Bias vector shape: {linear.bias.shape}")

Linear layer: input_size=3, output_size=2
Weight matrix shape: torch.Size([2, 3])
Bias vector shape: torch.Size([2])


In [8]:
input_data = torch.tensor([1.0,2.0,3.0])
output = linear(input_data)
print(f'Input data:{input_data}')
print(f'Output data:{output}')
print(f"Formula: output = input @ weight.T + bias")

Input data:tensor([1., 2., 3.])
Output data:tensor([-2.7454,  0.2469], grad_fn=<ViewBackward0>)
Formula: output = input @ weight.T + bias


In [12]:
#Manual Calculations
manual_op = input_data @ linear.weight.T + linear.bias
print(f"Manual calculation: {manual_op}")

Manual calculation: tensor([-2.7454,  0.2469], grad_fn=<AddBackward0>)


**Activation Functions**

In [13]:
print("\n\n2. ACTIVATION FUNCTIONS")
print("-" * 40)



2. ACTIVATION FUNCTIONS
----------------------------------------


In [14]:
x = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
print(f"Input: {x}")

Input: tensor([-2., -1.,  0.,  1.,  2.])


1. ReLU: Returns max(0, x)
2. Sigmoid: Maps to (0, 1)
3. Tanh: Maps to (-1, 1)


In [15]:
relu = nn.ReLU()
relu_out = relu(x)
print(f"ReLU: {relu_out}")
print("ReLU kills negative values, keeps positive ones")

ReLU: tensor([0., 0., 0., 1., 2.])
ReLU kills negative values, keeps positive ones


In [16]:
sigmoid = nn.Sigmoid()
sigmoid_out = sigmoid(x)
print(f'Sigmoid: {sigmoid_out}')
print("Sigmoid maps values to (0, 1)")

Sigmoid: tensor([0.1192, 0.2689, 0.5000, 0.7311, 0.8808])
Sigmoid maps values to (0, 1)


In [17]:
tanh = nn.Tanh()
tanh_out = tanh(x)
print(f'Tanh: {tanh_out}')
print("Tanh maps values to (-1, 1)")

Tanh: tensor([-0.9640, -0.7616,  0.0000,  0.7616,  0.9640])
Tanh maps values to (-1, 1)


**LOSS FUNCTIONS**

In [18]:
# MSE Loss for regression
mse_loss = nn.MSELoss()
predictions = torch.tensor([2.5, 1.8, 3.1])
targets = torch.tensor([2.0, 2.0, 3.0])
mse = mse_loss(predictions, targets)
print(f"MSE Loss Example:")
print(f"Predictions: {predictions}")
print(f"Targets: {targets}")
print(f"MSE: {mse:.4f}")
print("MSE = mean((pred - target)^2)")

MSE Loss Example:
Predictions: tensor([2.5000, 1.8000, 3.1000])
Targets: tensor([2., 2., 3.])
MSE: 0.1000
MSE = mean((pred - target)^2)


In [19]:
# Cross Entropy for classification
ce_loss = nn.CrossEntropyLoss()
# 3 classes, 2 samples
logits = torch.tensor([[2.0, 1.0, 0.1],   # Sample 1: class 0 most likely
                       [0.5, 2.5, 0.3]])  # Sample 2: class 1 most likely
labels = torch.tensor([0, 1])  # True classes
ce = ce_loss(logits, labels)
print(f"\nCrossEntropy Loss Example:")
print(f"Logits: {logits}")
print(f"True labels: {labels}")
print(f"CE Loss: {ce:.4f}")
print("Lower loss = better predictions")


CrossEntropy Loss Example:
Logits: tensor([[2.0000, 1.0000, 0.1000],
        [0.5000, 2.5000, 0.3000]])
True labels: tensor([0, 1])
CE Loss: 0.3185
Lower loss = better predictions


**Multi-Layer Perceptron or MLP**

In [23]:
print("\n\n5. BUILDING A NEURAL NETWORK")
print("-" * 40)

class ExplainedNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Layer 1: 4 inputs -> 8 hidden units
        self.layer1 = nn.Linear(4, 8)
        # Layer 2: 8 hidden -> 3 outputs
        self.layer2 = nn.Linear(8, 3)
        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        print(f"  Input shape: {x.shape}")

        # First layer + activation
        x = self.layer1(x)
        print(f"  After layer1: {x.shape}")
        x = self.relu(x)
        print(f"  After ReLU: {x.shape}")

        # Second layer (output)
        x = self.layer2(x)
        print(f"  Final output: {x.shape}")
        return x

# Create and test network
net = ExplainedNN()
print(f"Network architecture:")
print(net)

# Test forward pass
batch_input = torch.randn(2, 4)  # 2 samples, 4 features each
print(f"\nForward pass:")
output = net(batch_input)
print(f"Final result shape: {output.shape}")



5. BUILDING A NEURAL NETWORK
----------------------------------------
Network architecture:
ExplainedNN(
  (layer1): Linear(in_features=4, out_features=8, bias=True)
  (layer2): Linear(in_features=8, out_features=3, bias=True)
  (relu): ReLU()
)

Forward pass:
  Input shape: torch.Size([2, 4])
  After layer1: torch.Size([2, 8])
  After ReLU: torch.Size([2, 8])
  Final output: torch.Size([2, 3])
Final result shape: torch.Size([2, 3])


**OPTIMIZERS**

In [25]:
# Create simple model for demo
model = nn.Linear(2, 1)
print(model)
print(f"Model parameters before training:")
print(f"Weight: {model.weight.data}")
print(f"Bias: {model.bias.data}")

Linear(in_features=2, out_features=1, bias=True)
Model parameters before training:
Weight: tensor([[0.1883, 0.3693]])
Bias: tensor([-0.1151])


In [27]:
sgd = torch.optim.SGD(model.parameters(), lr=0.01)
adam = torch.optim.Adam(model.parameters(), lr=0.01)

print(f"\nOptimizers:")
print(f"SGD: Simple gradient descent: {sgd}")
print(f"Adam: Adaptive learning rate (usually better): {adam}")


Optimizers:
SGD: Simple gradient descent: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)
Adam: Adaptive learning rate (usually better): Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)


Training

In [36]:
X = torch.randn(10,2)
y = (2*X[:,0]+1*X[:,1]+1).unsqueeze(1)

In [37]:
model = nn.Linear(2,1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [38]:
print("Training to learn: y = 2*x1 + 1*x2 + 1")
print(f"Initial loss: {criterion(model(X), y).item():.4f}")

Training to learn: y = 2*x1 + 1*x2 + 1
Initial loss: 2.1181


In [39]:
for step in range(3):
  print(f"\nStep {step + 1}:")

  predictions = model(X)
  loss = criterion(predictions,y)
  print(f"  1. Forward pass - Loss: {loss.item():.4f}")

  optimizer.zero_grad()
  print(f"2. Zero Grad")

  loss.backward()
  print(f"  3. Backward pass - Compute gradients")

  # 4. Update parameters
  optimizer.step()
  print(f"  4. Update parameters")
  print(f"     New weight: {model.weight.data}")


Step 1:
  1. Forward pass - Loss: 2.1181
2. Zero Grad
  3. Backward pass - Compute gradients
  4. Update parameters
     New weight: tensor([[ 0.5761, -0.3532]])

Step 2:
  1. Forward pass - Loss: 2.0833
2. Zero Grad
  3. Backward pass - Compute gradients
  4. Update parameters
     New weight: tensor([[ 0.5861, -0.3432]])

Step 3:
  1. Forward pass - Loss: 2.0488
2. Zero Grad
  3. Backward pass - Compute gradients
  4. Update parameters
     New weight: tensor([[ 0.5961, -0.3332]])


**DROPOUT AND BATCH NORMALIZATION**

In [40]:
# Dropout: Randomly sets some neurons to zero
dropout = nn.Dropout(p=0.5)  # 50% chance to zero out
x = torch.ones(1, 5)
print(f"Before dropout: {x}")
print(f"After dropout: {dropout(x)}")
print("Dropout prevents overfitting by randomly turning off neurons")

# Batch Normalization: Normalizes inputs to have mean=0, std=1
batch_norm = nn.BatchNorm1d(3)
x = torch.tensor([[1.0, 2.0, 3.0],
                  [4.0, 5.0, 6.0],
                  [7.0, 8.0, 9.0]])
print(f"\nBefore BatchNorm - mean: {x.mean(dim=0)}")
normalized = batch_norm(x)
print(f"After BatchNorm - mean: {normalized.mean(dim=0)}")
print("BatchNorm helps with training stability")

Before dropout: tensor([[1., 1., 1., 1., 1.]])
After dropout: tensor([[0., 0., 2., 2., 2.]])
Dropout prevents overfitting by randomly turning off neurons

Before BatchNorm - mean: tensor([4., 5., 6.])
After BatchNorm - mean: tensor([0., 0., 0.], grad_fn=<MeanBackward1>)
BatchNorm helps with training stability
