<a href="https://colab.research.google.com/github/UditKandpal/NeuralNetwork_Scratch/blob/main/NeuralNetwork_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Neural Network Architecture from basic implementing Linear Equation Prediction --> y=2x

In [6]:
import numpy as np

class Neural_Network:
  def __init__(self,input_size, hidden_size, output_size):
    # W1 SHAPE
    self.W1 = np.random.randn(input_size,hidden_size) * 0.01
    self.b1 = np.zeros((1,hidden_size))

    # W2 SHAPE
    self.W2 = np.random.randn(hidden_size, output_size) * 0.01
    self.b2 = np.zeros((1,output_size))

  # ACTIVATION FUNCTION
  def relu(self, Z):
    return np.maximum(0,Z)

  # RELU DERIVATIVE --> return 1 if Z>0, else 0
  def relu_derivative(self,Z):
    return (Z>0).astype(float)

  # FORWARD PASS
  def forward(self, X):
    # input -> hidden layer
    self.Z1 = np.dot(X, self.W1) + self.b1
    self.A1 = self.relu(self.Z1)

    # hidden -> output layer
    self.Z2 = np.dot(self.A1,self.W2) + self.b2
    y_pred = self.Z2
    return y_pred


  # BACKWARD PASS
  def backward(self, X, y_act, y_pred, learning_rate):
    m = X.shape[0]

    # calculating the gradients
    ## Loss Gradient
    d_loss_output = 2 * (y_pred - y_act) / m

    ## gradients for w2 and b2 ->
    self.dW2 = np.dot(self.A1.T, d_loss_output)
    self.db2 = np.sum(d_loss_output, axis=0, keepdims=True)

    ## Propagating the error to the hidden layer
    d_loss_hidden = np.dot(d_loss_output, self.W2.T)

    ## Applying relu derivative
    d_Z1 = d_loss_hidden * self.relu_derivative(self.Z1)

    ## gradients for W1 and b1
    self.dW1 = np.dot(X.T, d_Z1)
    self.db1 = np.sum(d_Z1 , axis=0, keepdims=True)

  # 6. Update Weights (Gradient Descent)
    self.W1 -= learning_rate * self.dW1
    self.b1 -= learning_rate * self.db1
    self.W2 -= learning_rate * self.dW2
    self.b2 -= learning_rate * self.db2

  def train(self, X, y, epochs, learning_rate):
          loss_history = []
          for i in range(epochs):
              # 1. Forward pass
              y_pred = self.forward(X)

              # 2. Calculate Loss (Mean Squared Error)
              loss = np.mean((y_pred - y)**2)
              loss_history.append(loss)

              # 3. Backward pass (Update weights)
              self.backward(X, y, y_pred, learning_rate)

              if i % 100 == 0:
                  print(f"Epoch {i}, Loss: {loss:.6f}")

          return loss_history


In [7]:
# 1. Create Dummy Data
# Let's try to learn a simple pattern: y = 2x1 - 3x2 (Linear)
# Or even simpler: Input is one number, Output is Input * 2
X = np.array([[1], [2], [3], [4], [5]])  # Inputs
y = np.array([[2], [4], [6], [8], [10]]) # Targets (y = 2x)

# 2. Initialize Network
# Input size 1 -> Hidden Neurons 5 -> Output size 1
nn = Neural_Network(input_size=1, hidden_size=5, output_size=1)

# 3. Train
print("Starting Training...")
history = nn.train(X, y, epochs=1000, learning_rate=0.01)

# 4. Predict on new data
test_val = np.array([[6]]) # We expect 12
prediction = nn.forward(test_val)

print("\n--- Result ---")
print(f"Input: 6")
print(f"Target: 12")
print(f"Prediction: {prediction[0][0]:.4f}")

Starting Training...
Epoch 0, Loss: 44.006524
Epoch 100, Loss: 0.247433
Epoch 200, Loss: 0.024617
Epoch 300, Loss: 0.001663
Epoch 400, Loss: 0.000098
Epoch 500, Loss: 0.000006
Epoch 600, Loss: 0.000000
Epoch 700, Loss: 0.000000
Epoch 800, Loss: 0.000000
Epoch 900, Loss: 0.000000

--- Result ---
Input: 6
Target: 12
Prediction: 12.0000


Implementing the same network using the PyTorch

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

## Defining the Architecture

model = nn.Sequential(
    nn.Linear(1,5), # Input -> Hidden Layer
    nn.ReLU(),   # Activation Function
    nn.Linear(5,1) # Hidden -> Output Layer
)

## Defining Loss and Optimizer

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)



In [6]:
X = torch.tensor([[1.0],[2.0],[3.0],[4.0],[5.0]])
y = torch.tensor([[2.0],[4.0],[6.0],[8.0],[10.0]])

for epoch in range(1000):
  y_pred = model(X)

  #loss calculation
  loss = criterion(y_pred, y)

  # MAGIC
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if epoch%10==0:
    print(f"Epoch {epoch}, Loss = {loss.item():.4f} ")

Epoch 0, Loss = 0.0983 
Epoch 10, Loss = 0.0818 
Epoch 20, Loss = 0.0680 
Epoch 30, Loss = 0.0565 
Epoch 40, Loss = 0.0468 
Epoch 50, Loss = 0.0388 
Epoch 60, Loss = 0.0321 
Epoch 70, Loss = 0.0265 
Epoch 80, Loss = 0.0219 
Epoch 90, Loss = 0.0181 
Epoch 100, Loss = 0.0149 
Epoch 110, Loss = 0.0123 
Epoch 120, Loss = 0.0101 
Epoch 130, Loss = 0.0083 
Epoch 140, Loss = 0.0068 
Epoch 150, Loss = 0.0056 
Epoch 160, Loss = 0.0046 
Epoch 170, Loss = 0.0038 
Epoch 180, Loss = 0.0031 
Epoch 190, Loss = 0.0025 
Epoch 200, Loss = 0.0021 
Epoch 210, Loss = 0.0017 
Epoch 220, Loss = 0.0014 
Epoch 230, Loss = 0.0011 
Epoch 240, Loss = 0.0009 
Epoch 250, Loss = 0.0008 
Epoch 260, Loss = 0.0006 
Epoch 270, Loss = 0.0005 
Epoch 280, Loss = 0.0004 
Epoch 290, Loss = 0.0003 
Epoch 300, Loss = 0.0003 
Epoch 310, Loss = 0.0002 
Epoch 320, Loss = 0.0002 
Epoch 330, Loss = 0.0001 
Epoch 340, Loss = 0.0001 
Epoch 350, Loss = 0.0001 
Epoch 360, Loss = 0.0001 
Epoch 370, Loss = 0.0001 
Epoch 380, Loss = 0.000