# Backpropagation

## We have to consider the following steps

## How to start now?

## Prepare your dataset

In [1]:
# import torch
# import torchvision
# import torchvision.transforms as transforms
# import numpy as np

# def load_mnist_data_for_network(root_path='./data', batch_size=4):
#     transform = transforms.Compose([
#         transforms.ToTensor(),
#         transforms.Normalize((0.5,), (0.5,))
#     ])

#     trainset = torchvision.datasets.MNIST(root=root_path, train=True, download=True, transform=transform)
#     trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

#     testset = torchvision.datasets.MNIST(root=root_path, train=False, download=True, transform=transform)
#     testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

#     # Convert PyTorch DataLoader to NumPy arrays
#     train_images, train_labels = [], []
#     for data in trainloader:
#         inputs, labels = data
#         # Reshape each image to (728,)
#         train_images.append(inputs.numpy().reshape(-1, 28*28))
#         train_labels.append(labels.numpy())

#     test_images, test_labels = [], []
#     for data in testloader:
#         inputs, labels = data
#         # Reshape each image to (728,)
#         test_images.append(inputs.numpy().reshape(-1, 28*28))
#         test_labels.append(labels.numpy())

#     # Concatenate batches into NumPy arrays
#     train_images = np.concatenate(train_images, axis=0)
#     train_labels = np.concatenate(train_labels, axis=0)
#     test_images = np.concatenate(test_images, axis=0)
#     test_labels = np.concatenate(test_labels, axis=0)

#     return (train_images, train_labels), (test_images, test_labels)

# train_data, test_data = load_mnist_data_for_network()
# print(train_data[0].shape)  # Shape of the train images (num_samples, 728)
# print(train_data[1].shape)  # Shape of the train labels (num_samples,)

import pandas as pd


TrainData = pd.read_csv("./data/mnist_train.csv")
TestData = pd.read_csv("./data/mnist_test.csv")

X_train = TrainData.iloc[:,:-1]
X_test = TestData.iloc[:,:-1]

y_train = TrainData.iloc[:,-1]
y_test = TestData.iloc[:,-1]

print(X_train.shape)
print(y_train.shape)


(60000, 784)
(60000,)


## Building your neural network

In [2]:
# Your code
import numpy as np
def xavier_init(shape = tuple):
    if len(shape) == 1:
        number_of_in, number_of_out = shape[0], shape[0]
    elif len(shape) == 2:
        number_of_in, number_of_out = shape[0], shape[1]
    else:
        raise ValueError("Xavier initialization supports only 1D and 2D shapes.")

    scaler= np.sqrt(2.0 / (number_of_in + number_of_out))

    return np.random.normal(loc=0.0, scale=scaler, size=shape)

def bias(shape = tuple):
    bias = np.random.normal(loc=0.0, size = shape[1] )
    return bias


W1 = xavier_init((784,32))
W2 = xavier_init((32,10))
B1 = bias((784,32))
B2 = bias((32,10))

print(W1.shape, W2.shape, B1.shape , B2.shape)

(784, 32) (32, 10) (32,) (10,)


## Implement your loss function(s)

In [3]:
import numpy as np
  
#LOSS FUNC \ cross_entropy
def Loss_derivative(target_output , output):
    return (output - target_output) / (output * (1-output))

def Loss(target_output , output): #cross_entropy
    return ((-target_output * np.log(output)) - ((1-target_output) * np.log(1-output)))

#activation function
def sigmoid(input): 
     return (1 / (1+np.exp(-1 * input)))

#activision funct derivative
def sigmoid_deriv(x):
    return sigmoid(x)*(1-sigmoid(x))

## Implement the training loop

In [4]:
# Your code

# Consider the following steps:
# 1) Loop through your training data
#   1. 1) Choose number of epochs (How often do you want to loop through your complete dataset?)
# 2) Forward the data through your network
# 3) Calculate the loss
# 4) Perform backpropagation with SGD and update the weights
#   4. 1) Choose a learning rate to update your weights
# Repeat 1, 2, 3, 4 until the training converges or maximum epochs are reached

import random

# input_data = train_data[0][1]
# targets = train_data[1][1]


learning_rate = 0.01


#forward pass

epoch = 100
for i in range(epoch):

    random_point = random.randint(0,len(X_train))
    
    input_data = X_train.loc[random_point,:]
    targets = y_train.loc[random_point]
    # Forward pass for the first layer
    Z1 = np.dot(W1.T, input_data) + B1  # Linear transformation
    A1 = sigmoid(Z1)  # Sigmoid activation
    #forward pass output layer
    Z2 = np.dot(W2.T, A1) + B2  # Linear transformation
    Y = sigmoid(Z2)  # Sigmoid activation



    # Calculate the cross-entropy loss
    loss = Loss(targets, Y)

    # Backpropagation

    # Calculate the derivative of the loss with respect to the output
    dL_dY = Loss_derivative(targets, Y)

    # Calculate the derivative of the output layer
    dY_dZ2 = sigmoid_deriv(Z2)
    dZ2_dW2 = A1

    # gradients for weights and biases in output layer
    dL_dW2 = np.outer(dZ2_dW2, dL_dY * dY_dZ2)
    dL_dB2 = dL_dY * dY_dZ2

    # derivative of the hidden layer
    dZ2_dA1 = W2
    dA1_dZ1 = sigmoid_deriv(Z1)
    dZ1_dW1 = input_data

    # Calculate the gradients for weights and biases in the hidden layer @ is matrix multip.
    dL_dW1 = np.outer(dZ1_dW1, (dL_dY * dY_dZ2) @ dZ2_dA1.T * dA1_dZ1)
    dL_dB1 = (dL_dY * dY_dZ2) @ dZ2_dA1.T * dA1_dZ1

    # Update the weights and biases in the output layer
    W2 -= learning_rate * dL_dW2
    B2 -= learning_rate * dL_dB2

    # Update the weights and biases in the hidden layer
    W1 -= learning_rate * dL_dW1
    B1 -= learning_rate * dL_dB1

    print(f"Iteration {i}: \n Loss: {np.mean(loss)}")
    #print(f"Iteration {i}: \n Loss: {loss}")
    

Iteration 0: 
 Loss: 0.6690064090259646
Iteration 1: 
 Loss: 0.6789447483949365
Iteration 2: 
 Loss: 0.6454990767215273
Iteration 3: 
 Loss: 0.6879373794835713
Iteration 4: 
 Loss: 0.5648131957944088
Iteration 5: 
 Loss: 0.6285465501042136
Iteration 6: 
 Loss: 0.5400467076852211
Iteration 7: 
 Loss: 0.5010356794739252
Iteration 8: 
 Loss: 0.4129859332801156
Iteration 9: 
 Loss: 0.4474753718023613
Iteration 10: 
 Loss: 0.42367393610716847
Iteration 11: 
 Loss: 0.5154237183226378
Iteration 12: 
 Loss: 0.3538823097671428
Iteration 13: 
 Loss: 0.41945949416628536
Iteration 14: 
 Loss: 0.3422127671683622
Iteration 15: 
 Loss: 0.3446476364165724
Iteration 16: 
 Loss: 0.29975076130816986
Iteration 17: 
 Loss: 0.27177668440704783
Iteration 18: 
 Loss: 0.32949663317586453
Iteration 19: 
 Loss: 0.28020705982051
Iteration 20: 
 Loss: 0.31141068876629463
Iteration 21: 
 Loss: 0.23281571217784586
Iteration 22: 
 Loss: 0.2615301208045351
Iteration 23: 
 Loss: 0.21664123694496668
Iteration 24: 
 Loss

  return (1 / (1+np.exp(-1 * input)))


Iteration 50: 
 Loss: 0.10343280842836686
Iteration 51: 
 Loss: 0.16312653301209795
Iteration 52: 
 Loss: 0.12039698078057168
Iteration 53: 
 Loss: 0.12083603217567496
Iteration 54: 
 Loss: 0.0922438003242287
Iteration 55: 
 Loss: 0.09167341526817245
Iteration 56: 
 Loss: 0.08624220083999423
Iteration 57: 
 Loss: 0.08486003683183231
Iteration 58: 
 Loss: 0.08633015475827727
Iteration 59: 
 Loss: 0.0985278185608047
Iteration 60: 
 Loss: 0.08067786475728375
Iteration 61: 
 Loss: 0.0794345664069609
Iteration 62: 
 Loss: 0.07822100873382701
Iteration 63: 
 Loss: 0.0770523290994118
Iteration 64: 
 Loss: 0.07854259604019202
Iteration 65: 
 Loss: 0.07483552545741701
Iteration 66: 
 Loss: 0.09032807509247515
Iteration 67: 
 Loss: 0.08714320110444596
Iteration 68: 
 Loss: 0.07141001363386584
Iteration 69: 
 Loss: 0.08620376195793641
Iteration 70: 
 Loss: 0.08480208641419354
Iteration 71: 
 Loss: 0.09063917691503148
Iteration 72: 
 Loss: 0.07242259983764861
Iteration 73: 
 Loss: 0.06625805605260