**Experiment 9 : Implementing a
Neural Network and
Backpropagation from Scratch**

Name : Amishi Gupta

Roll No. 23/CS/048

In [1]:
#Import all necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier

In [2]:
#Load Data
data= load_breast_cancer()
X = data.data
y = data.target

#Inspect Data
print(f"X (Features) Shape: {X.shape}")
print(f"y (Target) Shape: {y.shape}")
print("\nFeature Names:")
print(data.feature_names)
print(f"\nTarget Classes: {data.target_names}")

#(70/30 split)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

print(f"\nTraining samples: {X_train.shape[0]}")
print(f"Validation samples: {X_val.shape[0]}")

#Standardize Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

X_val_scaled = scaler.transform(X_val)

print("\nData preprocessing complete")

X (Features) Shape: (569, 30)
y (Target) Shape: (569,)

Feature Names:
['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']

Target Classes: ['malignant' 'benign']

Training samples: 398
Validation samples: 171

Data preprocessing complete


In [3]:
#Activation Functions
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def relu(Z):
    return np.maximum(0, Z)

#Activation Derivatives
def sigmoid_derivative(A):
    return A * (1 - A)

def relu_derivative(Z):
    #Returns 1 if Z > 0, 0 otherwise
    return (Z > 0) * 1

#Loss Functions
def compute_bce_loss(Y, Y_hat):
    m = Y.shape[1]
    epsilon = 1e-15
    cost = -1/m * np.sum(Y*np.log(Y_hat + epsilon) + (1 - Y) * np.log(1 - Y_hat + epsilon))
    return np.squeeze(cost)

def compute_mse_loss(Y, Y_hat):
    m = Y.shape[1]
    cost = 1/m * np.sum((Y_hat - Y)**2)
    return np.squeeze(cost)

print("Utility functions defined")

Utility functions defined


In [4]:
class MyANNClassifier:
    def __init__(self, layer_dims, learning_rate=0.01, n_iterations=1000, loss='bce'):
        self.layer_dims = layer_dims
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.loss = loss
        self.parameters_ = {}
        self.costs_ = []
        self.grads_ = {}

    def _initialize_parameters(self):
        np.random.seed(42)
        L = len(self.layer_dims)
        for l in range(1, L):
            self.parameters_['W' + str(l)] = np.random.randn(self.layer_dims[l], self.layer_dims[l-1]) * 0.01
            self.parameters_['b' + str(l)] = np.zeros((self.layer_dims[l], 1))

    def _forward_propagation(self, X):
        cache = []
        A = X
        L = len(self.parameters_) // 2
        for l in range(1, L):
            A_prev = A
            W = self.parameters_['W' + str(l)]
            b = self.parameters_['b' + str(l)]
            Z = np.dot(W, A_prev) + b
            A = relu(Z)

            linear_cache = (A_prev, W, b)
            activation_cache = Z
            cache.append((linear_cache, activation_cache))

        A_prev = A
        W = self.parameters_['W' + str(L)]
        b = self.parameters_['b' + str(L)]

        Z = np.dot(W, A_prev) + b
        A_L = sigmoid(Z)

        linear_cache = (A_prev, W, b)
        activation_cache = Z
        cache.append((linear_cache, activation_cache))

        return A_L, cache

    def _backward_propagation(self, Y, Y_hat, cache):
        self.grads_ = {}
        L = len(self.parameters_) // 2
        m = Y.shape[1]
        Y = Y.reshape(Y_hat.shape)

        if self.loss == 'bce':
            dA_L = -(np.divide(Y, Y_hat + 1e-15) - np.divide(1 - Y, 1 - Y_hat + 1e-15))
        elif self.loss == 'mse':
            dA_L = 2 * (Y_hat - Y)

        linear_cache_L, Z_L = cache[L-1]
        A_prev_L, W_L, b_L = linear_cache_L

        dZ_L = dA_L * sigmoid_derivative(Y_hat)

        self.grads_['dW' + str(L)] = (1/m) * np.dot(dZ_L, A_prev_L.T)
        self.grads_['db' + str(L)] = (1/m) * np.sum(dZ_L, axis=1, keepdims=True)
        dA_prev = np.dot(W_L.T, dZ_L)

        for l in reversed(range(L-1)):
            linear_cache, Z = cache[l]
            A_prev, W, b = linear_cache

            dZ = dA_prev * relu_derivative(Z)

            self.grads_['dW' + str(l+1)] = (1/m) * np.dot(dZ, A_prev.T)
            self.grads_['db' + str(l+1)] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
            dA_prev = np.dot(W.T, dZ)

    def _update_parameters(self):
        L = len(self.parameters_) // 2

        for l in range(1, L + 1):
            self.parameters_['W' + str(l)] = self.parameters_['W' + str(l)] - self.learning_rate * self.grads_['dW' + str(l)]
            self.parameters_['b' + str(l)] = self.parameters_['b' + str(l)] - self.learning_rate * self.grads_['db' + str(l)]

    def fit(self, X, y):
        # Reshape X
        X_fit = X.T
        # Reshape y
        y_fit = y.reshape(1, -1)

        if X_fit.shape[0] != self.layer_dims[0]:
            raise ValueError(f"Input feature count ({X_fit.shape[0]}) does not match layer_dims[0] ({self.layer_dims[0]})")

        #Initialize parameters
        self._initialize_parameters()
        self.costs_ = [] #Reset costs

        print(f"Starting training for {self.n_iterations} iterations with {self.loss.upper()} loss...")

        #Gradient Descent Loop
        for i in range(self.n_iterations):
            #Forward propagation
            Y_hat, cache = self._forward_propagation(X_fit)

            #Compute loss
            if self.loss == 'bce':
                cost = compute_bce_loss(y_fit, Y_hat)
            elif self.loss == 'mse':
                cost = compute_mse_loss(y_fit, Y_hat)

            #Backward propagation
            self._backward_propagation(y_fit, Y_hat, cache)

            #Update parameters
            self._update_parameters()

            #Store cost every 100 iterations
            if i % 100 == 0:
                self.costs_.append(cost)
                if i % 1000 == 0:
                    print(f"Cost after iteration {i}: {cost:.6f}")

        print(f"Training complete. Final cost: {cost:.6f}")

    def predict(self, X):
        # Reshape X
        X_pred = X.T

        #Run forward propagation
        Y_hat, _ = self._forward_propagation(X_pred)

        #Convert probabilities to binary predictions
        predictions = (Y_hat > 0.5).astype(int)

        #Return as a flattened 1D array
        return predictions.flatten()

print("MyANNClassifier class defined")

MyANNClassifier class defined


In [5]:
n_features = X_train_scaled.shape[1]

print("\nTraining Model 1: [30, 10, 1] with BCE Loss")
layer_dims_1 = [n_features, 10, 1]
model_1 = MyANNClassifier(layer_dims_1, learning_rate=0.001, n_iterations=5000, loss='bce')
model_1.fit(X_train_scaled, y_train)

y_pred_1 = model_1.predict(X_val_scaled)
print("\nModel 1 Evaluation (BCE, 1 Hidden Layer)")
print(classification_report(y_val, y_pred_1))

print("\nTraining Model 2: [30, 10, 1] with MSE Loss")
layer_dims_2 = [n_features, 10, 1]
model_2 = MyANNClassifier(layer_dims_2, learning_rate=0.001, n_iterations=5000, loss='mse')
model_2.fit(X_train_scaled, y_train)

y_pred_2 = model_2.predict(X_val_scaled)
print("\nModel 2 Evaluation (MSE, 1 Hidden Layer)")
print(classification_report(y_val, y_pred_2))

print("\nTraining Model 3: [30, 10, 5, 1] with BCE Loss")
layer_dims_3 = [n_features, 10, 5, 1]
model_3 = MyANNClassifier(layer_dims_3, learning_rate=0.001, n_iterations=5000, loss='bce')
model_3.fit(X_train_scaled, y_train)

y_pred_3 = model_3.predict(X_val_scaled)
print("\nModel 3 Evaluation (BCE, 2 Hidden Layers)")
print(classification_report(y_val, y_pred_3))


Training Model 1: [30, 10, 1] with BCE Loss
Starting training for 5000 iterations with BCE loss...
Cost after iteration 0: 0.693180
Cost after iteration 1000: 0.680122
Cost after iteration 2000: 0.670488
Cost after iteration 3000: 0.657429
Cost after iteration 4000: 0.621066
Training complete. Final cost: 0.519979

Model 1 Evaluation (BCE, 1 Hidden Layer)
              precision    recall  f1-score   support

           0       1.00      0.65      0.79        63
           1       0.83      1.00      0.91       108

    accuracy                           0.87       171
   macro avg       0.92      0.83      0.85       171
weighted avg       0.89      0.87      0.86       171


Training Model 2: [30, 10, 1] with MSE Loss
Starting training for 5000 iterations with MSE loss...
Cost after iteration 0: 0.250016
Cost after iteration 1000: 0.246399
Cost after iteration 2000: 0.243501
Cost after iteration 3000: 0.241054
Cost after iteration 4000: 0.238751
Training complete. Final cost: 0.2361

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Cost after iteration 1000: 0.680719
Cost after iteration 2000: 0.673173
Cost after iteration 3000: 0.668575
Cost after iteration 4000: 0.665762
Training complete. Final cost: 0.664037

Model 3 Evaluation (BCE, 2 Hidden Layers)
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.63      1.00      0.77       108

    accuracy                           0.63       171
   macro avg       0.32      0.50      0.39       171
weighted avg       0.40      0.63      0.49       171



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
print("\nTraining Model 4: sklearn.MLPClassifier")
model_4 = MLPClassifier(hidden_layer_sizes=(10,),
                        activation='relu',
                        solver='adam',
                        max_iter=1000,
                        learning_rate_init=0.001,
                        random_state=42)

model_4.fit(X_train_scaled, y_train)

y_pred_4 = model_4.predict(X_val_scaled)
print("\nModel 4 Evaluation (sklearn.MLPClassifier)")
print(classification_report(y_val, y_pred_4))


Training Model 4: sklearn.MLPClassifier

Model 4 Evaluation (sklearn.MLPClassifier)
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        63
           1       0.99      0.99      0.99       108

    accuracy                           0.99       171
   macro avg       0.99      0.99      0.99       171
weighted avg       0.99      0.99      0.99       171

