Eva Saini 1RVU22CSE053

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Step1: Load and preprocess the data.

In [8]:
import h5py
import numpy as np
import matplotlib.pyplot as plt

train_file_path = '/content/drive/My Drive/Colab Notebooks/Tr.h5'
test_file_path = '/content/drive/My Drive/Colab Notebooks/Te.h5'

# Load the dataset
def load_data():
   train_dataset = h5py.File(train_file_path, "r")
   test_dataset = h5py.File(test_file_path, "r")

    # Extract images and labels
   train_X = np.array(train_dataset["images"][:])  # training set features
   train_Y = np.array(train_dataset["labels"][:])  # training set labels
   test_X = np.array(test_dataset["images"][:])    # test set features
   test_Y = np.array(test_dataset["labels"][:])

   return train_X, train_Y, test_X, test_Y

# Preprocessing the data (flattening and normalizing)
def preprocess_data(train_X, test_X):
    train_X_flatten = train_X.reshape(train_X.shape[0], -1).T  # Flatten images
    test_X_flatten = test_X.reshape(test_X.shape[0], -1).T

    # Normalize pixel values to [0, 1]
    train_X_norm = train_X_flatten / 255.0
    test_X_norm = test_X_flatten / 255.0

    return train_X_norm, test_X_norm

# One-hot encode labels for multi-class classification
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels.reshape(-1)].T

train_X, train_Y, test_X, test_Y = load_data()
train_X, test_X = preprocess_data(train_X, test_X)
train_Y_one_hot = one_hot_encode(train_Y, 5)
test_Y_one_hot = one_hot_encode(test_Y, 5)


Step 2: Implement the softmax activation function.

In [9]:
def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return expZ / np.sum(expZ, axis=0, keepdims=True)

Step 3: Define the cost function for multi-class classification.

In [10]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(AL)) / m
    cost = np.squeeze(cost)  # To make sure cost is a scalar
    return cost


Step 4:Implement the gradient descent algorithm to update the model parameters.

In [11]:
def initialize_parameters(layer_dims):
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)  # Number of layers

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

    return parameters


In [12]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    return Z

def forward_propagation(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2  # Number of layers

    for l in range(1, L):
        A_prev = A
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        Z = linear_forward(A_prev, W, b)
        A = np.maximum(0, Z)  # ReLU activation
        caches.append((A_prev, W, b, Z))

    # Final layer (Softmax activation)
    W_L = parameters['W' + str(L)]
    b_L = parameters['b' + str(L)]
    ZL = linear_forward(A, W_L, b_L)
    AL = softmax(ZL)

    caches.append((A, W_L, b_L, ZL))
    return AL, caches


In [13]:
def linear_backward(dZ, cache):
    A_prev, W, b, Z = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def backward_propagation(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]

    # Initial gradient (for Softmax)
    dZL = AL - Y
    current_cache = caches[L - 1]
    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(dZL, current_cache)

    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(grads["dA" + str(l + 1)], current_cache)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads


In [14]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2  # Number of layers

    for l in range(1, L + 1):
        parameters["W" + str(l)] -= learning_rate * grads["dW" + str(l)]
        parameters["b" + str(l)] -= learning_rate * grads["db" + str(l)]

    return parameters


Step 5: Test the model on the test set and evaluate its performance.

In [15]:
def model(X, Y, layer_dims, learning_rate=0.0075, num_iterations=3000):
    np.random.seed(1)
    costs = []
    parameters = initialize_parameters(layer_dims)

    for i in range(0, num_iterations):
        AL, caches = forward_propagation(X, parameters)
        cost = compute_cost(AL, Y)
        grads = backward_propagation(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0:
            costs.append(cost)
            print(f"Cost after iteration {i}: {cost}")

    return parameters


In [18]:
def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    return np.argmax(AL, axis=0)

# Evaluate on the test set
def evaluate(test_X, test_Y, parameters):
    predictions = predict(test_X, parameters)
    accuracy = np.mean(predictions == np.argmax(test_Y, axis=0))
    print(f"Test Accuracy: {accuracy * 100}%")

# Updated layer dimensions
parameters = model(train_X, train_Y_one_hot, layer_dims=[49152, 20, 7, 5], num_iterations=2500)
evaluate(test_X, test_Y_one_hot, parameters)


Cost after iteration 0: 1.6093863775118022
Cost after iteration 100: 1.6093653349174797
Cost after iteration 200: 1.6093399737891814
Cost after iteration 300: 1.6092981584337163
Cost after iteration 400: 1.6092316177357309
Cost after iteration 500: 1.6091209622689981
Cost after iteration 600: 1.608928251370577
Cost after iteration 700: 1.6085769747318805
Cost after iteration 800: 1.6078870659265754
Cost after iteration 900: 1.6063252638690704
Cost after iteration 1000: 1.6021459807370393
Cost after iteration 1100: 1.5893718178581249
Cost after iteration 1200: 1.5649568358787882
Cost after iteration 1300: 1.5519777552492608
Cost after iteration 1400: 1.5298897239217712
Cost after iteration 1500: 1.4264337948610597
Cost after iteration 1600: 1.3484281277406864
Cost after iteration 1700: 1.2931289615132975
Cost after iteration 1800: 1.2448921706040106
Cost after iteration 1900: 1.1870354733613868
Cost after iteration 2000: 1.1298179162970359
Cost after iteration 2100: 1.0647812067414812
C