Step 1: Load and Preprocess the Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import h5py
import numpy as np

# Paths to the datasets
train_path = "../content/drive/My Drive/Train.h5"
test_path = "../content/drive/My Drive/Test.h5"

def inspect_h5_file(file_path):
    with h5py.File(file_path, "r") as f:
        print(f"Inspecting {file_path}:")
        for key in f.keys():
            print(key)

# Inspect the structure of the training and test datasets
inspect_h5_file(train_path)
inspect_h5_file(test_path)

Inspecting ../content/drive/My Drive/Train.h5:
images
labels
Inspecting ../content/drive/My Drive/Test.h5:
images
labels


In [7]:
def load_data(train_path, test_path):
    train_dataset = h5py.File(train_path, "r")
    test_dataset = h5py.File(test_path, "r")

    train_set_x_orig = np.array(train_dataset["images"][:])
    train_set_y_orig = np.array(train_dataset["labels"][:])

    test_set_x_orig = np.array(test_dataset["images"][:])
    test_set_y_orig = np.array(test_dataset["labels"][:])

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig

# Load the data
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig = load_data(train_path, test_path)

# Inspect the data
print(f"Number of training examples: {train_set_x_orig.shape[0]}")
print(f"Number of test examples: {test_set_x_orig.shape[0]}")
print(f"Each image is of size: {train_set_x_orig.shape[1:]}")

# Flatten the images and normalize pixel values
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T / 255.0
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T / 255.0

# Convert labels to one-hot encoding
num_classes = np.max(train_set_y_orig) + 1
train_set_y = np.eye(num_classes)[train_set_y_orig].T
test_set_y = np.eye(num_classes)[test_set_y_orig].T

print(f"Shape of training data: {train_set_x_flatten.shape}")
print(f"Shape of training labels: {train_set_y.shape}")
print(f"Shape of test data: {test_set_x_flatten.shape}")
print(f"Shape of test labels: {test_set_y.shape}")

Number of training examples: 2626
Number of test examples: 120
Each image is of size: (128, 128, 3)
Shape of training data: (49152, 2626)
Shape of training labels: (5, 2626)
Shape of test data: (49152, 120)
Shape of test labels: (5, 120)


Step 2: Implement the Softmax Activation Function

In [8]:
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
    return exp_Z / exp_Z.sum(axis=0, keepdims=True)


Step 3: Define the Cost Function for Multi-Class Classification

In [9]:
def compute_cost(Y, Y_hat):
    m = Y.shape[1]
    cost = -1/m * np.sum(Y * np.log(Y_hat))
    return cost


Step 4: Implement the Gradient Descent Algorithm

In [10]:
def initialize_parameters(dim, num_classes):
    W = np.random.randn(num_classes, dim) * 0.01
    b = np.zeros((num_classes, 1))
    return W, b

def forward_propagation(X, W, b):
    Z = np.dot(W, X) + b
    A = softmax(Z)
    return A

def backward_propagation(X, Y, A):
    m = X.shape[1]
    dZ = A - Y
    dW = 1/m * np.dot(dZ, X.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    return dW, db

def gradient_descent(X, Y, W, b, learning_rate, num_iterations):
    for i in range(num_iterations):
        A = forward_propagation(X, W, b)
        cost = compute_cost(Y, A)

        dW, db = backward_propagation(X, Y, A)

        W = W - learning_rate * dW
        b = b - learning_rate * db

        if i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return W, b


Step 5: Test the Model and Evaluate Its Performance

In [11]:
def predict(X, W, b):
    A = forward_propagation(X, W, b)
    predictions = np.argmax(A, axis=0)
    return predictions

def accuracy(predictions, labels):
    return np.mean(predictions == labels) * 100

# Initialize parameters
dim = train_set_x_flatten.shape[0]
W, b = initialize_parameters(dim, num_classes)

# Train the model
learning_rate = 0.01
num_iterations = 1000
W, b = gradient_descent(train_set_x_flatten, train_set_y, W, b, learning_rate, num_iterations)

# Make predictions on the test set
test_predictions = predict(test_set_x_flatten, W, b)
test_labels = np.argmax(test_set_y, axis=0)

# Evaluate accuracy
test_accuracy = accuracy(test_predictions, test_labels)
print(f"Test accuracy: {test_accuracy}%")


Cost after iteration 0: 2.003216846469892
Cost after iteration 100: 10.093959022192664
Cost after iteration 200: 14.807001810690807
Cost after iteration 300: 9.227468477681176
Cost after iteration 400: 10.282358025499647
Cost after iteration 500: 11.804511658595276
Cost after iteration 600: 10.091224340709891
Cost after iteration 700: 7.937442895515193
Cost after iteration 800: 10.287739863600018
Cost after iteration 900: 4.343463055757876
Test accuracy: 36.666666666666664%
