# Dataset
* Use Stanford dog dataset with over 20k images of dogs, but only use 4096 images.
* Resize images to shape 12288 (64*64*3).
* Copy all resized images to numpy array of shape (12288, 4096).
* Any prediction above 50% is good!!!

In [3]:
import kagglehub

# Download latest version of stanford dog dataset
path_dogs_dataset = kagglehub.dataset_download("jessicali9530/stanford-dogs-dataset")

# Download latest version of stanford car dataset
path_cars_dataset = kagglehub.dataset_download("jessicali9530/stanford-cars-dataset")

In [4]:
# Store images and targets to hdf5 format

import numpy as np
from PIL import Image
import glob
import os
import h5py

SHAPE_IMAGES = (64, 64)

def generate_dataset(directory, shape=(64, 64), num_img=4096):
    image_paths = glob.glob(os.path.join(directory, '**', '*.*'), recursive=True)
    image_paths = [p for p in image_paths if p.lower().endswith(('.jpeg', '.jpg'))]
    images = []
    for path in image_paths:
        if len(images) >= num_img:
            break
        img = Image.open(path).convert('RGB').resize(shape)  # Convert to RGB and resize
        images.append(np.array(img).flatten())  # Flatten into a 12288 (64x64x3) array

    return np.array(images).T  # Shape 12288 x NUM_IMAGES

Xdogs = generate_dataset(path_dogs_dataset, shape=SHAPE_IMAGES)
Xcars = generate_dataset(path_cars_dataset, shape=SHAPE_IMAGES)
X = np.hstack((Xdogs, Xcars)) / 255. # Normalize data from 0.0 to 1.0
Ydogs = np.ones((1, Xdogs.shape[1]), dtype='int8')
Ycars = np.zeros((1, Xcars.shape[1]), dtype='int8')
Y = np.hstack((Ydogs, Ycars))

shuffled_indices = np.random.permutation(X.shape[1])
X = X[:, shuffled_indices]
Y = Y[:, shuffled_indices]

split_index = int(0.8 * X.shape[1])
Xtrain, Xtest = X[:, :split_index], X[:, split_index:]
Ytrain, Ytest = Y[:, :split_index], Y[:, split_index:]

# Open the HDF5 file for writing (creates the file if it doesn't exist)
with h5py.File('dogs.h5', 'w') as hf:
    hf.create_dataset('Xtrain', data=Xtrain)
    hf.create_dataset('Xtest', data=Xtest)
    hf.create_dataset('Ytrain', data=Ytrain)
    hf.create_dataset('Ytest', data=Ytest)

# Training
* A single node made of a matrix W of shape (12288, 1) and a bias b
* Initialize W and b to 0

In [5]:
import matplotlib.pyplot as plt
import h5py
import numpy as np

def load_dataset(shape_img=(64, 64), show_samples=False):
    with h5py.File('dogs.h5', 'r') as hf:
        Xtrain = hf['Xtrain'][:]
        Xtest = hf['Xtest'][:]
        Ytrain = hf['Ytrain'][:]
        Ytest = hf['Ytest'][:]
    
    if show_samples:
        # Plot first 5 images of each dataset for verification
        fig, axes = plt.subplots(2, 10, figsize=(20, 4))
        for i in range(10):
            x = Xtrain[:, i].reshape(shape_img[0], shape_img[1], 3)
            axes[0, i].imshow(x)
            axes[0, i].axis('off')  # Hide the axis
            axes[0, i].set_title("Dog" if Ytrain[0, i] == 1 else "Car", fontsize=8)
            x = Xtest[:, i].reshape(shape_img[0], shape_img[1], 3)
            axes[1, i].imshow(x)
            axes[1, i].axis('off')  # Hide the axis
            axes[1, i].set_title("Dog" if Ytest[0, i] == 1 else "Car", fontsize=8)

        plt.show()

    return Xtrain, Xtest, Ytrain, Ytest

def init_parameters(n):
    W = np.zeros((n, 1), dtype='float')
    b = 0.0

    return W, b

def linear_combination(X, W, b):
    Z = np.dot(W.T, X) + b

    return Z

def sigmoid(Z): # Compute the sigmoid of z
    A = 1.0 / (1.0 + np.exp(-Z))
    
    return A

def compute_cost(Y, A):
    J = -np.sum(Y * np.log(A) + (1.0 - Y) * np.log(1.0 - A)) / A.shape[1]
    
    return J

def compute_accuracy(Xtrain, Xtest, W, b):
    Ypred_train = predict(Xtrain, W, b)
    Ypred_test = predict(Xtest, W, b)
    
    acc_train = 100 - np.mean(np.abs(Ypred_train - Ytrain)) * 100
    acc_test = 100 - np.mean(np.abs(Ypred_test - Ytest)) * 100

    return acc_train, acc_test

def predict(X, W, b, labels=None, shape_img=(64, 64), show_samples=False):
    _, m = X.shape

    # Forward propagation
    Z = linear_combination(X, W, b) # Z = (1, m)
    A = sigmoid(Z) # A = (1, m)

    Ypred = np.zeros((1, m))
    Ypred = np.where(A > 0.5, 1, 0)

    if show_samples:
        # Plot first 10 predictions
        fig, axes = plt.subplots(1, 10, figsize=(20, 4))
        for i in range(10):
            x = X[:, i].reshape(shape_img[0], shape_img[1], 3)
            axes[i].imshow(x)
            axes[i].axis('off')  # Hide the axis
            title = f'GT: {labels[Ytest[0, i]]}\nPred: {labels[Ypred[0, i]]}'
            axes[i].set_title(title)

        plt.show()
    
    return Ypred

def plot_performance(performance_metrics, lr = 0.01):
    # Plot cost function over iteration
    costs = [metrics[0] for metrics in performance_metrics]
    plt.plot(costs)
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title(f'Cost Function over Iteration\nLearning rate = {lr}')
    plt.show()

    # Plot cost function over iteration
    accs_train = [metrics[1] for metrics in performance_metrics]
    accs_test = [metrics[2] for metrics in performance_metrics]
    fig, ax1 = plt.subplots()

    # Plot accs_train on the left y-axis
    ax1.plot(accs_train, 'b-', label='Train Accuracy')
    ax1.set_xlabel('iterations (per hundreds)')
    ax1.set_ylabel('Train Accuracy [%]', color='b')
    ax1.tick_params(axis='y', labelcolor='b')

    # Set x-axis to logarithmic scale
    ax1.set_xscale('log')

    # Create a second y-axis for accs_test
    ax2 = ax1.twinx()
    ax2.plot(accs_test, 'r-', label='Test Accuracy')
    ax2.set_ylabel('Test Accuracy [%]', color='r')
    ax2.tick_params(axis='y', labelcolor='r')

    # Add a title and show the plot
    plt.title(f'Accuracy\nLearning rate = {lr}')
    fig.tight_layout()
    plt.show()

In [None]:
LEARNING_RATE = 0.005
NUM_ITER = 1000
LABELS = ['Car', 'Dog']

Xtrain, Xtest, Ytrain, Ytest = load_dataset(show_samples=True)
n, m = Xtrain.shape

W, b = init_parameters(n) # W = (n, 1)

performance_metrics = []
for i in range(NUM_ITER):
    # Forward propagation
    Z = linear_combination(Xtrain, W, b) # Z = (1, m)
    A = sigmoid(Z) # A = (1, m)

    # Compute cost function and model's accuracy
    if i % 10 == 0:
        J = compute_cost(Ytrain, A)
        acc_train, acc_test = compute_accuracy(Xtrain, Xtest, W, b)
        performance_metrics.append((J, acc_train, acc_test))
        print(f'Iteration #{i}')
        print(f'  Cost: {J:.2f}')
        print(f'  Accuracy: train = {acc_train:.1f}%, test = {acc_test:.1f}%\n')

    # Backward propagation
    dW = np.dot(Xtrain, (A - Ytrain).T) / m # dW = (n, 1)
    db = np.sum(A - Ytrain) / m
    W -= LEARNING_RATE * dW
    b -= LEARNING_RATE * db

plot_performance(performance_metrics, LEARNING_RATE)

print(W)
print(b)