In [1]:
import numpy as np
import copy
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras 
import cupy as cp
import os
import cv2
from PIL import Image

In [2]:
def plot_images(images, labels):
    fig, axes = plt.subplots(1, len(images), figsize=(10, 10))
    for i, (image, label) in enumerate(zip(images, labels)):
        img_2d = image.reshape((28, 28))  # Reshape image to 2D matrix
        axes[i].imshow(img_2d, cmap='gray')
        axes[i].set_title(label)
        axes[i].axis('off')
    plt.show()

In [3]:
def load_dataset():
    data_dir = r'C:\Users\ebi19\OneDrive\Documents\Jypyter_apps\Multiple_neural_network_layer_model\Lions and Cheetahs'
    classes = ['Lions', 'Cheetahs']
    X_train, X_test, y_train, y_test = [], [], [], []
    test_split = 0.2
    for index, label in enumerate(classes):
        path = os.path.join(data_dir, label)
        files = os.listdir(path)
        for i, file in enumerate(files):
            image_path = os.path.join(path, file)
            image = Image.open(image_path).convert('RGB')
            image = image.resize((64, 64))
            image = cp.asarray(image, dtype=cp.float32) / 255.
            if i < int(len(files) * (1 - test_split)):
                X_train.append(cp.ndarray.flatten(image))
                y_train.append(index)
            else:
                X_test.append(cp.ndarray.flatten(image))
                y_test.append(index)
    X_train = cp.vstack(X_train)
    X_train = X_train.T
    X_test = cp.vstack(X_test)
    X_test = X_test.T
    y_train = cp.asarray(y_train, dtype=cp.int32)
    y_test = cp.asarray(y_test, dtype=cp.int32)
    y_test = y_test.reshape(y_test.shape[-1],1).T
    y_train = y_train.reshape(y_train.shape[-1],1).T
    return X_train, y_train, X_test, y_test

In [4]:
def layer_size(X,Y):
    
    n_x = cp.shape(X)
    n_x = n_x[0]
    n_h = 256 # size of hidden layer 1
    n_z = 128 # size of hidden layer 2
    n_y = cp.shape(Y)
    n_y = n_y[0]

    return n_x,n_h,n_z,n_y

In [5]:

def relu(x):
    return cp.maximum(0, x)

In [6]:
def softmax(x):
    """Compute softmax values for each row of x."""
    e_x = cp.exp(x - cp.max(x))
    return e_x / e_x.sum(axis=0)

In [7]:
def initialize_parameters(n_x,n_h,n_z,n_y):

    w1 = cp.random.randn(n_h,n_x) * 0.001
    b1 = cp.zeros((n_h , 1))
    w2 = cp.random.randn(n_z,n_h) * 0.001
    b2 = cp.zeros((n_z,1))
    w3 = cp.random.randn(n_y,n_z) * 0.001
    b3 = cp.zeros((n_y,1))

    parameters = { "w1" : w1 , "b1" : b1 , "w2" : w2, "b2" : b2, "w3" : w3, "b3" : b3}

    return parameters

In [8]:
def forward_prop(X,parameters):

    w1 = parameters["w1"]
    w2 = parameters["w2"]
    w3 = parameters["w3"]
    b1 = parameters["b1"]
    b2 = parameters["b2"]
    b3 = parameters["b3"]
    
    
    z1 = cp.dot(w1,X) + b1
    a1 = relu(z1)
    z2 = cp.dot(w2,a1) + b2
    a2 = relu(z2)
    z3 = cp.dot(w3,a2) + b3
    a3 = relu(z3)

    caches = {"z1" : z1, "a1" : a1, "z2": z2, "a2" : a2, "z3" : z3, "a3" : a3}

    return a3,caches

In [9]:
def cost_function(a3, Y,parameters):
    m = Y.shape[0]
    w1 = parameters["w1"]
    w2 = parameters["w2"]
    w3 = parameters["w3"]
    epsilon = 1e-7
    cost = -1/m * cp.sum(Y*cp.log(a3+epsilon) + (1-Y)*cp.log(1-a3+epsilon)) 
    return cost


In [10]:
def backward_prop(parameters,caches,X,Y):

    m = Y.shape[0]
    
    w1 = parameters["w1"]
    w2 = parameters["w2"]
    w3 = parameters["w3"]
    a1 = caches["a1"]
    a2 = caches["a2"]
    a3 = caches["a3"]
    z1 = caches["z1"]
    z2 = caches["z2"]
    z3 = caches["z3"]

    dz3  = a3 - Y
    dw3 = cp.dot(dz3,a2.T)/m 
    db3 = cp.sum(dz3,axis = 1, keepdims=True)/m
    da2 = cp.dot(w3.T,dz3)
    dz2 = cp.multiply(da2,a2)
    dw2 = cp.dot(dz2,a1.T)/m 
    db2 = cp.sum(dz2,axis = 1, keepdims=True)/m
    da1 = cp.dot(w2.T,dz2)
    dz1 = cp.multiply(da1,a1)
    dw1 = cp.dot(dz1,X.T)/m 
    db1 = cp.sum(dz1, axis=1, keepdims= True)/m

    backprop = { "dz3" : dz3, "dw3" : dw3, "db3" : db3, "da2" : da2, "dz2" : dz2, "dw2" : dw2, "db2" : db2, "da1" : da1, "dz1" : dz1, "dw1" : dw1, "db1" : db1}
    return backprop


In [11]:
def update_parameters(backprop, learning_rate ,parameters):

    w1 = copy.deepcopy(parameters["w1"])
    w2 = copy.deepcopy(parameters["w2"])
    w3 = copy.deepcopy(parameters["w3"])
    b1 = copy.deepcopy(parameters["b1"])
    b2 = copy.deepcopy(parameters["b2"])
    b3 = copy.deepcopy(parameters["b3"])

    dw1 = backprop["dw1"]
    dw2 = backprop["dw2"]
    dw3 = backprop["dw3"]
    db1 = backprop["db1"]
    db2 = backprop["db2"]
    db3 = backprop["db3"]

    #updating the parameters

    w1 = w1 - (learning_rate * dw1)
    w2 = w2 - (learning_rate * dw2)
    w3 = w3 - (learning_rate * dw3)

    b1 = b1 - (learning_rate * db1)
    b2 = b2 - (learning_rate * db2)
    b3 = b3 - (learning_rate * db3)

    #storing updated parameters in the dictionary

    parameters = {"w1" : w1, "w2": w2, "w3" : w3, "b1" : b1, "b2" : b2, "b3" : b3}

    return parameters    

In [12]:
def gradient_descent(X,Y,iterations,learning_rate):

    n_x,n_h,n_z,n_y = layer_size(X,Y)
    parameters = initialize_parameters(n_x,n_h,n_z,n_y)

    for i in range(0,iterations):
        a3,caches = forward_prop(X,parameters)
        cost = cost_function(a3,Y,parameters)
        backprop = backward_prop(parameters,caches,X,Y)
        parameters = update_parameters(backprop, learning_rate ,parameters)

        #print("updated parameters = ",parameters)
        if i % 100 == 0:
           print(f"Cost after iteration {i}: {cost}")
        
    return parameters

In [13]:
def predict(X, parameters):
    # Implement forward propagation to get predictions
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    A, _ = forward_prop(X, parameters)
    for i in range(A.shape[1]):
        if A[0, i] <= 0.5:
            Y_prediction[0, i] = 0
        else:
            Y_prediction[0, i] = 1
    return Y_prediction 

In [14]:
def forward_prop_test(X,parameters):

    w1 = parameters["w1"]
    w2 = parameters["w2"]
    w3 = parameters["w3"]
    b1 = parameters["b1"]
    b2 = parameters["b2"]
    b3 = parameters["b3"]
    b3 = b3[:,:10000] #depends on the shape of Y_test
    
    
    z1 = cp.dot(w1,X) + b1
    a1 = relu(z1)
    z2 = cp.dot(w2,a1) + b2
    a2 = relu(z2)
    z3 = cp.dot(w3,a2) + b3
    a3 = relu(z3)

    caches = {"z1" : z1, "a1" : a1, "z2": z2, "a2" : a2, "z3" : z3, "a3" : a3}

    return a3,caches

In [15]:
def accuracy(Y_prediction, Y):
    # Convert Y_prediction and Y to arrays if they are not already
    Y_prediction = cp.array(Y_prediction)
    Y = cp.array(Y)

    # Calculate the number of examples
    m = Y.shape[1]

    # Calculate the number of correctly predicted examples
    correct = cp.sum(Y_prediction == Y)

    # Calculate the accuracy as a percentage
    acc = correct / m * 100

    return acc


In [16]:
X, Y, X_test, Y_test = load_dataset()

# Train the model
n_x, n_h, n_z, n_y = layer_size(X, Y)
parameters = gradient_descent(X, Y, iterations=1000000, learning_rate=0.00021)
print("\n Parameters are ",parameters)

# Make predictions
train_predictions = predict(X, parameters)
test_predictions = predict(X_test, parameters)

train_accuracy = accuracy(train_predictions, Y)
test_accuracy = accuracy(test_predictions, Y_test)

# Print accuracies
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

Cost after iteration 0: 1187.4943395068394
Cost after iteration 100: 110.98941095109879
Cost after iteration 200: 110.90344332825634
Cost after iteration 300: 110.9033206706277
Cost after iteration 400: 110.9032902249026
Cost after iteration 500: 110.90325987378102
Cost after iteration 600: 110.90322951750542
Cost after iteration 700: 110.90319915551191
Cost after iteration 800: 110.90316878711302
Cost after iteration 900: 110.90313841128079
Cost after iteration 1000: 110.90310802723208
Cost after iteration 1100: 110.90307763422119
Cost after iteration 1200: 110.9030472314869
Cost after iteration 1300: 110.90301681819965
Cost after iteration 1400: 110.90298639364975
Cost after iteration 1500: 110.90295595720536
Cost after iteration 1600: 110.90292550808219
Cost after iteration 1700: 110.90289504552885
Cost after iteration 1800: 110.90286456879215
Cost after iteration 1900: 110.9028340771176
Cost after iteration 2000: 110.9028035697495
Cost after iteration 2100: 110.90277304591973
Cost 