In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt

In [35]:
# Load and prepare data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Cat label is 3
cat_train = X_train[y_train.flatten() == 3][:200]
cat_test = X_test[y_test.flatten() == 3][:50]

# Not-cat label is 0
not_cat_train = X_train[y_train.flatten() == 0][:200]
not_cat_test = X_test[y_test.flatten() == 0][:50]

# Create cat and not-cat labels
y_cat_train = np.ones((len(cat_train), 1))
y_cat_test = np.ones((len(cat_test), 1))
y_not_cat_train = np.zeros((len(not_cat_train), 1))
y_not_cat_test = np.zeros((len(not_cat_test), 1))

# Flatten image data
cat_train_flat = cat_train.reshape(len(cat_train), -1).T
cat_test_flat = cat_test.reshape(len(cat_test), -1).T
not_cat_train_flat = not_cat_train.reshape(len(not_cat_train), -1).T
not_cat_test_flat = not_cat_test.reshape(len(not_cat_test), -1).T

# Combine cat and not-cat data
X_train = np.hstack([cat_train_flat, not_cat_train_flat]) / 255
X_test = np.hstack([cat_test_flat, not_cat_test_flat]) / 255

# Combine cat and not-cat labels
y_train = np.vstack([y_cat_train, y_not_cat_train]).T
y_test = np.vstack([y_cat_test, y_not_cat_test]).T

In [63]:
np.random.seed(3687)
# Sigmoid activation function and its derivative
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))

def ReLU(x):
    return np.maximum(0, x)

def ReLU_derivative(x):
    return np.where(x > 0, 1, 0)

def LeakyReLU(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def LeakyReLU_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

# Initialize weights and biases
def initialize_parameters(input_size, hidden_size1, hidden_size2, output_size):
    # np.random.seed(42)  # For reproducibility
    W1 = np.random.randn(hidden_size1, input_size) * 0.01
    b1 = np.zeros((hidden_size1, 1))
    W2 = np.random.randn(hidden_size2, hidden_size1) * 0.01
    b2 = np.zeros((hidden_size2, 1))
    W3 = np.random.randn(output_size, hidden_size2) * 0.01
    b3 = np.zeros((output_size, 1))
    return W1, b1, W2, b2, W3, b3


# Forward propagation
def forward_propagation(X, W1, b1, W2, b2, W3, b3, active):
    Z1 = np.dot(W1, X) + b1
    A1 = active(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = active(Z2)
    Z3 = np.dot(W3, A2) + b3
    A3 = active(Z3)
    return Z1, A1, Z2, A2, Z3, A3


# Compute cost
def compute_cost(A3, Y):
    m = Y.shape[1]
    cost = -(1 / m) * np.sum(Y * np.log(A3) + (1 - Y) * np.log(1 - A3))
    return cost


# Backward propagation
def backward_propagation(X, Y, Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, active_derivative):
    m = X.shape[1]
    dZ3 = A3 - Y
    dW3 = (1 / m) * np.dot(dZ3, A2.T)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * active_derivative(Z2)
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * active_derivative(Z1)
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2, dW3, db3


# Update parameters
def update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate, delay, iteration,num_iters):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3
    if iteration % 100 == 0:
        learning_rate = learning_rate - learning_rate * delay * iteration / num_iters
    return W1, b1, W2, b2, W3, b3


# Neural network model
def neural_network(X_train, Y_train, input_size, hidden_size1, hidden_size2, output_size, num_iterations,
                   learning_rate):
    W1, b1, W2, b2, W3, b3 = initialize_parameters(input_size, hidden_size1, hidden_size2, output_size)

    for i in range(num_iterations):
        Z1, A1, Z2, A2, Z3, A3 = forward_propagation(X_train, W1, b1, W2, b2, W3, b3, sigmoid)
        cost = compute_cost(A3, Y_train)

        dW1, db1, dW2, db2, dW3, db3 = backward_propagation(X_train, Y_train, Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, sigmoid_derivative)
        W1, b1, W2, b2, W3, b3 = update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, learning_rate,delay,i,num_iterations)

        if i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return W1, b1, W2, b2, W3, b3


# Prediction
def predict(X, W1, b1, W2, b2, W3, b3):
    _, _, _, _, _, A3 = forward_propagation(X, W1, b1, W2, b2, W3, b3, sigmoid)
    predictions = (A3 > 0.5).astype(int)
    return predictions

In [62]:
# Train neural network
input_size = X_train.shape[0]
hidden_size1 = 38
hidden_size2 = 19
output_size = 1
num_iterations = 16000
learning_rate = 0.52
delay = 1


W1, b1, W2, b2, W3, b3 = neural_network(X_train, y_train, input_size, hidden_size1, hidden_size2, output_size,
                                        num_iterations, learning_rate)

# Make predictions
train_predictions = predict(X_train, W1, b1, W2, b2, W3, b3)
test_predictions = predict(X_test, W1, b1, W2, b2, W3, b3)

# Calculate accuracy
train_accuracy = np.mean(train_predictions == y_train)
test_accuracy = np.mean(test_predictions == y_test)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

Cost after iteration 0: 0.6936167203972181
Cost after iteration 100: 0.6931389355013087
Cost after iteration 200: 0.6931135340281063
Cost after iteration 300: 0.6929948349644882
Cost after iteration 400: 0.6917750944552219
Cost after iteration 500: 0.5741419669712304
Cost after iteration 600: 0.47046957710980536
Cost after iteration 700: 0.42997557091570293
Cost after iteration 800: 0.3426597298723258
Cost after iteration 900: 0.30746405549139794
Cost after iteration 1000: 0.2425757850740263
Cost after iteration 1100: 0.2040464248354128
Cost after iteration 1200: 0.13308052949863164
Cost after iteration 1300: 0.3339817642701554
Cost after iteration 1400: 0.2070946953998528
Cost after iteration 1500: 0.1613179438702295
Cost after iteration 1600: 0.44066219260053174
Cost after iteration 1700: 0.10556499157923653
Cost after iteration 1800: 0.036224823703766375
Cost after iteration 1900: 0.019556176728426446
Cost after iteration 2000: 0.01213692963596086
Cost after iteration 2100: 0.008738