In [1]:
import numpy as np
import torch
from torchvision import transforms
from PIL import Image

In [2]:
# Load and preprocess image
img = Image.open("cat_10.jpg").resize((64, 64)).convert("RGB")
x = np.array(img).reshape(-1, 1) / 255.0    # shape (12288, 1)
print("Input vector shape:", x.shape)
y = np.array([[1]])

Input vector shape: (12288, 1)


In [3]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivative(a):
    return a * (1-a)

In [4]:
# --- Step 3: Initialize network parameters ---
np.random.seed(0)
n_input = x.shape[0]  # 12288 for 64x64x3 image
n_h1 = 3
n_h2 = 2
n_out = 1



In [5]:
def init_params(n_input, n_h1, n_h2, n_out):
    W1 = np.random.randn(n_h1, n_input) * 0.01
    b1 = np.zeros((n_h1, 1))
    W2 = np.random.randn(n_h2, n_h1) * 0.01
    b2 = np.zeros((n_h2, 1))
    W3 = np.random.randn(n_out, n_h2) * 0.01
    b3 = np.zeros((n_out, 1))

    return W1, b1, W2, b2, W3, b3

def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivative(a):
    return a*(1-a)

def forward_prop(W1, b1, W2, b2, W3, b3, X):
    Z1 = W1.dot(X) + b1
    A1 = sigmoid(Z1)

    Z2 = W2.dot(A1) + b2
    A2 = sigmoid(Z2)

    Z3 = W3.dot(A2) + b3
    A3 = sigmoid(Z3)

    return Z1, A1, Z2, A2, Z3, A3

def backprop(Z1, A1, Z2, A2, Z3, A3, X, y, W1, W2, W3):
    m = y.shape[1]

    dZ3 = A3 - y
    dW3 = (1/m) * np.dot(dZ3, A2.T)
    db3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)

    dZ2 = np.dot(W3.T, dZ3) * sigmoid_derivative(A2)
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

    dZ1 = np.dot(W2.T, dZ2) * sigmoid_derivative(A1)
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    return dW1, db1, dW2, db2, dW3, db3
    

def update_params(dW1, db1, dW2, db2, dW3, db3, alpha, W1, b1, W2, b2, W3, b3):
    W1 -= alpha * dW1
    b1 -= alpha * db1
    W2 -= alpha * dW2
    b2 -= alpha * db2
    W3 -= alpha * dW3 
    b3 -= alpha * db3

    return W1, b1, W2, b2, W3, b3

def get_predition(A3):
    return np.argmax(A3, 0)

def gradient_descent(X, y, iterations, alpha, n_input, n_h1, n_h2, n_out):
    W1, b1, W2, b2, W3, b3 = init_params(n_input, n_h1, n_h2, n_out)
    for i in range(iterations):
        Z1, A1, Z2, A2, Z3, A3 = forward_prop(W1, b1, W2, b2, W3, b3, X)

        dW1, db1, dW2, db2, dW3, db3 = backprop(Z1, A1, Z2, A2, Z3, A3, X, y, W1, W2, W3)

        W1, b1, W2, b2, W3, b3 = update_params(dW1, db1, dW2, db2, dW3, db3, alpha, W1, b1, W2, b2, W3, b3)

        if i%100==0:
            print("iteration: ", i)
            # print(W1, b1, W2, b2, W3, b3)

    return W1, b1, W2, b2, W3, b3


In [7]:
W1, b1, W2, b2, W3, b3 = gradient_descent(x, y, 10000, 0.01, n_input, n_h1, n_h2, n_out)
print(f"W1: {W1}")
print(f"b1: {b1}")
print(f"W2: {W2}")
print(f"b2: {b2}")
print(f"W3: {W3}")
print(f"b3: {b3}")

iteration:  0
iteration:  100
iteration:  200
iteration:  300
iteration:  400
iteration:  500
iteration:  600
iteration:  700
iteration:  800
iteration:  900
iteration:  1000
iteration:  1100
iteration:  1200
iteration:  1300
iteration:  1400
iteration:  1500
iteration:  1600
iteration:  1700
iteration:  1800
iteration:  1900
iteration:  2000
iteration:  2100
iteration:  2200
iteration:  2300
iteration:  2400
iteration:  2500
iteration:  2600
iteration:  2700
iteration:  2800
iteration:  2900
iteration:  3000
iteration:  3100
iteration:  3200
iteration:  3300
iteration:  3400
iteration:  3500
iteration:  3600
iteration:  3700
iteration:  3800
iteration:  3900
iteration:  4000
iteration:  4100
iteration:  4200
iteration:  4300
iteration:  4400
iteration:  4500
iteration:  4600
iteration:  4700
iteration:  4800
iteration:  4900
iteration:  5000
iteration:  5100
iteration:  5200
iteration:  5300
iteration:  5400
iteration:  5500
iteration:  5600
iteration:  5700
iteration:  5800
iteration