In [2]:
import numpy as np
import pandas as pd
import os
from PIL import Image


In [3]:
def load_to_pd_frame(directory):
    
    rows = []
    count = 0
    
    for sub_dir in os.listdir(directory):
        sub_path = os.path.join(directory, sub_dir)
        if os.path.isdir(sub_path):
            for image in os.listdir(sub_path):
                image_path = os.path.join(sub_path, image)
                try: 
                    img = Image.open(image_path).convert('L').resize((150,150))
                    pixels = (np.array(img)/255).flatten()
                    if np.isnan(pixels).any():
                        print(f"NaN found in image {image_path}, skipping")
                        continue
                    row = [count] + pixels.tolist()
                    rows.append(row)
                except Exception as e:
                    pass
                
            count += 1
            
    num_pixels = 150 * 150
    columns = ['label'] + [f'pixel_{i}' for i in range(num_pixels)]
    df = pd.DataFrame(rows, columns=columns)
    return df

In [4]:
data = load_to_pd_frame('archive-2/seg_train/seg_train')

In [5]:
data = np.array(data)
np.random.shuffle(data)
m, n = data.shape

data_dev = data[0:1000].T
y_dev = data_dev[0]
x_dev = data_dev[1:n]

data_train = data[1000:m].T
y_train = data_train[0]
x_train = data_train[1:n]

data.shape

(14034, 22501)

In [6]:
class Hidden_Layer:
    def __init__(self, n_neurons, n_features):
        self.weights = 0.01 * np.random.randn(n_neurons, n_features)
        self.biases= np.zeros((n_neurons,1))

Layer1 = Hidden_Layer(512,x_train.shape[0])
Layer2 = Hidden_Layer(256,512)
Layer3 = Hidden_Layer(6,256)

def init_parameters(Layer1, Layer2, Layer3):
    W1 = Layer1.weights
    b1 = Layer1.biases
    W2 = Layer2.weights
    b2 = Layer2.biases
    W3 = Layer3.weights
    b3 = Layer3.biases

    return W1, b1, W2, b2, W3, b3

def Activation_ReLU(Z):
    return np.maximum(Z,0)

def Activation_Softmax(Z):
    Z_stable = Z - np.max(Z, axis=0, keepdims=True)
    exp_Z = np.exp(Z_stable)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

def forward_propagation(W1, b1, W2, b2, W3, b3, X):
    Z1 = W1.dot(X) + b1
    A1 = Activation_ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = Activation_ReLU(Z2)
    Z3 = W3.dot(A2) + b3
    A3 = Activation_Softmax(Z3)

    return  Z1, A1, Z2, A2, Z3, A3

def deriv_ReLU(Z):
    return Z > 0

def one_hot_encode(Y):
    Y = Y.astype(int)
    one_hot_y = np.zeros((Y.size, Y.max() + 1))
    one_hot_y[np.arange(Y.size), Y] = 1
    return one_hot_y.T

def backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    one_hot_y = one_hot_encode(Y)
    
    dZ3 = A3 - one_hot_y
    dW3 = (1 / m) * dZ3.dot(A2.T)
    db3 = (1 / m) * np.sum(dZ3)
    dZ2 = W3.T.dot(dZ3) * deriv_ReLU(Z2)    
    dW2 = (1 / m) * dZ2.dot(A1.T)
    db2 = (1 / m) * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    dW1 = (1 / m) * dZ1.dot(X.T)
    db1 = (1 / m) * np.sum(dZ1)

    return dW1, db1, dW2, db2, dW3, db3

def update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha):

    max_grad = 1.0
    dW1 = np.clip(dW1, -max_grad, max_grad)
    db1 = np.clip(db1, -max_grad, max_grad)
    dW2 = np.clip(dW2, -max_grad, max_grad)
    db2 = np.clip(db2, -max_grad, max_grad)
    dW3 = np.clip(dW3, -max_grad, max_grad)
    db3 = np.clip(db3, -max_grad, max_grad)
    
    
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2  
    W3 = W3 - alpha * dW3
    b3 = b3 - alpha * db3

    return W1, b1, W2, b2, W3, b3



In [9]:
def get_predictions(A3):
    return np.argmax(A3, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations, batch_size=64):
    W1, b1, W2, b2, W3, b3 = init_parameters(Layer1, Layer2, Layer3)
    m = X.shape[1]  # number of training samples

    for i in range(iterations):
        permutation = np.random.permutation(m)
        X_shuffled = X[:, permutation]
        Y_shuffled = Y[permutation]

        for j in range(0, m, batch_size):
            X_batch = X_shuffled[:, j:j+batch_size]
            Y_batch = Y_shuffled[j:j+batch_size]

            Z1, A1, Z2, A2, Z3, A3 = forward_propagation(W1, b1, W2, b2, W3, b3, X_batch)
            dW1, db1, dW2, db2, dW3, db3 = backward_propagation(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X_batch, Y_batch)
            W1, b1, W2, b2, W3, b3 = update_parameters(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha)

        # Optional: check accuracy on the full set every few iterations
        if i % 10 == 0:
            Z1, A1, Z2, A2, Z3, A3 = forward_propagation(W1, b1, W2, b2, W3, b3, X)
            predictions = get_predictions(A3)
            acc = get_accuracy(predictions, Y)
            print(f"Iteration {i}, Accuracy: {acc:.4f}")

    return W1, b1, W2, b2, W3, b3

In [None]:
W1, b1, W2, b2, W3, b3 = gradient_descent(x_train, y_train, 0.10, 500)