In [1]:
import numpy as np
import pandas as pd
import math
import sklearn.metrics

In [2]:
def standardize_rows(arr):
    row_means = arr.mean(axis=1, keepdims=True)  # Compute the mean of each row
    row_stds = arr.std(axis=1, keepdims=True)    # Compute the std of each row

    row_stds[row_stds == 0] = 1

    standardized_arr = (arr - row_means) / row_stds  # Standardize each row
    return standardized_arr

In [3]:
train = np.load('./data/fashion_train.npy')



train_test = standardize_rows(train[:, : -1])
y = train[:,-1]

y_matrix = np.eye(5)[y]

print(train_test.shape, y.shape)

(10000, 784) (10000,)


In [None]:



def input_layer(X_train, y_train):
    n_feartures = X_train.shape[1]
    output_nodes = len(set(y_train))
    len_of_X_train = len(X_train)
    return n_feartures, output_nodes, len_of_X_train


def hidden_layer(X, n_feartures, nunbers_of_nodes_hidden, len_of_X_train):
    #first hidden layer

    W = np.random.randn(n_feartures, nunbers_of_nodes_hidden)

    b = np.random.randn(len_of_X_train, nunbers_of_nodes_hidden)

    z = X@W+b

    a = np.maximum(0, z)

    return a


#output layer
def output_layer(a, output_nodes, len_of_X_train):

    W = np.random.randn(a.shape[1], output_nodes)

    b = np.random.randn(len_of_X_train, output_nodes)

    z = a@W+b

    softmax = np.exp(z - np.max(z))/np.sum(np.exp(z - np.max(z)))
    preds = np.argmax(softmax, axis=1)

    return print(f'pred: {np.argmax(softmax, axis=1)}, shape: {softmax.shape}'), preds


In [6]:
X = train_test

n_feartures, output_nodes, len_of_X_train = input_layer(X, y)

a = hidden_layer(X, n_feartures, nunbers_of_nodes_hidden=40, len_of_X_train=len_of_X_train)

preds = output_layer(a, output_nodes, len_of_X_train)

preds

pred: [0 0 0 ... 0 0 0], shape: (10000, 5)


(None, array([0, 0, 0, ..., 0, 0, 0], dtype=int64))

In [181]:
def sigmoid(x):
    return np.exp(x) / (np.exp(x) + 1)

def derivative_sigmoid(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

def softmax(z):
    return np.exp(z - np.max(z))/np.sum(np.exp(z - np.max(z)))

def softmax_derivative(z):
    s = np.exp(z) / np.sum(np.exp(z))
    s = s.reshape(-1, 1)
    jacobian = np.diagflat(s) - np.dot(s, s.T)
    return jacobian

def relu(x, alpha=0.01):
    return np.maximum(alpha * x, x)

def derivative_relu(x):
    return np.where(x > 0, 1, 0)

def cross_entropy(y, a, epsilon=1e-12):
    a = np.clip(a, epsilon, 1. - epsilon)
    return -np.sum(y * np.log(a))

def cross_entropy_derivative_a(y, a):
    return -1/a * np.sum(y)

def cross_entropy_derivative_softmax(y, a):
    return a-y

def binary_cross_entropy(y, a, epsilon=1e-12):
    a = np.clip(a, epsilon, 1. - epsilon)  # Avoid log(0) errors
    return -np.mean(y * np.log(a) + (1 - y) * np.log(1 - a))


In [183]:
derivative_dict = {
    softmax:softmax_derivative,
    cross_entropy:cross_entropy_derivative_a,
    sigmoid:derivative_sigmoid,
    relu:derivative_relu
}

In [None]:
class Layer:
    def __init__(self,activation_function,shape):
        self.activation_function = activation_function
        self.W = np.random.randn(shape[0], shape[1]) * np.sqrt(2 / shape[0])
        #self.b = np.random((1, shape[1]))
        self.b = np.zeros((1, shape[1]))

    def feed(self, X):
        self.X = X

    def activate(self):
        #self.Z = self.X.dot(self.W) + self.b
        self.Z = self.X.dot(self.W) + self.b
        self.Z = (self.Z - np.mean(self.Z, axis=0)) / np.std(self.Z, axis=0)
        self.a = self.activation_function(self.Z)
        return self.a

    def adjust_weights_output(self, alpha, y_matrix):
        dL_dZ = self.a - y_matrix  # Direct gradient for cross-entropy with softmax
        dL_w = np.dot(dL_dZ.T, self.X).T

        self.W = self.W - alpha * dL_w
        self.b = self.b - alpha * dL_dZ.mean(axis=0, keepdims=True)
        self.dL_dZ = dL_dZ # error signal  ??? gotta research a bit
    
    def adjust_weights(self,alpha,next_layer):
        da_dZ = derivative_dict[self.activation_function](self.Z)
        dL_dZ = np.dot(next_layer.dL_dZ, next_layer.W.T) * da_dZ
        dL_w = np.dot(dL_dZ.T, self.X).T

        self.W = self.W - alpha * dL_w
        self.b = self.b - alpha * dL_dZ.mean(axis=0, keepdims=True)
        self.dL_dZ = dL_dZ


class NeuralNet:
    def __init__(self,X,layer_info):
        self.X = X
        self.layer_nums = layer_info
        self.layers = []

        for i in range(len(layer_info)-1):
            l = Layer(layer_info[i]["activation_function"],(layer_info[i]["size"],layer_info[i+1]["size"]))
            self.layers.append(l)
    


#forward
input_layer = Layer(relu,(784,20))
h1 = Layer(relu,(20,10))
output_layer = Layer(softmax,(10,5))
input_layer.feed(train_test)

for i in range(1000):
    input_layer.activate()
    h1.feed(input_layer.a)
    h1.activate()
    output_layer.feed(h1.a)
    output_layer.activate()

    output_layer.adjust_weights_output(0.0001,y_matrix)
    h1.adjust_weights(0.0001,output_layer)
    input_layer.adjust_weights(0.0001,h1)

preds=output_layer.a.argmax(axis=1)
sklearn.metrics.accuracy_score(preds,y)

0.2328

In [180]:
preds=output_layer.a.argmax(axis=1)
sklearn.metrics.accuracy_score(preds,y)

0.2496

In [184]:
output_layer.a

array([[6.97709696e-01, 1.34624998e-06, 9.99999997e-01, 8.87216426e-01,
        1.87055661e-02],
       [9.99999318e-01, 2.14961487e-07, 9.99999866e-01, 6.53627756e-01,
        1.07191168e-01],
       [7.37926362e-01, 1.72396161e-07, 1.00000000e+00, 9.14274671e-01,
        1.04642128e-02],
       ...,
       [1.00000000e+00, 2.89932063e-13, 1.00000000e+00, 8.41873535e-01,
        9.45439227e-03],
       [8.93434140e-01, 1.55613148e-07, 1.00000000e+00, 9.04027772e-01,
        1.23144157e-02],
       [9.99999927e-01, 3.56340735e-15, 1.00000000e+00, 9.61700721e-01,
        7.72121239e-04]])

0.2076