In [2]:
import numpy as np

In [5]:
def standardize_rows(arr):
    row_means = arr.mean(axis=1, keepdims=True)  # Compute the mean of each row
    row_stds = arr.std(axis=1, keepdims=True)    # Compute the std of each row
    row_stds[row_stds == 0] = 1
    standardized_arr = (arr - row_means) / row_stds  # Standardize each row
    return standardized_arr

In [33]:
class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.01):
        # Initialize network parameters
        self.layers = len(layer_sizes) - 1
        self.learning_rate = learning_rate
        self.weights = []
        self.biases = []

        # Initialize weights and biases for each layer
        for i in range(self.layers):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]))
            self.biases.append(np.zeros((1, layer_sizes[i + 1])))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        return z * (1 - z)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, X):
        self.a = [X]
        for i in range(self.layers - 1):
            z = np.dot(self.a[-1], self.weights[i]) + self.biases[i]
            self.a.append(self.sigmoid(z))
        z = np.dot(self.a[-1], self.weights[-1]) + self.biases[-1]
        self.a.append(self.softmax(z))
        return self.a[-1]

    def backward(self, X, y):
        m = y.shape[0]
        y_one_hot = np.eye(self.a[-1].shape[1])[y.astype(int)]

        # Compute gradients for the output layer
        dz = self.a[-1] - y_one_hot
        gradients_w = []
        gradients_b = []

        # Backpropagation through hidden layers
        for i in range(self.layers - 1, 0, -1):
            dW = np.dot(self.a[i].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m
            gradients_w.insert(0, dW)
            gradients_b.insert(0, db)

            # **Fix**: Use correct activations for derivatives
            dz = np.dot(dz, self.weights[i].T) * self.sigmoid_derivative(self.a[i])

        # Compute gradients for the first layer
        dW = np.dot(X.T, dz) / m
        db = np.sum(dz, axis=0, keepdims=True) / m
        gradients_w.insert(0, dW)
        gradients_b.insert(0, db)

        # Update weights and biases
        for i in range(self.layers):
            self.weights[i] -= self.learning_rate * gradients_w[i]
            self.biases[i] -= self.learning_rate * gradients_b[i]


    def train(self, X, y, epochs=100):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y)
            if epoch % 10 == 0:
                loss = -np.mean(np.log(output[range(y.size), y.astype(int)]))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

    def accuracy(self, predictions, labels):
        return np.mean(predictions == labels)

In [36]:
# Load training data
train = np.load("data/fashion_train.npy")
X_train = train[:, :-1]
y_train = train[:, -1]

# Standardize the data
X_train = standardize_rows(X_train)

# Initialize neural network with multiple layers
layer_sizes = [X_train.shape[1], 128, 64, 32, len(np.unique(y_train))]
nn = NeuralNetwork(layer_sizes, learning_rate=0.02)

# Train the network
nn.train(X_train, y_train, epochs=3000)

Epoch 0, Loss: 10.2574
Epoch 10, Loss: 6.9561
Epoch 20, Loss: 4.0802
Epoch 30, Loss: 2.4878
Epoch 40, Loss: 1.9265
Epoch 50, Loss: 1.7190
Epoch 60, Loss: 1.6321
Epoch 70, Loss: 1.5853
Epoch 80, Loss: 1.5523
Epoch 90, Loss: 1.5248
Epoch 100, Loss: 1.5001
Epoch 110, Loss: 1.4773
Epoch 120, Loss: 1.4560
Epoch 130, Loss: 1.4359
Epoch 140, Loss: 1.4169
Epoch 150, Loss: 1.3990
Epoch 160, Loss: 1.3820
Epoch 170, Loss: 1.3658
Epoch 180, Loss: 1.3503
Epoch 190, Loss: 1.3356
Epoch 200, Loss: 1.3214
Epoch 210, Loss: 1.3079
Epoch 220, Loss: 1.2949
Epoch 230, Loss: 1.2824
Epoch 240, Loss: 1.2703
Epoch 250, Loss: 1.2587
Epoch 260, Loss: 1.2474
Epoch 270, Loss: 1.2365
Epoch 280, Loss: 1.2259
Epoch 290, Loss: 1.2157
Epoch 300, Loss: 1.2058
Epoch 310, Loss: 1.1961
Epoch 320, Loss: 1.1867
Epoch 330, Loss: 1.1776
Epoch 340, Loss: 1.1687
Epoch 350, Loss: 1.1600
Epoch 360, Loss: 1.1516
Epoch 370, Loss: 1.1433
Epoch 380, Loss: 1.1353
Epoch 390, Loss: 1.1274
Epoch 400, Loss: 1.1198
Epoch 410, Loss: 1.1123
Ep

In [37]:


# Load test data
test = np.load("data/fashion_test.npy")
X_test = test[:, :-1]
y_test = test[:, -1]
X_test = standardize_rows(X_test)

# Test the network
predictions = nn.predict(X_test)
accuracy = nn.accuracy(predictions, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 71.42%
