In [2]:
import numpy as np

In [5]:
def standardize_rows(arr):
    row_means = arr.mean(axis=1, keepdims=True)  # Compute the mean of each row
    row_stds = arr.std(axis=1, keepdims=True)    # Compute the std of each row
    row_stds[row_stds == 0] = 1
    standardized_arr = (arr - row_means) / row_stds  # Standardize each row
    return standardized_arr

In [38]:
import numpy as np

class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.001, dropout_rate=0.5):
        # Initialize parameters
        self.layers = len(layer_sizes) - 1
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.weights = []
        self.biases = []

        # He initialization for weights
        for i in range(self.layers):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]) * np.sqrt(2 / layer_sizes[i]))
            self.biases.append(np.zeros((1, layer_sizes[i + 1])))

    def relu(self, z):
        return np.maximum(0, z)

    def relu_derivative(self, z):
        return (z > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, X, training=True):
        self.a = [X]
        for i in range(self.layers - 1):
            z = np.dot(self.a[-1], self.weights[i]) + self.biases[i]
            a = self.relu(z)

            # Apply dropout only during training
            if training:
                dropout_mask = np.random.rand(*a.shape) > self.dropout_rate
                a *= dropout_mask
                a /= (1 - self.dropout_rate)

            self.a.append(a)

        z = np.dot(self.a[-1], self.weights[-1]) + self.biases[-1]
        self.a.append(self.softmax(z))
        return self.a[-1]


    def backward(self, X, y):
        m = y.shape[0]
        y_one_hot = np.eye(self.a[-1].shape[1])[y.astype(int)]

        # Compute gradients for output layer
        dz = self.a[-1] - y_one_hot
        gradients_w = []
        gradients_b = []

        # Backpropagation through layers
        for i in range(self.layers - 1, 0, -1):
            dW = np.dot(self.a[i].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m
            gradients_w.insert(0, dW)
            gradients_b.insert(0, db)

            dz = np.dot(dz, self.weights[i].T) * self.relu_derivative(self.a[i])

        # Compute gradients for first layer
        dW = np.dot(X.T, dz) / m
        db = np.sum(dz, axis=0, keepdims=True) / m
        gradients_w.insert(0, dW)
        gradients_b.insert(0, db)

        # Update weights and biases using gradients
        for i in range(self.layers):
            self.weights[i] -= self.learning_rate * gradients_w[i]
            self.biases[i] -= self.learning_rate * gradients_b[i]

    def train(self, X, y, epochs=100, batch_size=64):
        for epoch in range(epochs):
            # Mini-batch gradient descent
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[indices[i:i + batch_size]]
                y_batch = y[indices[i:i + batch_size]]
                output = self.forward(X_batch)
                self.backward(X_batch, y_batch)

            if epoch % 10 == 0:
                loss = -np.mean(np.log(output[range(y_batch.size), y_batch.astype(int)]))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X, training=False)
        return np.argmax(output, axis=1)

    def accuracy(self, predictions, labels):
        return np.mean(predictions == labels)


In [67]:
# Load training data
train = np.load("data/fashion_train.npy")
X_train = train[:, :-1]
y_train = train[:, -1]

# Standardize the data
X_train = standardize_rows(X_train)

# Load test data
test = np.load("data/fashion_test.npy")
X_test = test[:, :-1]
y_test = test[:, -1]
X_test = standardize_rows(X_test)

In [68]:
layer_options = [[784, 128, 5], [784, 256, 128, 5], [784, 512, 256, 128, 5]]
dropout_rates = [0.2, 0.3, 0.5]

for layers in layer_options:
    for dropout in dropout_rates:
        nn = NeuralNetwork(layers, learning_rate=0.001, dropout_rate=dropout)
        nn.train(X_train, y_train, epochs=100)
        accuracy = nn.accuracy(nn.predict(X_test), y_test)
        print(f"Layers: {layers}, Dropout: {dropout}, Accuracy: {accuracy}")

layers: [784, 128, 5], dropout: 0.2
Epoch 0, Loss: 1.2920
Epoch 10, Loss: 0.5831
Epoch 20, Loss: 0.4275
Epoch 30, Loss: 0.3334
Epoch 40, Loss: 0.4194
Epoch 50, Loss: 0.5225
Epoch 60, Loss: 0.2807
Epoch 70, Loss: 0.8202
Epoch 80, Loss: 0.6499
Epoch 90, Loss: 0.4382
Layers: [784, 128, 5], Dropout: 0.2, Accuracy: 0.8358
layers: [784, 128, 5], dropout: 0.3
Epoch 0, Loss: 0.8200
Epoch 10, Loss: 0.4906
Epoch 20, Loss: 0.4412
Epoch 30, Loss: 0.2982
Epoch 40, Loss: 0.1987
Epoch 50, Loss: 0.5179
Epoch 60, Loss: 0.4995
Epoch 70, Loss: 0.4942
Epoch 80, Loss: 0.3499
Epoch 90, Loss: 0.4869
Layers: [784, 128, 5], Dropout: 0.3, Accuracy: 0.834
layers: [784, 128, 5], dropout: 0.5
Epoch 0, Loss: 1.7099
Epoch 10, Loss: 0.7919
Epoch 20, Loss: 0.7298
Epoch 30, Loss: 0.3157
Epoch 40, Loss: 0.5039
Epoch 50, Loss: 0.3266
Epoch 60, Loss: 0.3879
Epoch 70, Loss: 0.3028
Epoch 80, Loss: 0.1298
Epoch 90, Loss: 0.2467
Layers: [784, 128, 5], Dropout: 0.5, Accuracy: 0.8158
layers: [784, 256, 128, 5], dropout: 0.2
Epo

In [66]:
# Test the network
predictions = nn.predict(X_test)
accuracy = nn.accuracy(predictions, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 80.10%
