In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import softmax

In [50]:
WANDB_PROJECT = "myprojectname"
WANDB_ENTITY = "myname"
DATASET = "fashion_mnist"
EPOCHS = 1
BATCH_SIZE = 4
LOSS = "cross_entropy"
OPTIMIZER = "sgd"
LEARNING_RATE = 0.1
MOMENTUM = 0.5
BETA = 0.5
BETA1 = 0.5
BETA2 = 0.5
EPSILON = 1e-6
WEIGHT_DECAY = 0.0
WEIGHT_INIT = "random"
NUM_LAYERS = 1
HIDDEN_SIZE = 4
ACTIVATION = "sigmoid"

In [51]:
# import wandb

# wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY)

### Question 1
Download the fashion-MNIST dataset and plot 1 sample image for each class as shown in the grid using wandb.

In [52]:
from keras.datasets import fashion_mnist

# Load the data
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [53]:
# # plot one image from each class
# for i in range(10):
#     plt.subplot(2, 5, i+1)
#     plt.imshow(x_train[y_train == i][0], cmap='gray')
#     plt.axis('off')

In [54]:
class FFNeuralNetwork():
    def __init__(self, neurons, hidden_layers, input_size, output_size, activation_function, weight_init):
        self.neurons, self.hidden_layers = neurons, hidden_layers
        self.weights, self.biases = [], []
        self.input_size, self.output_size = input_size, output_size
        self.activation_function = activation_function
        self.weight_init = weight_init
        self.pre_activation, self.post_activation = [], []
        self.output_activation_function = "softmax"

    def initialize_weights(self):
        self.weights.append(np.random.randn(self.input_size, self.neurons))
        for _ in range(self.hidden_layers-1):
            self.weights.append(np.random.randn(self.neurons, self.neurons))
        self.weights.append(np.random.randn(self.neurons, self.output_size))

        if self.weight_init == "Xavier":
            for i in range(len(self.weights)):
                self.weights[i] = self.weights[i] / np.sqrt(self.weights[i].shape[0])

    def initiate_biases(self):
        for _ in range(self.hidden_layers):
            self.biases.append(np.random.randn(self.neurons))
        self.biases.append(np.random.randn(self.output_size))
    
    def activation(self, x):
        # x is a matrix of size (batch_size, neurons)
        if self.activation_function == "sigmoid":
            return 1 / (1 + np.exp(-x))
        elif self.activation_function == "tanh":
            return np.tanh(x)
        elif self.activation_function == "ReLU":
            return np.maximum(0, x)
        else:
            raise Exception("Invalid activation function")
    
    def output_activation(self, x):
        if self.output_activation_function == "softmax":
            return softmax(x, axis=1)
        else:
            raise Exception("Invalid output activation function")
    
    def output_matrix(self, x):
        # x is a matrix of size (batch_size, input_size)
        self.pre_activation = []
        self.post_activation = []
        self.pre_activation.append(x)
        self.post_activation.append(x)

        for i in range(self.hidden_layers):
            y = np.dot(self.pre_activation[i], self.weights[i]) + self.biases[i]
            self.pre_activation.append(y)
            self.post_activation.append(self.activation(y))
            
        y = np.dot(self.pre_activation[-1], self.weights[-1]) + self.biases[-1]
        self.pre_activation.append(y)
        self.post_activation.append(self.output_activation(y))
        return self.post_activation[-1]

In [56]:
def loss(y, y_pred):
    # y is a matrix of size (batch_size, output_size)
    # y_pred is a matrix of size (batch_size, output_size)
    if LOSS == "cross_entropy":
        return -np.sum(y * np.log(y_pred)) / y.shape[0]
    elif LOSS == "mse":
        return np.sum((y - y_pred)**2) / y.shape[0]
    else:
        raise Exception("Invalid loss function")

def loss_derivative(y, y_pred):
    # y is a matrix of size (batch_size, output_size)
    # y_pred is a matrix of size (batch_size, output_size)
    if LOSS == "cross_entropy":
        return -y / y_pred
    elif LOSS == "mse":
        return 2 * (y_pred - y)
    else:
        raise Exception("Invalid loss function")