In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
x_train = np.array(pd.read_csv('train_data.csv',header=None))
y_train = np.array(pd.read_csv('train_labels.csv',header=None))

## Data Processing

In [3]:
# split training set and validation set
len_valid = len(x_train)//4
x_valid = x_train[:len_valid]
x_train = x_train[len_valid:]
y_valid = y_train[:len_valid]
y_train = y_train[len_valid:]

## Build the model

In [10]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.w1 = np.random.randn(input_size, hidden_size) # 784x64
        self.b1 = np.random.randn(1,hidden_size) # 1x64
        self.w2 = np.random.randn(hidden_size, output_size) #64x4
        self.b2 = np.random.randn(1, output_size) # 1x4
        
    def sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))
    
    def sigmoid_derivative(self, y): 
        return y * (1.0 - y)
    
    # MSE
    def loss_fn(self,y_pred, y):
        loss = np.sum((y - y_pred)**2)
        return loss
    
    def predict(self, X):
        Y_pred = X
        
    # training process
    def fit(self, X, Y , epochs, lr):
        for epoch in range(epochs):
            losses = []
            acc_history = []
            for x, y in zip(X,Y):
                # Forward process
                # calculate xw+b at hidden layer
                z = x @ self.w1 + self.b1
                # pass in the activation function at hidden layer
                h_z = self.sigmoid(z)
                # calculate the output
                y_hat = h_z @ self.w2 + self.b2
                # pass in the activation function
                y_pred = self.sigmoid(y_hat)
                
                # compute loss for one point
                loss = self.loss_fn(y_pred, y)
                losses.append(loss)
                
                single_acc = point_acc(y_pred, y)
                acc_history.append(single_acc)
                
                # Backward propagation process
                # delta2 size (1,4), grad_b2 size (1,4), grad_w2 size (64,4), h_z size (1,64)
                delta2 = ((y_pred - y) * self.sigmoid_derivative(y_pred)).reshape(1,-1)
                grad_b2 = lr * delta2
                grad_w2 = h_z.T @ grad_b2
                
                # w2 size (64,4), grad_b1 size (1,64), grad_w1 size (784,64), x size (1,784)
                grad_b1 = (lr * (delta2 @ self.w2.T) * self.sigmoid_derivative(h_z)).reshape(1,-1)               
                grad_w1 = x.T.reshape(-1,1) @ grad_b1
                
                # update weights
                self.w1 -= grad_w1
                self.b1 -= grad_b1
                self.w2 -= grad_w2
                self.b2 -= grad_b2
            
            
            epoch_loss = np.array(losses).mean()
            epoch_acc = np.array(acc_history).mean()
            print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, epoch_loss, epoch_acc))
              
            

In [11]:
def point_acc(y_pred, y):
    if np.argmax(y_pred) == np.argmax(y): 
        return 1
    return 0

In [12]:
mlp = MLP(input_size=x_train.shape[1], hidden_size=64, output_size = y_train.shape[1])
mlp.fit(x_train, y_train, 10, 0.01)

Epoch [0], val_loss: 0.6166, val_acc: 0.5740
Epoch [1], val_loss: 0.3864, val_acc: 0.7077
Epoch [2], val_loss: 0.3474, val_acc: 0.7262
Epoch [3], val_loss: 0.3265, val_acc: 0.7494
Epoch [4], val_loss: 0.2231, val_acc: 0.8891
Epoch [5], val_loss: 0.1241, val_acc: 0.9381
Epoch [6], val_loss: 0.1012, val_acc: 0.9468
Epoch [7], val_loss: 0.0896, val_acc: 0.9520
Epoch [8], val_loss: 0.0819, val_acc: 0.9559
Epoch [9], val_loss: 0.0763, val_acc: 0.9587


In [None]:
a = np.array([])