A two-layer fully connected autoencoder on MNIST handwritten digits dataset, using numpy only

In [15]:
import numpy as np
import matplotlib.pyplot as plt
import torchvision

Loading dataset

In [16]:
def load_dataset():

    mnist = torchvision.datasets.MNIST('./', train=True, download=True)

    X = np.zeros((len(mnist), 784))
    for n in range(len(mnist)):
        X[n] = np.array(mnist[n][0]).reshape(-1)
    return X

X = load_dataset() / 255

In [None]:
plt.imshow(X[0].reshape(28,28))
plt.show()

Randomly initialize layer 1 and layer 2 weights as 2D numpy arrays with zero mean and 0.01 standard deviation. For simplicity, biases are omitted.

In [18]:
INPUT_SIZE  = X.shape[1]
HIDDEN_SIZE = 64

w1 = np.random.normal(0, 0.01, (INPUT_SIZE, HIDDEN_SIZE))
w2 = np.random.normal(0, 0.01, (HIDDEN_SIZE, INPUT_SIZE))

Relu and sigmoid activation functions in forward and backward modes.
Computing mean squared error metric, given outputs and targets.

In [19]:
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu_backward(D_output, output):
    return D_output * (output > 0)

def sigmoid_backward(D_output, output):
    return D_output * output * (1 - output)

def mean_squared_error(outputs, targets):
    squared_errors = (outputs - targets) ** 2
    return np.mean(squared_errors)

Hyperparameters

In [20]:
BATCH_SIZE = 1024
NUM_EPOCHS = 10
LEARNING_RATE = 0.0001
NUM_TRAINING_INSTANCES = X.shape[0]
ITERATIONS_PER_EPOCH = NUM_TRAINING_INSTANCES // BATCH_SIZE

Training

In [None]:
for epoch in range(NUM_EPOCHS): 
    for n in range(ITERATIONS_PER_EPOCH):
        
        batch_indices = range(n*BATCH_SIZE, n*BATCH_SIZE + BATCH_SIZE)   
        X_batch = X[batch_indices]
        
        h1 = np.dot(X_batch, w1)
        o1 = relu(h1)
        
        h2 = np.dot(o1, w2)
        o2 = sigmoid(h2)
        
        loss = mean_squared_error(o2, X_batch)
        
        print('Epoch %d, Iteration %d -- Loss: %.6f' % (epoch + 1, epoch * ITERATIONS_PER_EPOCH + n, loss))
        
        D_o2 = 2 * (o2 - X_batch)
        
        D_h2 = sigmoid_backward(D_o2, o2)
        
        D_o1 = np.dot(D_h2, w2.T)
        D_w2 = np.dot(o1.T, D_h2)
        
        D_h1 = relu_backward(D_o1, o1)
        
        D_w1 = np.dot(X_batch.T, D_h1)
        
    
        w1 -= D_w1 * LEARNING_RATE
        w2 -= D_w2 * LEARNING_RATE
        
        if n % 10 == 0:
            plt.figure()
            plt.title('Original %d dimensional image' % INPUT_SIZE)
            plt.imshow(X_batch[0].reshape(28, 28)); plt.show()
            
            plt.figure()
            plt.title('Reconstruction from %d dimensional representation' % HIDDEN_SIZE)
            plt.imshow(o2[0].reshape(28, 28)); plt.show()