In [None]:
# load required packages

import os
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(precision=2, suppress=True)

In [None]:
def arr_stg(arr, row_sep=" ", col_sep="\n", fmt="%3d"):
    n,m = arr.shape
    stg = col_sep.join(
        row_sep.join(fmt%arr[i,j] for j in range(m)) 
        for i in range(n)
    )
    return stg

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigma, sigma_prime = sigmoid, sigmoid_prime

class nn():
    def __init__(self, sizes):
        self.n_layers = len(sizes) - 1
        # input layer, layer 1 ~ n
        self.weights = [None] + [np.random.randn(sizes[i-1], sizes[i]) for i in range(1, self.n_layers + 1)]
        self.biases = [None] + [np.random.randn(sizes[i]) for i in range(1, self.n_layers + 1)]

    def feedforward(self, X, return_all=False):
        acts = [X]
        zs = [None]
        for i in range(1, self.n_layers + 1):
            a_prev = acts[i-1]
            W,b = self.weights[i],self.biases[i]
            z = a_prev.dot(W) + b
            a = sigma(z)
            acts.append(a)
            zs.append(z)
        if return_all:
            return acts, zs
        else:
            return acts[-1]

    def backpropagation(self, X, Y, alpha=0.1, return_all=False):
        N = X.shape[0]
        acts, zs = self.feedforward(X, return_all=True)
        grad_Ws = [None] * (self.n_layers + 1)
        # grad_bs = [None] * (self.n_layers + 1)
        grad_deltas = [None] * (self.n_layers + 1)
        grad_deltas[-1] = 2 * (acts[-1] - Y) * sigma_prime(zs[-1])
        for i in range(self.n_layers - 1, 0, -1):
            # (N, dim_i) = (N, dim_{i+1}).dot(dim_{i+1},dim_i)
            grad_deltas[i] = grad_deltas[i + 1].dot(self.weights[i + 1].T)
        for i in range(self.n_layers, 0, -1):
            # (N, dim_{i-1}, dim_i) = (N, dim_{i-1}, 1) * (N, 1, dim_i)
            grad_Ws[i] = acts[i-1][:,:,np.newaxis] * grad_deltas[i][:,np.newaxis,:]
            #  did not copy
            grad_bs = grad_deltas

        # update
        for i in range(1, self.n_layers + 1):
            self.weights[i] -= alpha / N * grad_Ws[i].sum(axis=0)
            self.biases[i] -= alpha / N * grad_bs[i].sum(axis=0)

        if return_all==True:
            return grad_Ws, grad_bs

In [None]:
### load NSYSU-digits

import urllib
import numpy as np

base = r"https://github.com/SageLabTW/auto-grading/raw/master/nsysu-digits/"
for c in ['X', 'y']:
    filename = "nsysu-digits-%s.csv"%c
    if filename not in os.listdir('.'):
        print(filename, 'not found --- will download')
        urllib.request.urlretrieve(base + c + ".csv", filename)

Xsys = np.genfromtxt('nsysu-digits-X.csv', dtype=int, delimiter=',') ### flattened already
ysys = np.genfromtxt('nsysu-digits-y.csv', dtype=int, delimiter=',')
ysysone = np.eye(10)[ysys]
print(Xsys.shape, ysys.shape)

In [None]:
# load MNSIT (long)

from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784')

In [None]:
# check input

print(arr_stg(Xsys[1:2,:], row_sep=",", fmt="%d"))
print(arr_stg(Xsys[1].reshape(28,28), row_sep=""))

In [None]:
# check output

print(ysys[1])
print(ysysone[1])

In [None]:
# test

digits_nn = nn([3,4,2])
X = np.ones((10, 3), dtype=float)
Y = np.ones((10, 2), dtype=float)

In [None]:
print(Y[0])
for epoch in range(10):
    print(epoch, digits_nn.feedforward(X[0]))
    for _ in range(100):
        digits_nn.backpropagation(X, Y)

In [None]:
# NSYSY-digits

digits_nn = nn([784,36,10])
X = Xsys
Y = ysysone
print(X.shape, Y.shape)

In [None]:
# MNIST (long)

digits_nn = nn([784,36,10])
X = mnist['data'].values.astype(float)
Y = np.eye(10)[mnist['target'].values.astype(int)]
print(X.shape, Y.shape)

In [None]:
# stochastic gradient descent

N = X.shape[0]
print(Y[0])
for epoch in range(10):
    pred_Y = digits_nn.feedforward(X)
    preds = np.argmax(pred_Y, axis=1)
    labels = np.argmax(Y, axis=1)
    accuracy = np.sum(preds == labels) / labels.shape
    print(epoch, digits_nn.feedforward(X[0]), "acc =", accuracy)
    for _ in range(1000):
        inds = np.random.choice(N, 10)
        digits_nn.backpropagation(X[inds], Y[inds], alpha=0.03)

In [None]:
# most important feature

fig,axs = plt.subplots(2, 5, figsize=(15,6))
Ws = digits_nn.weights

for n in range(10):
    i,j = n // 5, n % 5
    max_ind = Ws[2][:,n].argmax() # out of 0 ~ 35
    max_feature = Ws[1][:,max_ind]
    ax = axs[i,j]
    ax.set_title(n)
    ax.axis("off")
    axs[i,j].imshow(max_feature.reshape(28,28))