In [None]:
# Standard scientific Python imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Load the digits dataset
digits = datasets.load_digits()

In [None]:
# Inspect dataset
SAMPLE = 0
(ROW, COL) = (5, 5)
(WIDTH, HEIGHT) = np.shape(digits.images[SAMPLE])

plt.figure(figsize=(5,5))
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:ROW*COL]):
    plt.subplot(ROW, COL, index + 1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
plt.show()

print(f'{digits.target[SAMPLE]} -> {WIDTH} x {HEIGHT}\n {digits.images[SAMPLE]}')

In [None]:
# Convert categorical variable into dummy/indicator variables
labels = digits.target.reshape(len(digits.target),1) 
enc = OneHotEncoder()
enc.fit(labels)
onehotlabels = enc.transform(labels).toarray()
print(f'{digits.target[SAMPLE]} -> {onehotlabels[SAMPLE]}')

In [None]:
wrapper_images = [np.reshape(i, (WIDTH*HEIGHT)) for i in digits.images]
wrapper_labels = onehotlabels

print(f'images -> {np.shape(wrapper_images)}, labels -> {np.shape(wrapper_labels)}')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(wrapper_images, wrapper_labels, test_size=0.33, random_state=42)
# convert matrix to (features x numbers)
X_train = np.array(X_train).T
X_test = np.array(X_test).T
y_train = np.array(y_train).T
y_test = np.array(y_test).T

In [None]:
class NeuralNetwork:
    def __init__(self, neurons):
        self._layers = len(neurons)
        self._neurons = neurons
        self._weights = [np.random.randn(nex, pre+1) for pre, nex in zip(neurons[:-1], neurons[1:])]
     
    def _sigmoid(self, x):
        return 1.0/(1.0 + np.exp(-x))
    
    def _sigmoid_prime(self, x):
        return self._sigmoid(x)*(1 - self._sigmoid(x))
    
    def feedforward(self, a):
        z_s = []
        a_s = []
        for w in self._weights:
            a = np.r_[np.ones((1, a.shape[1])), a]
            a_s.append(a)
            z = w.dot(a)
            a = self._sigmoid(z)
            z_s.append(z)
        a_s.append(a)
        return (a, z_s, a_s)
    
    def backprop(self, y_hat, y, z_s, a_s):
        delta_weights = [np.zeros(w.shape) for w in self._weights]
        # Update last layer delta
        delta = a_s[-1] - y
        delta_weights[-1] = np.dot(delta, a_s[-2].T)
        # Update all but the last layer delta
        for L in range(2, self._layers):
            delta = self._weights[-L+1].T.dot(delta)[1:] * self._sigmoid_prime(z_s[-L])
            delta_weights[-L] = np.dot(delta, a_s[-L-1].T) 
        return delta_weights
    
    def fit(self, x, y, iterations = 1000, learning_rate=0.001):
        # batch GD
        for i in range(iterations):
            (y_hat, z_s, a_s) = self.feedforward(x)
            delta_weights = self.backprop(y_hat, y, z_s, a_s)
            # Update weights of each layers
            self._weights = [w - learning_rate * dw for w, dw in zip(self._weights, delta_weights)]
    
    def predict(self, x):
        (y_hat, z_s, a_s) = self.feedforward(x)
        return y_hat
    
    def score(self, y_pred, y_test):
        return sum(int(np.argmax(x) == np.argmax(y)) for (x, y) in zip(y_pred.T, y_test.T)) / y_pred.shape[1] * 100
        
    def get_cost(self, y_hat, y):
        return -1/y.shape[1] * (y * np.log(y_hat+0.0001) + (1-y) * np.log(1-y_hat+0.0001)).sum()

In [None]:
# Train model
neurons = [64, 64, 64, 10]
nn = NeuralNetwork(neurons)
nn.fit(X_train, y_train, iterations=5000)

In [None]:
y_pred = nn.predict(X_test)
nn.score(y_pred, y_test)