In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from sklearn import datasets
from sklearn.cross_validation import train_test_split

In [3]:
dataset = datasets.fetch_mldata('MNIST Original')
trainX, testX, trainY, testY = train_test_split(dataset.data / 255.0, dataset.target.astype('int0'), test_size=0.3)

In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    tmp = sigmoid(x)
    return tmp * (1 - tmp)

In [5]:
def append_bias(arr):
    return np.concatenate((np.array([-1]), arr))

In [6]:
class NeuralNetwork:
    def __init__(self, layers):
        self.num_layers = len(layers)
        self.layers = layers
        
    def train(self, X_, y, max_iter=10000, learning_rate=1):
        n = len(y)
        X = X_.copy()
        self.weights = [np.random.rand(self.layers[i]+1,self.layers[i+1])/n - 1/(2*n) for i in range(self.num_layers-1)]
        
        self.losses = []
        self.α = learning_rate
        
        for _ in range(max_iter):
            ind = np.random.randint(0, n)            
            self.forward(X[ind])
            self.backward(X[ind], y[ind])
    
    def forward(self, X):
        self.result = [append_bias(X)]
        self.state = [self.result[-1].copy()]
        
        for w in self.weights:
            self.result.append(append_bias(np.sum(self.state[-1] * w.T, axis=1)))
            self.state.append(append_bias(sigmoid(self.result[-1][1:])))
            
        return self.state[-1][1:]
        
    def backward(self, X, y):
        ϵ = self.state[-1][1:].copy()
        ϵ[y] -= 1
        self.losses.append(np.sum(np.square(ϵ)) / 2)
        
        for i in range(self.num_layers-2, -1, -1):
            tmp = self.weights[i][1:].copy()
            self.weights[i] = self.weights[i] - self.α * ϵ * append_bias(sigmoid_derivative(
                                                            self.result[i][1:]))[:,np.newaxis] * self.state[i][:,np.newaxis]
            ϵ = np.sum(ϵ * sigmoid_derivative(self.result[i][1:])[:,np.newaxis] * tmp, axis=1)
        
    def predict(self, X):
        res = []
        for x in X:
            res.append(np.argmax(self.forward(x)))
            
        return np.array(res)

In [8]:
nn = NeuralNetwork([trainX.shape[1], 10])

In [9]:
np.random.seed(1)
nn.train(trainX, trainY, max_iter=2000)

In [10]:
(nn.predict(testX) == testY).sum() / len(testY)

0.65195238095238095

In [12]:
res = []

for H in [15, 50, 100, 200]:
    nn = NeuralNetwork([trainX.shape[1], H, 10])
    nn.train(trainX, trainY, max_iter=2000)
    res.append((nn.predict(testX) == testY).sum() / len(testY))
    
res

[0.098000000000000004,
 0.10871428571428571,
 0.80785714285714283,
 0.79652380952380952]

In [18]:
res = []

for n in range(5):
    nn = NeuralNetwork([trainX.shape[1]] + [15]*n + [10])
    nn.train(trainX, trainY, max_iter=2000)
    res.append((nn.predict(testX) == testY).sum() / len(testY))
    
res

[0.66152380952380951,
 0.098000000000000004,
 0.098000000000000004,
 0.10295238095238095,
 0.098000000000000004]