In [374]:
%matplotlib inline
import matplotlib.pyplot as plt
import time
import numpy as np

import sys
sys.path.insert(0,'/home/henning/git_repos/ummon3/examples')
import load_mnist

# Some nice default configuration for plots
plt.rcParams['figure.figsize'] = 15, 7.5
plt.rcParams['axes.grid'] = True
plt.gray();
np.set_printoptions(suppress=True)

<Figure size 1080x540 with 0 Axes>

In [375]:
x_train, y_train, x_valid, y_valid, x_test, y_test = load_mnist.read([0,1,2,3,4,5,6,7,8,9], path="")
x_train = (1.0/255.0) * x_train.astype('float64')
x_test = (1.0/255.0) * x_test.astype('float64')
x_valid = (1.0/255.0) * x_valid.astype('float64')
y_train = y_train.astype('float64')
y_test = y_test.astype('float64')
y_valid = y_valid.astype('float64')

MNIST, Classes:     10
Training Images:    (50000, 784)
Validation Images:  (10000, 784)
Test Images:        (10000, 784)


In [376]:
# Netzwerkparameter
mbs =  10                    # Größe der Minibatches
eta = 0.1                    # Lernrate
no_hidden = 36               # Anzahl versteckter Neuronen
epochs = 50                  # Anzahl Epochen
sizes = [784, no_hidden, 10] # dreischichtiges MPL mit 784 Eingangs-, no_hidden versteckten, 10 Ausgangsneuronen
num_layers = len(sizes)      # Anzahl Schichten

In [377]:
biases = [np.random.randn(y, 1) for y in sizes[1:]]
weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

In [378]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_derived(z):
    return sigmoid(z) * (1 - sigmoid(z))

def mse_derived(output_activations, y):
    return output_activations - y

def feedforward(a, weights, biases):
    activation = a
    activations = [a]
    zs = []
    for b, w in zip(biases, weights):
        z = np.dot(w, activation) + b
        zs.append(z)
        activation = sigmoid(z)
        activations.append(activation)
    return activations, zs

def backpropagation(x, y, weights, biases):
    activations, zs = feedforward(x, weights, biases)
    
    nabla_w = [np.zeros(w.shape) for w in weights]
    nabla_b = [np.zeros(b.shape) for b in biases]
    
    t = mse_derived(activations[-1], y)
    s = sigmoid_derived(zs[-1])
    error_L = t * s
    nabla_b[-1] = error_L
    nabla_w[-1] = np.dot(error_L, activations[-2].T)
    
    t = np.dot(weights[1].T, error_L)
    s = sigmoid_derived(zs[-2])
    error_l = t * s
    nabla_b[-2] = error_l
    nabla_w[-2] = np.dot(error_l, activations[-3].T)
    
    return nabla_w, nabla_b
    
    
def update_batch(xbatch, ybatch, weights, biases, eta):
    
    batch_nabla_w = [np.zeros(w.shape) for w in weights]
    batch_nabla_b = [np.zeros(b.shape) for b in biases]
    
    for x, y in zip(xbatch, ybatch):
        x = np.array([x]).T
        y = np.array([y]).T
        
        nabla_w, nabla_b = backpropagation(x, y, weights, biases)
        
        batch_nabla_w = [bnw + nw for bnw, nw in zip(batch_nabla_w, nabla_w)]
        batch_nabla_b = [bnb + nb for bnb, nb in zip(batch_nabla_b, nabla_b)]
        
    new_weights = [w - (eta / mbs) * bnw for bnw, w in zip(batch_nabla_w, weights)]
    new_biases = [b - (eta / mbs) * bnb for bnb, b in zip(batch_nabla_b, biases)]
        
    return new_weights, new_biases

def evaluate(x, y, weights, biases):
    correct = 0
    for x_, y_ in zip(x, y):
        x_ = np.array([x_]).T
        y_ = np.array([y_]).T
        activations, zs = feedforward(x_, weights, biases)
        ypred = activations[-1]
        label_index = np.argmax(y_)
        pred_index = np.argmax(ypred)
        
        if label_index == pred_index:
            correct += 1
    return correct

In [379]:
start = time.time()
accuracys = np.zeros(epochs)
for j in range(epochs):
    p = np.random.permutation(len(x_train))
    x_train = x_train[p,:]
    y_train = y_train[p]
    
    for i in range(0, len(x_train), mbs):
        xbatch = x_train[i:i + mbs]
        ybatch = y_train[i:i + mbs]
        weights, biases = update_batch(xbatch, ybatch, weights, biases, eta)
    accuracys[j] = evaluate(x_valid, y_valid, weights, biases)
    improvement = (accuracys[j] - accuracys[max(j - 1, 0)])
    improvement_percent = (improvement / accuracys[max(j - 1, 0)]) * 100
    print("Epoch {}: {} / {}, improvement: {:.2f}% / {}".format(j, accuracys[j], len(x_test), improvement_percent, improvement))
stop = time.time()
print("Elapsed time: {}".format(stop - start))

  This is separate from the ipykernel package so we can avoid doing imports until


KeyboardInterrupt: 

Error in callback <function flush_figures at 0x7fd4205b70d0> (for post_execute):


KeyboardInterrupt: 

In [None]:
print("Test accuracy: {0} / {1}".format(evaluate(x_test, y_test, weights, biases), len(x_test)))

In [None]:
def plot_gallery(data, labels, shape, interpolation='nearest'):
    for i in range(data.shape[0]):
        plt.subplot(1, data.shape[0], (i + 1))
        plt.imshow(data[i].reshape(shape), interpolation=interpolation)
        plt.title(labels[i])
        plt.xticks(()), plt.yticks(())

In [None]:
plt.plot(accuracys/len(x_valid));

In [None]:
labels = range(no_hidden)
plot_gallery(weights[0][:15,:],labels, shape=(28, 28))