In [39]:
import os
import gzip
import fetch
import fetch.math as fm
from fetch.math import NDArrayDouble



import random

import numpy as np

In [40]:
DATA_URL = 'http://yann.lecun.com/exdb/mnist/'

In [41]:
def load_data(one_hot=True, reshape=None, training_size=50000, validation_size=10000):
    
    print("loading training image data")
    x_tr = load_images('train-images-idx3-ubyte.gz', training_size, validation_size)
    print("loading training labels")
    y_tr = load_labels('train-labels-idx1-ubyte.gz', training_size, validation_size)

    print("loading test image data")
    x_te = load_images('t10k-images-idx3-ubyte.gz', training_size, validation_size)
    print("loading test labels")
    y_te = load_labels('t10k-labels-idx1-ubyte.gz', training_size, validation_size)

    if one_hot:
        y_tr_onehot = NDArrayDouble.Zeros([y_tr.shape()[0], 10])
        y_te_onehot = NDArrayDouble.Zeros([y_te.shape()[0], 10])
        
        for i in range(len(y_tr)):
            y_tr_onehot[i, int(y_tr[i])] = 1
        for i in range(len(y_te)):
            y_te_onehot[i, int(y_te[i])] = 1
            
        
    if reshape:
        x_tr, x_te = [x.reshape(*reshape) for x in (x_tr, x_te)]
    
    return x_tr, y_tr_onehot, x_te, y_te_onehot
    
def load_images(filename, training_size, validation_size):
    download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
        data = data.reshape(-1, 28 * 28) / 256
        nd_data = NDArrayDouble([training_size, 28*28])
        nd_data.FromNumpy(data[:training_size, :])
    return nd_data

def load_labels(filename, training_size, validation_size):
    download(filename)
    with gzip.open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
        nd_data = NDArrayDouble(training_size)
        nd_data.FromNumpy(data[:training_size])
    return nd_data

def download(filename):
    if not os.path.exists(filename):
        from urllib.request import urlretrieve
        print("Downloading %s" % filename)
        urlretrieve(DATA_URL + filename, filename)

In [42]:
def sigmoid(x):
    z = x * -1.0
    z.Exp()
    return NDArrayDouble.__truediv__(float(1.0), (z + 1))

def d_sigmoid(y):
    y = y * (1 - y)
    return y

In [43]:
# feed forward pass of a network
# take X input and the network (defined as a list of weights)
# no biases?
def feed_forward(X, weights):
    a = [X]
    for w in weights:
        temp = a[-1].dot(w)
        temp2 = sigmoid(temp)
        a.append(temp2)
    return a

In [50]:
# get the gradients of the network
def grads(X, Y, weights):

    # define container
#     grads = NDArrayDouble(weights_size)
    grads = np.empty_like(weights)
    
    # run a forward pass to get delta
    a = feed_forward(X, weights)
    delta = a[-1] - Y # cross-entropy

    # calculate grads
    grads[-1] = np.dot(a[-2].transpose([1, 0]), delta)
    for i in range(len(a)-2, 0, -1):
        delta = np.dot(delta, weights[i].transpose([1, 0])) * d_sigmoid(a[i])
        grads[i-1] = np.dot(a[i-1].transpose([1, 0]), delta)
        
    return grads / len(X)

In [51]:
def make_new_layer(in_size, out_size):
    
    numer = out_size
    denom = np.sqrt(out_size)

    layer = NDArrayDouble([in_size, out_size])

    for i in range(layer.size()):
        layer[i] = (random.uniform(0, 1.0) * numer) / denom
    
    return layer

def get_initial_weights(net):
    
    weights = [make_new_layer(28*28, net[0])]
    if len(net) > 2:
        for i in range(len(net) - 2):
            weights.append(make_new_layer(net[i], net[i+1]))
    weights.append(make_new_layer(net[-2], net[-1]))

    return weights

In [52]:
def train(n_epochs, batch_size, alpha, weights, X_tr, Y_tr, X_te, Y_te):
    
    # epochs
    for i in range(num_epochs):

        # training batches
        for j in range(0, X_tr.shape()[0] - batch_size, batch_size):

            # get current batch
            X, Y = X_tr[j:j+batch_size, :], Y_tr[j:j+batch_size, :]

            # update weights
            weights -= alpha * grads(X, Y, weights)

        # get current prediction
        
#         cur_pred = feed_forward(X_te, weights)[-1]
#         print(cur_pred.shape())
#         max_pred = cur_pred.Max()
        
        
        
        
        
#         prediction = np.argmax(feed_forward(X_te, weights)[-1], axis=1)
#         print("epoch: ", i, ": ", "accuracy: ", np.mean(prediction == np.argmax(Y_te, axis=1)))
    
    return

In [35]:
# load the data
X_tr, Y_tr, X_te, Y_te = load_data(one_hot=True)

loading training image data
loading training labels
loading test image data
loading test labels


In [53]:
# initialise network
outputs = 10
network_architecture = [100, 10, outputs]
weights = get_initial_weights(network_architecture)

In [54]:
# training constants
num_epochs = 30
batch_size = 50
alpha = 0.2

In [55]:
train(num_epochs, batch_size, alpha, weights, X_tr, Y_tr, X_te, Y_te)

ValueError: shapes (500,) and (100,) not aligned: 500 (dim 0) != 100 (dim 0)

In [None]:
print(X_tr.shape())
print(Y_tr.shape())

In [21]:
z = NDArrayDouble.Ones(100)

TypeError: Ones(): incompatible function arguments. The following argument types are supported:
    1. (arg0: List[int]) -> fetch.math.NDArrayDouble

Invoked with: 100

In [17]:
z / 2

<fetch.math.NDArrayDouble at 0x7f5c418920a0>

In [18]:
float(2) / z

TypeError: unsupported operand type(s) for /: 'float' and 'fetch.math.NDArrayDouble'

In [20]:
z = NDArrayDouble.__truediv__(float(2.0),z)

In [23]:
z = x.dot(y)

In [24]:
z.shape()

[20, 20]

In [25]:
z = x.Exp()

TypeError: unsupported operand type(s) for *=: 'NoneType' and 'int'