In [1]:
import numpy as np, sys
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28) / 255,
                  y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
    
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

np.random.seed(1)
alpha, iterations = (2, 300)
pixels_per_image, num_labels = (784, 10)
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels

kernels = \
    0.02 * np.random.random((kernel_rows*kernel_cols, num_kernels)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1, 1, row_to-row_from, col_to-col_from)

for j in range(iterations):
    train_acc = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        layer_0.shape
        
        sects = list()
        for row_start in range(layer_0.shape[1] - kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start, row_start + kernel_rows,
                                         col_start, col_start + kernel_cols)
                sects.append(sect)
                
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0] * es[1], -1)
        
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1,weights_1_2))
        
        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == np.argmax(labelset))
            train_acc += _inc
 
        layer_2_delta = \
            (labels[batch_start:batch_end] - layer_2) / batch_size
        layer_1_delta = \
            layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        lld_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(lld_reshape)
        kernels -= alpha * k_update
        
    test_acc = 0
    
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        layer_0.shape
        
        sects = list()
        for row_start in range(layer_0.shape[1] - kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                          row_start, row_start + kernel_rows,
                                          col_start, col_start + kernel_cols)
                sects.append(sect)
                
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)
            
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        layer_2 = np.dot(layer_1, weights_1_2)
            
        test_acc += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    if(j % 1 == 0):
        sys.stdout.write(
            "\n" + \
            "I:" + str(j) + \
            " Train-Acc:" + str(train_acc/float(len(images))) + \
            " Test-Acc:" + str(test_acc/float(len(test_images))))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])



I:0 Train-Acc:0.097 Test-Acc:0.098
I:1 Train-Acc:0.087 Test-Acc:0.098
I:2 Train-Acc:0.087 Test-Acc:0.098
I:3 Train-Acc:0.087 Test-Acc:0.098
I:4 Train-Acc:0.087 Test-Acc:0.098
I:5 Train-Acc:0.087 Test-Acc:0.098
I:6 Train-Acc:0.087 Test-Acc:0.098
I:7 Train-Acc:0.087 Test-Acc:0.098
I:8 Train-Acc:0.087 Test-Acc:0.098
I:9 Train-Acc:0.087 Test-Acc:0.098
I:10 Train-Acc:0.087 Test-Acc:0.098
I:11 Train-Acc:0.087 Test-Acc:0.098
I:12 Train-Acc:0.087 Test-Acc:0.098
I:13 Train-Acc:0.087 Test-Acc:0.098
I:14 Train-Acc:0.087 Test-Acc:0.098
I:15 Train-Acc:0.087 Test-Acc:0.098
I:16 Train-Acc:0.087 Test-Acc:0.098
I:17 Train-Acc:0.087 Test-Acc:0.098
I:18 Train-Acc:0.087 Test-Acc:0.098
I:19 Train-Acc:0.087 Test-Acc:0.098
I:20 Train-Acc:0.087 Test-Acc:0.098
I:21 Train-Acc:0.087 Test-Acc:0.098
I:22 Train-Acc:0.087 Test-Acc:0.098
I:23 Train-Acc:0.087 Test-Acc:0.098
I:24 Train-Acc:0.087 Test-Acc:0.098
I:25 Train-Acc:0.087 Test-Acc:0.098
I:26 Train-Acc:0.087 Test-Acc:0.098
I:27 Train-Acc:0.087 Test-Acc:0.098
I

I:224 Train-Acc:0.087 Test-Acc:0.098
I:225 Train-Acc:0.087 Test-Acc:0.098
I:226 Train-Acc:0.087 Test-Acc:0.098
I:227 Train-Acc:0.087 Test-Acc:0.098
I:228 Train-Acc:0.087 Test-Acc:0.098
I:229 Train-Acc:0.087 Test-Acc:0.098
I:230 Train-Acc:0.087 Test-Acc:0.098
I:231 Train-Acc:0.087 Test-Acc:0.098
I:232 Train-Acc:0.087 Test-Acc:0.098
I:233 Train-Acc:0.087 Test-Acc:0.098
I:234 Train-Acc:0.087 Test-Acc:0.098
I:235 Train-Acc:0.087 Test-Acc:0.098
I:236 Train-Acc:0.087 Test-Acc:0.098
I:237 Train-Acc:0.087 Test-Acc:0.098
I:238 Train-Acc:0.087 Test-Acc:0.098
I:239 Train-Acc:0.087 Test-Acc:0.098
I:240 Train-Acc:0.087 Test-Acc:0.098
I:241 Train-Acc:0.087 Test-Acc:0.098
I:242 Train-Acc:0.087 Test-Acc:0.098
I:243 Train-Acc:0.087 Test-Acc:0.098
I:244 Train-Acc:0.087 Test-Acc:0.098
I:245 Train-Acc:0.087 Test-Acc:0.098
I:246 Train-Acc:0.087 Test-Acc:0.098
I:247 Train-Acc:0.087 Test-Acc:0.098
I:248 Train-Acc:0.087 Test-Acc:0.098
I:249 Train-Acc:0.087 Test-Acc:0.098
I:250 Train-Acc:0.087 Test-Acc:0.098
I