In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from scipy import ndimage
from keras.datasets import mnist
from tqdm import tqdm_notebook

Using TensorFlow backend.


In [2]:
def pool_forward(x,mode="max"):
    #m*n_c*w*h
    x_patches = x.reshape(x.shape[0],x.shape[1]//2, 2,x.shape[2]//2, 2,x.shape[3])
    if mode=="max":
        out = x_patches.max(axis=2).max(axis=3)
        mask  =np.isclose(x,np.repeat(np.repeat(out,2,axis=1),2,axis=2)).astype(int)
    elif mode=="average": 
        out =  x_patches.mean(axis=3).mean(axis=4)
        mask = np.ones_like(x)*0.25
    return out,mask

In [3]:
def pool_backward(dx, mask):
    return mask*(np.repeat(np.repeat(dx,2,axis=1),2,axis=2))

In [4]:
def relu(x, deriv=False):
    if deriv:
        return (x>0)
    return np.multiply(x, x>0)

In [5]:
def conv_forward(x,w,b,padding="same"):
    if padding=="same": 
        pad = (w.shape[0]-1)//2
    else: #padding is valid - i.e no zero padding
        pad =0 
    n = (x.shape[1]-w.shape[0]+2*pad) +1 #ouput width/height
    y = np.zeros((x.shape[0],n,n,w.shape[3]))
    x_padded = np.pad(x,((0,0),(pad,pad),(pad,pad),(0,0)),'constant', constant_values = 0)
    w = np.flip(w,0)
    w = np.flip(w,1)
    
    f = w.shape[0]
        
    for i in range(x.shape[0]):
        for k in range(w.shape[3]):
            for d in range(x.shape[3]):
                y[i,:,:,k]= y[i,:,:,k]+ ndimage.convolve(x_padded[i,:,:,d],w[:,:,d,k])[f//2:-(f//2),f//2:-(f//2)]
                #ndimage.convolve starts convolution from centre of kernel and zero pads but we don't want this
                #since we want to manually decide if we want to pad or not
    y = y + b
    return y

In [6]:
def fc_forward(x,w,b):
    return relu(w.dot(x)+b)


In [7]:
def fc_backward(dA,a,x,w,b):
    m = dA.shape[1]
    dZ = dA*relu(a,deriv=True)
    dW = (1/m)*dZ.dot(x.T)
    db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
    dx =  np.dot(w.T,dZ)
    return dx, dW,db

In [8]:
def softmax_forward(x,w,b):
    z = w.dot(x)+b
    z -= np.max(z,axis=0,keepdims=True)
    a = np.exp(z) 
    a = a/np.sum(a,axis=0,keepdims=True)
    return a+1e-8

In [9]:
def softmax_backward(y_hat, y, w, b, x):
    m = y.shape[1]
    dZ = y_hat - y
    dW = (1/m)*dZ.dot(x.T)
    db = (1/m)*np.sum(dZ,axis=1,keepdims=True)
    dx =  np.dot(w.T,dZ)
    return dx, dW,db

In [10]:
def conv_backward(dZ,x,w,padding="same"):
    m = x.shape[0]
    db = (1/m)*np.sum(dZ, axis=(0,1,2), keepdims=True)
    
    if padding=="same": 
        pad = (w.shape[0]-1)//2
    else: #padding is valid - i.e no zero padding
        pad =0 
    x_padded = np.pad(x,((0,0),(pad,pad),(pad,pad),(0,0)),'constant', constant_values = 0)
    
    #this will allow us to broadcast operation
    x_padded_bcast = np.expand_dims(x_padded, axis=-1)
    dZ_bcast = np.expand_dims(dZ, axis=-2)
    
    dW = np.zeros_like(w)
    f=w.shape[0]
    w_x = x_padded.shape[1]
    for i in range(f):
        for j in range(f):
            dW[i,j,:,:] = (1/m)*np.sum(dZ_bcast*x_padded_bcast[:,i:w_x-(f-1 -i),j:w_x-(f-1 -j),:,:],axis=(0,1,2))  
    
    dx = np.ones_like(x_padded)*0.0
    Z_pad = f-1
    dZ_padded = np.pad(dZ,((0,0),(Z_pad,Z_pad),(Z_pad,Z_pad),(0,0)),'constant', constant_values = 0)  
    for i in range(x.shape[0]):
        for k in range(w.shape[3]):
            for d in range(x.shape[3]):
                dx[i,:,:,d]+=ndimage.convolve(dZ_padded[i,:,:,k],w[:,:,d,k])[f//2:-(f//2),f//2:-(f//2)]
    dx = dx[:,pad:dx.shape[1]-pad,pad:dx.shape[2]-pad,:]
    return dx,dW,db

Next to define the model:


In [11]:
def init_conv_parameters(f, n_c, k):
    
    return 0.015+0.05*np.random.normal(size=(f,f,n_c,k)), np.random.rand(1,1,1,k)
                                                                      
def init_fc_parameters(n_x,n_y):
    return 0.0001*np.random.normal(size=(n_y,n_x)),np.random.rand(n_y,1)

In [12]:
def initialise_parameters():    
    parameters={}
    parameters["W_conv1"], parameters["b_conv1"] = init_conv_parameters(5, 1, 16)
    parameters["W_conv2"], parameters["b_conv2"] = init_conv_parameters(5, 16, 16)

    parameters["W_conv3"], parameters["b_conv3"] = init_conv_parameters(3, 16, 32)
    parameters["W_conv4"], parameters["b_conv4"] = init_conv_parameters(3, 32,64)

    parameters["W_fc1"],parameters["b_fc1"] = init_fc_parameters(3136,512)
    parameters["W_softmax"],parameters["b_softmax"] = init_fc_parameters(512,10)

    return parameters


In [13]:
def forward_prop(X,parameters):
    
    cache={}
    
    cache["z_conv1"] = conv_forward(X,parameters["W_conv1"], parameters["b_conv1"])
    cache["a_conv1"] = relu(cache["z_conv1"])

    cache["z_conv2"] = conv_forward(cache["a_conv1"],parameters["W_conv2"], parameters["b_conv2"])
    cache["a_conv2"] = relu(cache["z_conv2"])

    cache["z_pool1"], cache["mask_pool1"] = pool_forward(cache["a_conv2"])
    
 

    cache["z_conv3"] = conv_forward(cache["z_pool1"],parameters["W_conv3"], parameters["b_conv3"])
    cache["a_conv3"] = relu(cache["z_conv3"])
    
 

    cache["z_conv4"] = conv_forward(cache["a_conv3"],parameters["W_conv4"], parameters["b_conv4"] )
    cache["a_conv4"] = relu(cache["z_conv4"])
    
 
    cache["z_pool2"], cache["mask_pool2"] = pool_forward(cache["a_conv4"])


    cache["a_flatten"] = np.reshape(cache["z_pool2"], (cache["z_pool2"].shape[0],-1)).T

 
    cache["a_fc1"] = fc_forward( cache["a_flatten"],parameters["W_fc1"],parameters["b_fc1"])
    
    
    return softmax_forward(cache["a_fc1"],parameters["W_softmax"],parameters["b_softmax"]),cache

In [14]:
def backprop(X,Y,Y_pred,parameters,cache,lambd):
    grads = {}
    
    dA, grads["dW_softmax"],grads["db_softmax"] =softmax_backward(Y_pred, Y, parameters["W_softmax"],parameters["b_softmax"],cache["a_fc1"])

    dA, grads["dW_fc1"],grads["db_fc1"] = fc_backward(dA,cache["a_fc1"],cache["a_flatten"],parameters["W_fc1"],parameters["b_fc1"])
    
    dA = np.reshape(dA.T,cache["z_pool2"].shape)
    grads["dz_pool2"] = dA
    dA = pool_backward(dA, cache["mask_pool2"])
    
    dA = dA*relu(cache["z_conv4"],deriv=True)
    grads["da_conv4"] = dA
    dA, grads["dW_conv4"],grads["db_conv4"] = conv_backward(dA,cache["a_conv3"],parameters["W_conv4"])
    
    dA = dA*relu(cache["z_conv3"],deriv=True)
    grads["da_conv3"] = dA
    dA, grads["dW_conv3"],grads["db_conv3"] = conv_backward(dA,cache["z_pool1"],parameters["W_conv3"])
    
    grads["dz_pool1"] = dA
    dA = pool_backward(dA, cache["mask_pool1"])

    
    dA = dA*relu(cache["z_conv2"],deriv=True)
    grads["da_conv2"] = dA
    dA, grads["dW_conv2"],grads["db_conv2"] = conv_backward(dA,cache["a_conv1"],parameters["W_conv2"])
    
    
    dA = dA*relu(cache["z_conv1"],deriv=True)
    grads["da_conv1"] = dA
    grads["dx"], grads["dW_conv1"],grads["db_conv1"] = conv_backward(dA,X,parameters["W_conv1"])
    
    #regularisation term
    for key in grads:
        if "W" in key:
            grads[key]= grads[key]+ (lambd/X.shape[0])*parameters[key[1:]] 
    return grads        

In [15]:
def loss_function(y_pred,y,parameters,lambd):
    m = y.shape[1]
    cost = (-1/m)*np.sum(y*np.log(y_pred))
    
    regularisation_term = 0
    for key in parameters:
        if "W" in key:
            regularisation_term += np.sum(np.square(parameters[key]))
    
    regularised_cost = cost + (lambd/(2*m))*regularisation_term
    
    return regularised_cost

In [16]:
def accuracy(y_pred,y):
    preds = np.argmax(y_pred,axis=0)
    truth = np.argmax(y,axis=0)
    return np.mean(np.equal(preds,truth).astype(int))

In [17]:
def backprop_checker(parameters,grads, x, y):
    epsilon = 1e-7
    rel_threshold = 10
    num_sample = 5
    flag = True
    print("Checking gradients...")
    for param in reversed(list(parameters.keys())):
        print("Checking: " + param)
        dims = parameters[param].shape
        
        
        num_grad = 0
        backprop_grad = 0
        
        for _ in range(num_sample): #sample 10 neurons
            idx = np.zeros(len(dims))
            for i in range(len(dims)):
                idx[i] = np.random.randint(0,dims[i])
            idx = tuple(idx.astype(int))

            parameters[param][idx]= parameters[param][idx] + epsilon
            y_pred_plus = forward_prop(x,parameters)[0]
            J_plus = loss_function(y_pred_plus,y,parameters,lambd=0)
            parameters[param][idx]= parameters[param][idx] - 2*epsilon

            y_pred_minus = forward_prop(x,parameters)[0]

            J_minus = loss_function(y_pred_minus,y,parameters,lambd=0)
            parameters[param][idx]= parameters[param][idx]+ epsilon

            num_grad += (J_plus-J_minus)/(2*epsilon)
            backprop_grad += grads["d"+param][idx]
            
            
        num_grad/=num_sample
        backprop_grad/=num_sample
        rel_error = abs((num_grad-backprop_grad)/num_grad)
        if rel_error>rel_threshold:
            #print("J_plus: "+ str(J_plus))
            #print("J_minus: "+ str(J_minus))
            print("Numerical grad:" + str(num_grad))
            print("Backprop grad:" + str(backprop_grad))
            print("Relative error: " + str(rel_error))
            flag = False
    print("Gradient check complete")
    assert(flag==True) #if not the backprop is massively out

In [18]:
%matplotlib notebook
def train_model(X_train, Y_train, X_dev, Y_dev,num_epochs,batch_size,lambd,learning_rate,parameters = initialise_parameters() ):
    train_costs = []
    train_evals = []
    dev_evals = []
    fig, (ax1, ax2,ax3) = plt.subplots(1,3,figsize=(10, 3))
    
    ax1.set_xlabel('Number of iterations')
    ax1.set_ylabel('Error')
    ax1.set_title('Training Set Error')
    
    ax2.set_xlabel('Number of iterations')
    ax2.set_ylabel('Accuracy')
    ax2.set_title('Training Set Accuracy')
    
    ax3.set_xlabel('Number of iterations')
    ax3.set_ylabel('Accuracy')
    ax3.set_title('Dev Set Accuracy')

    plt.tight_layout()
    plt.ion()

    fig.show()
    fig.canvas.draw()
    
    momentum = {}
    beta = 0.9
    for param in parameters:
        momentum[param] = np.zeros_like(parameters[param]) 

    
    for epoch in tqdm_notebook(range (num_epochs), total=num_epochs,desc="Number of Epochs"):
        print("Training the model, epoch: " + str(epoch+1))
        #cycle through the entire training set in batches
        for i in tqdm_notebook(range(0,X_train.shape[0]//batch_size), total =X_train.shape[0]//batch_size, desc = "Minibatch number"):
            
            
            #get the next minibatch to train on
            X_train_minibatch = X_train[i*batch_size:(i+1)*batch_size]
            Y_train_minibatch = Y_train[:,i*batch_size:(i+1)*batch_size]
            
            
            #perform one cycle of forward and backward propagation to get the partial derivatives w.r.t. the weights
            #and biases. Calculate the cost - used to monitor training
            y_pred, cache = forward_prop(X_train_minibatch,parameters)
            minibatch_cost = loss_function(y_pred,Y_train_minibatch,parameters,lambd)
            minibatch_grads = backprop(X_train_minibatch,Y_train_minibatch,y_pred,parameters, cache,lambd)
            
            if i%5==0:
                fig2, ax = plt.subplots(3,8)
                ax[0,0].imshow(X_train_minibatch[0,:,:,0], cmap="gray")
                ax[0,1].imshow(cache["a_conv1"][0,:,:,0],cmap="gray") 
                ax[0,2].imshow(cache["a_conv2"][0,:,:,0],cmap="gray") 
                ax[0,3].imshow(cache["z_pool1"][0,:,:,0],cmap="gray") 
                ax[0,4].imshow(cache["a_conv3"][0,:,:,0],cmap="gray") 
                ax[0,5].imshow(cache["a_conv4"][0,:,:,0],cmap="gray") 
                ax[0,6].imshow(cache["z_pool2"][0,:,:,0],cmap="gray") 
                ax[0,7].imshow(np.expand_dims(y_pred[:,0], axis=1),cmap="gray") 

                                
                ax[1,0].imshow(minibatch_grads["dx"][0,:,:,0],cmap="gray") 
                ax[1,1].imshow(minibatch_grads["da_conv1"][0,:,:,0],cmap="gray")
                ax[1,2].imshow(minibatch_grads["da_conv2"][0,:,:,0],cmap="gray") 
                ax[1,3].imshow(minibatch_grads["dz_pool1"][0,:,:,0],cmap="gray") 
                ax[1,4].imshow(minibatch_grads["da_conv3"][0,:,:,0],cmap="gray") 
                ax[1,5].imshow(minibatch_grads["da_conv4"][0,:,:,0],cmap="gray") 
                ax[1,6].imshow(minibatch_grads["dz_pool2"][0,:,:,0],cmap="gray") 
                ax[1,7].imshow(np.expand_dims((y_pred-Y_train_minibatch)[:,0], axis=1),cmap="gray") 
                
                ax[2,0].imshow(parameters["W_conv1"][:,:,0,0], cmap="gray")
                ax[2,1].imshow(parameters["W_conv2"][:,:,0,0],cmap="gray") 
                ax[2,2].imshow(parameters["W_conv3"][:,:,0,0],cmap="gray") 
                ax[2,3].imshow(parameters["W_conv4"][:,:,0,0],cmap="gray") 
                ax[2,4].imshow(minibatch_grads["dW_conv1"][:,:,0,0],cmap="gray") 
                ax[2,5].imshow(minibatch_grads["dW_conv2"][:,:,1,1],cmap="gray") 
                ax[2,6].imshow(minibatch_grads["dW_conv3"][:,:,2,2],cmap="gray") 
                ax[2,7].imshow(minibatch_grads["dW_conv4"][:,:,3,3],cmap="gray") 
                fig2.canvas.draw()
           
            #check activations aren't blowing up or dead relu neurons
            for key in cache.keys():
                if "a_" in key:
                    print(key + ": " + str(np.max(cache[key])))
                    if np.max(cache[key])==0:
                        print("DEAD RELU")
                        backprop_checker(parameters,minibatch_grads, X_train_minibatch, Y_train_minibatch)
            
            #update the parameters using gradient descent
            for param in parameters.keys():
                momentum[param] = beta *  momentum[param] + minibatch_grads["d"+param]
                parameters[param] = parameters[param] - learning_rate* momentum[param]
            
            train_costs.append(minibatch_cost)
            ax1.plot(train_costs)
            fig.canvas.draw()
            print("Training set error: "+ str(minibatch_cost))

            
            train_eval_metric = accuracy(y_pred,Y_train_minibatch)
            train_evals.append(train_eval_metric)
            ax2.plot(train_evals)
            fig.canvas.draw()
            
            #periodically output an update on the current cost and performance on the dev set for visualisation
            if(i%10 == 0):
                print("Training set accuracy: "+ str(train_eval_metric))
                y_dev_pred,_ = forward_prop(X_dev,parameters)
                dev_eval_metric = accuracy(y_dev_pred,Y_dev)
                dev_evals.append(dev_eval_metric)
                print("Accuracy on dev set: "+ str(dev_eval_metric))
                ax3.plot(dev_evals)
                fig.canvas.draw()
    print("Training complete!")
    #return the trained parameters 
    return parameters

In [19]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [20]:
def process_input(X,Y):
    X = np.reshape(X,(X.shape[0], 28,28,1))
    X= X/255 #normalise input features 
    Y = np.eye(10)[Y.reshape(-1)].T
    return X,Y
x_train, y_train = process_input(x_train, y_train)
x_test , y_test = process_input(x_test , y_test)

In [21]:
parameters =train_model(x_train,y_train,x_test[:100],y_test[:,:100],
                        num_epochs=1,batch_size=64,lambd=0,learning_rate=0.003)

<IPython.core.display.Javascript object>

HBox(children=(IntProgress(value=0, description='Number of Epochs', max=1), HTML(value='')))

Training the model, epoch: 1


HBox(children=(IntProgress(value=0, description='Minibatch number', max=937), HTML(value='')))

<IPython.core.display.Javascript object>

a_conv1: 1.5264523568606254
a_conv2: 7.062214236698033
a_conv3: 22.217439884923106
a_conv4: 80.48714930595845
a_flatten: 80.48714930595845
a_fc1: 1.495010126949493
Training set error: 2.352177700535207
Training set accuracy: 0.078125
Accuracy on dev set: 0.15
a_conv1: 1.5154564607942342
a_conv2: 6.965739565720116
a_conv3: 21.81114619148496
a_conv4: 78.86130273828127
a_flatten: 78.86130273828127
a_fc1: 1.7538327080229337
Training set error: 2.3102568768423772
a_conv1: 1.5277931645941756
a_conv2: 6.9920649199312255
a_conv3: 21.793064591061285
a_conv4: 78.74841497373501
a_flatten: 78.74841497373501
a_fc1: 2.7749173876718034
Training set error: 2.324755245441794
a_conv1: 1.5255403047621574
a_conv2: 6.899228196168803
a_conv3: 21.532195792955854
a_conv4: 77.52398595583217
a_flatten: 77.52398595583217
a_fc1: 3.8811398778348036
Training set error: 2.2981315400933306
a_conv1: 1.4612764544919679
a_conv2: 7.277332469597993
a_conv3: 23.346366920702952
a_conv4: 84.16428700411814
a_flatten: 84.16428

<IPython.core.display.Javascript object>

a_conv1: 1.5494955191119248
a_conv2: 6.87106656250573
a_conv3: 21.888033544023376
a_conv4: 78.46938542719877
a_flatten: 78.46938542719877
a_fc1: 1.0286968624376136
Training set error: 2.330125718204711
a_conv1: 1.4705259165114828
a_conv2: 7.034847869047684
a_conv3: 21.99986343796584
a_conv4: 79.74951844372649
a_flatten: 79.74951844372649
a_fc1: 0.5807724313568389
Training set error: 2.3162450742428984
a_conv1: 1.5023189365220087
a_conv2: 6.83524209185539
a_conv3: 21.13177369466511
a_conv4: 76.29754083040548
a_flatten: 76.29754083040548
a_fc1: 0.5666398295813428
Training set error: 2.3382629750237065
a_conv1: 1.5111505387102186
a_conv2: 6.950531603429742
a_conv3: 22.066992696228738
a_conv4: 79.68901177726035
a_flatten: 79.68901177726035
a_fc1: 0.5381440035722211
Training set error: 2.339079099216636
a_conv1: 1.5129518280362124
a_conv2: 6.955691101249456
a_conv3: 21.76596280888652
a_conv4: 78.9318689259974
a_flatten: 78.9318689259974
a_fc1: 0.9290804994104553
Training set error: 2.283120

<IPython.core.display.Javascript object>

a_conv1: 1.482760407267839
a_conv2: 7.083232606107581
a_conv3: 22.364640449898587
a_conv4: 81.33216473535383
a_flatten: 81.33216473535383
a_fc1: 1.8825630146832788
Training set error: 2.3204683769597385
Training set accuracy: 0.109375
Accuracy on dev set: 0.15
a_conv1: 1.5160262178298611
a_conv2: 7.195172224213715
a_conv3: 23.062312161157426
a_conv4: 83.6079028923469
a_flatten: 83.6079028923469
a_fc1: 4.102527083204288
Training set error: 2.3481249899794294
a_conv1: 1.50424594595207
a_conv2: 6.926369772844908
a_conv3: 21.7244595323493
a_conv4: 79.40852201186327
a_flatten: 79.40852201186327
a_fc1: 7.186769583641727
Training set error: 2.282485822027306
a_conv1: 1.4886424158670666
a_conv2: 6.895864360832486
a_conv3: 21.76622816975753
a_conv4: 78.58820608079604
a_flatten: 78.58820608079604
a_fc1: 10.363205186251095
Training set error: 2.3623791597966184
a_conv1: 1.4890415034657236
a_conv2: 7.157135688746483
a_conv3: 22.523653407298365
a_conv4: 81.17955813021052
a_flatten: 81.1795581302105

<IPython.core.display.Javascript object>

a_conv1: 1.5295213211321022
a_conv2: 7.125134340807977
a_conv3: 22.127823279691416
a_conv4: 79.38868203867092
a_flatten: 79.38868203867092
a_fc1: 14.968797218839152
Training set error: 2.295503974550552
a_conv1: 1.5158720028747485
a_conv2: 7.15489562149646
a_conv3: 22.691166075347496
a_conv4: 81.76189860254739
a_flatten: 81.76189860254739
a_fc1: 21.72972791890211
Training set error: 2.327804331732712
a_conv1: 1.4940738552282722
a_conv2: 7.029634632824503
a_conv3: 22.216818936196077
a_conv4: 79.97431079031637
a_flatten: 79.97431079031637
a_fc1: 15.834241113945716
Training set error: 2.341224358011472
a_conv1: 1.5051034754113073
a_conv2: 7.24626173593761
a_conv3: 23.07866510314318
a_conv4: 83.74573750801062
a_flatten: 83.74573750801062
a_fc1: 0.16210017847425895
Training set error: 2.3179430599603457
a_conv1: 1.531989027828427
a_conv2: 7.255264675178904
a_conv3: 23.28646330692257
a_conv4: 84.93029780820153
a_flatten: 84.93029780820153
a_fc1: 11.633292684269309
Training set error: 2.35468

<IPython.core.display.Javascript object>

a_conv1: 1.5263347459424068
a_conv2: 7.224156084794404
a_conv3: 23.030419278451138
a_conv4: 82.85478314989757
a_flatten: 82.85478314989757
a_fc1: 46.56505354069449
Training set error: 2.334735111737441
Training set accuracy: 0.0625
Accuracy on dev set: 0.15
a_conv1: 1.5371458919592809
a_conv2: 7.210003572018697
a_conv3: 22.72677127622863
a_conv4: 81.75526019433617
a_flatten: 81.75526019433617
a_fc1: 31.9636396221766
Training set error: 2.3518262418220024
a_conv1: 1.5063030314164383
a_conv2: 7.230616158324077
a_conv3: 22.891042824545035
a_conv4: 82.28274640399147
a_flatten: 82.28274640399147
a_fc1: 0.00018847210572490308
Training set error: 2.347039942435332
a_conv1: 1.5221173517929363
a_conv2: 7.050658014089231
a_conv3: 22.42123780945404
a_conv4: 80.61287878371722
a_flatten: 80.61287878371722
a_fc1: -0.0
Training set error: 2.327609532274445
a_conv1: 1.5329435656672457
a_conv2: 7.02706543233467
a_conv3: 22.392119083052233
a_conv4: 80.50097222984523
a_flatten: 80.50097222984523
a_fc1: -

<IPython.core.display.Javascript object>

a_conv1: 1.5180118065012742
a_conv2: 7.1907179938753725
a_conv3: 23.0348593873795
a_conv4: 83.33898613890416
a_flatten: 83.33898613890416
a_fc1: -0.0
Training set error: 2.3163746693957954
a_conv1: 1.5048454460342582
a_conv2: 6.898133817630646
a_conv3: 21.65885322465202
a_conv4: 78.07964665042115
a_flatten: 78.07964665042115
a_fc1: -0.0
Training set error: 2.358712741415828
a_conv1: 1.4705149314281223
a_conv2: 6.852518461575385
a_conv3: 21.76868470711336
a_conv4: 77.35738767682953
a_flatten: 77.35738767682953
a_fc1: -0.0
Training set error: 2.314868560477169
a_conv1: 1.5025661937051265
a_conv2: 7.039823587202617
a_conv3: 22.358278044847182
a_conv4: 80.26506158862601
a_flatten: 80.26506158862601
a_fc1: -0.0
Training set error: 2.319705926885929
a_conv1: 1.5080725655357283
a_conv2: 7.157022167866157
a_conv3: 22.764285668053486
a_conv4: 81.78626824030074
a_flatten: 81.78626824030074
a_fc1: -0.0
Training set error: 2.302519472193614


KeyboardInterrupt: 

In [None]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])