In [1]:
from keras.datasets import mnist 
from keras.utils import np_utils 
from keras.models import Sequential 
from keras.layers import Dense, Activation
from matplotlib import pyplot as plt # import pyplot alias plt for plotting 
from sklearn.metrics import confusion_matrix 
import seaborn as sn
from keras.layers import Activation, BatchNormalization, Convolution2D, MaxPooling2D, Flatten, Conv2D
import numpy as np

Using TensorFlow backend.


# Load datatset, preprocessing

In [2]:
# the data, shuffled and split between train and test sets 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
input_dim = (28,28,1) 
X_train = X_train.reshape(60000, *input_dim) #28,28,1
X_test = X_test.reshape(10000, *input_dim) #28,28,1
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
X_train /= 255 
X_test /= 255

In [4]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(60000, 28, 28, 1) (10000, 28, 28, 1) (60000,) (10000,)


In [5]:
output_dim = nb_classes = 10

In [6]:
Y_train = np_utils.to_categorical(y_train, nb_classes) 
Y_test = np_utils.to_categorical(y_test, nb_classes)

# CNN model in Keras


In [7]:
model = Sequential() # Sequential model
 
model.add(Convolution2D(8, 3, 3, activation='relu', input_shape=(28,28,1))) 
model.add(BatchNormalization())

model.add(Convolution2D(16, 3, 3, activation='relu')) 
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size = (2, 2))) 

model.add(Convolution2D(10, 1, activation='relu')) 

model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Convolution2D(10, 1, activation='relu'))
 
model.add(Convolution2D(10, 5)) 

model.add(Flatten()) #flatten image dimension into 1
model.add(Activation('softmax'))# softmax to assign confidence score of each class

  This is separate from the ipykernel package so we can avoid doing imports until
  
  del sys.path[0]


In [None]:
import tensorflow as tf

tf.test.is_gpu_available( cuda_only=False, min_cuda_compute_capability=None )

In [9]:
model.summary()

ValueError: This model has not yet been built. Build the model first by calling build() or calling fit() with some data. Or specify input_shape or batch_input_shape in the first layer for automatic build. 

# Model Training

In [8]:
batch_size = 128 
nb_epoch = 100
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) 
history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,verbose=1, validation_data=(X_test, Y_test)) 
score = model.evaluate(X_test, Y_test, verbose=0) 
print('Test error:', score[0]) 
print('Test accuracy:', score[1])

InternalError: cudaGetDevice() failed. Status: cudaGetErrorString symbol not found.

In [None]:
plt.plot(history.history['acc'],'r')
plt.plot(history.history['val_acc'],'b')
plt.legend({'training accuracy':'r', 'validation accuracy': 'b'})
plt.show()

In [None]:
plt.plot(history.history['loss'],'r')
plt.plot(history.history['val_loss'],'b')
plt.legend({'training loss':'r', 'validation loss': 'b'})
plt.show()

# Inference 

In [None]:
predictions = model.predict_classes(X_test)

In [None]:
predictions[0:10]

In [None]:
np.argmax(Y_test[0])

In [None]:
y_test = np.argmax(Y_test, axis=1)

In [None]:
Y_test.shape

In [None]:
y_test.shape

# Misclassified Images

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
index = 0
misclassifiedIndexes = []
for label, predict in zip(y_test, predictions):
    if label != predict: 
        misclassifiedIndexes.append(index)
    index +=1

In [None]:
plt.figure(figsize=(20,4))
for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
    plt.subplot(1, 5, plotIndex + 1)
    plt.imshow(np.reshape(X_test[badIndex], (28,28)), cmap=plt.cm.gray)
    plt.title('Predicted: {}, Actual: {}'.format(predictions[badIndex], y_test[badIndex]), fontsize = 15)

# Confusion Matrix

In [None]:
# we can also use a confusion matrix to visualize the performance of a classification model
cm = confusion_matrix(y_test, predictions)
# use this line to normalize the confusion matrix to show percentages instead of counts
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


In [None]:
# plot this matrix
plt.figure(figsize=(8, 8))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Overall Accuracy Score: {0}'.format(score)
plt.title(all_sample_title, size = 15);
plt.savefig('toy_Digits_ConfusionSeabornCodementor.png')
plt.show();

In [None]:
 model.layers[0].get_weights()[0][:, :, 0, :].shape

In [None]:
model.layers[2].get_weights()[0].shape

# Visualize Kernels

In [None]:
#get_weights [x, y, channel, nth convolution layer]
weight_conv2d_1 = model.layers[2].get_weights()[0][:, :, 0, :]
col_size = 3
row_size = 3
filter_index = 0
fig, ax = plt.subplots(row_size, col_size, figsize=(12,8))
for row in range(0,row_size): 
    for col in range(0,col_size):
        ax[row][col].imshow(weight_conv2d_1[:,:,filter_index],cmap="gray")
        filter_index += 1

# Test a random image

In [None]:
img_no = 12002
test_img = X_train[img_no]
plt.imshow(test_img.reshape(28, 28), cmap='gray')
plt.title("Value:{}".format(y_train[img_no]) )
from keras.models import Model
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs = model.input, outputs=layer_outputs)
activations = activation_model.predict(test_img.reshape(1, 28, 28, 1))

def display_activation(activations, col_size, row_size, act_index):
    activation = activations[act_index]
    activation_index = 0
    fig, ax = plt.subplots(row_size, col_size, figsize=(15,12))
    
    for row in range(0, row_size):
        for col in range(0, col_size):
            ax[row][col].imshow(activation[0, :, :, activation_index], cmap='gray')
            activation_index += 1
      
#display_activation(activations, 4, 8, 6)
display_activation(activations, 2, 5, 8)