In [1]:
# general data and misc libraries
import pandas as pd

# tensorflow/keras for cnn training
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras.losses import categorical_crossentropy
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split

In [2]:
img_rows = 28
img_cols = 28
num_classes = 10

print('Loading data...')
df = pd.read_csv('train.csv')
print('Shape of data file:',df.shape)

# get data excluding label column
X = df.drop('label',axis=1)

# reconstruct images from flattened rows
X = X.values.reshape(len(X),img_rows,img_cols,1)

# normalise X to lie between 0 and 1
X = X/X.max()
print('Shape of network input:',X.shape)

# extract true label of each image
labels = df['label'].values

# convert labels in to dummy vectors
y = to_categorical(labels, num_classes)
print('Shape of label vectors:',y.shape)
print('First label vector:',y[0])

Loading data...
Shape of data file: (42000, 785)
Shape of network input: (42000, 28, 28, 1)
Shape of label vectors: (42000, 10)
First label vector: [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
print('Train shape: X =',X_train.shape,', y =',y_train.shape)
print('Test shape: X =',X_test.shape,', y =',y_test.shape)

data_generator = ImageDataGenerator(        
                    rotation_range=10,
                    zoom_range = 0.1,
                    width_shift_range=0.1, 
                    height_shift_range=0.1,
                ) 

batch_size = 100
flow_train = data_generator.flow(X_train,y_train,batch_size=batch_size)
flow_test = data_generator.flow(X_test,y_test,batch_size=batch_size)

Train shape: X = (33600, 28, 28, 1) , y = (33600, 10)
Test shape: X = (8400, 28, 28, 1) , y = (8400, 10)


In [4]:
layers = [
          Conv2D(32, kernel_size=(5, 5), 
                          activation='relu',name='Conv1',
                          input_shape=(img_rows, img_cols, 1)),
          Dropout(rate=0.25),
          Conv2D(32, kernel_size=(5, 5), 
                          activation='relu',name='Conv2'),
          Dropout(rate=0.25),
          Conv2D(64, kernel_size=(3, 3), 
                          activation='relu',name='Conv3'),  
          Dropout(rate=0.25),
          Conv2D(64, kernel_size=(3, 3), 
                          activation='relu',name='Conv4'),
          Dropout(rate=0.25),
          MaxPool2D(pool_size=(2, 2), strides=2,
                          name='Pool1'),
          Flatten(name='Flatten'),
          Dense(256, activation='relu',name='Dense1'), 
          Dropout(rate=0.25),
          Dense(64, activation='relu',name='Dense2'),   
          Dense(num_classes, activation='softmax',
                          name='Output')
        ]

n_layers = len(layers)

# Buld model
model = Sequential()

# add each defined layer to the model
for layer in layers:
    model.add(layer)

# get layer names
layer_names = [model.layers[i].name for i in range(n_layers)]

In [5]:
# set model optimisation parameters
model.compile(loss=categorical_crossentropy,
                   optimizer='adam',
                   metrics=['accuracy'])

# Fit model
history = model.fit_generator(
              flow_train,
              epochs=30,
              validation_data = flow_test)

Epoch 1/30
  3/112 [..............................] - ETA: 6:03 - loss: 2.2917 - acc: 0.1356

KeyboardInterrupt: 