## Exercise to use Keras
- More exercise on 1-hidden layer MPL
- Conv2d

In [1]:
import keras

Using TensorFlow backend.


In [2]:
keras.backend.image_data_format()

'channels_last'

In [3]:
keras.backend.image_dim_ordering()

'tf'

In [4]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(100,input_dim=784,activation='relu'))
model.add(keras.layers.Dense(10,activation='softmax'))
model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])

In [5]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               78500     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 79,510.0
Trainable params: 79,510
Non-trainable params: 0.0
_________________________________________________________________


In [6]:
785*100+101*10

79510

In [7]:
import pandas as pd
trainDF = pd.read_csv('../input/train.csv')
testDF = pd.read_csv('../input/test.csv')

In [8]:
trainDF.shape, testDF.shape

((42000, 785), (28000, 784))

In [9]:
train_X = trainDF.loc[:,'pixel0':'pixel783'].values

In [10]:
train_Y = keras.utils.to_categorical(trainDF.loc[:,'label'].values)

In [11]:
history = model.fit(train_X, train_Y, validation_split=.3, epochs=10, batch_size= 200, verbose=1)

Train on 29399 samples, validate on 12601 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
history.history

{'acc': [0.39457124307036479,
  0.53617469922568251,
  0.56665192667880571,
  0.64002176851126413,
  0.65202898182241753,
  0.69070376597957917,
  0.723528011369723,
  0.73590938655441041,
  0.73726997350672441,
  0.77016224970302272],
 'loss': [9.700819457387059,
  7.4294177086694964,
  6.9470535864849969,
  5.7596116966810182,
  5.5725403111493277,
  4.9375409739346496,
  4.4138686928385278,
  4.2133558765999046,
  4.1967304639700478,
  3.6501347462884981],
 'val_acc': [0.52337115714024218,
  0.55400364815777292,
  0.63145782406221695,
  0.65201174388867633,
  0.64034600095534722,
  0.70645186874792887,
  0.72303785614414484,
  0.72700579614263139,
  0.71065788486251547,
  0.8011268904148402],
 'val_loss': [7.6329461070395928,
  7.1576667278874968,
  5.8980572629888783,
  5.5798950576751958,
  5.7627974099161587,
  4.687109406604983,
  4.3980774712575803,
  4.3507278555301001,
  4.6115454940244929,
  3.1376211541709935]}

In [14]:
test_X = testDF.values

In [15]:
train_X.shape, train_Y.shape, test_X.shape

((42000, 784), (42000, 10), (28000, 784))

In [16]:
conv_model = keras.models.Sequential()

In [17]:
conv_model.add(keras.layers.Conv2D(filters=32,kernel_size=(3,3),input_shape=(28,28,1),padding='same',activation='relu'))

In [18]:
conv_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
Total params: 320.0
Trainable params: 320
Non-trainable params: 0.0
_________________________________________________________________


In [19]:
3*3*32 + 32

320

In [20]:
conv_model.add(keras.layers.MaxPool2D(pool_size=(4,4)))

In [21]:
conv_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)          0         
Total params: 320.0
Trainable params: 320.0
Non-trainable params: 0.0
_________________________________________________________________


In [22]:
conv_model.add(keras.layers.Flatten())
conv_model.add(keras.layers.Dense(10,activation='softmax'))

In [23]:
conv_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                15690     
Total params: 16,010.0
Trainable params: 16,010.0
Non-trainable params: 0.0
_________________________________________________________________


In [24]:
train_X.min(), train_X.max()

(0, 255)

In [25]:
train_X.dtype

dtype('int64')

In [26]:
train_X = train_X.reshape(train_X.shape[0],28,28,1).astype('float32')

In [27]:
train_X = (train_X - train_X.min())/(train_X.max() - train_X.min())

In [28]:
conv_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
history = conv_model.fit(train_X, train_Y, epochs=5, batch_size=64, validation_split=.4, verbose=1)

Train on 25200 samples, validate on 16800 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


With such a simple convolutional architecture, we achieved validation accuracy 96.8% in only 5 epochs.
<pre>
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_3 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                15690     
=================================================================
Total params: 16,010
Trainable params: 16,010
Non-trainable params: 0
</pre>

<pre>
conv_model = keras.models.Sequential()
conv_model.add(keras.layers.Conv2D(filters=32,kernel_size=(3,3),input_shape=(28,28,1),padding='same',activation='relu'))
conv_model.add(keras.layers.MaxPool2D(pool_size=(4,4)))
conv_model.add(keras.layers.Flatten())
conv_model.add(keras.layers.Dense(10,activation='softmax'))
conv_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = conv_model.fit(train_X, train_Y, epochs=5, batch_size=64, validation_split=.4, verbose=1)
</pre>