In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
from keras.utils import np_utils
from keras.layers import Dense, Activation,Flatten, Convolution2D, Dropout, MaxPooling2D
from keras.models import Sequential

import datetime

Using TensorFlow backend.


In [3]:
ds = pd.read_csv('../../mnist_train.csv')
data = ds.values[:12000]
print data.shape

(12000, 785)


In [4]:
## model 1 -> (0-4) and model 2 -> (5-9)
mask = data[:,0] < 5
print data[:, 0].shape
print mask.shape
print np.unique(mask)
print mask.sum()

data_01 = []
data_02 = []

for ix in range(data.shape[0]):
    if mask[ix] == True:
        data_01.append(data[ix])
    else:
        data_02.append(data[ix])
        
data_01 = np.asarray(data_01)
data_02 = np.asarray(data_02)
print "-------------"
print data_01.shape
print data_02.shape

(12000,)
(12000,)
[False  True]
6145
-------------
(6145, 785)
(5855, 785)


In [5]:
### data_01 for the first network
split = int(0.85*data_01.shape[0])

x_tr = data_01[:split, 1:]/255.0
x_tes = data_01[split:, 1:]/255.0

y_tr = np_utils.to_categorical(data_01[:split,0], nb_classes=5)
y_tes = np_utils.to_categorical(data_01[split:, 0], nb_classes=5)

x_tr = x_tr.reshape((-1, 28, 28, 1))
x_tes = x_tes.reshape((-1, 28, 28, 1))

print x_tr.shape, x_tes.shape
print y_tr.shape, y_tes.shape

(5223, 28, 28, 1) (922, 28, 28, 1)
(5223, 5) (922, 5)


In [6]:
model = Sequential()

model.add(Convolution2D(64, 3, 3, input_shape=(28,28,1), activation='relu'))
model.add(Convolution2D(32, 3, 3, activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(16, 3, 3, activation='relu'))
model.add(Convolution2D(8, 3, 3, activation='relu'))

model.add(Flatten())

model.add(Dense(100))
model.add(Dropout(0.5))
model.add(Activation('relu'))

model.add(Dense(5))
model.add(Activation('softmax'))

model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 26, 26, 64)    640         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 24, 24, 32)    18464       convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 12, 12, 32)    0           convolution2d_2[0][0]            
____________________________________________________________________________________________________
convolution2d_3 (Convolution2D)  (None, 10, 10, 16)    4624        maxpooling2d_1[0][0]             
___________________________________________________________________________________________

In [7]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
start = datetime.datetime.now()
ans = start - datetime.datetime.now()
print ans

-1 day, 23:59:59.999935


In [22]:
hist = model.fit(x_tr, y_tr, batch_size=16, nb_epoch=5, verbose=2,validation_data=(x_tes, y_tes))

Train on 5223 samples, validate on 922 samples
Epoch 1/5
20s - loss: 0.1315 - acc: 0.9629 - val_loss: 0.1267 - val_acc: 0.9620
Epoch 2/5
20s - loss: 0.0866 - acc: 0.9740 - val_loss: 0.1108 - val_acc: 0.9685
Epoch 3/5
21s - loss: 0.0627 - acc: 0.9797 - val_loss: 0.0826 - val_acc: 0.9740
Epoch 4/5
20s - loss: 0.0466 - acc: 0.9851 - val_loss: 0.0880 - val_acc: 0.9740
Epoch 5/5
20s - loss: 0.0469 - acc: 0.9849 - val_loss: 0.0632 - val_acc: 0.9816


In [9]:
trans_model = Sequential(model.layers[:-4])

for ix in trans_model.layers:
    ix.trainable = False

trans_model.add(Dense(5))
trans_model.add(Activation('softmax'))

trans_model.summary()
trans_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 26, 26, 64)    640         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 24, 24, 32)    18464       convolution2d_1[0][0]            
                                                                   convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 12, 12, 32)    0           convolution2d_2[0][0]            
                                                                   convolution2d_2[1][0]            
___________________________________________________________________________________________

In [11]:
split = int(0.85 * data_02.shape[0])

X_tr = data_02[:split, 1:]/255.0
X_te = data_02[split:, 1:]/255.0

X_tr = X_tr.reshape((-1, 28, 28, 1))
X_te = X_te.reshape((-1, 28, 28, 1))

y_tr = np_utils.to_categorical(data_02[:split, 0]-5, nb_classes=5)
y_te = np_utils.to_categorical(data_02[split:, 0]-5, nb_classes=5)

print X_tr.shape, X_te.shape
print y_tr.shape, y_te.shape

(4976, 28, 28, 1) (879, 28, 28, 1)
(4976, 5) (879, 5)


In [None]:
start = datetime.datetime.now()
hist = trans_model.fit(X_tr, y_tr,
                epochs=10,
                shuffle=True,
                batch_size=128,
                validation_data=(X_te, y_te))

print datetime.datetime.now() - start