In [26]:
import pandas as pd
import numpy as np
from keras.models import *
from keras.layers import *
from keras.callbacks import EarlyStopping
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import History, ModelCheckpoint
import collections
import sys
import pickle
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [27]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [28]:
def normalize_(x):
    return x/255.0

In [29]:
X = np.array([row.split() for row in train['feature'].tolist()], dtype=np.float32)

In [30]:
y = train['label'].tolist()

In [31]:
## One-hot encoder
with open('encoder.pkl', 'rb') as f:
    encoder = pickle.load(f)
    
y = encoder.transform(y)



In [42]:
def shuffle_split_data(X, y, percent):
    rand = np.random.rand(X.shape[0])
    split = rand < np.percentile(rand, percent)
    X_train = X[split]
    y_train = y[split]
    X_val = X[~split]
    y_val = y[~split]
    
    print('Perentage: ', str(percent))
    print(str(len(X_train)), ' ', str(len(y_train)),\
          ' ', str(len(X_val)), ' ', str(len(y_val)))
    
    return X_train, y_train, X_val, y_val

In [49]:
X_train, y_train, X_val, y_val = shuffle_split_data(\
                                    X, y, 90)

Perentage:  90
25838   25838   2871   2871


In [57]:
X_train = normalize_(X_train.reshape(-1,48,48,1))
X_val = normalize_(X_val.reshape(-1,48,48,1))

In [58]:
datagen = ImageDataGenerator(rotation_range=30, \
                            width_shift_range=0.2, \
                            height_shift_range=0.2, \
                            zoom_range=[0.8, 1.2], \
                            shear_range=0.2, \
                            horizontal_flip=True)

In [61]:
batch_size = 64
epochs = 100
input_shape = (48,48,1)

In [63]:
## Adding model

model = Sequential()

# CNN 1
model.add(Conv2D(64, input_shape=input_shape, kernel_size=(5,5) \
                , padding='same', kernel_initializer='glorot_normal'))
model.add(LeakyReLU(1./20))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.25))

# CNN 2
model.add(Conv2D(128, input_shape=input_shape, kernel_size=(3,3) \
                , padding='same', kernel_initializer='glorot_normal'))
model.add(LeakyReLU(1./20))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.3))

# CNN 3
model.add(Conv2D(512, input_shape=input_shape, kernel_size=(3,3) \
                , padding='same', kernel_initializer='glorot_normal'))
model.add(LeakyReLU(1./20))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.35))

# CNN 4
model.add(Conv2D(512, input_shape=input_shape, kernel_size=(3,3) \
                , padding='same', kernel_initializer='glorot_normal'))
model.add(LeakyReLU(1./20))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='same'))
model.add(Dropout(0.35))

# Flatten
model.add(Flatten())

# FC
model.add(Dense(512, activation='relu', kernel_initializer='glorot_normal'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu', kernel_initializer='glorot_normal'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax', kernel_initializer='glorot_normal'))

model.compile(loss='categorical_crossentropy', optimizer='adam', \
             metrics=['accuracy'])

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 48, 48, 64)        1664      
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 48, 48, 64)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 48, 48, 64)        256       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 24, 24, 128)       73856     
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 24, 24, 128)       0         
__________

In [66]:
hist = History()
early_stop = EarlyStopping(monitor='val_acc', patience=7, verbose=1)
check_save = ModelCheckpoint('models/model-{epochs:05d}-{val_acc:.5f}.h5', \
                            monitor='val_acc', save_best_only=True)

In [69]:
model.fit_generator(
            datagen.flow(X_train, y_train, batch_size=batch_size), 
            steps_per_epoch=5*len(X_train)//batch_size,
            validation_data=(X_val, y_val),
            epochs=epochs, callbacks=[check_save, hist], workers = 10 )

Epoch 1/100
  40/2018 [..............................] - ETA: 1:46:04 - loss: 2.9636 - acc: 0.1586

KeyboardInterrupt: 

In [None]:
model.save('models/model_01.h5')