In [1]:
import pandas as pd
import numpy as np
import pyreadr
import import_ipynb

In [2]:
train = pyreadr.read_r('train_df.RData')['dat_train']
test = pyreadr.read_r('test_df.RData')['dat_test']

In [3]:
X_train, y_train = train.loc[:,:'feature6006'], train.loc[:,'emotion_idx']
X_test, y_test = test.loc[:,:'feature6006'], test.loc[:,'emotion_idx']

In [4]:
### Step 0: Provide directories for training images. Training images and Training fiducial points will be in different subfolders. 
train_dir = '../data/train_set/' # This will be modified for different data sets.
train_image_dir = train_dir + 'images/'
train_pt_dir = train_dir + 'points/'
train_label_path = train_dir + 'label.csv'

In [5]:
### Step 2: import data and train-test split 
from sklearn.model_selection import train_test_split
info = pd.read_csv(train_label_path)
train_idx_py, test_idx_py = train_test_split(range(len(info)), test_size=0.2, random_state = 0)
train_idx_r = [i+1 for i in train_idx_py]
test_idx_r = [i+1 for i in test_idx_py]

In [6]:
train_idx_py, val_idx_py = train_test_split(train_idx_py, test_size=0.02, random_state = 0)

In [7]:
import feature_cnn
train_generator = feature_cnn.dat_generator(train_idx_py, True, train_image_dir, info)
test_generator = feature_cnn.dat_generator(test_idx_py, False, train_image_dir, info)
val_generator = feature_cnn.dat_generator(val_idx_py, False, train_image_dir, info)

importing Jupyter notebook from feature_cnn.ipynb
Found 1960 validated image filenames belonging to 22 classes.
Found 500 validated image filenames belonging to 22 classes.
Found 40 validated image filenames belonging to 22 classes.


In [8]:
# from keras_preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [9]:
def train_cnn(train_generator, val_generator):
    from keras import Sequential
    from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
    from keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Flatten, Dropout
    model_ckpnt = ModelCheckpoint("best.h5",verbose = 1, save_best_only = True)
    early_stop = EarlyStopping(patience = 10, verbose = 1, monitor = 'val_loss')
    reduceLR = ReduceLROnPlateau(patience = 5, verbose = 1, factor = 0.05)
    
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
#     model.add(GlobalAveragePooling2D())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(22))
    model.add(Activation('softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    batch_size = 16
    model.fit_generator(
            train_generator,
            validation_data=val_generator,
            steps_per_epoch=len(train_generator),
            callbacks=[model_ckpnt, early_stop, reduceLR],
            epochs=100)
#     model.save_weights('first_try.h5')
    return model

In [10]:
model = train_cnn(train_generator, val_generator)

Using TensorFlow backend.


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 148, 32)      320       
_________________________________________________________________
activation_1 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 72, 72, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 34, 64)       

In [27]:
from sklearn.externals import joblib

# save the model to disk
filename = 'cnn_train.sav'
joblib.dump(model, filename)

['cnn_train.sav']

In [11]:
# model.load_weights('best.h5')
preds = model.predict(test_generator, verbose = 1)



In [26]:
y_pred = preds.argmax(1)

In [13]:
sum(y_pred == info.loc[test_idx_py, 'emotion_idx'].reset_index(drop=True))

22

In [25]:
info.loc[train_idx_py,'emotion_idx'].value_counts()

3     111
11    102
12    101
17     99
14     98
8      96
2      93
20     92
7      91
18     90
4      89
1      89
19     89
13     84
5      84
21     83
6      81
16     81
22     81
10     78
9      78
15     70
Name: emotion_idx, dtype: int64