In [1]:
from __future__ import print_function
from __future__ import absolute_import

import warnings
import numpy as np
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Reshape

from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense

from tensorflow.keras.layers import Concatenate, concatenate
from tensorflow.keras.layers import Add, add
from tensorflow.keras.layers import Multiply, multiply

from tensorflow.keras import backend as K

tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
class SEResNeXt(object):
    def __init__(self, size=96, num_classes=10, depth=64, reduction_ratio=4, num_split=8, num_block=3):
        self.depth = depth  # number of channels
        self.ratio = reduction_ratio  # ratio of channel reduction in SE module
        self.num_split = num_split  # number of splitting trees for ResNeXt (so called cardinality)
        self.num_block = num_block  # number of residual blocks
        if K.image_data_format() == 'channels_first':
            self.channel_axis = 1
        else:
            self.channel_axis = 3
        self.model = self.build_model(Input(shape=(size,size,1)), num_classes)

    def conv_bn(self, x, filters, kernel_size, stride, padding='same'):
        '''
        Combination of Conv and BN layers since these always appear together.
        '''
        x = Conv2D(filters=filters, kernel_size=[kernel_size, kernel_size],
                   strides=[stride, stride], padding=padding)(x)
        x = BatchNormalization()(x)
        
        return x
    
    def activation(self, x, func='relu'):
        '''
        Activation layer.
        '''
        return Activation(func)(x)
    
    def channel_zeropad(self, x):
        '''
        Zero-padding for channle dimensions.
        Note that padded channles are added like (Batch, H, W, 2/x + x + 2/x).
        '''
        shape = list(x.shape)
        y = K.zeros_like(x)
        
        if self.channel_axis == 3:
            y = y[:, :, :, :shape[self.channel_axis] // 2]
        else:
            y = y[:, :shape[self.channel_axis] // 2, :, :]
        
        return concatenate([y, x, y], self.channel_axis)
    
    def channel_zeropad_output(self, input_shape):
        '''
        Function for setting a channel dimension for zero padding.
        '''
        shape = list(input_shape)
        shape[self.channel_axis] *= 2

        return tuple(shape)
    
    def initial_layer(self, inputs):
        '''
        Initial layers includes {conv, BN, relu}.
        '''
        x = self.conv_bn(inputs, self.depth, 3, 1)
        x = self.activation(x)
        
        return x
    
    def transform_layer(self, x, stride):
        '''
        Transform layer has 2 {conv, BN, relu}.
        '''
        x = self.conv_bn(x, self.depth, 1, 1)
        x = self.activation(x)
        
        x = self.conv_bn(x, self.depth, 3, stride)
        x = self.activation(x)
        
        return x
        
    def split_layer(self, x, stride):
        '''
        Parallel operation of transform layers for ResNeXt structure.
        '''
        splitted_branches = list()
        for i in range(self.num_split):
            branch = self.transform_layer(x, stride)
            splitted_branches.append(branch)
        
        return concatenate(splitted_branches, axis=self.channel_axis)
    
    def squeeze_excitation_layer(self, x, out_dim):
        '''
        SE module performs inter-channel weighting.
        '''
        squeeze = GlobalAveragePooling2D()(x)
        
        excitation = Dense(units=out_dim // self.ratio)(squeeze)
        excitation = self.activation(excitation)
        excitation = Dense(units=out_dim)(excitation)
        excitation = self.activation(excitation, 'sigmoid')
        excitation = Reshape((1,1,out_dim))(excitation)
        
        scale = multiply([x,excitation])
        
        return scale
    
    def residual_layer(self, x, out_dim):
        '''
        Residual block.
        '''
        for i in range(self.num_block):
            input_dim = int(np.shape(x)[-1])
            
            if input_dim * 2 == out_dim:
                flag = True
                stride = 2
            else:
                flag = False
                stride = 1
            
            subway_x = self.split_layer(x, stride)
            subway_x = self.conv_bn(subway_x, out_dim, 1, 1)
            subway_x = self.squeeze_excitation_layer(subway_x, out_dim)
            
            if flag:
                pad_x = AveragePooling2D(pool_size=(2,2), strides=(2,2), padding='same')(x)
                pad_x = Lambda(self.channel_zeropad, output_shape=self.channel_zeropad_output)(pad_x)
            else:
                pad_x = x
            
            x = self.activation(add([pad_x, subway_x]))
                
        return x
    
    def build_model(self, inputs, num_classes):
        '''
        Build a SENet model.
        '''
        x = self.initial_layer(inputs)
        
        x = self.residual_layer(x, out_dim=64)
        x = self.residual_layer(x, out_dim=128)
        x = self.residual_layer(x, out_dim=256)
        
        x = GlobalAveragePooling2D()(x)
        x = Dense(units=num_classes, activation='softmax')(x)
        
        return Model(inputs, x)

In [3]:
img_rows,img_cols = 48,48
batch_size = 16

In [4]:
train_data_dir = './dataset/train/'
validation_data_dir = './dataset/validation/'

In [5]:
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=30, shear_range=0.3,zoom_range=0.4, width_shift_range=0.4, height_shift_range=0.4, horizontal_flip=True, fill_mode='nearest')
validation_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
train_generator = train_datagen.flow_from_directory(train_data_dir, color_mode='grayscale', target_size=(img_rows,img_cols), batch_size=batch_size,class_mode='categorical', shuffle=True)
validatiion_generator = validation_datagen.flow_from_directory(validation_data_dir, color_mode='grayscale', target_size=(img_rows,img_cols), batch_size=batch_size, class_mode='categorical', shuffle=True)

Found 24282 images belonging to 5 classes.
Found 5937 images belonging to 5 classes.


In [7]:
nb_train_samples = train_generator.n
nb_validation_samples = validatiion_generator.n
num_classes = train_generator.num_classes
# class_weights = [ {idx:(1/val)*nb_train_samples} for idx,val in enumerate(np.bincount(train_generator.classes))]
class_weights = dict(list(enumerate(nb_train_samples/num_classes*np.bincount(train_generator.classes))))
print(class_weights)

{0: 19391605.2, 1: 34791249.599999994, 2: 24194584.799999997, 3: 23980903.2, 4: 15564761.999999998}


In [8]:
np.bincount(train_generator.classes)

array([3993, 7164, 4982, 4938, 3205], dtype=int64)

In [9]:
sEResNeXt = SEResNeXt(size=48,num_classes=num_classes)
model = sEResNeXt.model
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 48, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 48, 48, 64)   640         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 48, 48, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 48, 48, 64)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [10]:
from tensorflow.keras.optimizers import RMSprop,SGD,Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [11]:
checkpoint = ModelCheckpoint('Emotion_little_vgg.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)
earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=5,verbose=1,restore_best_weights=True)
redure_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2,patience=3,verbose=1,min_delta=0.0001)
callbacks = [earlystop, checkpoint, redure_lr]

In [12]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [13]:
epochs = 50

In [None]:
fit = model.fit(train_generator,steps_per_epoch=nb_train_samples//batch_size,epochs=epochs,callbacks=callbacks,validation_data=validatiion_generator,validation_steps=nb_validation_samples//batch_size)

Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.31014, saving model to Emotion_little_vgg.h5




Epoch 2/50

Epoch 00002: val_accuracy improved from 0.31014 to 0.31166, saving model to Emotion_little_vgg.h5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.31166 to 0.37989, saving model to Emotion_little_vgg.h5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.37989 to 0.43093, saving model to Emotion_little_vgg.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.43093 to 0.52443, saving model to Emotion_little_vgg.h5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.52443 to 0.53925, saving model to Emotion_little_vgg.h5
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.53925
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.53925 to 0.58524, saving model to Emotion_little_vgg.h5
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.58524 to 0.59046, saving model to Emotion_little_vgg.h5
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.59046
Epoch 11/50

Epoch 00011: val_accuracy improved from 0.59046 to 0.63982, saving model to Emotio