#### If using AMD GPU, switch backend to PlaidML library:

In [1]:
import os
os.environ['KERAS_BACKEND']='plaidml.keras.backend'

In [1]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.utils import Sequence
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from keras_preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import time
import pickle
from keras.models import load_model

Using TensorFlow backend.


#### Define Experiments

In [5]:
conv_layers = [5]      # number of conv layers
layer_sizes = [32]     # number of nodes in a layer
dense_layers = [2]     # number of dense layers

#### Load input data

In [7]:
pickle_in = open('../Dataset/df_1pct.pickle', 'rb')
df_train, df_test = pickle.load(pickle_in)

In [8]:
# The Keras ImageDataGenerator uses string type data label
df_train['gender'] = df_train.gender.astype(str)
df_test['gender'] = df_test.gender.astype(str)

In [9]:
print(df_train.shape, df_test.shape)

(4297, 10) (226, 10)


In [10]:
df_train.dtypes

path                object
id                  uint16
name                object
dob         datetime64[ns]
gender              object
score1             float64
score2             float64
pic_date    datetime64[ns]
region              object
age                float64
dtype: object

We will be using a generator to feed model with images, the X would be the path to these images. y will be the gender label.

In [11]:
image_reshape_size = 100
input_image_root_dir = '../Dataset/imdb_crop/' # Don't forget the ending slash

In [12]:
from keras import backend as K
K.set_image_data_format('channels_last')
batch_size = 64
inputShape = (image_reshape_size, image_reshape_size, 1)

In [13]:
df_train.head(3)

Unnamed: 0,path,id,name,dob,gender,score1,score2,pic_date,region,age
351096,67/nm0755267_rm3707735808_1980-4-8_2001.jpg,10591,Katee Sackhoff,1980-04-08,0,3.704906,3.597096,2001-01-01,"[159.42317678741446, 51.63402111024036, 209.90...",20.73417
436852,25/nm0090225_rm1239005952_1964-10-25_2007.jpg,13481,Michael Boatman,1964-10-25,1,2.629246,1.647017,2007-01-01,"[80.61172611750719, 114.87418016786741, 131.33...",42.185671
115652,18/nm0001518_rm2982264064_1961-10-26_2011.jpg,5456,Dylan McDermott,1961-10-26,1,2.793912,1.419481,2011-01-01,"[446.31, 70.47, 570.72, 194.88000000000002]",49.183761


#### Set up input image generator using flow_from_dataframe

In [14]:
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)

train_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="training",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=batch_size,
                                            seed=1,
                                            shuffle=True)

val_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="validation",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=batch_size,
                                            seed=1,
                                            shuffle=True)

test_generator = datagen.flow_from_dataframe(dataframe=df_test, 
                                            directory=input_image_root_dir, 
                                            x_col="path", y_col=None, 
                                            class_mode=None, 
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=1,
                                            shuffle=False)

Found 3868 validated image filenames belonging to 2 classes.
Found 429 validated image filenames belonging to 2 classes.
Found 226 validated image filenames.


#### (1) Run training experiments

In [23]:
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            
            NAME = 'BN-{}-conv-{}-node-{}-dens-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))  # model name with timestamp
            print(NAME) 
            
            tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
            checkpoint = ModelCheckpoint('weights/{}'.format(NAME), monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
            callbacks = [tensorboard, checkpoint]
            
            model = Sequential()
            
            # first layer
            model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu", input_shape=inputShape))
            model.add(BatchNormalization())
            model.add(MaxPooling2D(pool_size=(3,3)))
            
            # sets up additional # of conv layers
            for _ in range(conv_layer - 1):
                layer_size *= 2
                model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu"))
                model.add(BatchNormalization())
                model.add(Conv2D(layer_size, (3,3), padding="same", activation="relu"))
                model.add(BatchNormalization())
                model.add(MaxPooling2D(pool_size=(2,2)))
                model.add(Dropout(0.35))
            
            model.add(Flatten())
            
            layer_size *= 4 # to get the dense layer to be 8X of last output size
            
            # sets up # of dense layers
            for _ in range(dense_layer):
                model.add(Dense(layer_size, activation='relu'))
                model.add(BatchNormalization())
                model.add(Dropout(0.6))
            
            # output layer
            model.add(Dense(1))
            model.add(Activation('sigmoid'))
            
            opt = Adam(lr=0.0005)
            model.compile(loss='binary_crossentropy', 
                          optimizer=opt,
                          metrics=['accuracy'])
            
            # Optional for resuming, load weights
            model.load_weights('weights/BN-5-conv-32-node-2-dens-1554663813')

#             model.fit_generator(generator=train_generator,
#                                 steps_per_epoch=(train_generator.n // train_generator.batch_size),
#                                 callbacks = callbacks,
#                                 validation_data=val_generator,
#                                 validation_steps=(val_generator.n // val_generator.batch_size),
#                                 epochs=5,
#                                 use_multiprocessing=False,
#                                 workers=4)
            
#             filepath = NAME + '.h5'
#             model.save(filepath)

BN-5-conv-32-node-2-dens-1554704495


In [24]:
model.save('CNN-final-model.h5')

#### Load model, resume training

In [None]:
inputFile = 'BN-5-conv-32-node-2-dens-1553895953-3.h5'
model = load_model(inputFile)

In [None]:
saveAs = 'BN-5-conv-32-node-2-dens-1553895953-4Lab'

In [None]:
from keras.callbacks import ModelCheckpoint

NAME = saveAs
print(NAME) 

tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
callbacks = [tensorboard]

opt = Adam(lr=0.001)
model.compile(loss='binary_crossentropy', 
              optimizer=opt,
              metrics=['accuracy'])

model.fit_generator(generator=train_generator,
                    steps_per_epoch=(train_generator.n // train_generator.batch_size),
                    callbacks = callbacks,
                    validation_data=val_generator,
                    validation_steps=(val_generator.n // val_generator.batch_size),
                    epochs=5,
                    use_multiprocessing=False,
                    workers=8)

filepath = NAME + '.h5'
model.save(filepath)

#### Evaluation

In [15]:
model = load_model('CNN-final-model.h5')

In [16]:
from sklearn.metrics import confusion_matrix
test_generator.reset()
pred=model.predict_generator(test_generator,
                            steps=test_generator.n//test_generator.batch_size,
                            verbose=1)




In [17]:
y_true = df_test.gender.astype(int)
y_pred = [1 if x>=0.5 else 0 for x in pred]
cm = confusion_matrix(y_true, y_pred)

In [18]:
cm

array([[ 64,  26],
       [ 23, 113]])

In [19]:
TN = cm[0][0]
TP = cm[1][1]
FN = cm[1][0]
FP = cm[0][1]

Specificity

In [20]:
TN/(TN+FP)

0.7111111111111111

Precision

In [21]:
TP/(TP+FP)

0.8129496402877698

Recall

In [22]:
TP/FN

4.913043478260869

Accuracy

In [23]:
(TP+TN)/(TN+TP+FN+FP)

0.7831858407079646

#### Live Demo with webcam