In [4]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
from keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
from keras_preprocessing.image import ImageDataGenerator

import time
import pickle

#### If using AMD GPU, switch backend to PlaidML library:

In [5]:
import os
os.environ['KERAS_BACKEND']='plaidml.keras.backend'

# When using plaidml, the libraries are imported from keras instead of tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

#### Define Experiments

In [6]:
conv_layers = [3,5]      # number of conv layers
layer_sizes = [100]  # number of nodes in a layer
dense_layers = [2]     # number of dense layers

#### Load input data

In [7]:
pickle_in = open('../Dataset/df_5pct.pickle', 'rb')
df_train, df_test = pickle.load(pickle_in)

In [8]:
# The Keras ImageDataGenerator uses string type data label
df_train['gender'] = df_train.gender.astype(str)
df_test['gender'] = df_test.gender.astype(str)

In [9]:
print(df_train.shape, df_test.shape)

(21482, 10) (1131, 10)


In [10]:
df_train.dtypes

path                object
id                  uint16
name                object
dob         datetime64[ns]
gender              object
score1             float64
score2             float64
pic_date    datetime64[ns]
region              object
age                float64
dtype: object

We will be using a generator to feed model with images, the X would be the path to these images. y will be the gender label.

In [11]:
image_reshape_size = 100
input_image_root_dir = '../Dataset/imdb_crop/' # Don't forget the ending slash

In [13]:
inputShape = (image_reshape_size, image_reshape_size, 1)
batch_size = 32

In [14]:
df_train.head(3)

Unnamed: 0,path,id,name,dob,gender,score1,score2,pic_date,region,age
2390,93/nm0000093_rm433293312_1963-12-18_2009.jpg,2336,Brad Pitt,1963-12-18,1,2.988206,,2009-01-01,"[67.73055822774081, 81.165869873289, 188.09436...",45.041308
425680,51/nm0584951_rm169062400_1986-8-29_2009.jpg,11601,Lea Michele,1986-08-29,0,-inf,,2009-01-01,"[1, 1, 452, 653]",22.344059
27752,96/nm0000196_rm4278383104_1963-5-25_2015.jpg,13908,Mike Myers,1963-05-25,1,4.893829,2.025698,2015-01-01,"[318.12685538350325, 423.486473844671, 632.157...",51.606809


#### Set up input image generator using flow_from_dataframe

In [15]:
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.25)

train_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="training",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=32,
                                            seed=1,
                                            shuffle=True)

val_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="validation",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=32,
                                            seed=1,
                                            shuffle=True)

test_generator = datagen.flow_from_dataframe(dataframe=df_test, 
                                            directory=input_image_root_dir, 
                                            x_col="path", y_col=None, 
                                            class_mode=None, 
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=1,
                                            shuffle=False)

Found 16112 validated image filenames belonging to 2 classes.
Found 5370 validated image filenames belonging to 2 classes.
Found 1131 validated image filenames.


#### Run all training experiments

In [None]:
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            
            NAME = 'gender-{}-conv-{}-node-{}-dens-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))  # model name with timestamp
            print(NAME) 
            
            tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
            callbacks = [tensorboard]
            
            model = Sequential()
            
            # first layer
            model.add(Conv2D(layer_size, (3,3), input_shape=inputShape))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2,2)))
            
            # sets up additional # of conv layers
            for _ in range(conv_layer - 1):
                model.add(Conv2D(layer_size, (3,3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2,2)))
            
            model.add(Flatten())
            
            # sets up # of dense layers
            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))
            
            # output layer
            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            model.compile(loss='binary_crossentropy', 
                          optimizer='adam',
                          metrics=['accuracy'])

            model.fit_generator(generator=train_generator,
                                steps_per_epoch=(train_generator.n // train_generator.batch_size),
                                callbacks = callbacks,
                                validation_data=val_generator,
                                validation_steps=(val_generator.n // test_generator.batch_size),
                                epochs=10,
                                use_multiprocessing=False,
                                workers=1)

gender-3-conv-100-node-2-dens-1552880812
Epoch 1/10

To view the tensorboard, use command:
tensorboard --logdir=logs/

#### Evaluation

In [None]:
test_generator.reset()
pred=model.predict_generator(test_generator,
                            steps=test_generator.n//test_generator.batch_size,
                            verbose=1)
pred_class=np.argmax(pred,axis=1) # index of largest value in each row