In [3]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
from keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle

#### If using AMD GPU, switch backend to PlaidML library:

In [4]:
import os
os.environ['KERAS_BACKEND']='plaidml.keras.backend'

# When using plaidml, the libraries are imported from keras instead of tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

#### Define Experiments

In [5]:
conv_layers = [1, 2, 3]      # number of conv layers
layer_sizes = [32, 64, 128]  # number of nodes in a layer
dense_layers = [0, 1, 2]     # number of dense layers

#### Load input data

In [6]:
pickle_in = open('../Dataset/df_1pct.pickle', 'rb')
df_train, df_test = pickle.load(pickle_in)

In [7]:
print(df_train.shape, df_test.shape)

(4377, 10) (230, 10)


In [8]:
df_train.head(3)

Unnamed: 0,path,id,name,dob,gender,score1,score2,pic_date,region,age
135334,91/nm0002091_rm2595784448_1940-10-19_2005.jpg,13567,Michael Gambon,1940-10-19,1.0,-inf,,2005-01-01,"[1, 1, 1362, 2048]",64.203919
345035,22/nm1551922_rm1669175040_1982-9-19_2008.jpg,3949,Columbus Short,1982-09-19,1.0,-inf,,2008-01-01,"[1, 1, 400, 600]",25.284571
260587,40/nm1524440_rm3044393728_1981-1-26_2011.jpg,3925,Colin O'Donoghue,1981-01-26,1.0,5.34239,4.71086,2011-01-01,"[275.2165024827699, 349.717003159889, 496.6700...",29.930799


We will be using a generator to feed model with images, the X would be the path to these images. y will be the gender label.

In [9]:
X_train = df_train.path
X_test = df_test.path
y_train = df_train.gender
y_test = df_test.gender

In [10]:
image_reshape_size = 100
input_image_root_dir = '/Volumes/ExFAT_500GB/IMDB Data/imdb'
output_image_root_dir = '/Volumes/ExFAT_500GB/resized_grayscale'

In [11]:
inputShape = (image_reshape_size, image_reshape_size, 1)
batch_size = 32
epochs = 10

#### Set up input data generator

In [12]:
class DataGenerator(Sequence):

    def __init__(self, image_filenames, labels, batch_size):
        self.image_filenames, self.labels = image_filenames, labels
        self.batch_size = batch_size

    def __len__(self):
        return np.ceil(len(self.image_filenames) / float(self.batch_size))

    def __getitem__(self, idx):
        """generates one batch of data"""
        batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        return np.array([cv2.resize(cv2.imread(file_name), (image_reshape_size, image_reshape_size)) for file_name in batch_x]), np.array(batch_y)

#### Set up tensorboard

In [13]:
from tensorflow.keras.callbacks import TensorBoard
import time
def setupTensorBoard(conv_layer, layer_size, dense_layer):
    """Set up a tensor board, given the number of conv layers, number of nodes, and number of dense layers."""
    NAME = 'gender-{}-conv-{}-node-{}-dens-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))  # model name with timestamp
    tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
    return tensorboard

#### Run all training experiments

In [None]:
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            
            NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
            print(NAME) 
            
            tensorboard = setupTensorBoard(conv_layer, layer_size, dense_layer)
            
            model = Sequential()
            
            # first layer
            model.add(Conv2D(layer_size, (3,3), input_shape=inputShape))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2,2)))
            
            # sets up additional # of conv layers
            for _ in range(conv_layer - 1):
                model.add(Conv2D(layer_size, (3,3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2,2)))
            
            model.add(Flatten())
            
            # sets up # of dense layers
            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))
            
            # output layer
            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            model.compile(loss='binary_crossentropy', 
                          optimizer='adam',
                          metrics=['accuracy'])
            
            # Not using generator for training:
            # model.fit(X, y, batch_size=batch_size, epochs=10, validation_split=0.3, callbacks=[tensorboard])
            
            # Using generator for training:
            train_generator = DataGenerator(X_train, y_train, batch_size)
            test_generator = DataGenerator(X_test, y_test, batch_size)

            model.fit_generator(generator=train_generator,
                    steps_per_epoch=(num_training_samples // batch_size),
                    epochs=epochs
                    verbose=1,
                    validation_data=test_generator,
                    validation_steps=(num_validation_samples // batch_size),
                    use_multiprocessing=True,
                    workers=16,
                    max_queue_size=32)