In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
from keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
from keras_preprocessing.image import ImageDataGenerator

import time
import pickle

Using TensorFlow backend.


#### If using AMD GPU, switch backend to PlaidML library:

In [2]:
import os
os.environ['KERAS_BACKEND']='plaidml.keras.backend'

# When using plaidml, the libraries are imported from keras instead of tensorflow
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

#### Define Experiments

In [3]:
conv_layers = [5]      # number of conv layers
layer_sizes = [32,64,128]  # number of nodes in a layer
dense_layers = [3]     # number of dense layers

#### Load input data

In [4]:
pickle_in = open('../Dataset/df_10pct.pickle', 'rb')
df_train, df_test = pickle.load(pickle_in)

In [5]:
# The Keras ImageDataGenerator uses string type data label
df_train['gender'] = df_train.gender.astype(str)
df_test['gender'] = df_test.gender.astype(str)

In [6]:
print(df_train.shape, df_test.shape)

(42965, 10) (2261, 10)


In [7]:
df_train.dtypes

path                object
id                  uint16
name                object
dob         datetime64[ns]
gender              object
score1             float64
score2             float64
pic_date    datetime64[ns]
region              object
age                float64
dtype: object

We will be using a generator to feed model with images, the X would be the path to these images. y will be the gender label.

In [8]:
image_reshape_size = 100
input_image_root_dir = '../Dataset/imdb_crop/' # Don't forget the ending slash

In [9]:
inputShape = (image_reshape_size, image_reshape_size, 1)
batch_size = 32

In [10]:
df_train.head(3)

Unnamed: 0,path,id,name,dob,gender,score1,score2,pic_date,region,age
120097,12/nm0001612_rm2398793472_1969-8-19_2012.jpg,13149,Matthew Perry,1969-08-19,1,0.799562,,2012-01-01,"[397.6, 52.0, 454.4, 108.8]",42.369111
110887,35/nm0001435_rm3857046784_1963-7-30_1994.jpg,11966,Lisa Kudrow,1963-07-30,0,0.774393,,1994-01-01,"[1132.544, 313.344, 1277.952, 458.752]",30.426361
59955,59/nm0000459_rm3615721728_1960-8-16_2008.jpg,18968,Timothy Hutton,1960-08-16,1,2.119279,,2008-01-01,"[282.03335656952333, 55.67480128185821, 342.96...",47.376743


#### Set up input image generator using flow_from_dataframe

In [11]:
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)

train_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="training",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=32,
                                            seed=1,
                                            shuffle=True)

val_generator = datagen.flow_from_dataframe(dataframe=df_train,
                                            directory=input_image_root_dir,
                                            x_col="path", y_col="gender",
                                            subset="validation",
                                            class_mode="binary",
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=32,
                                            seed=1,
                                            shuffle=True)

test_generator = datagen.flow_from_dataframe(dataframe=df_test, 
                                            directory=input_image_root_dir, 
                                            x_col="path", y_col=None, 
                                            class_mode=None, 
                                            color_mode="grayscale",
                                            target_size=(image_reshape_size,image_reshape_size),
                                            batch_size=1,
                                            shuffle=False)

Found 38669 validated image filenames belonging to 2 classes.
Found 4296 validated image filenames belonging to 2 classes.
Found 2261 validated image filenames.


#### Run all training experiments

In [13]:
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            
            NAME = 'gender-{}-conv-{}-node-{}-dens-{}'.format(conv_layer, layer_size, dense_layer, int(time.time()))  # model name with timestamp
            print(NAME) 
            
            tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
            callbacks = [tensorboard]
            
            model = Sequential()
            
            # first layer
            model.add(Conv2D(layer_size, (3,3), input_shape=inputShape))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2,2)))
            
            # sets up additional # of conv layers
            for _ in range(conv_layer - 1):
                model.add(Conv2D(layer_size, (3,3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2,2)))
            
            model.add(Flatten())
            
            # sets up # of dense layers
            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))
            
            # output layer
            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            model.compile(loss='binary_crossentropy', 
                          optimizer='adam',
                          metrics=['accuracy'])

            model.fit_generator(generator=train_generator,
                                steps_per_epoch=(train_generator.n // train_generator.batch_size),
                                callbacks = callbacks,
                                validation_data=val_generator,
                                validation_steps=(val_generator.n // val_generator.batch_size),
                                epochs=10,
                                use_multiprocessing=True,
                                workers=2)

gender-2-conv-32-node-2-dens-1552975562
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-3-conv-32-node-2-dens-1552978617
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-5-conv-32-node-2-dens-1552981798
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-2-conv-64-node-2-dens-1552985014
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-3-conv-64-node-2-dens-1552991364
Epoch 1/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-5-conv-64-node-2-dens-1552998269
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
gender-2-conv-128-node-2-dens-1553005355
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/1

Process ForkPoolWorker-265:
Process ForkPoolWorker-266:
Traceback (most recent call last):
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/Users/potatorun/anaconda3/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
K

KeyboardInterrupt: 

To view the tensorboard, use command:
tensorboard --logdir=logs/

#### Evaluation

In [None]:
test_generator.reset()
pred=model.predict_generator(test_generator,
                            steps=test_generator.n//test_generator.batch_size,
                            verbose=1)
pred_class=np.argmax(pred,axis=1) # index of largest value in each row