Usage of VGG19 for gender identification

    In this section, we are going to use the same training, validation and testing set and just change the model from VGG16 to VGG19 and evaluate the performance

In [23]:
# librairies
import pandas as pd
import numpy as np
import seaborn as sns
import os

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

#### Modeling


In [1]:
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization


from tensorflow.keras import models
from tensorflow.keras import layers

In [2]:
num_classes=2

vgg=VGG19(include_top=False, pooling='avg', weights='imagenet',input_shape=(178, 218, 3))
vgg.summary()

# Freeze the layers except the last 2 layers
for layer in vgg.layers[:-5]:
    layer.trainable = False

# Check the trainable status of the individual layers
for layer in vgg.layers:
    print(layer, layer.trainable)
    

# Create the model
model = models.Sequential()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 178, 218, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 178, 218, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 178, 218, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 89, 109, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 89, 109, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 89, 109, 128)     

In [3]:
# Add the vgg convolutional base model
model.add(vgg)
 
# Add new layers
model.add(layers.Dense(128, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(num_classes, activation='sigmoid'))

model.summary()

# compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Functional)           (None, 512)               20024384  
_________________________________________________________________
dense (Dense)                (None, 128)               65664     
_________________________________________________________________
batch_normalization (BatchNo (None, 128)               512       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 258       
Total params: 20,090,818
Trainable params: 7,145,602
Non-trainable params: 12,945,216
_________________________________________________________________


In [6]:
import h5py
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint

# use early stopping to optimally terminate training through callbacks
es=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)

# save best model automatically
mc= ModelCheckpoint('./CNN/Gender ID/VGG19/best_model_2_vgg19.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
cb_list=[es,mc]

In [8]:
from tensorflow.python.keras.applications.vgg16 import preprocess_input
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator



data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)


train_generator = data_generator.flow_from_directory(
        './data/Celeb_sets/train/',
        target_size=(178, 218),
        batch_size=12,
        class_mode='categorical')


validation_generator = data_generator.flow_from_directory(
        './data/Celeb_sets/valid/',
        target_size=(178, 218),
        batch_size=12,
        class_mode='categorical')


model.fit_generator(
        train_generator,
        epochs=5,
        steps_per_epoch=2667,
        validation_data=validation_generator,
        validation_steps=667, callbacks=cb_list)

Found 32000 images belonging to 2 classes.
Found 7998 images belonging to 2 classes.
Epoch 1/5

Epoch 00001: val_loss improved from inf to 0.11711, saving model to ./CNN/Gender ID/VGG19/best_model_2_vgg19.h5
Epoch 2/5

Epoch 00002: val_loss improved from 0.11711 to 0.07182, saving model to ./CNN/Gender ID/VGG19/best_model_2_vgg19.h5
Epoch 3/5

Epoch 00003: val_loss did not improve from 0.07182
Epoch 4/5

Epoch 00004: val_loss did not improve from 0.07182
Epoch 00004: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f872077b590>

In [15]:
import os

os.getcwd()

'/Users/vickyyounang/Documents/PHD/winter2021/deep_learning/project_&_topic/Project/code/CNN/Gender ID/VGG19'

#### Testing the model

In [16]:
root ='/Users/vickyyounang/Documents/PHD/winter2021/deep_learning/project_&_topic/Project/code/'
test_path=   './data/Celeb_sets/test/'


In [31]:

# load a saved model
from tensorflow.keras.models import load_model

# changing directory to the best model saved
#os.chdir('./CNN/Gender ID/VGG19')
saved_model = load_model('best_model_2_vgg19.h5')

# generate data for test set of images
test_generator = data_generator.flow_from_directory(
        root+test_path,
        target_size=(178, 218),
        batch_size=1,
        class_mode='categorical',
        shuffle=False)

# obtain predicted activation values for the last dense layer
test_generator.reset()
pred=saved_model.predict_generator(test_generator, verbose=1, steps=1000)
# determine the maximum activation value for each sample
predicted_class_indices=np.argmax(pred,axis=1)

Found 1000 images belonging to 2 classes.


In [32]:
# label each predicted value to correct gender
labels = (test_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

# format file names to simply male or female
filenames=test_generator.filenames

#print(filenames)

filenz=[0]
for i in range(0,len(filenames)):
    filenz.append(filenames[i].split('/')[0])
filenz=filenz[1:]

# determine the test set accuracy
match=[]
match_ml=[]
match_fm=[]

for i in range(0,len(filenames)):
    match.append(filenz[i]==predictions[i])
    if filenz[i]=='male':
        match_ml.append(filenz[i]==predictions[i])
    if filenz[i]=='female':
        match_fm.append(filenz[i]==predictions[i])
    
print('total accuracy = ', match.count(True)/1000)
print('male accuracy = ', match_ml.count(True)/500)
print('female accuracy = ', match_fm.count(True)/500)

total accuracy =  0.977
male accuracy =  0.968
female accuracy =  0.986


In [27]:
# writing the results into a csv file

results=pd.DataFrame({"Filename":filenz,"Predictions":predictions})

results.to_csv("GenderID_VGG19_test_results.csv",index=False)


In [33]:
os.getcwd()

'/Users/vickyyounang/Documents/PHD/winter2021/deep_learning/project_&_topic/Project/code/CNN/Gender ID/VGG19'

In [34]:
# predict for pictures of children
test_generator = data_generator.flow_from_directory(
        root+'data/Celeb_sets/test-me',
        target_size=(178, 218),
        batch_size=1,
        class_mode='categorical',
        shuffle=False)


# obtain predicted activation values for the last dense layer
test_generator.reset()
#print(len(test_generator))
pred=saved_model.predict_generator(test_generator, verbose=1, steps=10)
# determine the maximum activation value for each sample
predicted_class_indices=np.argmax(pred,axis=1)




Found 10 images belonging to 2 classes.






In [35]:
# label each predicted value to correct gender
labels = (test_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

#print(len(labels), len(predictions))

# format file names to simply male or female
filenames=test_generator.filenames

print(filenames)

filenz=[0]
for i in range(0,len(filenames)):
    filenz.append(filenames[i].split('/')[0])
filenz=filenz[1:]

print(predictions)
#print(filenz)

# determine the test set accuracy
match=[]
for i in range(0,len(filenames)):
    match.append(filenz[i]==predictions[i])
    
#print(match)
match.count(True)/len(filenames)

['female/img_1.jpg', 'female/img_2.jpg', 'female/img_3.jpg', 'female/img_4.jpg', 'female/img_5.jpg', 'male/img_1.jpg', 'male/img_2.jpg', 'male/img_3.jpg', 'male/img_4.jpg', 'male/img_5.jpg']
['female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male']


0.9

In [36]:
print(match)

[True, True, True, True, True, False, True, True, True, True]
