#Human Face Gender Classification

In [39]:
import pandas as pd
import cv2
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers
import sklearn

In [None]:
Load the data into data frames.

In [3]:
face_train = pd.read_csv('fairface_label_train.csv')
face_train = face_train.loc[:, 'file':'race']
face_test = pd.read_csv('fairface_label_val.csv')
face_test = face_test.loc[:, 'file':'race']

In [35]:
face_train_clean = pd.DataFrame({'file' : [], 'gender' : []})

for i in range(0, len(face_train['file'])):
    imagePath = face_train['file'][i]

    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    faces = faceCascade.detectMultiScale(
        gray,
        scaleFactor=1.03,
        minNeighbors=12,
        minSize=(120, 120)
    )

    if np.shape(faces)[0] > 0:
        x, y, w, h = faces[:][0]
        roi_color = image[y:y + h, x:x + w]    
        cv2.imwrite('TrainClean/' + str(i+1) + '.jpg', cv2.resize(src=roi_color,dsize=(224, 224)))
        new_row = {'file' : 'TrainClean/' + str(i+1) + '.jpg', 'gender' : face_train['gender'][i]}
        face_train_clean = face_train_clean.append(new_row, ignore_index=True)
        
face_train_clean.to_csv('clean_training_data.csv')

In [44]:
face_test_clean = pd.DataFrame({'file' : [], 'gender' : []})

for i in range(0, len(face_test['file'])):
    imagePath = face_test['file'][i]

    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    faces = faceCascade.detectMultiScale(
        gray,
        scaleFactor=1.03,
        minNeighbors=12,
        minSize=(120, 120)
    )

    if np.shape(faces)[0] > 0:
        x, y, w, h = faces[:][0]
        roi_color = image[y:y + h, x:x + w]    
        cv2.imwrite('TestClean/' + str(i+1) + '.jpg', cv2.resize(src=roi_color,dsize=(224, 224)))
        new_row = {'file' : 'TestClean/' + str(i+1) + '.jpg', 'gender' : face_test['gender'][i]}
        face_test_clean = face_test_clean.append(new_row, ignore_index=True)
        
face_test_clean.to_csv('clean_test_data.csv')

In [4]:
face_train_clean = pd.read_csv('clean_training_data.csv')
face_test_clean = pd.read_csv('clean_test_data.csv')

# Load x and y training and testing data
train_images = np.zeros((len(face_train_clean['file']), 224, 224, 3), dtype='uint8')
train_labels = np.array(face_train_clean['gender'])
test_images = np.zeros((len(face_test_clean['file']), 224, 224, 3), dtype='uint8')
test_labels = np.array(face_test_clean['gender'])

# map the labels to numbers
le = LabelEncoder()
train_labels = le.fit_transform(train_labels)
for i in range(len(face_train_clean['file'])):
    train_images[i] = cv2.imread(face_train_clean['file'][i])

test_labels = le.fit_transform(test_labels)
for i in range(len(face_test_clean['file'])):
    test_images[i] = cv2.imread(face_test_clean['file'][i])


In [None]:
model = tf.keras.models.Sequential()
model.add(layers.Conv2D(filters=32, 
                        input_shape=(224, 224, 3), 
                        kernel_size=(7, 7), 
                        strides=(2, 2),
                        activation='relu',                  
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(filters=64, 
                        kernel_size=(5, 5),
                        activation='relu',                   
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(filters=96, 
                        kernel_size=(5, 5),
                        activation='relu',                   
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(filters=128, 
                        kernel_size=(5, 5),
                        activation='relu',                   
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(filters=64, 
                        kernel_size=(3, 3),
                        activation='relu',                   
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')

model.summary()

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2)
chp = tf.keras.callbacks.ModelCheckpoint(filepath='checkpoints/weights.{epoch:02d}.hdf5', save_weights_only=False, save_freq=1000)
model.fit(train_images, train_labels, epochs=60, batch_size=128, validation_split=0.3, callbacks=[es, chp])


In [16]:
predictions = np.round(reconstructed_model.predict(test_images))
predictions = predictions.astype(int)
test_labels.reshape(len(test_labels),1)

correctPredictions = np.sum(predictions[:,0]==test_labels)

accuracy = (correctPredictions / len(predictions))*100
print("Gender classification model is " + str(accuracy) + "% accurate.")

Gender classification model is 88.0393227744402% accurate.


In [41]:
matrix = sklearn.metrics.confusion_matrix(test_labels, predictions)
matrix

array([[2547,  345],
       [ 312, 2289]], dtype=int64)

In [40]:
import os

os.system('jupyter nbconvert --to html GenderRecognision.ipynb')

0