In [30]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [31]:
df = pd.read_csv('merged_feret_train.csv')
generated_df = pd.read_csv('feret_generated.csv')
df = pd.concat([df, generated_df], ignore_index=True)

DATA_DIR = "./colored/"
TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = 100
IM_HEIGHT = 100
ID_GENDER_MAP = {0: 'male', 1: 'female'}
GENDER_ID_MAP = dict((g, i) for i, g in ID_GENDER_MAP.items())
ID_RACE_MAP = {0: 'white', 1: 'black', 2: 'asian', 3: 'hispanic', 4: 'middle eastern'}
RACE_ID_MAP = dict((r, i) for i, r in ID_RACE_MAP.items())

ID_GENDER_MAP, GENDER_ID_MAP, ID_RACE_MAP, RACE_ID_MAP



({0: 'male', 1: 'female'},
 {'male': 0, 'female': 1},
 {0: 'white', 1: 'black', 2: 'asian', 3: 'hispanic', 4: 'middle eastern'},
 {'white': 0, 'black': 1, 'asian': 2, 'hispanic': 3, 'middle eastern': 4})

In [32]:
test_df = pd.read_csv('merged_feret_test.csv')

In [33]:
p = np.random.permutation(len(df))
train_up_to = int(len(df))
train_idx = p[:train_up_to]

# split train_idx further into training and validation set
train_up_to = int(train_up_to * 0.7)
train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]

df['gender_id'] = df['gender'].map(lambda gender: gender)
df['race_id'] = df['race'].map(lambda race: race)

len(train_idx), len(valid_idx)

(661, 284)

In [34]:
root_path = "./colored/"
generation_path = "./feret_all/"
df['file'] = df['filename'].apply(lambda x: os.path.join(root_path if "final" in x else generation_path, x))
test_df['file'] = test_df['filename'].apply(lambda x: os.path.join(root_path, x))
df

Unnamed: 0,filename,gender,race,is_generated,gender_id,race_id,file
0,00823_940307_fa_a_converted_final.png,1,4,,1,4,./colored/00823_940307_fa_a_converted_final.png
1,00855_940307_fa_converted_final.png,0,4,,0,4,./colored/00855_940307_fa_converted_final.png
2,00869_940307_fa_converted_final.png,0,4,,0,4,./colored/00869_940307_fa_converted_final.png
3,00912_960530_fa_converted_final.png,0,4,,0,4,./colored/00912_960530_fa_converted_final.png
4,00926_960627_fa_converted_final.png,0,4,,0,4,./colored/00926_960627_fa_converted_final.png
...,...,...,...,...,...,...,...
940,1_4_20231227211914296737.png,1,4,True,1,4,./feret_all/1_4_20231227211914296737.png
941,1_4_20231227211945054664.png,1,4,True,1,4,./feret_all/1_4_20231227211945054664.png
942,1_4_20231227212007592997.png,1,4,True,1,4,./feret_all/1_4_20231227212007592997.png
943,1_4_20231227212143839029.png,1,4,True,1,4,./feret_all/1_4_20231227212143839029.png


In [35]:
from keras.utils import to_categorical
from PIL import Image

def get_data_generator(df, indices, for_training, batch_size=16):
    images, races, genders = [], [], []
    while True:
        for i in indices:
            r = df.iloc[i]
            file, race, gender = r['file'], r['race_id'], r['gender_id']
            im = Image.open(file)
            im = im.resize((IM_WIDTH, IM_HEIGHT))
            im = np.array(im)
            images.append(im)
            races.append(to_categorical(race, len(RACE_ID_MAP)))
            genders.append(to_categorical(gender, 2))
            if len(images) >= batch_size:
                yield np.array(images), [np.array(races), np.array(genders)]
                images, races, genders = [], [], []
        if not for_training:
            break

In [36]:
from keras.layers import Input, Dense, BatchNormalization, Conv2D, MaxPool2D, GlobalMaxPool2D, Dropout
from keras.optimizers import SGD
from keras.models import Model

def conv_block(inp, filters=32, bn=True, pool=True):
    _ = Conv2D(filters=filters, kernel_size=3, activation='relu')(inp)
    if bn:
        _ = BatchNormalization()(_)
    if pool:
        _ = MaxPool2D()(_)
    return _

input_layer = Input(shape=(IM_HEIGHT, IM_WIDTH, 3))
_ = conv_block(input_layer, filters=32, bn=False, pool=False)
_ = conv_block(_, filters=32*2)
_ = conv_block(_, filters=32*3)
_ = conv_block(_, filters=32*4)
_ = conv_block(_, filters=32*5)
_ = conv_block(_, filters=32*6)
bottleneck = GlobalMaxPool2D()(_)


# for race prediction
_ = Dense(units=1024, activation='relu')(bottleneck)
race_output = Dense(units=len(RACE_ID_MAP), activation='softmax', name='race_output')(_)

# for gender prediction
_ = Dense(units=256, activation='relu')(bottleneck)
gender_output = Dense(units=len(GENDER_ID_MAP), activation='softmax', name='gender_output')(_)

model = Model(inputs=input_layer, outputs=[race_output, gender_output])
model.compile(optimizer='rmsprop', 
              loss={'race_output': 'categorical_crossentropy', 'gender_output': 'categorical_crossentropy'},
              loss_weights={'race_output': 1.5, 'gender_output': 1.},
              metrics={'race_output': 'accuracy', 'gender_output': 'accuracy'})

In [None]:
from keras.callbacks import ModelCheckpoint

batch_size = 64
valid_batch_size = 64
train_gen = get_data_generator(df, train_idx, for_training=True, batch_size=batch_size)
valid_gen = get_data_generator(df, valid_idx, for_training=True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit_generator(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=16,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)



  history = model.fit_generator(train_gen,


Epoch 1/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 2/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 3/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 4/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 5/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 6/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 7/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 8/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 9/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 10/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 11/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 12/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 13/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 14/16


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


Epoch 15/16

In [None]:
def  plot_train_history(history):
    fig, axes = plt.subplots(1, 3, figsize=(20, 5))
    axes[0].plot(history.history['race_output_accuracy'], label='Race Train accuracy')
    axes[0].plot(history.history['val_race_output_accuracy'], label='Race Val accuracy')
    axes[0].set_xlabel('Epochs')
    axes[0].legend()
    
    axes[1].plot(history.history['gender_output_accuracy'], label='Gender Train accuracy')
    axes[1].plot(history.history['val_gender_output_accuracy'], label='Gener Val accuracy')
    axes[1].set_xlabel('Epochs')
    axes[1].legend()


    axes[2].plot(history.history['loss'], label='Training loss')
    axes[2].plot(history.history['val_loss'], label='Validation loss')
    axes[2].set_xlabel('Epochs')
    axes[2].legend()

plot_train_history(history)



In [None]:
p_t = np.random.permutation(len(test_df))
test_index = p_t[:]
len(test_index)
test_df['gender_id'] = test_df['gender'].map(lambda gender: gender)
test_df['race_id'] = test_df['race'].map(lambda race: race)

In [None]:
test_gen = get_data_generator(test_df, test_index, for_training=False, batch_size=128)
dict(zip(model.metrics_names, model.evaluate_generator(test_gen, steps=len(test_index)//128)))

In [None]:
test_gen = get_data_generator(test_df, test_index, for_training=False, batch_size=128)
x_test, (race_true, gender_true)= next(test_gen)
race_pred, gender_pred = model.predict_on_batch(x_test)

In [None]:
race_pred

In [None]:
race_true, gender_true = race_true.argmax(axis=-1), gender_true.argmax(axis=-1)
race_pred, gender_pred = race_pred.argmax(axis=-1), gender_pred.argmax(axis=-1)

In [None]:
from sklearn.metrics import classification_report
print("Classification report for race")
print(classification_report(race_true, race_pred))

print("\nClassification report for gender")
print(classification_report(gender_true, gender_pred))