In [97]:
import os
import cv2
import h5py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

race_map_rev = {
    0: 'White', 
    1: 'Black', 
    2: 'East Asian', 
    3: 'Southeast Asian', 
    4: 'Indian', 
    5: 'Latino_Hispanic', 
    6: 'Middle Eastern'
}
gender_map_rev = {
    0: 'Male', 
    1: 'Female'
}

In [98]:
def create_tf_dataset_partial(hdf5_path, batch_size=50, shuffle=True, max_samples=None):
    def generator():
        with h5py.File(hdf5_path, 'r') as h5f:
            images = h5f['images']
            races = h5f['races']
            genders = h5f['genders']
            total_samples = len(images)
            if max_samples is not None:
                total_samples = min(total_samples, max_samples)
            for i in range(total_samples):
                yield images[i], (races[i], genders[i])

    output_types = (tf.uint8, (tf.int32, tf.int32))
    output_shapes = ((224, 224, 3), ((), ()))
    
    dataset = tf.data.Dataset.from_generator(
        generator,
        output_types=output_types,
        output_shapes=output_shapes
    )

    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)
    
    dataset = dataset.map(
        lambda img, lbl: (tf.cast(img, tf.float32)/255.0, lbl),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset



In [99]:
'''
Load Datasets
'''

# Paths to HDF5 files
train_hdf5 = '../data/train.h5'
val_hdf5   = '../data/val.h5'
test_hdf5  = '../data/test.h5'

batch_size = 16  # Smaller batch size for limited resources

# Create smaller training dataset
train_ds = create_tf_dataset_partial(
    hdf5_path=train_hdf5,
    batch_size=batch_size,
    shuffle=True,
    max_samples=5000  # Use only 20k images from possibly 50k or 100k
)

val_ds = create_tf_dataset_partial(
    hdf5_path=val_hdf5,
    batch_size=batch_size,
    shuffle=False,
    max_samples=5000   # Example partial validation set
)

test_ds = create_tf_dataset_partial(
    hdf5_path=test_hdf5,
    batch_size=batch_size,
    shuffle=False,
    max_samples=None   # Use full or partial test if desired
)


In [100]:
'''
Model Architecture
'''
## 5. Define the Model Architecture (Pre-trained ResNet50)

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

def build_model():
    # Load pre-trained ResNet50 without the top layers
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model

    # Add custom layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)

    # Race classification head
    race_output = Dense(7, activation='softmax', name='race_output')(x)

    # Gender classification head
    gender_output = Dense(2, activation='softmax', name='gender_output')(x)

    # Define the model with two outputs
    model = Model(inputs=base_model.input, outputs=[race_output, gender_output])

    return model, base_model

# Build the model and get the base_model
model, base_model = build_model()
model.summary()


In [101]:
for layer in model.layers:
    print(layer.name)


input_layer_14
conv1_pad
conv1_conv
conv1_bn
conv1_relu
pool1_pad
pool1_pool
conv2_block1_1_conv
conv2_block1_1_bn
conv2_block1_1_relu
conv2_block1_2_conv
conv2_block1_2_bn
conv2_block1_2_relu
conv2_block1_0_conv
conv2_block1_3_conv
conv2_block1_0_bn
conv2_block1_3_bn
conv2_block1_add
conv2_block1_out
conv2_block2_1_conv
conv2_block2_1_bn
conv2_block2_1_relu
conv2_block2_2_conv
conv2_block2_2_bn
conv2_block2_2_relu
conv2_block2_3_conv
conv2_block2_3_bn
conv2_block2_add
conv2_block2_out
conv2_block3_1_conv
conv2_block3_1_bn
conv2_block3_1_relu
conv2_block3_2_conv
conv2_block3_2_bn
conv2_block3_2_relu
conv2_block3_3_conv
conv2_block3_3_bn
conv2_block3_add
conv2_block3_out
conv3_block1_1_conv
conv3_block1_1_bn
conv3_block1_1_relu
conv3_block1_2_conv
conv3_block1_2_bn
conv3_block1_2_relu
conv3_block1_0_conv
conv3_block1_3_conv
conv3_block1_0_bn
conv3_block1_3_bn
conv3_block1_add
conv3_block1_out
conv3_block2_1_conv
conv3_block2_1_bn
conv3_block2_1_relu
conv3_block2_2_conv
conv3_block2_2_bn

In [102]:
# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss={
        'race_output': 'sparse_categorical_crossentropy',
        'gender_output': 'sparse_categorical_crossentropy'
    },
    metrics={
        'race_output': 'accuracy',
        'gender_output': 'accuracy'
    }
)


In [103]:
# Define callbacks
checkpoint_filepath = '../models/best_model.keras'

# Define callbacks with explicit mode
checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_race_output_accuracy',
    save_best_only=True,
    verbose=1,
    mode='max'  # Explicitly maximize accuracy
)

early_stop = EarlyStopping(
    monitor='val_race_output_accuracy',
    patience=8,
    restore_best_weights=True,
    verbose=1,
    mode='max'  # Explicitly maximize accuracy
)

callbacks = [checkpoint, early_stop]

In [104]:
epochs = 10
steps_per_epoch = 5000 // 100  # 50
validation_steps = 1000 // 100  # 10

history = model.fit(
    train_ds,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks=callbacks
)


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 514ms/step - gender_output_accuracy: 0.5372 - gender_output_loss: 0.7025 - loss: 2.6917 - race_output_accuracy: 0.1804 - race_output_loss: 1.9892
Epoch 1: val_race_output_accuracy improved from -inf to 0.14375, saving model to ../models/best_model.keras
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 647ms/step - gender_output_accuracy: 0.5367 - gender_output_loss: 0.7026 - loss: 2.6913 - race_output_accuracy: 0.1803 - race_output_loss: 1.9887 - val_gender_output_accuracy: 0.5000 - val_gender_output_loss: 0.7196 - val_loss: 2.6470 - val_race_output_accuracy: 0.1437 - val_race_output_loss: 1.9274
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 553ms/step - gender_output_accuracy: 0.5102 - gender_output_loss: 0.6972 - loss: 2.6586 - race_output_accuracy: 0.1548 - race_output_loss: 1.9614
Epoch 2: val_race_output_accuracy improved from 0.14375 to 0.21250, saving model to 

  self.gen.throw(typ, value, traceback)



Epoch 7: val_race_output_accuracy did not improve from 0.23750
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 326ms/step - gender_output_accuracy: 0.5367 - gender_output_loss: 0.6589 - loss: 2.6838 - race_output_accuracy: 0.1327 - race_output_loss: 1.8832 - val_gender_output_accuracy: 0.5250 - val_gender_output_loss: 0.6929 - val_loss: 2.6310 - val_race_output_accuracy: 0.1375 - val_race_output_loss: 1.9382
Epoch 8/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701ms/step - gender_output_accuracy: 0.4692 - gender_output_loss: 0.7030 - loss: 2.6420 - race_output_accuracy: 0.1801 - race_output_loss: 1.9390
Epoch 8: val_race_output_accuracy did not improve from 0.23750
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 847ms/step - gender_output_accuracy: 0.4702 - gender_output_loss: 0.7029 - loss: 2.6419 - race_output_accuracy: 0.1801 - race_output_loss: 1.9391 - val_gender_output_accuracy: 0.5063 - val_gender_output_loss: 0.7213 - val

In [106]:
## 9. (Optional) Fine-Tuning

# Unfreeze some layers of the base model for fine-tuning
base_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = len(base_model.layers) - 20 # Adjust based on the model.summary()

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# Recompile the model with a lower learning rate
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-5),
    loss={
        'race_output': 'sparse_categorical_crossentropy',
        'gender_output': 'sparse_categorical_crossentropy'
    },
    metrics={
        'race_output': 'accuracy',
        'gender_output': 'accuracy'
    }
)

# Continue training
fine_tune_epochs = 10
steps_per_epoch = 5000 // 100  # 50
validation_steps = 1000 // 100  # 10
total_epochs = epochs + fine_tune_epochs

history_fine = model.fit(
    train_ds,
    epochs=total_epochs,
    steps_per_epoch=steps_per_epoch,
    initial_epoch=history.epoch[-1],
    validation_data=val_ds,
    validation_steps=validation_steps,
    callbacks=callbacks
)

Epoch 10/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 779ms/step - gender_output_accuracy: 0.6709 - gender_output_loss: 0.6405 - loss: 2.4789 - race_output_accuracy: 0.2848 - race_output_loss: 1.8383
Epoch 10: val_race_output_accuracy did not improve from 0.23750
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 930ms/step - gender_output_accuracy: 0.6708 - gender_output_loss: 0.6405 - loss: 2.4789 - race_output_accuracy: 0.2844 - race_output_loss: 1.8385 - val_gender_output_accuracy: 0.5000 - val_gender_output_loss: 0.7007 - val_loss: 2.6271 - val_race_output_accuracy: 0.1813 - val_race_output_loss: 1.9264
Epoch 11/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 857ms/step - gender_output_accuracy: 0.6554 - gender_output_loss: 0.6269 - loss: 2.4698 - race_output_accuracy: 0.2452 - race_output_loss: 1.8429
Epoch 11: val_race_output_accuracy did not improve from 0.23750
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[

2024-12-30 13:06:08.191380: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)



Epoch 16: val_race_output_accuracy did not improve from 0.28125
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 359ms/step - gender_output_accuracy: 0.6566 - gender_output_loss: 0.5812 - loss: 2.3971 - race_output_accuracy: 0.2947 - race_output_loss: 1.6904 - val_gender_output_accuracy: 0.6125 - val_gender_output_loss: 0.6716 - val_loss: 2.4739 - val_race_output_accuracy: 0.2750 - val_race_output_loss: 1.8023
Epoch 17/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 936ms/step - gender_output_accuracy: 0.6626 - gender_output_loss: 0.6122 - loss: 2.3595 - race_output_accuracy: 0.3162 - race_output_loss: 1.7473
Epoch 17: val_race_output_accuracy improved from 0.28125 to 0.32500, saving model to ../models/best_model.keras
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1s/step - gender_output_accuracy: 0.6631 - gender_output_loss: 0.6119 - loss: 2.3590 - race_output_accuracy: 0.3164 - race_output_loss: 1.7471 - val_gender_output_accurac