In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from google.colab import drive
drive.mount('/content/drive')


In [2]:
dataset_path = 'archive/UTKFace'

def load_images_labels(dataset_path):
    images = []
    age_labels = []
    gender_labels = []
    race_labels = []
    for file_name in os.listdir(dataset_path):
        parts = file_name.split('_')
        if len(parts) < 4:
            print(f'Skipping file: {file_name}')
            continue
        age = int(parts[0])
        gender = int(parts[1])
        race = int(parts[2])
        img_path = os.path.join(dataset_path, file_name)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (128, 128))  
            images.append(img)
            age_labels.append(age)
            gender_labels.append(gender)
            race_labels.append(race)
        else:
            print(f'Unable to read image: {file_name}')
    return np.array(images), np.array(age_labels), np.array(gender_labels), np.array(race_labels)


In [3]:
images, age_labels, gender_labels, race_labels = load_images_labels(dataset_path)
print(f'Number of age_labels: {len(age_labels)}, Number of gender_labels: {len(gender_labels)}, Number of race_labels: {len(race_labels)}')

Skipping file: 39_1_20170116174525125.jpg.chip.jpg
Skipping file: 61_1_20170109150557335.jpg.chip.jpg
Skipping file: 61_1_20170109142408075.jpg.chip.jpg
Number of age_labels: 23705, Number of gender_labels: 23705, Number of race_labels: 23705


In [4]:
images = images / 255.0

race_labels = to_categorical(race_labels, num_classes=5)

X_train, X_test, y_train_age, y_test_age, y_train_gender, y_test_gender, y_train_race, y_test_race = train_test_split(
    images, age_labels, gender_labels, race_labels, test_size=0.2, random_state=42)

input_layer = Input(shape=(128, 128, 3))
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(input_layer)
max_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(max_1)
max_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(max_2)
max_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(max_3)
max_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

flatten = Flatten()(max_4)

# Fully connected layers for age prediction, dropout is added to try to prevent overfitting
dense_age = Dense(64, activation='relu')(flatten)
dropout_age = Dropout(0.3)(dense_age)  
output_age = Dense(1, activation='linear', name='age_output')(dense_age)

# Fully connected layers for gender prediction, dropout is added to try to prevent overfitting
dense_gender = Dense(64, activation='relu')(flatten)
dropout_gender = Dropout(0.3)(dense_gender)  
output_gender = Dense(1, activation='sigmoid', name='gender_output')(dense_gender)

# Fully connected layers for race prediction, dropout is added to try to prevent overfitting
dense_race = Dense(64, activation='relu')(flatten)
dropout_race = Dropout(0.3)(dense_race)  
output_race = Dense(5, activation='softmax', name='race_output')(dense_race)

model = Model(inputs=input_layer, outputs=[output_age, output_gender, output_race])

model.compile(optimizer='adam', loss={'age_output': 'mean_squared_error',
'gender_output': 'binary_crossentropy','race_output': 
'categorical_crossentropy'}, metrics={'age_output': 'mae','gender_output': 
'accuracy','race_output': 'accuracy'})

history = model.fit(X_train, {'age_output': y_train_age, 'gender_output': y_train_gender, 'race_output': y_train_race}, 
                    validation_data=(X_test, {'age_output': y_test_age, 'gender_output': y_test_gender, 'race_output': y_test_race}),
                    epochs=50, batch_size=32)

Epoch 1/50
[1m507/593[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m32s[0m 374ms/step - age_output_mae: 16.6111 - gender_output_accuracy: 0.5585 - loss: 473.9209 - race_output_accuracy: 0.3970

KeyboardInterrupt: 

In [None]:
loss, age_mae, gender_accuracy, ethnicity_accuracy = model.evaluate(X_test, {'age_output': y_test_age, 'gender_output': y_test_gender, 'ethnicity_output': y_test_ethnicity})
print(f'Test Loss: {loss}')
print(f'Age MAE: {age_mae}')
print(f'Gender Accuracy: {gender_accuracy}')
print(f'Ethnicity Accuracy: {ethnicity_accuracy}')