<a href="https://colab.research.google.com/github/atharva-ketkar1/DemographicPredictor/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
#!unzip '/content/drive/MyDrive/Demographic Predictor Files/UTKFace.zip';

In [17]:
def data_generator(file_names, dataset_path, batch_size):
    while True:
        batch_images = []
        batch_age_labels = []
        batch_gender_labels = []
        batch_race_labels = []
        for file_name in file_names:
            parts = file_name.split('_')
            if len(parts) < 4:
                continue
            age = int(parts[0])
            gender = int(parts[1])
            race = int(parts[2])
            img_path = os.path.join(dataset_path, file_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (128, 128)) / 255.0
                batch_images.append(img)
                batch_age_labels.append(age)
                batch_gender_labels.append(gender)
                batch_race_labels.append(race)
            if len(batch_images) == batch_size:
                yield np.array(batch_images), {
                    'age_output': np.array(batch_age_labels),
                    'gender_output': np.array(batch_gender_labels),
                    'race_output': to_categorical(batch_race_labels, num_classes=5)
                }
                batch_images = []
                batch_age_labels = []
                batch_gender_labels = []
                batch_race_labels = []

In [18]:
dataset_path = 'UTKFace'
file_names = os.listdir(dataset_path)

# Split the file names into training and testing sets
train_files, test_files = train_test_split(file_names, test_size=0.2, random_state=42)

# Set the batch size
batch_size = 32

# Create data generators for training and testing
train_generator = data_generator(train_files, dataset_path, batch_size)
test_generator = data_generator(test_files, dataset_path, batch_size)

In [19]:
# Define the model architecture
input_layer = Input(shape=(128, 128, 3))
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(input_layer)
max_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(max_1)
max_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(max_2)
max_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(max_3)
max_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

flatten = Flatten()(max_4)

# Fully connected layers for age prediction, dropout is added to try to prevent overfitting
dense_age = Dense(64, activation='relu')(flatten)
dropout_age = Dropout(0.3)(dense_age)
output_age = Dense(1, activation='linear', name='age_output')(dropout_age)

# Fully connected layers for gender prediction, dropout is added to try to prevent overfitting
dense_gender = Dense(64, activation='relu')(flatten)
dropout_gender = Dropout(0.3)(dense_gender)
output_gender = Dense(1, activation='sigmoid', name='gender_output')(dropout_gender)

# Fully connected layers for race prediction, dropout is added to try to prevent overfitting
dense_race = Dense(64, activation='relu')(flatten)
dropout_race = Dropout(0.3)(dense_race)
output_race = Dense(5, activation='softmax', name='race_output')(dropout_race)

# Create the model
model = Model(inputs=input_layer, outputs=[output_age, output_gender, output_race])

# Compile the model
model.compile(optimizer='adam', loss={'age_output': 'mean_squared_error',
                                      'gender_output': 'binary_crossentropy',
                                      'race_output': 'categorical_crossentropy'},
              metrics={'age_output': 'mae',
                       'gender_output': 'accuracy',
                       'race_output': 'accuracy'})

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_files) // batch_size
validation_steps = len(test_files) // batch_size

# Train the model
history = model.fit(train_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=test_generator,
                    validation_steps=validation_steps,
                    epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
test_loss, test_age_loss, test_gender_loss, test_race_loss, test_age_mae, test_gender_accuracy, test_race_accuracy = model.evaluate(
    test_generator, steps=validation_steps)

print(f"Test Age MAE: {test_age_mae}")
print(f"Test Gender Accuracy: {test_gender_accuracy}")
print(f"Test Race Accuracy: {test_race_accuracy}")


Test Age MAE: 7.339992046356201
Test Gender Accuracy: 0.8676097989082336
Test Race Accuracy: 0.712837815284729


In [21]:
model.save('demographic_predictor_model.h5')


  saving_api.save_model(


In [22]:
model.save('demographic_predictor_model.keras')
