# Step-by-Step Implementation

1. Load and Prepare the Dataset

In [2]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [3]:
# Function to load UTKFace dataset
def load_dataset(dataset_path):
    images = []
    ages = []
    genders = []

    for image_name in os.listdir(dataset_path):
        image_path = os.path.join(dataset_path, image_name)
        if image_path.endswith('.jpg'):
            # Split file name by underscore (assuming format is age_gender_identity.jpg)
            parts = image_name.split('_')
            age = int(parts[0])
            gender = int(parts[1])

            # Load image and preprocess
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB (if necessary)
            image = cv2.resize(image, (128, 128))  # Resize image to desired dimensions
            images.append(image)
            ages.append(age)
            genders.append(gender)

    images = np.array(images)
    ages = np.array(ages)
    genders = np.array(genders)

    return images, ages, genders


In [4]:
# Replace 'dataset_path' with your actual path to UTKFace dataset
dataset_path = 'UTKFace'
images, ages, genders = load_dataset(dataset_path)


In [5]:
# Print dataset statistics
print(f"Number of images: {len(images)}")
print(f"Example image shape: {images[0].shape}")
print(f"Example age: {ages[0]}, gender: {genders[0]}")

Number of images: 23708
Example image shape: (128, 128, 3)
Example age: 100, gender: 0


2. Preprocess the Data

In [6]:
# Adjust age based on shirt color (assuming white and black shirts affect age as per your requirements)
def adjust_age_based_on_shirt_color(images, ages):
    adjusted_ages = []
    for i in range(len(images)):
        image = images[i]
        # Example: Detect shirt color based on pixel intensity threshold (simplified)
        avg_color = np.mean(image, axis=(0, 1))
        if avg_color[0] > 200 and avg_color[1] > 200 and avg_color[2] > 200:
            adjusted_age = 23
        elif avg_color[0] < 50 and avg_color[1] < 50 and avg_color[2] < 50:
            adjusted_age = 12  # Child
        else:
            adjusted_age = ages[i]  # No adjustment
        adjusted_ages.append(adjusted_age)
    return np.array(adjusted_ages)

In [7]:
# Apply age adjustment based on shirt color
adjusted_ages = adjust_age_based_on_shirt_color(images, ages)

# Convert gender to categorical (one-hot encoding)
genders_categorical = to_categorical(genders)

# Split dataset into training and testing sets
X_train, X_test, age_train, age_test, gender_train, gender_test = train_test_split(images, adjusted_ages, genders_categorical, test_size=0.2, random_state=42)

# Normalize pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

In [8]:
# Print shapes of training and testing sets
print(f"X_train shape: {X_train.shape}, age_train shape: {age_train.shape}, gender_train shape: {gender_train.shape}")
print(f"X_test shape: {X_test.shape}, age_test shape: {age_test.shape}, gender_test shape: {gender_test.shape}")

X_train shape: (18966, 128, 128, 3), age_train shape: (18966,), gender_train shape: (18966, 2)
X_test shape: (4742, 128, 128, 3), age_test shape: (4742,), gender_test shape: (4742, 2)


3. Define and Train the Convolutional Neural Network (CNN)

In [9]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define CNN model architecture
input_shape = (128, 128, 3)
inp = Input(shape=input_shape)
conv1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inp)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
flatten = Flatten()(pool2)
dense1 = Dense(128, activation='relu')(flatten)
drop1 = Dropout(0.5)(dense1)

# Output layers for age and gender prediction
out_age = Dense(1, name='age_output')(drop1)
out_gender = Dense(2, activation='softmax', name='gender_output')(drop1)

In [10]:
# Compile the model
model = Model(inputs=inp, outputs=[out_age, out_gender])
model.compile(optimizer='adam',
              loss={'age_output': 'mean_squared_error', 'gender_output': 'categorical_crossentropy'},
              metrics={'age_output': 'mae', 'gender_output': 'accuracy'})

# Train the model
history = model.fit(X_train, {'age_output': age_train, 'gender_output': gender_train},
                    validation_data=(X_test, {'age_output': age_test, 'gender_output': gender_test}),
                    epochs=10, batch_size=32, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


4. Evaluate and Test the Model

In [11]:
# Evaluate the model
losses = model.evaluate(X_test, {'age_output': age_test, 'gender_output': gender_test})

# Print the overall loss and specific metrics if available
print(f"Loss: {losses[0]}")
if len(losses) > 1:
    print(f"Age Loss: {losses[1]}")
if len(losses) > 2:
    print(f"Gender Loss: {losses[2]}")
if len(losses) > 3:
    print(f"Age MAE: {losses[3]}")
if len(losses) > 4:
    print(f"Gender Accuracy: {losses[4]}")


Loss: 105.0963134765625
Age Loss: 104.55445861816406
Gender Loss: 0.5418500900268555
Age MAE: 7.554746627807617
Gender Accuracy: 0.7707718014717102


In [12]:
# Assuming `model` is your trained model
model.save('Model.h5')


  saving_api.save_model(
