<a href="https://colab.research.google.com/github/UsmanShafeeq/Advanced-Machine-Learning-Project/blob/main/Gender_and_Age_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Enable GPU


In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.12.19-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow)
  Downloading wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)
  Downloading tensorboard_data_server-0.7.

In [3]:
import tensorflow as tf

# Check GPU availability
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print('GPU not found, using CPU.')
else:
    print(f'GPU device found: {device_name}')




GPU not found, using CPU.


Step 2: Mount Google Drive

In [4]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


Step 3: Install Required Libraries

In [5]:
!pip install tensorflow keras matplotlib seaborn scikit-learn opencv-python tqdm shap lime


Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Collecting shap
  Downloading shap-0.50.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (25 kB)
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting slicer==0.0.8 (from shap)
  Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shap-0.50.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Step 4: Import Libraries

In [6]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, mean_squared_error, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16, ResNet50, EfficientNetB0
from tensorflow.keras.optimizers import Adam


Step 5: Load and Preprocess Dataset

In [None]:
# Dataset path (UTKFace or IMDB-WIKI)
dataset_path = '/content/drive/MyDrive/UTKFace/'  # change accordingly

images, ages, genders = [], [], []

for file in tqdm(os.listdir(dataset_path)):
    try:
        age, gender, _ = file.split('_')[:3]
        img = cv2.imread(os.path.join(dataset_path, file))
        img = cv2.resize(img, (128,128))
        images.append(img)
        ages.append(int(age))
        genders.append(int(gender))
    except:
        continue

# Normalize images
images = np.array(images, dtype='float32')/255.0
ages = np.array(ages, dtype='float32')
genders = np.array(genders, dtype='int')

# Train-test split
X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(
    images, ages, genders, test_size=0.2, random_state=42
)


  1%|          | 240/23716 [02:31<117:51:02, 18.07s/it]

Step 6: Build Multi-Task CNN Model

In [None]:
def build_multitask_model(base_model_name='EfficientNetB0', input_shape=(128,128,3)):
    inputs = Input(shape=input_shape)

    if base_model_name == 'VGG16':
        base_model = VGG16(include_top=False, input_tensor=inputs, weights='imagenet')
    elif base_model_name == 'ResNet50':
        base_model = ResNet50(include_top=False, input_tensor=inputs, weights='imagenet')
    else:
        base_model = EfficientNetB0(include_top=False, input_tensor=inputs, weights='imagenet')

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Gender classification output
    gender_output = Dense(1, activation='sigmoid', name='gender_output')(x)
    # Age regression output
    age_output = Dense(1, activation='linear', name='age_output')(x)

    model = Model(inputs=inputs, outputs=[gender_output, age_output])
    model.compile(
        optimizer=Adam(1e-4),
        loss={'gender_output':'binary_crossentropy','age_output':'mse'},
        metrics={'gender_output':'accuracy','age_output':'mae'}
    )
    return model


Step 7: Train Multiple Models

In [None]:
models_to_train = ['VGG16','ResNet50','EfficientNetB0']
results_list = []

for model_name in models_to_train:
    print(f'\nTraining {model_name}...')
    model = build_multitask_model(model_name)
    history = model.fit(
        X_train, {'gender_output': y_gender_train, 'age_output': y_age_train},
        validation_data=(X_test, {'gender_output': y_gender_test, 'age_output': y_age_test}),
        epochs=15, batch_size=32, verbose=1
    )

    # Predictions
    pred_gender, pred_age = model.predict(X_test)
    pred_gender_labels = (pred_gender > 0.5).astype(int)

    # Metrics
    gender_acc = accuracy_score(y_gender_test, pred_gender_labels)*100
    gender_f1 = f1_score(y_gender_test, pred_gender_labels)
    age_mae = mean_absolute_error(y_age_test, pred_age)
    age_rmse = np.sqrt(mean_squared_error(y_age_test, pred_age))

    results_list.append([model_name, gender_acc, gender_f1, age_mae, age_rmse])

    # Plot Training History
    plt.figure(figsize=(14,5))
    # Gender Accuracy
    plt.subplot(1,2,1)
    plt.plot(history.history['gender_output_accuracy'], label='Train Acc')
    plt.plot(history.history['val_gender_output_accuracy'], label='Val Acc')
    plt.title(f'{model_name} Gender Accuracy')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend()

    # Age MAE
    plt.subplot(1,2,2)
    plt.plot(history.history['age_output_mae'], label='Train MAE')
    plt.plot(history.history['val_age_output_mae'], label='Val MAE')
    plt.title(f'{model_name} Age MAE')
    plt.xlabel('Epoch'); plt.ylabel('MAE'); plt.legend()
    plt.show()


Step 8: Result Table

In [None]:
results_df = pd.DataFrame(results_list, columns=['Model','Gender Accuracy (%)','Gender F1-Score','Age MAE','Age RMSE'])
results_df


Step 9: Visualize Predictions

In [None]:
pred_gender, pred_age = model.predict(X_test[:12])
pred_gender_labels = (pred_gender>0.5).astype(int)

plt.figure(figsize=(15,8))
for i in range(12):
    plt.subplot(3,4,i+1)
    plt.imshow(X_test[i])
    plt.title(f'True Age: {y_age_test[i]}, Pred Age: {int(pred_age[i])}\nTrue Gender: {y_gender_test[i]}, Pred: {pred_gender_labels[i][0]}')
    plt.axis('off')
plt.tight_layout()
plt.show()


Step 10: Compare Models (Graph)

In [None]:
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
sns.barplot(x='Model', y='Gender Accuracy (%)', data=results_df)
plt.title('Gender Accuracy Comparison'); plt.ylim(0,100)

plt.subplot(1,2,2)
sns.barplot(x='Model', y='Age MAE', data=results_df)
plt.title('Age MAE Comparison'); plt.ylim(0,10)
plt.tight_layout()
plt.show()


Step 11: Grad-CAM for Explainability

In [None]:
import tensorflow.keras.backend as K

def get_gradcam_heatmap(model, img_array, layer_name='conv5_block3_out'): # For ResNet50
    grad_model = Model([model.inputs], [model.get_layer(layer_name).output, model.output[0]])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:,0]
    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = K.mean(grads, axis=(0,1,2))
    conv_outputs = conv_outputs.numpy()[0]
    for i in range(pooled_grads.shape[-1]):
        conv_outputs[:,:,i] *= pooled_grads[i]
    heatmap = np.mean(conv_outputs, axis=-1)
    heatmap = np.maximum(heatmap,0)
    heatmap /= np.max(heatmap)
    return heatmap

# Example Grad-CAM
img = np.expand_dims(X_test[0], axis=0)
heatmap = get_gradcam_heatmap(model, img)
plt.imshow(X_test[0])
plt.imshow(heatmap, cmap='jet', alpha=0.5)
plt.title('Grad-CAM Overlay')
plt.axis('off')
plt.show()


Step 12: SHAP Explainability

In [None]:
import shap
explainer = shap.GradientExplainer(model, X_train[:100])
shap_values = explainer.shap_values(X_test[:5])
shap.image_plot(shap_values, X_test[:5])
