In [None]:
# Imports
import os as os
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.models import load_model
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.preprocessing.image import ImageDataGenerator

functions to extract age and group by age

In [None]:
def extract_age(filename):
    return int(filename.split('_')[0].split('\\')[-1])

In [None]:
def check_which_group_age(age):
    if 1 <= age <= 2:
        return 0
    elif 3 <= age <= 10:
        return 1
    elif 11 <= age <= 20:
        return 2
    elif 21 <= age <= 30:
        return 3
    elif 31 <= age <= 45:
        return 4
    elif 46 <= age <= 60:
        return 5
    else:
        return 6

Create dataset

In [None]:
def preprocess_image(filelist, max_images_per_group=50):
    images = []
    labels = []
    group_to_filepaths = {}  # Dictionary to hold file paths for each age group

    # Group the file paths by age group
    for filepath in filelist:
        age = extract_age(filepath)
        if age > 80:
            age = 80
        age_group = check_which_group_age(age)
        if age_group not in group_to_filepaths:
            group_to_filepaths[age_group] = []
        group_to_filepaths[age_group].append(filepath)

    # Shuffle and truncate lists of file paths for each age group
    for group, paths in group_to_filepaths.items():
        random.shuffle(paths)
        group_to_filepaths[group] = paths[:max_images_per_group]
    
    for i in group_to_filepaths.keys():
        print(len(group_to_filepaths[i]))
    # Process the images
    for group, paths in group_to_filepaths.items():
        for filepath in paths:
            age = extract_age(filepath)
            if age > 80:
                age = 80
            img_org = cv.imread(filepath)

            # Convert the image to grayscale
            gray = cv.cvtColor(img_org, cv.COLOR_RGB2GRAY)

            # Apply Canny edge detection to find edges
            edges = cv.Canny(gray, 100, 200)

            # Perform dilation to thicken the edges
            kernel = np.ones((3, 3), np.uint8)
            thick_edges = cv.dilate(edges, kernel, iterations=2) 
            img_combined = cv.addWeighted(img_org, 0.7, cv.cvtColor(thick_edges, cv.COLOR_GRAY2RGB), 0.3, 0)
            img_combined = cv.resize(img_combined, (200, 200)) 
            img_combined = img_combined.astype(np.float32) / 255.0 

            images.append(img_combined)
            labels.append(age)
    
    return np.array(images), np.array(labels)

# Load and preprocess images
dirIn = r'C:\Users\Bartus\Desktop\UTKFace'
all_files = [os.path.join(dirIn, filename) for filename in os.listdir(dirIn)]
X, Y = preprocess_image(all_files, 1600)
print(len(X), len(Y))


Split the data into train, validation, and test sets

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.1, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

Augmentation data

In [None]:
datagen = ImageDataGenerator(
    rotation_range=10,   
    width_shift_range=0.05,
    height_shift_range=0.05,
    fill_mode='nearest',
    cval = 0
)

Compile and define CNN model

In [None]:
model = Sequential([
    Conv2D(16, (3, 3), activation='relu', padding='same', input_shape=(200, 200, 3)),
    MaxPooling2D(2, 2),
    
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),

    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(2, 2),

    Flatten(),

    Dense(512, activation='relu'),
    Dropout(0.45),

    Dense(256, activation='relu'),
    Dropout(0.35),

    Dense(1, activation='linear')
])


# Model compile
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Summarize
model.summary()


Train model

In [None]:
model_checkpoint_path = "best_model13.h5"

model_checkpoint_callback = ModelCheckpoint(
    filepath=model_checkpoint_path,
    save_best_only=True,
    monitor='val_loss', 
    mode='min', 
    verbose=1
)


history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    steps_per_epoch=len(X_train) // 32,
    validation_data=(X_val, y_val),
    epochs=40,
    verbose=1,
    callbacks=[model_checkpoint_callback]
)