In [1]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

# Load data
relative_path = os.path.join('..', 'Data', 'archive (22)', 'age_gender.csv')
current_dir = os.getcwd()
file_path = os.path.join(current_dir, relative_path)
data = pd.read_csv(file_path)

# Ensure no NaN values in 'age'
data = data.dropna(subset=['age'])

# Define new age bins with fewer categories
age_bins = [0, 20, 40, 60, 80, 100]
data['age_group'] = pd.cut(data['age'], bins=age_bins, labels=False)

# Ensure no NaN values in 'age_group'
data = data.dropna(subset=['age_group'])
data['age_group'] = data['age_group'].astype(int)

# Preprocess pixel values and reshape
data['pixels'] = data['pixels'].apply(lambda x: np.array(x.split(), dtype='float32').reshape(48, 48, 1))
X = np.stack(data['pixels'].values)
y = data['age_group'].values

# Normalize pixel values
X = X / 255.0

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data augmentation with reduced parameters
datagen = ImageDataGenerator(
    rotation_range=10,  # Reduce rotation range
    width_shift_range=0.1,  # Reduce width shift range
    height_shift_range=0.1,  # Reduce height shift range
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)

# Model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(age_bins) - 1, activation='softmax')  # Number of bins - 1 for classification
])

# Compile model with mixed precision
tf.keras.mixed_precision.set_global_policy('mixed_float16')
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train model
history = model.fit(datagen.flow(X_train, y_train, batch_size=64),  # Increase batch size if possible
                    epochs=50,
                    validation_data=(X_test, y_test))

# Evaluate model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("Accuracy:", accuracy_score(y_test, y_pred_classes))
print("Classification Report:\n", classification_report(y_test, y_pred_classes))

# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

plt.show()





In [2]:
# model.save("age_detector_task5.model", save_format="h5")