In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Load dataset
relative_path = os.path.join('..', 'Data', 'archive (22)', 'age_gender.csv')
current_dir = os.getcwd()
file_path = os.path.join(current_dir, relative_path)
data = pd.read_csv(file_path)

# Update ethnicity to nationality mapping
def map_ethnicity_to_nationality(ethnicity):
    if ethnicity == 3:
        return 'Indian'
    elif ethnicity == 0:
        return 'White'
    elif ethnicity == 1:
        return 'Black'
    elif ethnicity == 2:
        return 'Asian'
    elif ethnicity == 4:
        return 'Others'
    return 'Others'

data['nationality'] = data['ethnicity'].apply(map_ethnicity_to_nationality)

# Filter age
data = data[(data['age'] >= 10) & (data['age'] <= 60)]

# Convert pixels to numpy arrays
data['pixels'] = data['pixels'].apply(lambda x: np.fromstring(x, sep=' ').reshape(48, 48, 1))

# One-hot encode ethnicity
ethnicity_labels = to_categorical(data['ethnicity'], num_classes=5)

# Split the data
train_data, test_data, train_labels, test_labels = train_test_split(
    np.stack(data['pixels'].values), ethnicity_labels, test_size=0.2, random_state=42)

# Data generator for image augmentation
datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

# Prepare data generators
train_generator = datagen.flow(train_data, train_labels, batch_size=32)
validation_generator = datagen.flow(test_data, test_labels, batch_size=32)

# Input layer
input_image = Input(shape=(48, 48, 1))

# Shared layers
x = Conv2D(32, (3, 3), activation='relu')(input_image)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
shared_output = Dropout(0.5)(x)

# Ethnicity branch
ethnicity_output = Dense(5, activation='softmax', name='ethnicity')(shared_output)

# Combine into model
model = Model(inputs=input_image, outputs=ethnicity_output)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, epochs=50, validation_data=validation_generator)

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

# Prediction function for ethnicity
def predict_ethnicity(image):
    predictions = model.predict(image)
    ethnicity = np.argmax(predictions, axis=1)
    return ethnicity

# Test prediction
test_image = np.random.rand(1, 48, 48, 1)  # Replace with actual image preprocessing
ethnicity_prediction = predict_ethnicity(test_image)
print(ethnicity_prediction)


In [2]:
# model.save("ethinicity_detector_task5.model", save_format="h5")

  saving_api.save_model(
