In [None]:
# https://www.kaggle.com/code/yflau17/age-gender-prediction-by-cnn

import os, shutil
from matplotlib.image import imread
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from PIL import Image

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import Model, Input
from keras import optimizers
from keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from keras import callbacks
from tensorflow import keras
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

from tensorflow.keras.utils import plot_model

In [None]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, 'C:/0_thesis/0_dataset-analysis')

from age_groups import *

In [None]:
pepper_val = "C:/0_thesis/dataset/pepper-validation-data"
pepper_val_csv = "C:/0_thesis/dataset/pepper-validation-data.csv"

In [None]:
# UTKFACE wild
ds_path = 'C:/0_thesis/dataset/utkface-wild-pepper/'
csv_path = 'C:/0_thesis/dataset/utkface-wild-pepper.csv'
results_folder = "C:/0_thesis/2_model/gender/23wild"

In [None]:
# UTKFACE
ds_path = 'C:/0_thesis/dataset/utkface-pepper/'
csv_path = 'C:/0_thesis/dataset/utkface-pepper.csv'
results_folder = "C:/0_thesis/2_model/gender/23"

In [None]:
batch_size = 64 # !!

img_size = 256
x_col = 'filename'
y_col = 'gender'

In [None]:
# VAL ON SAME VAL OF TRAINING
validation_data = pd.read_csv(results_folder+"/validation_data.csv")
validation_data[y_col]=validation_data[y_col].astype(str)


val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_dataframe(validation_data, 
                                                directory = ds_path, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="binary",
                                                shuffle = False,
                                                batch_size = batch_size)

In [None]:
validation_data.info()

In [None]:
counts = np.bincount(validation_data[y_col])
print(counts)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(df[y_col]), y=df[y_col])
dict(enumerate(class_weights))

## ON PEPPER PHOTOS

In [None]:
# VAL ON PEPPER PHOTOS
validation_data = pd.read_csv(pepper_val_csv)
gender_mapper = {'male': 0, 'female': 1}
validation_data = validation_data.replace({"gender": gender_mapper})
# Split in training and validation set

validation_data[y_col]=validation_data[y_col].astype(str)

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_dataframe(validation_data, 
                                                directory = pepper_val, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="binary",
                                                shuffle=False,
                                                batch_size = batch_size) # class_mode = 'multi_output',

## Create model

In [None]:
df = pd.read_csv(csv_path)
n_tot_images = df.shape[0]
df.rename(columns = {'Unnamed: 0':'original-index'}, inplace = True)
# Change gender from string to integer
gender_mapper = {'male': 0, 'female': 1}
df = df.replace({"gender": gender_mapper})
# Split in training and validation set
df[y_col]=df[y_col].astype(str)


training_data, validation_data = train_test_split(df, test_size=0.3)
#validation_data.to_csv(str(id_process)+"_validation_data.csv")

n_train = len(training_data)
n_val = len(validation_data)

print('No. of training image:', n_train)
print('No. of validation image:', n_val)

# set train and val data
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_dataframe(training_data, 
                                                    directory = ds_path, 
                                                    x_col = x_col, 
                                                    y_col = y_col, 
                                                    target_size = (img_size, img_size), 
                                                    class_mode="binary",
                                                    batch_size = batch_size)
val_generator = val_datagen.flow_from_dataframe(validation_data, 
                                                directory = ds_path, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="binary", shuffle=False,
                                                batch_size = batch_size) # class_mode = 'multi_output',


In [None]:
n_male, n_female = np.bincount(df['gender'])
initial_bias = np.log([n_female/n_male])
print("Init bias: ", initial_bias)
output_bias = tf.keras.initializers.Constant(initial_bias)


In [None]:
inputs = Input(shape=(256, 256, 3))

gender_model = Conv2D(256, (3, 3), activation = 'relu')(inputs)
gender_model = MaxPooling2D((2, 2))(gender_model)
gender_model = Dropout(0.5)(gender_model)
gender_model = Flatten()(gender_model)
gender_model = Dense(128, activation = 'relu')(gender_model)
gender_model = Dense(64, activation = 'relu')(gender_model)
gender_model = Dense(32, activation = 'relu')(gender_model)
gender_model = Dense(1, activation = 'sigmoid', name='gender_output', bias_initializer=output_bias)(gender_model)

model = Model(inputs=inputs, outputs=gender_model)

model.summary()
#plot_model(model, to_file="model.jpg", show_shapes=True)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(training_data['age-group']), y=training_data['age-group'])
class_weights = dict(enumerate(class_weights))

In [None]:
# TRAIN
epochs = 3 # !!

opt = keras.optimizers.Adam(learning_rate=0.001)
#opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)
#opt = keras.optimizers.SGD(learning_rate=1e-4, momentum=0.9)

earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 5, 
                                        restore_best_weights = True)

model.compile(loss={'gender_output':'binary_crossentropy'}, 
            optimizer=opt,
            metrics={'gender_output':'accuracy'}) # !! optimizer="adam", categorical_crossentropy

history = model.fit(train_generator,
                    steps_per_epoch = n_train // batch_size, 
                    epochs = epochs,
                    batch_size = batch_size,
                    validation_data=val_generator,
                    validation_steps = n_val // batch_size,
                    callbacks = [earlystopping], 
                    class_weight=class_weights)



In [None]:

# PLOTTING
fig = plt.figure(figsize=(15,10))
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=1)

fig.add_subplot(2,1,1)
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.legend()
plt.grid(True)
plt.xlabel('epoch')

fig.add_subplot(2,1,2)
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Gender accuracy')
plt.legend()
plt.grid(True)
plt.xlabel('epoch')

plt.savefig(str(id_process)+"_metrics.jpg")

## Validate model

In [None]:
model = keras.models.load_model(results_folder+"/model")

In [None]:
model.summary()

In [None]:
model.layers[-1].get_config()

In [None]:
model.get_config()

In [None]:
model.evaluate(val_generator)

In [None]:
prediction = model.predict(val_generator)

In [None]:
y_pred = np.round(prediction)
y_pred = y_pred.astype(int)
validation_data[y_col]=validation_data[y_col].astype(int)

In [None]:
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
precision = precision_score(validation_data[y_col], y_pred)
recall = recall_score(validation_data[y_col], y_pred)
accuracy = accuracy_score(validation_data[y_col], y_pred)
f1 = f1_score(validation_data[y_col], y_pred)
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

In [None]:
cm = confusion_matrix(validation_data[y_col], y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot()
#plt.savefig(results_folder+"/cm.jpg")

In [None]:
# Print some examples for PREDICTION

plt.figure(figsize=(10,10))

indices = random.sample(np.arange(0,len(validation_data.index)).tolist(),9)

for j, i in enumerate(indices):
    sample = validation_data.iloc[i]
    
    actual_gender = "Female" if sample.gender==1 else "Male"
    pred_gender = "Female" if y_pred[i]==1 else "Male"
    
    plt.subplot(3,3,j+1)
    plt.axis('off')
    plt.title('Actual: %s\nPred: %s' % (actual_gender, pred_gender))
    #plt.imshow(Image.open(ds_path+"/"+sample.filename))
    plt.imshow(Image.open(pepper_val+"/"+sample.filename))

plt.savefig(results_folder+"/example_pepper.jpg")
plt.show()