In [None]:
# https://www.kaggle.com/code/yflau17/age-gender-prediction-by-cnn

import os, shutil
from matplotlib.image import imread
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from PIL import Image

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import Model, Input
from keras import optimizers
from keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from keras import callbacks
from tensorflow import keras
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

from tensorflow.keras.utils import plot_model

In [None]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, 'C:/0_thesis/0_dataset-analysis')

from age_groups import *

In [None]:
pepper_val = "C:/0_thesis/dataset/pepper-validation-data"
pepper_val_csv = "C:/0_thesis/dataset/pepper-validation-data.csv"

In [None]:
# UTKFACE wild
ds_path = 'C:/0_thesis/dataset/utkface-wild-pepper/'
csv_path = 'C:/0_thesis/dataset/utkface-wild-pepper.csv'
results_folder = "C:/0_thesis/2_model/age/28wild"

In [None]:
# UTKFACE
ds_path = 'C:/0_thesis/dataset/utkface-pepper/'
csv_path = 'C:/0_thesis/dataset/utkface-pepper.csv'
results_folder = "C:/0_thesis/2_model/age/28"

In [None]:
batch_size = 64 # !!

img_size = 256
x_col = 'filename'
y_col = 'age'


In [None]:
# VAL ON SAME VAL OF TRAINING
validation_data = pd.read_csv(results_folder+"/validation_data.csv")

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_dataframe(validation_data,
                                                directory = ds_path, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="raw",
                                                shuffle=False,
                                                batch_size = batch_size)

#### Trying to balance the dataset..

In [None]:
counts = np.bincount(validation_data['age-group'])
print(counts)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(df["age-group"]), y=df["age-group"])
dict(enumerate(class_weights))

In [None]:
def get_n_rows(n, df):
    rows = random.sample(np.arange(0,len(df.index)).tolist(),n)
    return df.iloc[rows,]

n = 347
val_group = validation_data[validation_data['age-group'] == '0']

val_group_1 = validation_data[validation_data['age-group'] == '1']

val_data = pd.concat([get_n_rows(n, val_group), get_n_rows(n, val_group_1)], axis=0)

for i in range(2, 8):
    val_group = validation_data[validation_data['age-group'] == str(i)]
    val_data = pd.concat([val_data, get_n_rows(n, val_group)], axis=0)

for i in range(8, len(counts)):
    val_group = validation_data[validation_data['age-group'] == str(i)]
    val_data = pd.concat([val_data, val_group], axis=0)
    
val_data

In [None]:
validation_data = val_data

In [None]:
validation_data 

## ON PEPPER PHOTOS

In [None]:
# VAL ON PEPPER PHOTOS
validation_data = pd.read_csv(pepper_val_csv)
gender_mapper = {'male': 0, 'female': 1}
validation_data = validation_data.replace({"gender": gender_mapper})
# Split in training and validation set

#validation_data[y_col]=validation_data[y_col].astype(str)

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_dataframe(validation_data, 
                                                directory = pepper_val, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="input",
                                                shuffle=False,
                                                batch_size = batch_size) # class_mode = 'multi_output',

## Create model

In [None]:
df = pd.read_csv(csv_path)
n_tot_images = df.shape[0]
df.rename(columns = {'Unnamed: 0':'original-index'}, inplace = True)

gender_mapper = {'male': 0, 'female': 1}
df = df.replace({"gender": gender_mapper})

df["age-group"]=df["age-group"].astype(str)

# Split in training and validation set
training_data, validation_data = train_test_split(df, test_size=0.3)
validation_data.to_csv(results_folder+"/validation_data.csv") # save it to work locally

n_train = len(training_data)
n_val = len(validation_data)

print('No. of training image:', n_train)
print('No. of validation image:', n_val)

# Weights for imbalanced training set
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(training_data["age-group"]), y=training_data["age-group"])
class_weights = dict(enumerate(class_weights))

# Set train and val data generator

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 64 # !!

img_size = 256
x_col = 'filename'
y_col = 'age-group'

train_generator = train_datagen.flow_from_dataframe(training_data, 
                                                    directory = ds_path, 
                                                    x_col = x_col, 
                                                    y_col = y_col, 
                                                    target_size = (img_size, img_size), 
                                                    class_mode="sparse",
                                                    batch_size = batch_size)

val_generator = val_datagen.flow_from_dataframe(validation_data, 
                                                directory = ds_path, 
                                                x_col = x_col, 
                                                y_col = y_col, 
                                                target_size = (img_size, img_size),
                                                class_mode="sparse",
                                                shuffle=False,
                                                batch_size = batch_size) # class_mode = 'multi_output',


In [None]:
inputs = Input(shape=(256, 256, 3))

age_model = Conv2D(32, (3, 3), activation = 'relu')(inputs)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Conv2D(64, (3, 3), activation = 'relu')(age_model)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Conv2D(128, (3, 3), activation = 'relu')(age_model)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Dropout(0.5)(age_model)

age_model = Flatten()(age_model)
age_model = Dense(256, activation = 'relu')(age_model)

# 12 is the total number of age-groups
age_model = Dense(1, name='age_output')(age_model)

model = Model(inputs=inputs, outputs=age_model)

#model.summary()

#plot_model(model, to_file="model.jpg", show_shapes=True)

# TRAIN
epochs = 25 # !!

opt = keras.optimizers.Adam(learning_rate=0.001)

earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 5, 
                                        restore_best_weights = True)

model.compile(loss={'age_output':'mse'}, 
            optimizer=opt,
            metrics={'age_output':'mae'}) #tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

history = model.fit(train_generator,
                    steps_per_epoch = n_train // batch_size, 
                    epochs = epochs,
                    batch_size = batch_size,
                    validation_data = val_generator,
                    validation_steps = n_val // batch_size,
                    callbacks = [earlystopping], 
                    verbose = 1,
                    class_weight = class_weights)


In [None]:
model.save(results_folder+"/model")

In [None]:
validation_data.to_csv(results_folder+"/validation_data.csv") # save it to work locally

In [None]:
# Create model
inputs = Input(shape=(256, 256, 3))
age_model = Conv2D(256, (3, 3), activation = 'relu')(inputs)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Dropout(0.25)(age_model)
age_model = Conv2D(128, (3, 3), activation = 'relu')(age_model)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Dropout(0.25)(age_model)
age_model = Conv2D(64, (3, 3), activation = 'relu')(age_model)
age_model = MaxPooling2D((2, 2))(age_model)
age_model = Dropout(0.25)(age_model)
age_model = Flatten()(age_model)
age_model = Dense(256, activation = 'relu')(age_model)
age_model = Dense(128, activation = 'relu')(age_model)
age_model = Dense(64, activation = 'relu')(age_model)
age_model = Dense(32, activation = 'relu')(age_model)

# 12 is the total number of age-groups
age_model = Dense(12, activation = 'sigmoid', name='age_output')(age_model)

model = Model(inputs=inputs, outputs=age_model)
model.summary()

#plot_model(model, to_file=results_folder+"/model.jpg", show_shapes=True)

In [None]:
'''model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(256,256,3)))
model.add(Activation('relu'))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='sigmoid'))'''

In [None]:
model = VGG16(input_shape=[img_size,img_size,3],weights='imagenet', include_top=False)
for layer in model.layers:
    layer.trainable = False
x = Flatten()(model.output)
age_model = Dense(12, activation = 'sigmoid', name='age_output')(x)

model = Model(inputs=model.input, outputs=age_model)

model.summary()

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50

base_model = ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=None,
    pooling='avg'
)
for layer in base_model.layers:
    layer.trainable = False

age_model = Dense(12, activation='sigmoid', name='age_output')(base_model.output)
model = Model(inputs=base_model.input, outputs=age_model)

model.summary()

In [None]:
# TRAIN
epochs = 20 # !!
opt = keras.optimizers.Adam(learning_rate=0.001)
#opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)
earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 5, 
                                        restore_best_weights = True)

model.compile(loss={'age_output':'binary_crossentropy'},
            optimizer=opt,
            metrics={'age_output':'categorical_accuracy'}) # !! optimizer="adam", categorical_crossentropy

history = model.fit(train_generator,
                    steps_per_epoch = n_train // batch_size, 
                    epochs = epochs,
                    batch_size = batch_size,
                    validation_data=val_generator,
                    validation_steps = n_val // batch_size,
                    callbacks = [earlystopping])

print(history)
#model.save("model_"+str(id_process))

In [None]:

# PLOTTING
fig = plt.figure(figsize=(15,10))
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=1)

fig.add_subplot(2,1,1)
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.legend()
plt.grid(True)
plt.xlabel('epoch')

fig.add_subplot(2,1,2)
plt.plot(history.history['mae'], label='train accuracy')
plt.plot(history.history['val_mae'], label='val accuracy')
plt.title('Age MAE')
plt.legend()
plt.grid(True)
plt.xlabel('epoch')

plt.savefig(results_folder+"/metrics.jpg")

## Validate model

In [None]:
model = keras.models.load_model(results_folder+"/model")

In [None]:
model.layers[-1].get_config()

In [None]:
model.evaluate(val_generator)

In [None]:
prediction = model.predict(val_generator)

In [None]:
y_pred = np.round(prediction)

In [None]:
y_pred = y_pred.astype(int)
validation_data[y_col]=validation_data[y_col].astype(int)

In [None]:
# from age to age-group
y_pred_groups = []

for pred in y_pred:
    y_pred_groups.append(AgeGroups().getGroupFromAge(pred))

In [None]:
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score

# AGE-GROUP
precision = precision_score(validation_data["age-group"], y_pred_groups, average='micro')
recall = recall_score(validation_data["age-group"], y_pred_groups, average='micro')
accuracy = accuracy_score(validation_data["age-group"], y_pred_groups)
f1 = f1_score(validation_data["age-group"], y_pred_groups, average='micro')
print("Accuracy on age-group: ", accuracy)
print("Precision on age-group: ", precision)
print("Recall on age-group: ", recall)
print("F1 on age-group: ", f1)

In [None]:
cm = confusion_matrix(validation_data["age-group"], y_pred_groups)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=AgeGroups().getLabels())
disp.plot()
#plt.savefig(results_folder+"/cm_age-groups.jpg")

In [None]:
# Print some examples for PREDICTION

plt.figure(figsize=(10,10))

indices = random.sample(np.arange(0,len(validation_data.index)).tolist(),9)

for j, i in enumerate(indices):
    sample = validation_data.iloc[i]
    
    actual_age = sample[y_col]
    pred_age = y_pred[i]
    
    plt.subplot(3,3,j+1)
    plt.axis('off')
    plt.title('Actual: %s\nPred: %s' % (actual_age, pred_age))
    #plt.imshow(Image.open(ds_path+"/"+sample.filename))
    plt.imshow(Image.open(pepper_val+"/"+sample.filename))

plt.savefig(results_folder+"/example_pepper.jpg")
plt.show()