# Facial Atributes Recognition Model based on MobileNetV2
This code creates a model to do facial atributes recognition which then will be used as a pretained model for a facial recognition model. 
This Conv2D model uses MobileNetV2 due to its lightness and rapidy during training. It was revised and discovered in **Vafaa Sukkar** and **Ergun Ercelebi** article **"A Real-time Face Recognition Based on MobileNetV2 Model"**. It can be found in https://ineseg.org/wp-content/uploads/2023/08/4.pdf

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers, regularizers, mixed_precision
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Activation, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os 

import optuna
import wandb
import gc

In [None]:
gpus = tf.config.list_physical_devices("GPU")

if gpus:
    print("TensorFlow is using the GPU \n", gpus)
else:
    print("No GPU detected.")
    
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
gc.collect()
tf.keras.backend.clear_session()

In [None]:
from wandb.integration.keras import WandbMetricsLogger

wandb.require("core")
wandb.login()

In [None]:
def load_image(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels = 3)
    img = tf.image.resize(img, (160, 160))
    return img, label

def make_dataset(paths, labels, batch_size = 32, shuffle = False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(10000)
    ds = ds.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

In [None]:
# Se cargan los datos
# Se debe de realizar una transformación en los datos, convirtiéndolos de -1/1 a 0/1:

ds = pd.read_csv("/tf/Face-Recognition/CelebA/list_attr_celeba.txt", sep = r"\s+", skiprows = 1)
ds.iloc[:, 0:40] = (ds.iloc[:, 0:40] == 1).astype("int32")
ds.head()
# ds.describe()
# (ds == -1).sum()

In [None]:
img_dir = "/img_align_celeba"

ds["image_path"] = ds.index.map(lambda x: os.path.join(img_dir, x))
ds.reset_index(inplace = True)
ds.rename(columns = {"index" : "image"}, inplace = True)
ds.head()

In [None]:
df_split = pd.read_csv(
    "/tf/Face-Recognition/CelebA/Eval/list_eval_partition.txt", 
    sep = r"\s+",
    names = ["image", "partition"])

ds = ds.merge(df_split, on = "image")
ds.head()

In [None]:
os.path.exists(ds["image_path"].iloc[0])

In [None]:
df_train = ds[ds["partition"] == 0]
df_val   = ds[ds["partition"] == 1]
df_test  = ds[ds["partition"] == 2]

In [None]:
train_ds = make_dataset(
    df_train["image_path"].values, 
    df_train.iloc[:, 1:41].values,
    shuffle = True
)

val_ds = make_dataset(
    df_val["image_path"].values,
    df_val.iloc[:, 1:41].values,
    shuffle = False
)

test_ds = make_dataset(
    df_test["image_path"].values,
    df_test.iloc[:, 1:41].values,
    shuffle = False
)

In [None]:
df_train_small = df_train.sample(frac = 0.15, random_state = 5)
df_val_small = df_val.sample(frac = 0.15, random_state = 5)
df_test_small = df_test.sample(frac = 0.15, random_state = 5)

train_ds_small = make_dataset(
    df_train_small["image_path"].values,
    df_train_small.iloc[:, 1:41].values,
    shuffle = True,
)

val_ds_small = make_dataset(
    df_val_small["image_path"].values,
    df_val_small.iloc[:, 1:41].values,
)

test_ds_small = make_dataset(
    df_test_small["image_path"].values,
    df_test_small.iloc[:, 1:41].values,
)

In [None]:
MobileNetV2 = keras.applications.MobileNetV2(
    weights = "imagenet",
    input_shape = (160, 160, 3),
    include_top = False)

In [None]:
MobileNetV2.trainable = False

In [None]:
lr = 1e-4
optimizer = tf.keras.optimizers.RMSprop(learning_rate = lr) 

In [None]:
inputs = keras.Input(shape = (160, 160, 3))
x = keras.applications.mobilenet_v2.preprocess_input(inputs)
x = MobileNetV2(x, training = False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256)(x)
x = layers.LeakyReLU()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(128)(x)
x = layers.LeakyReLU()(x)
x = layers.Dropout(0.15)(x)
x = layers.Dense(64)(x)
x = layers.ReLU()(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(40, activation = "sigmoid", dtype = "float32")(x)
model = keras.Model(inputs, outputs)

model.compile(loss = "binary_crossentropy",
              optimizer = optimizer,
              metrics = [tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.AUC()])

model.summary()

In [None]:
early_stopping = EarlyStopping(monitor = "val_binary_accuracy", patience = 10, restore_best_weights = True)
lr_reduction = ReduceLROnPlateau(monitor = "val_loss", factor = 0.1, patience = 7)

In [None]:
wandb.init(
        project = "Conv2D-MobileNetV2-Based-Trials-Exp-Series1.0",
        name = "Trial_1_FullSet",
        reinit = True,
        config = {
            "activation": "leaky_relu, relu",
            "n_layers": 3,
            "learning_rate": lr,
            "optimizer": "RMSProp"
        }
    )

In [None]:
history = model.fit(
    train_ds, 
    validation_data = val_ds,
    epochs = 200,
    verbose = 1, 
    callbacks = [WandbMetricsLogger(log_freq = 5), early_stopping, lr_reduction]
        )

In [None]:
tf.keras.backend.clear_session()
wandb.finish()
gc.collect()

### Multi-label (CelebA) considerations

CelebA is **multi-label** each image has 40 independent binary attributes. Important implications:

- **BinaryAccuracy:** When using a single BinaryAccuracy() metric in model.compile, it computes accuracy element-wise over the whole output tensor. Concretely:

    - If batch size = 32 and 40 labels, each update_state compares 32×40 = 1280 elements and returns fraction correct out of 1280.

    - This is typically more informative than “exact match accuracy” (which requires all 40 to be correct per image and is often near zero).

- **AUC for multi-label:** AUC can be evaluated:

    - **Per-class AUC:** Computes AUC separately for each of the 40 labels (which is recommended). This reveals which attributes the model predicts well and which it doesn’t.

    - **Micro AUC:** Treats every label-instance pair as separate samples (flatten all labels and predictions) and computes a single AUC across them (equivalent to element-wise pooling). This weights attributes by frequency.

    - **Macro AUC:** Averages per-class AUC equally (gives each attribute equal weight regardless of prevalence).