<div style="width: 100%; clear: both;">
<div style="float: left; width: 50%;">
<img src="http://www.uoc.edu/portal/_resources/common/imatges/marca_UOC/UOC_Masterbrand.jpg", align="left">
</div>
<div style="float: right; width: 50%;">
<p style="margin: 0; padding-top: 22px; text-align:right;">M2.880 - TFM - Área 3 aula 1</p>
<p style="margin: 0; text-align:right;">2022-1 · Máster universitario en Ciencia de datos (Data science)</p>
<p style="margin: 0; text-align:right; padding-button: 100px;">Estudios de Informática, Multimedia y Telecomunicación</p>
</div>
</div>
<div style="width:100%;">&nbsp;</div>


# Predicción de Edad y Género a partir de la imagen de una persona

## Sección 0. Preparación del entorno.

#### Importar de librerías

In [None]:
import os
import numpy as np 
import pandas as pd
import tensorflow as tf
import _pickle as pickle
from datetime import timedelta
import matplotlib.pyplot as plt
from timeit import default_timer
from sklearn.metrics import accuracy_score as ACC
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE

#### TPU configuration

In [None]:
TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TF_MASTER)
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver)

print("Number of replicas:", strategy.num_replicas_in_sync)

Number of replicas: 8


#### Montar unidad de disco de Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


#### Funciones de normalización

In [None]:
# Load max_age and min_age
pickle_file = '/content/drive/MyDrive/TFM/age_scaler.pickle'
with open(pickle_file, 'rb') as f:
  age_scaler = pickle.load(f)
max_age, min_age = age_scaler['max_age'], age_scaler['min_age']

In [None]:
def no_norm(data):
  """No scale is applied to data"""
  return data

def no_denorm(data):
  """No scale is applied to data"""
  return data

def norm_01(data):
  """Data is normalized between 0 and 1"""
  return (data - min_age) / (max_age - min_age)

def denorm_01(data):
  """Data is denormalized from 0 and 1 to their actual range"""
  return data * (max_age - min_age) + min_age

def norm_11(data):
  """Data is normalized between -1 and 1"""
  return 2 * norm_01(data) - 1

def denorm_11(data):
  """Data is denormalized from -1 and 1 to their actual range"""
  return denorm_01((data + 1)/2)

#### Definición de parámetros y rutas

In [None]:
# IMAGES
IMG_WIDTH_OG = IMG_HEIGHT_OG = 200
IMG_SIZE_OG = [IMG_HEIGHT_OG, IMG_WIDTH_OG]
IMG_SHAPE_OG = [*IMG_SIZE_OG, 3]
IMG_WIDTH_ENB0 = IMG_HEIGHT_ENB0 = 224
IMG_SIZE_ENB0 = [IMG_HEIGHT_ENB0, IMG_WIDTH_ENB0]
IMG_SHAPE_ENB0 = [*IMG_SIZE_ENB0, 3]

# HIPERPARAMETERS
BATCH_SIZE = 256
EPOCHS = 100
VERBOSE = 0
PATIENCE = 10
DELTA = 5e-4
AUTOTUNE = tf.data.AUTOTUNE
lr_TL = 1e-3 # Learning rate for training top model
lr_FT = 1e-5 # Learinig rate for fine-tuning all layers

# FILE PATHS AND FILE NAMES
GCS_BUCKET = 'info-bucket-gcs-tfm'
MODELS_PATH = '/content/drive/MyDrive/TFM/models/TPU/' # TPU folder
TRAIN_FILE = 'train.pickle'
GENDER_TEST_FILE = 'test_gender.pickle'
AGE_TEST_FILE = 'test_age.pickle'
MODEL_FILE = 'model.h5'

# DATA DEFINITIONS
tasks = ['gender', 'age', 'mtl']
task_error = 'task must be in [\'gender\', \'age\', \'mtl\']'
dict_gender = {
    'sigmoid': {
        'neurons': 1,
        'monitor': 'val_binary_accuracy'
    },
    'softmax': {
        'neurons': 2,
        'monitor': 'val_categorical_accuracy'
    }
}
dict_age = {
    'activ': {
        'relu': {
            'norm_01': (norm_01, denorm_01),
            'no_norm': (no_norm, no_denorm)
        },
        'linear': {
            'norm_01': (norm_01, denorm_01),
            'no_norm': (no_norm, no_denorm)
        },
        'tanh': {
            'norm_11': (norm_11, denorm_11)
        },
        '': {
            '': [None, None]
        }
    },
    'loss': {
        'MSE': tf.keras.losses.MeanSquaredError(),
        'MAE': tf.keras.losses.MeanAbsoluteError(),
        '': None
    }
}

## Sección 1. Implementación de funciones

#### Genarar dataset

In [None]:
def get_dataset(filenames, task, activ_gender='', f_norm='', train=False):
  """
    Reads data from TFRecord and creates a tf.data.Dataset.

    Data is prepared for training according to arguments. This mean for gender
    classification, age regression or MTL. Also data for classification is 
    adapted for 1 or 2 output neurons and data for age is normalized. If train
    = True, then data augmentation is applied for training data.

    Parameters:
      filenames (list): a list which contains all files to be processed
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      f_norm (function): 'norm_01', 'norm_11' or 'no_norm' can be used
      train (bool): if True data augmentation is applied
    Return:
      tf.data.Dataset: data batched and prefetched to be used
  """
  def parse_tfrecord(example):
    """Parse a single example"""
    features = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "outputs": tf.io.FixedLenFeature([2], tf.int64) # gender and age
    }
    tfrecord = tf.io.parse_single_example(example, features)

    image = tf.image.decode_jpeg(tfrecord['image'], channels=3)
    image = tf.reshape(image, IMG_SHAPE_OG) # original shape is 200x200x3
    image = tf.image.resize(image, IMG_SIZE_ENB0) # resize image to 224x224x3
    gender, age = tfrecord['outputs'][0], tfrecord['outputs'][1]

    if task == 'mtl' or task == 'gender':
      if activ_gender == 'softmax':
        gender = tf.one_hot(gender, 2) # one hot encorer is applied to gender

    if task == 'gender':
      return image, gender
    elif task == 'age':
      return image, f_norm(age)
    else:
      return image, {'gender_output': gender, 'age_output': f_norm(age)}

  def load_dataset(filenames):
    """Read from TFRecord files and parse all data"""
    raw_dataset = tf.data.TFRecordDataset(
        filenames, num_parallel_reads=AUTOTUNE)
    parsed_dataset = raw_dataset.map(
        parse_tfrecord, num_parallel_calls=AUTOTUNE)
    return parsed_dataset

  def data_augment(image, outputs):
    """
      Create some additional training images by randomly flipping and
      increasing/decreasing the saturation of images in the training set.
    """
    augmented = tf.image.random_flip_left_right(image)
    augmented = tf.image.random_saturation(augmented, 0, 2)
    return augmented, outputs
  
  assert task in tasks, task_error
  dataset = load_dataset(filenames)

  if train:
    dataset = dataset.map(data_augment, num_parallel_calls=AUTOTUNE)

  # Prefetch the next batch while training (autotune prefetch buffer size).
  return dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [None]:
def get_datasets(fold, task, activ_gender, f_norm, msg=True):
  """
    Gets fold from GCP and returns train, validation and test datasets and its
    number of batches.

    Parameters:
      fold (str): indicates fold to be used for loading datasets
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      f_norm (function): 'norm_01', 'norm_11' or 'no_norm' can be used
      msg (bool): information about function run should be displayed
    Return:
      tuples: 0 - Three tf.data.Dataset ordered as train, validation and test
              1 - Three integers with number of batches in each tf.data.Dataset
  """
  assert task in tasks, task_error # Check if task is OK
  # GCS pattern that match where TFRecord files are stored
  GCS_TRAIN_PATTERN = f'{GCS_BUCKET}/{fold}/train*.tfrecord'
  GCS_VALID_PATTERN = f'{GCS_BUCKET}/{fold}/valid*.tfrecord'
  GCS_TEST_PATTERN = f'{GCS_BUCKET}/{fold}/test*.tfrecord'
  # Get TFRecord files from pattern
  train_files = tf.io.gfile.glob(f'gs://{GCS_TRAIN_PATTERN}')
  valid_files = tf.io.gfile.glob(f'gs://{GCS_VALID_PATTERN}')
  test_files = tf.io.gfile.glob(f'gs://{GCS_TEST_PATTERN}')
  # Calculate number of instance in dataset
  n_train = 2155 * len(train_files)
  n_valid = 2155 * len(valid_files)
  n_test = 2155 * len(test_files)
  # Calculate steps in datasets
  train_steps = np.ceil(n_train/BATCH_SIZE).astype(np.int64)
  valid_steps = np.ceil(n_valid/BATCH_SIZE).astype(np.int64)
  test_steps = np.ceil(n_test/BATCH_SIZE).astype(np.int64)
  # Get datasets
  train_ds = get_dataset(train_files, task, activ_gender, f_norm, train=True)
  valid_ds = get_dataset(valid_files, task, activ_gender, f_norm)
  test_ds = get_dataset(valid_files, task, activ_gender, f_norm)
  # Print summary
  print(f'\tTrain images: {n_train}, Files: {len(train_files)},',
        f'Steps per epoch: {train_steps}') if msg else None
  print(f'\tValid images: {n_valid}, Files: {len(valid_files)},',
        f'Steps per epoch: {valid_steps}') if msg else None
  print(f'\tTest images: {n_test}, Files: {len(test_files)},',
        f'Steps per epoch: {test_steps}') if msg else None

  return (train_ds, valid_ds, test_ds), (train_steps, valid_steps, test_steps)

#### Crear modelo

In [None]:
def create_ENB0_model(task, activ_gender, activ_age, msg=True): 
  """
    Create a model named `task`_model with all layers frozen.
  
    This function creates a model based on EfficientNetB0 pre-trained model as
    feature extraction layer. All model will be frozen and in inference mode.
  
    Parameters:
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      activ_age (str): 'relu', 'linear' or 'tanh' as activation funtion
      msg (bool): information about function run should be displayed
  
    Returns:
      tf.keras.Model: model created
  """
  assert task in tasks, task_error # Check if task is OK
  outputs, prnt_msg = [], f'' # Initializing variables

  # Input layer
  inputs = tf.keras.layers.Input(shape=IMG_SHAPE_ENB0, name='input_layer')

  # Shared model
  sm = tf.keras.applications.EfficientNetB0(
      include_top=False, weights='imagenet')
  sm.trainable = False # freezing pre-trained model
  sm = sm(inputs, training=False) # BN layer in inference mode

  if task == 'mtl' or task == 'gender':
    # Gender branch
    gb = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool_gender')(sm)
    gb = tf.keras.layers.BatchNormalization(name='norm_layer_gender')(gb)
    gb = tf.keras.layers.Dropout(rate=0.2, name='dropout_layer_gender')(gb)
    outputs.append(tf.keras.layers.Dense(dict_gender[activ_gender]['neurons'],
        activation=activ_gender, name='gender_output')(gb))
    prnt_msg = prnt_msg + f'\tGender output activation function: {activ_gender}'

  if task == 'mtl' or task == 'age':
    # Age branch
    ab = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool_age')(sm)
    ab = tf.keras.layers.BatchNormalization(name='norm_layer_age')(ab)
    ab = tf.keras.layers.Dropout(rate=0.2, name='dropout_layer_age')(ab)
    outputs.append(tf.keras.layers.Dense(
        1, activation=activ_age, name='age_output')(ab))
    prnt_msg = prnt_msg + f'\n' if prnt_msg else prnt_msg
    prnt_msg = prnt_msg + f'\tAge output activation function: {activ_age}'

  # Create model
  model = tf.keras.models.Model(
      inputs=inputs, outputs=outputs, name=task+'_model')
  
  print(prnt_msg) if msg else None

  return model

#### Compilar modelo y callbacks

In [None]:
def compile_model(model, lr, task, activ_gender, age_loss, gamma, msg=True):
  """
    Compiles a model with Adam optimizer and the specified activation functions
    for task. Gamma is used for multi-task learning.
  
    Parameters:
      model (tk.keras.Model): model to be compiled
      lr (float): learning rate to be used during training
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      activ_age (str): 'relu', 'linear' or 'tanh' as activation funtion
      gamma (float): used to weight the individual losses for each task
      msg (bool): information about function run should be displayed
    
    Returns:
      tf.keras.Model: model compiled
  """
  assert task in tasks, task_error # Check if task is OK
  losses, loss_weights, metrics = {}, None, {} # Initializing variables
  prnt_msg = f'\tLearning rate: {str(lr)}'

  if task == 'mtl' or task == 'gender':
    # Gender branch
    if activ_gender == 'softmax':
      losses['gender_output'] = tf.keras.losses.CategoricalCrossentropy()
      metrics['gender_output'] = tf.keras.metrics.CategoricalAccuracy()
    else:
      losses['gender_output'] = tf.keras.losses.BinaryCrossentropy()
      metrics['gender_output'] = tf.keras.metrics.BinaryAccuracy()
    prnt_msg = prnt_msg + f'\n\tGender loss: {losses["gender_output"].name}, '\
        f'Gender metric: {metrics["gender_output"].name}'

  if task == 'mtl' or task == 'age':
    # Age branch
    losses['age_output'] = dict_age['loss'][age_loss]
    metrics['age_output'] = [tf.keras.metrics.MeanSquaredError(),
                             tf.keras.metrics.MeanAbsoluteError()]
    prnt_msg = prnt_msg + f'\n\tAge loss: {losses["age_output"].name}, '\
        f'Age metrics: {[m.name for m in metrics["age_output"]]}'

  if task == 'mtl':
    loss_weights = {'gender_output': gamma, 'age_output': 1 - gamma}
    prnt_msg = prnt_msg + f'\n\tGamma value: {str(gamma)}'

  #Compile model
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                loss=losses, loss_weights=loss_weights, metrics=metrics)
  
  print(prnt_msg) if msg else None
  
  return model

In [None]:
def create_callbacks(task, activ_gender='', patience=PATIENCE, delta=DELTA):
  """
    Create EarlyStopping callbacks for training.

    EarlyStopping watch for a metric over validation set. Metric depends on task
    and Model with best performance over this metric is restored.
  
    Parameters:
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      patience (int): epochs without improvement before model stops training
      delta (float): minimum improvement to be taken into account
    
    Returns:
      list: a list with callbacks created
  """
  assert task in tasks, task_error # Check if task is OK

  if task == 'gender':
    monitor = dict_gender[activ_gender]['monitor']
  else:
    monitor = 'val_loss'

  return [
      tf.keras.callbacks.EarlyStopping(
        monitor=monitor,
        min_delta=delta,
        patience=patience, # Consecutive allowed epochs without improvement
        restore_best_weights=True, # return model with best weights
        verbose=1 # To see in what epochs training stops
        )]

#### Entrenamiento

In [None]:
def train_model(model, task, train_ds, valid_ds, callbacks, save_dir=None,
                msg=True):
  """
    Train model with callbacks and save results and model on disk.

    This function is used for training model and printing validation metrics.
    Metrics and model can be saved on disk.
  
    Parameters:
      model (tf.keras.Model): model to be trained
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      train_ds (tf.data.Dataset): train dataset
      valid_ds (tf.data.Dataset): validation dataset
      callbacks (list): list of callbacks to be used during training
      save_dir (str): directory where model and results should be saved
      msg (bool): information about function run should be displayed
    
    Returns:
      tf.keras.Model: model trained
  """
  def print_validation_metrics(H, task):
    """Prints metrics about training"""
    keys, msg = list(H.history.keys()), f'\t' # Initializing variables
    if task in ['gender', 'mtl']:
      if task == 'gender':
        metric = keys[-1] # BinaryAccuracy or CategoricalAccuracy
        b_epoch = np.argmax(H.history[metric], axis=0) + 1 # Max accuracy
      else:
        metric = keys[-3] # BinaryAccuracy or CategoricalAccuracy
        b_epoch = np.argmin(H.history['val_loss'], axis=0) + 1 # Min loss
      val_acc = H.history[metric][b_epoch - 1]
      msg = msg + f'Best epoch: {b_epoch}, Gender accuracy: {val_acc:.2%}'
    if task in ['age', 'mtl']:
      if task == 'age':
        out_name = 'val'
        b_epoch = np.argmin(H.history['val_loss'], axis=0) + 1
        msg = msg + f'Best epoch: {b_epoch}, '
      else:
        out_name = 'val_age_output'
        msg = msg + f', '
      val_mse = H.history[f'{out_name}_mean_squared_error'][b_epoch-1]
      val_mae = H.history[f'{out_name}_mean_absolute_error'][b_epoch-1]
      msg = msg + f'Age MSE: {val_mse:.6f}, Age MAE: {val_mae:.6f}'
    print(msg)

  assert task in tasks, task_error # Check if task is OK
  # Training
  t_start = default_timer()
  H = model.fit(
      train_ds,
      validation_data=valid_ds,
      epochs=EPOCHS,
      verbose=VERBOSE,
      callbacks=callbacks
  )
  elapsed_time = default_timer() - t_start

  # Store results
  lr = '%f' % (model.optimizer.lr.numpy())
  time_str = str(timedelta(seconds=np.round(elapsed_time)))
  results = {
      'learning_rate': lr,
      'training_time': time_str,
      'history': H.history
  }
  print_validation_metrics(H, task) if msg else None
  print(f'\tTraining time: {time_str}') if msg else None

  if save_dir:
    save_path = os.path.join(MODELS_PATH, save_dir)
    if not os.path.exists(save_path):
      os.makedirs(save_path)
    # Save results
    print('Saving results...') if msg else None
    results_file = os.path.join(save_path, TRAIN_FILE)
    if os.path.exists(results_file): # If file already exits, remove it
      os.remove(results_file)
    with open(results_file, 'ab') as f: # Save result dictionary
      pickle.dump(results, f)
    print(f'\train file: {results_file}') if msg else None
    # Save model
    print('Saving model...') if msg else None
    model_file = os.path.join(save_path, MODEL_FILE)
    model.save(model_file)
    print(f'\tModel file: {model_file}') if msg else None

  return model

#### Predicción

In [None]:
def test_model(model, test_ds, test_steps, task, activ_gender, f_denorm,
               save_dir=None, msg=True):
  """
    Test model over test set and save results denormalized on disk.

    This function is used for testing model and printing test metrics.
    real values and predicted values can be saved on disk denormalized.
  
    Parameters:
      model (tf.keras.Model): model to be trained
      test_ds (tf.data.Dataset): test dataset
      test_steps (tf.data.Dataset): number of batches in test_ds
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      activ_gender (str): 'sigmoid' or 'softmax' as activation function
      f_denorm (function): 'denorm_01', 'denorm_11' or 'no_denorm' can be used
      save_dir (str): directory where real and predicted values should be saved
     msg (bool): information about function run should be displayed
  """
  def print_test_metrics(task, f_denorm, real_g, pred_g, real_a, pred_a):
    """Print metrics over test set according to task"""
    msg = f'\t' # Initializing variables
    if task in ['gender', 'mtl']:
      acc = ACC(real_g, pred_g)
      msg = msg + f'Gender accuracy: {acc:.4%}'
    if task in ['age', 'mtl']:
      mse = MSE(real_a, pred_a)
      mae = MAE(real_a, pred_a)
      msg = msg + f', ' if task == 'mtl' else msg
      msg = msg + f'Age MSE: {mse:.6f}, Age MAE: {mae:.6f}'
    print(msg)

  assert task in tasks, task_error # Check if task is OK
  # Initializing variables
  y_real, real_gender, pred_gender, real_age, pred_age = [], [], [], [], []

  y_pred = model.predict(test_ds, verbose=0) # Predictions
  labels = test_ds.map(lambda img, lab: lab) # Gets only labels from Dataset
  iter_labels = labels.as_numpy_iterator() # Creates an iterator over labels

  if task=='mtl': # Two targets
    for _ in range(test_steps): # Real labels
      batch = next(iter_labels)
      gender, age = batch['gender_output'], batch['age_output']
      real_gender.extend(np.array(gender))
      real_age.extend(np.array(age))
    pred_gender, pred_age = y_pred[0], y_pred[1] # Predicted labels
  else: # One target
    for _ in range(test_steps): # Real labels
      batch = next(iter_labels)
      y_real.extend(np.array(batch))
    if task == 'gender': # Assign y_real and y_pred to variables accordingly
      real_gender, pred_gender = y_real, y_pred
    else:
      real_age, pred_age = y_real, y_pred
  
  # Preparing data to compute metrics and to stored real and predicted values
  if task in ['gender', 'mtl']:
    if activ_gender == 'softmax': # Gets higher position in array to set class
      real_gender = np.argmax(real_gender, axis=1)
      pred_gender = np.argmax(pred_gender, axis=1)
    else: # Round value to 0 or 1 to set class (activ_gender == 'sigmoid')
      real_gender = np.array(real_gender)
      pred_gender = np.round(pred_gender).flatten().astype(np.int64)
  if task in ['age', 'mtl']: # Age values are stored in original range
    real_age, pred_age = np.array(real_age), np.array(pred_age).flatten()
    denorm_real_age = f_denorm(real_age).astype(np.int64)
    denorm_pred_age = f_denorm(pred_age).astype(np.int64)

  # Compute and print test metrics
  print_test_metrics(task, f_denorm, real_gender, pred_gender, real_age,
                     pred_age) if msg else None

  # Save results
  if save_dir:
    if len(real_gender):
      gender_file = os.path.join(MODELS_PATH, save_dir, GENDER_TEST_FILE)
      if os.path.exists(gender_file):
        os.remove(gender_file)
      with open(gender_file, 'ab') as f: # Save gender results
        pickle.dump({'real': real_gender, 'pred': pred_gender}, f)
      print(f'\tGender results file: {gender_file}') if msg else None
    if len(real_age):
      age_file = os.path.join(MODELS_PATH, save_dir, AGE_TEST_FILE)
      if os.path.exists(age_file):
        os.remove(age_file)
      with open(age_file, 'ab') as f: # Save age results
        pickle.dump({'real': denorm_real_age, 'pred': denorm_pred_age}, f)
      print(f'\tAge results file: {age_file}') if msg else None

#### Función EfficientNetB0

In [None]:
def EffNetB0(task, folds, activ_gender='', activ_age='', norm='', age_loss='',
             gamma=0.5, test=True, fineTuning=True, save=True, log=True):
  """
    Creates, compiles, trains, tests and saves models and results over different
    active funtions and normalizations depending of task.

    Performes full process from creating to testing a model. Can perform only TL
    phase or TL & FT phase. It's possible to save model and results.

    Parameters:
      task (str): 'gender', 'age' or 'mtl' determinates task to be performed
      folds (str/list): fold or folds used for training and testing models
      activ_gender (str/list): 'sigmoid' and/or 'softmax' as activation function
      activ_age (str/list): 'relu', 'linear' and/or 'tanh' as activation funtion
      norm (str): normalization function used for a hyperparameter configuration
      age_loss (str/list): 'MSE' and/or 'MAE' as age loss function
      gamma (float): used to weight the individual losses for each task
      test (bool): if True models will be tested
      fineTuning (bool): if True FT phase will be done
      save (bool): if True models and results will be stored
      log (bool): information about process run should be displayed
  """
  assert task in tasks, task_error # Check if task is OK
  TL_dir, FT_dir, reload_dataset = None, None, False # Initializing variables
  # Adapt args to list type for loops
  folds = [folds] if isinstance(folds, str) else folds
  activ_gender = [activ_gender] if isinstance(
      activ_gender, str) else activ_gender
  activ_age = [activ_age] if isinstance(activ_age, str) else activ_age
  age_loss = [age_loss] if isinstance(age_loss, str) else age_loss
  gamma = [gamma] if isinstance(gamma, (float, int)) else gamma
  
  if log:
    head_msg = f'Model: {task}, Batch_size: {BATCH_SIZE}, Epochs: {EPOCHS}, '\
        f'Early stopping: Patience {PATIENCE}, Delta {DELTA}\nFolds: {folds}'
    if task in ['gender', 'mtl']:
      head_msg = head_msg + f'\nGender activation function: {activ_gender}'
    if task in ['age', 'mtl']:
      head_msg = head_msg + f'\nAge activation function: {activ_age}'\
          f'\nAge loss function: {age_loss}'
    if task == 'mtl':
      head_msg = head_msg + f'\nGamma values: {gamma}'
    print(head_msg)

  for fold in folds:
    reload_dataset = True
    for ag in activ_gender:
      if ag:
        reload_dataset =True
      for aa in activ_age:
        if norm == '' and task in ['gender', 'age']:
          my_norm = list(dict_age['activ'][aa].keys())
        else:
          my_norm = [norm]
        for n in my_norm:
          reload_dataset = True
          f_norm, f_denorm = dict_age['activ'][aa][n]
          for al in age_loss:
            for g in gamma:

              # Set directory for save models and results
              if save:
                TL_dir = f'efficientNetB0/{task}/{fold}/TL/'
                FT_dir = f'efficientNetB0/{task}/{fold}/FT/'
                if task == 'mtl':
                  dir = f'{ag}-{aa}-{n}-{al}-gamma_{str(g)}'
                elif task == 'gender':
                  dir = f'{ag}'
                else:
                  dir = f'{aa}-{n}-{al}'
                TL_dir, FT_dir = TL_dir + dir, FT_dir + dir

              print('\n'*2+'*'*45, f'BASE MODEL: EfficientNetB0',
                    f'- FOLD: {fold}', '*'*45, '\n') if log else None

              # Load data
              if reload_dataset:
                print('Creating datasets...') if log else None
                datasets, steps = get_datasets(fold, task, ag, f_norm, log)
                train_ds, valid_ds, test_ds = datasets
                train_steps, valid_steps, test_steps = steps
                reload_dataset = False
            
              # Transfer learning
              print('\n', '\t'*6, '  TRAIN TOP MODEL LAYERS\n') if log else None
              with strategy.scope(): # Create and compile model
                print('Creating model...') if log else None
                model = create_ENB0_model(task, ag, aa, log)
                print(f'\tAge normalization: {n}') if log and n else None
                print('Compiling model...') if log else None
                model = compile_model(model, lr_TL, task, ag, al, g, log)

              # Train model
              print('Training model...') if log else None
              callbacks = create_callbacks(task, ag)
              model = train_model(
                  model=model,
                  task=task,
                  train_ds=train_ds,
                  valid_ds=valid_ds,
                  callbacks=callbacks,
                  save_dir=TL_dir,
                  msg=log
              )

              # Test model
              if test:
                print('Testing model...') if log else None
                test_model(model, test_ds, test_steps, task, ag,
                           f_denorm, save_dir=TL_dir, msg=log)

              if fineTuning:
                # FINE TUNING
                print('\n', '\t'*6, 'FINE-TUNING ALL LAYERS\n') if log else None
                with strategy.scope(): # Unfreeze and compile model
                  print('Unfreezing and compiling model...') if log else None
                  model.trainable = True # Unfreeze whole model
                  model = compile_model(model, lr_FT, task, ag, al, g, log)

                # Train model
                print('Training model...') if log else None
                model = train_model(
                    model=model,
                    task=task,
                    train_ds=train_ds,
                    valid_ds=valid_ds,
                    callbacks=callbacks,
                    save_dir=FT_dir,
                    msg=log
                )

                # Test model
                if test:
                  print('Testing model...') if log else None
                  test_model(model, test_ds, test_steps, task, ag,
                             f_denorm, save_dir=FT_dir, msg=log)

## Sección 2. Implementación de modelos independientes

### Sección 2.1. Modelo para predicción del género: clasificación binaria

In [None]:
activ_gender = list(dict_gender.keys())
print(activ_gender)

['sigmoid', 'softmax']


In [None]:
EffNetB0(
    task='gender',
    folds='fold0',
    activ_gender=activ_gender,
    test=True,
    fineTuning=True,
    save=True,
    log=True
)

Model: gender, Batch_size: 256, Epochs: 100, Early stopping: Patience 10, Delta 0.0005
Folds: ['fold0']
Gender activation function: ['sigmoid', 'softmax']


********************************************* BASE MODEL: EfficientNetB0 - FOLD: fold0 ********************************************* 

Creating datasets...
	Train images: 15085, Files: 7, Steps per epoch: 59
	Valid images: 4310, Files: 2, Steps per epoch: 17
	Test images: 4310, Files: 2, Steps per epoch: 17

 						   TRAIN TOP MODEL LAYERS

Creating model...
	Gender output activation function: sigmoid
Compiling model...
	Learning rate: 0.001
	Gender loss: binary_crossentropy, Gender metric: binary_accuracy
Training model...
Restoring model weights from the end of the best epoch: 15.
Epoch 25: early stopping
	Best epoch: 15, Gender accuracy: 88.75%
	Training time: 0:04:56
Saving results...
	rain file: /content/drive/MyDrive/TFM/models/TPU/efficientNetB0/gender/fold0/TL/sigmoid/train.pickle
Saving model...
	Model file: /content/driv

### Sección 2.2. Modelo para predicción de la edad: regresión

In [None]:
activ_age = list(filter(None, dict_age['activ'].keys()))
loss_age = list(filter(None, dict_age['loss'].keys()))
print(f'Activation funtion: {activ_age}\nLoss funtions: {loss_age}')

Activation funtion: ['relu', 'linear', 'tanh']
Loss funtions: ['MSE', 'MAE']


In [None]:
EffNetB0(
    task='age',
    folds='fold0',
    activ_age=activ_age,
    age_loss=loss_age,
    test=True,
    fineTuning=True,
    save=True,
    log=True
)

Model: age, Batch_size: 256, Epochs: 100, Early stopping: Patience 10, Delta 0.0005
Folds: ['fold0']
Age activation function: ['relu', 'linear', 'tanh']
Age loss function: ['MSE', 'MAE']


********************************************* BASE MODEL: EfficientNetB0 - FOLD: fold0 ********************************************* 

Creating datasets...
	Train images: 15085, Files: 7, Steps per epoch: 59
	Valid images: 4310, Files: 2, Steps per epoch: 17
	Test images: 4310, Files: 2, Steps per epoch: 17

 						   TRAIN TOP MODEL LAYERS

Creating model...
	Age output activation function: relu
	Age normalization: norm_01
Compiling model...
	Learning rate: 0.001
	Age loss: mean_squared_error, Age metrics: ['mean_squared_error', 'mean_absolute_error']
Training model...
Restoring model weights from the end of the best epoch: 21.
Epoch 31: early stopping
	Best epoch: 24, Age MSE: 0.010742, Age MAE: 0.077636
	Training time: 0:05:49
Saving results...
	rain file: /content/drive/MyDrive/TFM/models/TPU/eff

## Sección 3. Multi-Task Learning

### Sección 3.1. Optimización de gamma

In [None]:
gamma = [x / 100 for x in range(11, 20)]
print(gamma)

[0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19]


In [None]:
EffNetB0(
    task='mtl',
    folds='fold0',
    activ_gender='sigmoid',
    activ_age='relu',
    norm='norm_01',
    age_loss='MAE',
    gamma=gamma,
    test=True,
    fineTuning=True,
    save=True,
    log=True
)

Model: mtl, Batch_size: 256, Epochs: 100, Early stopping: Patience 10, Delta 0.0005
Folds: ['fold0']
Gender activation function: ['sigmoid']
Age activation function: ['relu']
Age loss function: ['MAE']
Gamma values: [0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19]


********************************************* BASE MODEL: EfficientNetB0 - FOLD: fold0 ********************************************* 

Creating datasets...
	Train images: 15085, Files: 7, Steps per epoch: 59
	Valid images: 4310, Files: 2, Steps per epoch: 17
	Test images: 4310, Files: 2, Steps per epoch: 17

 						   TRAIN TOP MODEL LAYERS

Creating model...
	Gender output activation function: sigmoid
	Age output activation function: relu
	Age normalization: norm_01
Compiling model...
	Learning rate: 0.001
	Gender loss: binary_crossentropy, Gender metric: binary_accuracy
	Age loss: mean_absolute_error, Age metrics: ['mean_squared_error', 'mean_absolute_error']
	Gamma value: 0.11
Training model...
Restoring model weig

### Sección 3.2. Validación cruzada

In [None]:
folds = [f.split('/')[-1] for f in tf.io.gfile.glob(f'gs://{GCS_BUCKET}/*')]
folds.remove('fold0') # Model all ready trained
print(folds)

['fold1', 'fold10', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9']


In [None]:
EffNetB0(
    task='mtl',
    folds=folds,
    activ_gender='sigmoid',
    activ_age='relu',
    norm='norm_01',
    age_loss='MAE',
    gamma=0.03,
    test=True,
    fineTuning=True,
    save=True,
    log=True
)

Model: mtl, Batch_size: 256, Epochs: 100, Early stopping: Patience 10, Delta 0.0005
Folds: ['fold1', 'fold10', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9']
Gender activation function: ['sigmoid']
Age activation function: ['relu']
Age loss function: ['MAE']
Gamma values: [0.03]


********************************************* BASE MODEL: EfficientNetB0 - FOLD: fold1 ********************************************* 

Creating datasets...
	Train images: 15085, Files: 7, Steps per epoch: 59
	Valid images: 4310, Files: 2, Steps per epoch: 17
	Test images: 4310, Files: 2, Steps per epoch: 17

 						   TRAIN TOP MODEL LAYERS

Creating model...
	Gender output activation function: sigmoid
	Age output activation function: relu
	Age normalization: norm_01
Compiling model...
	Learning rate: 0.001
	Gender loss: binary_crossentropy, Gender metric: binary_accuracy
	Age loss: mean_absolute_error, Age metrics: ['mean_squared_error', 'mean_absolute_error']
	Gamma value: 0.03
Trai