# Ajustes iniciales

## Conexión a google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd ./drive/MyDrive/Colab Notebooks/

[Errno 2] No such file or directory: './drive/MyDrive/Colab Notebooks/'
/content/drive/MyDrive/Colab Notebooks


In [None]:
%pwd

'/content/drive/My Drive/Colab Notebooks'

## Importar modulos *_utils


In [None]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')

from my_utils import eval_utils
from my_utils import nn_utils
from my_utils import dataset_utils
import numpy as np

# Local Utils

# EVALUATE TRAINED MODELS (TEST SET)

## Trained Models Evaluations

##evaluateOnVal()

In [None]:
import pickle
from pandas import DataFrame

def evaluateOnVal(task, search_results_path, architecture):
  ## Load test data
  print('Loading test data...')
  X_train, Y_train = loadTrainData(task, architecture)
  print()

  ## Load search results
  with open(search_results_path, 'rb') as file_handler:
    search_results = pickle.load(file_handler)

  ## evaluate for method-A pondering (global mean)
  print('Evaluating method A models')
  search_results.sort_values(by='val_acc_A', ascending=False, inplace=True)
  config_ids = search_results.conf_ID[:5].to_list()

  merged_results_1 = DataFrame()

  

  for config_id in config_ids[:]:
    config_metrics_df = evalModelsOnValidation(X_train, Y_train, 
                                               task, 
                                               config_id, 
                                               'A', 
                                               f'{architecture}-1', 7)
    
    merged_results_1=merged_results_1.append(config_metrics_df, ignore_index=True)

  results_file = f'./Results/final/{architecture}-1_{task}_VALIDATION.df'

  with open(results_file, 'wb') as file_handler:
    pickle.dump(merged_results_1, file_handler)

  print(f'\nResults saved to: {results_file}')
  print()

  ## evaluate for method-B pondering (per-fold-analysys)
  print('Evaluating method B models')

  search_results.sort_values(by='val_acc_B', ascending=False, inplace=True)
  config_ids = search_results.conf_ID[:5].to_list()

  merged_results_2 = DataFrame()

  for config_id in config_ids[:]:
    config_metrics_df = evalModelsOnValidation(X_train, Y_train, 
                                               task, 
                                               config_id, 
                                               'B', 
                                               f'{architecture}-2', 7)
    
    merged_results_2=merged_results_2.append(config_metrics_df, ignore_index=True)

  results_file = f'./Results/final/{architecture}-2_{task}_VALIDATION.df'

  with open(results_file, 'wb') as file_handler:
    pickle.dump(merged_results_2, file_handler)

  print(f'\nResults saved to: {results_file}')

  return merged_results_1, merged_results_2


In [None]:
def loadTrainData(task, architecture):
  if architecture == 'SNN':
    encoding_format = 'SINGLE-VEC'
  else:
    encoding_format = 'EMB-SEQ'

  X_train, Y_train = dataset_utils.loadEncodedTrainData(embedding_type='FT3',
                                                 encoding_format=encoding_format,
                                                 labels_to_return=[task],
                                                 n_folds=7)
  
  return X_train, Y_train

### evaluateTrainedModelsOnValidationData()

In [None]:

from pandas import DataFrame
from  tensorflow.keras.utils import to_categorical

def evalModelsOnValidation(X_train, Y_train, task, config_ID, 
                                          eval_method, arch_label, n_folds):

  evaluations_record = list()

  # EVALUATE THE FOLDS CLASSIFIERS ---------------------------------------------
  for fold_idx in range(n_folds):
    #print('\nEvaluating data-fold {}'.format(fold_idx))
  
    weights_file = f'F{fold_idx}_{eval_method}.hdf5'
    trained_model = loadPretrainedModel(config_ID, weights_file)

    # DATA
    x_val   = X_train[fold_idx]

    # LABELS
    val_mask = Y_train['kfold'] == fold_idx
    y_val   = Y_train.loc[val_mask, :]

    # make predictions on x_val samples
    classes_probs = trained_model.predict(x_val)
    
    # turn the prob distributions into classes predictions 
    labels_predictions_array = getClassesPredictions(classes_probs, task)

    # evaluate the preditions
    evaluation = evaluatePredictions(task, y_val[task], labels_predictions_array)
    
    model_results_dict = {'conf_id': config_ID,
                          'model_type': 'F',
                          'architecture': arch_label,
                          **evaluation}

    evaluations_record.append(model_results_dict) 

  print('*', end='')

  evaluations_results_df = DataFrame(evaluations_record)

  return evaluations_results_df

### loadPretrainedModel()

In [None]:
#*************************     loadPretrainedModel()    ************************  
from keras.models import model_from_json

def loadPretrainedModel(config_ID, weights_file):
  # load model configuration from json file
  json_file = open(f'./models_json_files/{config_ID}.json', 'r')
  model_config = json_file.read()
  json_file.close()
  trained_model = model_from_json(model_config)

  # load pretrained weights into the model
  trained_model.load_weights(f'./trained_models/{config_ID}/{weights_file}')

  return trained_model


### getClassesPredictions()

In [None]:
import numpy as np

def class_pred(true_prob):
  if true_prob>=0.5:
    return 1
  else:
    return 0

def getClassesPredictions(classes_probs, task):
  if task=='HTA': 
    return np.array([probs.argmax() for probs in classes_probs]).reshape(-1,1)
  else:
    return np.apply_along_axis(class_pred, 1, classes_probs).reshape(-1,1)

### evaluatePredictions()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluatePredictions(task, val_labels, pred_labels, verbose=False):

  if task=='HTA':

    # get the correspondig predicted and val (target) labels for each task
    pred_HS_labels, pred_TR_labels, pred_AG_labels = dataset_utils.getLabelsPerTask(pred_labels)
    val_HS_labels, val_TR_labels, val_AG_labels = dataset_utils.getLabelsPerTask(val_labels)

    # compute the different metrics
    HS_acc = accuracy_score(val_HS_labels, pred_HS_labels)
    HS_prec = precision_score(val_HS_labels, val_HS_labels, average="macro")
    HS_recall = recall_score(val_HS_labels, val_HS_labels, average="macro")
    HS_f1 = f1_score(val_HS_labels, pred_HS_labels, average="macro")

    AG_acc = accuracy_score(val_AG_labels, pred_AG_labels)
    AG_prec = precision_score(val_AG_labels, pred_AG_labels, average="macro")
    AG_recall = recall_score(val_AG_labels, pred_AG_labels, average="macro")
    AG_f1 = f1_score(val_AG_labels, pred_AG_labels, average="macro")

    TR_acc = accuracy_score(val_TR_labels, pred_TR_labels)
    TR_prec = precision_score(val_TR_labels, pred_TR_labels, average="macro")
    TR_recall = recall_score(val_TR_labels, pred_TR_labels, average="macro")
    TR_f1 = f1_score(val_TR_labels, pred_TR_labels, average="macro")

    F1_multi = (HS_f1+ AG_f1 + TR_f1)/3

    EMR = computeEMR(list(zip(val_HS_labels, val_TR_labels, val_AG_labels)),
                    list(zip(pred_HS_labels, pred_TR_labels, pred_AG_labels)))

    results_dict = {'HS_acc':HS_acc,
                    'HS_prec':HS_prec,
                    'HS_recall':HS_recall,
                    'HS_f1':HS_f1,
                    'AG_acc':AG_acc,
                    'AG_prec':AG_prec,
                    'AG_recall':AG_recall,
                    'AG_f1':AG_f1,
                    'TR_acc':TR_acc,
                    'TR_prec':TR_prec,
                    'TR_recall':TR_recall,
                    'TR_f1':TR_f1,
                    'F1_multi':F1_multi,
                    'EMR':EMR}

    if verbose:
      print('EMR = ', EMR)
      print('F1_multi = ', F1_multi)
      print()

    return results_dict

  if task in ['HS', 'TR', 'AG']:
    # compute the different metrics
    acc = accuracy_score(val_labels, pred_labels)
    prec = precision_score(val_labels, pred_labels, average="macro")
    recall = recall_score(val_labels, pred_labels, average="macro")
    f1_macro = f1_score(val_labels, pred_labels, average="macro")

    results_dict = {'acc':acc,
                    'prec':prec,
                    'recall':recall,
                    'f1-macro':f1_macro}

    if verbose:
      print('Acc = ', acc)
      print('F1_macro = ', f1_macro)
      print()

    return results_dict

def computeEMR(test_labels, pred_labels):
  total_instances = len(test_labels)
  exact_match_count= 0
  for gold, pred in zip(test_labels, pred_labels):
    #print(gold, pred)
    if gold == pred:
      exact_match_count += 1

  return exact_match_count/total_instances

def compute_metrics(target, predicted):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

    accuracy = accuracy_score(target, predicted)
    precision = precision_score(target, predicted, average="macro")
    recall = recall_score(target, predicted, average="macro")
    f1 = f1_score(val_labels, pred_labels, average="macro")

    results = {'acc':accuracy_s, 
              'prec' : precision_pos,
              'recall' : precision_neg,  
              'f1': recall_pos,
              'recall_neg' : recall_neg,
              'f1_pos': f1_pos,
              'f1_neg': f1_neg}
    
    return results

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluatePredictions(task, val_labels, pred_labels, verbose=False):

  if task=='HTA':

    # get the correspondig predicted and val (target) labels for each task
    pred_HS_labels, pred_TR_labels, pred_AG_labels = dataset_utils.getLabelsPerTask(pred_labels)
    val_HS_labels, val_TR_labels, val_AG_labels = dataset_utils.getLabelsPerTask(val_labels)

    # compute ACC, PREC, RECALL and F1 metrics
    HS_acc, HS_prec, HS_recall, HS_f1 = compute_metrics(val_HS_labels, pred_HS_labels)
    AG_acc, AG_prec, AG_recall, AG_f1 = compute_metrics(val_AG_labels, pred_AG_labels)   
    TR_acc, TR_prec, TR_recall, TR_f1 = compute_metrics(val_TR_labels, pred_TR_labels)

    F1_multi = (HS_f1+ AG_f1 + TR_f1)/3

    EMR = computeEMR(list(zip(val_HS_labels, val_TR_labels, val_AG_labels)),
                    list(zip(pred_HS_labels, pred_TR_labels, pred_AG_labels)))

    results_dict = {'HS_acc':HS_acc,
                    'HS_prec':HS_prec,
                    'HS_recall':HS_recall,
                    'HS_f1':HS_f1,
                    'AG_acc':AG_acc,
                    'AG_prec':AG_prec,
                    'AG_recall':AG_recall,
                    'AG_f1':AG_f1,
                    'TR_acc':TR_acc,
                    'TR_prec':TR_prec,
                    'TR_recall':TR_recall,
                    'TR_f1':TR_f1,
                    'F1_multi':F1_multi,
                    'EMR':EMR}

    if verbose:
      print('EMR = ', EMR)
      print('F1_multi = ', F1_multi)
      print()

    return results_dict

  if task in ['HS', 'TR', 'AG']:
    # compute ACC, PREC, RECALL and F1 metrics
    acc, prec, recall, f1_macro = compute_metrics(val_labels, pred_labels)

    results_dict = {'acc':acc,
                    'prec':prec,
                    'recall':recall,
                    'f1-macro':f1_macro}

    if verbose:
      print('Acc = ', acc)
      print('F1_macro = ', f1_macro)
      print()

    return results_dict

def compute_metrics(target, predicted):
    accuracy = accuracy_score(target, predicted)
    precision = precision_score(target, predicted, average="macro")
    recall = recall_score(target, predicted, average="macro")
    f1 = f1_score(target, predicted, average="macro")
    
    return accuracy, precision, recall, f1    

def computeEMR(test_labels, pred_labels):
  total_instances = len(test_labels)
  exact_match_count= 0
  for gold, pred in zip(test_labels, pred_labels):
    #print(gold, pred)
    if gold == pred:
      exact_match_count += 1

  return exact_match_count/total_instances

### labels_utils

In [None]:
#**************************     getLabelsPerTask()    **************************
def getLabelsPerTask(HTA_labels):

    HS_labels = list()
    TR_labels = list()
    AG_labels = list()

    for HTA_label in HTA_labels:
        HS_label, TR_label, AG_label = mapTo3DimsFormat(HTA_label)

        HS_labels.append(HS_label)
        TR_labels.append(TR_label)
        AG_labels.append(AG_label)

    HS_labels = np.array(HS_labels).reshape(-1,1)
    TR_labels = np.array(TR_labels).reshape(-1,1)
    AG_labels = np.array(AG_labels).reshape(-1,1)

    return (HS_labels, TR_labels, AG_labels)

#**************************     mapTo3DimsFormat()    ************************** 
def mapTo3DimsFormat(AB_label):
  '''
  Maps label in five_classes_format to 3 dims labeling.

    0 -> (0,0,0)  [HT = 0, TR = 0, AG = 0]
    1 -> (1,0,0)  [HT = 1, TR = 0, AG = 0]
    2 -> (1,0,1)  [HT = 1, TR = 0, AG = 1]
    3 -> (1,1,0)  [HT = 1, TR = 1, AG = 0]
    4 -> (1,1,1)  [HT = 1, TR = 1, AG = 1]

  inpunt:
  label    - int, label in five_classes_format

  output:
  (H,T,A)  - ints tuple, labeling in 3 dims format

  '''
  if AB_label == 0:
    return(0,0,0)

  elif AB_label == 1:
    return(1,0,0)

  elif AB_label == 2:
    return(1,0,1)

  elif AB_label == 3:
    return(1,1,0)

  elif AB_label == 4:
    return(1,1,1)


# EVALUATE TRAINED MODELS ON VALIDATION DATA

#SNN (CMPLETE)

## HS

In [None]:
results_A, results_B = evaluateOnVal('HS', 
                          search_results_path = './Results/SNN/HS/experiments_4.df',
                          architecture = 'SNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: SINGLE-VEC

Process complete
5000 train instances retrieved

encodings_dim = (300,)

Evaluating method A models
*****
Results saved to: ./Results/final/SNN-1_HS_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/SNN-2_HS_VALIDATION.df


## AG

In [None]:
results_A, results_B = evaluateOnVal('AG', 
                          search_results_path = './Results/SNN/AG/experiments_4.df',
                          architecture = 'SNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: SINGLE-VEC

Process complete
5000 train instances retrieved

encodings_dim = (300,)

Evaluating method A models
*****
Results saved to: ./Results/final/SNN-1_AG_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/SNN-2_AG_VALIDATION.df


## TR

In [None]:
results_A, results_B = evaluateOnVal('TR', 
                          search_results_path = './Results/SNN/TR/experiments_4.df',
                          architecture = 'SNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: SINGLE-VEC

Process complete
5000 train instances retrieved

encodings_dim = (300,)

Evaluating method A models
*****
Results saved to: ./Results/final/SNN-1_TR_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/SNN-2_TR_VALIDATION.df


## HTA

In [None]:
results_A, results_B = evaluateOnVal('HTA', 
                          search_results_path = './Results/SNN/HTA/experiments_4f.df',
                          architecture = 'SNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: SINGLE-VEC

Process complete
5000 train instances retrieved

encodings_dim = (300,)

Evaluating method A models
*****
Results saved to: ./Results/final/SNN-1_HTA_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/SNN-2_HTA_VALIDATION.df


# CNN (COMPLETE)

## HS

In [None]:
results_A, results_B = evaluateOnVal('HS', 
                          search_results_path = './Results/CNN/HS/experiments_7f.df',
                          architecture = 'CNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/CNN-1_HS_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/CNN-2_HS_VALIDATION.df


## AG

In [None]:
results_A, results_B = evaluateOnVal('AG', 
                          search_results_path = './Results/CNN/AG/experiments_6f.df',
                          architecture = 'CNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/CNN-1_AG_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/CNN-2_AG_VALIDATION.df


## TR

In [None]:
results_A, results_B = evaluateOnVal('TR', 
                          search_results_path = './Results/CNN/TR/experiments_6f.df',
                          architecture = 'CNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/CNN-1_TR_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/CNN-2_TR_VALIDATION.df


## HTA

In [None]:
results_A, results_B = evaluateOnVal('HTA', 
                          search_results_path = './Results/CNN/HTA/experiments_6f.df',
                          architecture = 'CNN')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/CNN-1_HTA_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/CNN-2_HTA_VALIDATION.df


# BiLSTM (COMPLETE)

## HS

In [None]:
results_A, results_B = evaluateOnVal('HS', 
                      search_results_path = './Results/BiLSTM/HS/experiments_4f.df',
                      architecture = 'BiLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/BiLSTM-1_HS_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/BiLSTM-2_HS_VALIDATION.df


## AG

In [None]:
results_A, results_B = evaluateOnVal('AG', 
                          search_results_path = './Results/BiLSTM/AG/experiments_4f.df',
                          architecture = 'BiLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/BiLSTM-1_AG_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/BiLSTM-2_AG_VALIDATION.df


## TR

In [None]:
results_A, results_B = evaluateOnVal('TR', 
                          search_results_path = './Results/BiLSTM/TR/experiments_4f.df',
                          architecture = 'BiLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/BiLSTM-1_TR_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/BiLSTM-2_TR_VALIDATION.df


## HTA

In [None]:
results_A, results_B = evaluateOnVal('HTA', 
                          search_results_path = './Results/BiLSTM/HTA/experiments_4f.df',
                          architecture = 'BiLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/BiLSTM-1_HTA_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/BiLSTM-2_HTA_VALIDATION.df


# ConvLSTM (COMPLETE)

## HS

In [None]:
results_A, results_B = evaluateOnVal('HS', 
                      search_results_path = './Results/ConvLSTM/HS/experiments_5f.df',
                      architecture = 'ConvLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/ConvLSTM-1_HS_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/ConvLSTM-2_HS_VALIDATION.df


## AG

In [None]:
results_A, results_B = evaluateOnVal('AG', 
                          search_results_path = './Results/ConvLSTM/AG/experiments_5f.df',
                          architecture = 'ConvLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/ConvLSTM-1_AG_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/ConvLSTM-2_AG_VALIDATION.df


## TR

In [None]:
results_A, results_B = evaluateOnVal('TR', 
                          search_results_path = './Results/ConvLSTM/TR/experiments_5f.df',
                          architecture = 'ConvLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/ConvLSTM-1_TR_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/ConvLSTM-2_TR_VALIDATION.df


## HTA

In [None]:
results_A, results_B = evaluateOnVal('HTA', 
                          search_results_path = './Results/ConvLSTM/HTA/experiments_5f.df',
                          architecture = 'ConvLSTM')

Loading test data...
FastText 3 - Spanish Unannotated Corpora
Encoding Format: EMB-SEQ

Process complete
5000 train instances retrieved

encodings_dim = (55, 300)

Evaluating method A models
*****
Results saved to: ./Results/final/ConvLSTM-1_HTA_VALIDATION.df

Evaluating method B models
*****
Results saved to: ./Results/final/ConvLSTM-2_HTA_VALIDATION.df
