# Ajustes iniciales

## Conexión a google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd ./drive/MyDrive/Colab Notebooks/

/content/drive/MyDrive/Colab Notebooks


In [3]:
%pwd

'/content/drive/MyDrive/Colab Notebooks'

## Importar modulos *_utils


In [4]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')

from my_utils import eval_utils
from my_utils import nn_utils
from my_utils import dataset_utils
from my_utils import results_utils
import numpy as np

# Main functions

## automaticMixEnsembleEvaluation()

In [34]:
import pickle
from pandas import DataFrame

def automaticMixEnsembleEvaluation(task):
  ## Load test data
  print('Loading test data...')
  X_test, Y_test = loadTestData(task, 'CNN')
  print()

  print('Evaluating mix architecture ensembles (EVAL-METHOD 1)')
  print()
  
  ## get configurations info
  configIds = getConfigsInfo(task, evalMethod=1)

  results_1 = evaluateMixClassifiersEnsemble(X_test, Y_test, task, configIds, 
                                             evalMethod=1, verbose=False)  

  results_file = f'./Results/final/ensembles/MIX-ARCH-1-{task}.df'

  with open(results_file, 'wb') as file_handler:
    pickle.dump(results_1, file_handler)

  
  print('Evaluating mix architecture ensembles (EVAL-METHOD 2)')

  ## get configurations info
  configIds = getConfigsInfo(task, evalMethod=2)
  
  results_2 = evaluateMixClassifiersEnsemble(X_test, Y_test, task, configIds, 
                                             evalMethod=2, verbose=False)  

  results_file = f'./Results/final/ensembles/MIX-ARCH-2-{task}.df'

  with open(results_file, 'wb') as file_handler:
    pickle.dump(results_2, file_handler)

  print()

  return results_1, results_2

### evaluateMixClassifiersEnsemble()

In [32]:

from pandas import DataFrame
from  tensorflow.keras.utils import to_categorical

def evaluateMixClassifiersEnsemble(X_test, Y_test, task, config_ids, evalMethod, 
                                verbose=False):  

  n_classifiers = len(config_ids)                                                                 

  evaluations_record = list()

  if task=='HTA':
    n_classes = 5
  else:
    n_classes = 2

  # we'll use classes_probs_sum and classes_votes_sum 
  # to generate two different ensembles 
  if task=='HTA':
    classes_probs_sum = np.zeros((len(Y_test),n_classes))
  else:
    classes_probs_sum = np.zeros((len(Y_test),1))

  classes_votes_sum = np.zeros((len(Y_test),n_classes))

  # process samples with the different classiffiers
  if evalMethod==1:
    eval_method='A'
  else:
    eval_method='B'

  for config_ID in config_ids:
    weights_file = f'global_model_{eval_method}.hdf5'
    trained_model = loadPretrainedModel(config_ID, weights_file)

    # make predictions on X_test samples
    classes_probs = trained_model.predict(X_test)

    # turn the classes_probs into classes predictions
    labels_predictions_array = getClassesPredictions(classes_probs, task)
    
    classes_probs_sum += classes_probs
    classes_votes_sum += to_categorical(labels_predictions_array, num_classes=n_classes)
    
  # EVALUATE THE ENSEMBLES
  if verbose:
    print('\nEvaluating MEAN PROBABILITIES ENSEMBLE')

  # turn the accumulated probabilities into classes predictions
  labels_predictions_array = getClassesPredictions(classes_probs_sum/n_classifiers, task)

  # evaluate the preditions
  evaluation = evaluatePredictions(task, Y_test[task], labels_predictions_array)
  
  model_results_dict = {'model_type': 'MEAN-PROB',
                        'n_classifiers':n_classifiers,
                        'task': task,
                        **evaluation}

  evaluations_record.append(model_results_dict) 

  if verbose:
    print('\nEvaluating MAJORITY VOTING ENSEMBLE')

  # turn the classes votes into an array of classes predictions
  labels_predictions_array = np.array([classes_votes.argmax() for classes_votes in classes_votes_sum]).reshape(-1,1)

  # evaluate the preditions
  evaluation = evaluatePredictions(task, Y_test[task], labels_predictions_array)
  
  model_results_dict = {'model_type': 'VOTING',
                        'n_classifiers':n_classifiers,
                        'task': task,
                        **evaluation}

  evaluations_record.append(model_results_dict) 

  evaluations_results_df = DataFrame(evaluations_record)

  return evaluations_results_df

## utils

### loadTestData()

In [22]:
def loadTestData(task, architecture):
  if architecture == 'SNN':
    encoding_format = 'SINGLE-VEC'
  else:
    encoding_format = 'EMB-SEQ'

  X_test, Y_test = dataset_utils.loadEncodedTestData(embedding_type='FT3',
                                                 encoding_format=encoding_format,
                                                 labels_to_return = [task])
  
  return X_test, Y_test

### getConfigsInfo()

In [23]:
def getConfigsInfo(task, evalMethod):
  configIds = list()

  for architecture in ['CNN','BiLSTM','ConvLSTM']:
    config_ids = getBestModelsInfo(task, architecture, evalMethod)
    configIds.extend(config_ids)
  
  return configIds

def getBestModelsInfo(task, architecture, evalMethod, type_list=['G']):
  if task=='HTA':
    sorting_metric='EMR'
  else:
    sorting_metric='f1-macro'

  results_list = [f'./Results/final/{architecture}-{evalMethod}_{task}_TEST.df']

  results_df = results_utils.mergeDataFrames(results_list, '.delete_this.df') 

  type_mask = results_df.model_type.isin(type_list)
  best_models_info = results_df.loc[type_mask].iloc[:2]

  conf_ids = best_models_info.conf_id.to_list()
  
  return conf_ids

### loadPretrainedModel()

In [24]:
#*************************     loadPretrainedModel()    ************************  
from keras.models import model_from_json

def loadPretrainedModel(config_ID, weights_file):
  # load model configuration from json file
  json_file = open(f'./models_json_files/{config_ID}.json', 'r')
  model_config = json_file.read()
  json_file.close()
  trained_model = model_from_json(model_config)

  # load pretrained weights into the model
  trained_model.load_weights(f'./trained_models/{config_ID}/{weights_file}')

  return trained_model


### getClassesPredictions()

In [25]:
import numpy as np

def class_pred(true_prob):
  if true_prob>=0.5:
    return 1
  else:
    return 0

def getClassesPredictions(classes_probs, task):
  if task=='HTA': 
    return np.array([probs.argmax() for probs in classes_probs]).reshape(-1,1)
  else:
    return np.apply_along_axis(class_pred, 1, classes_probs).reshape(-1,1)

### evaluatePredictions()

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluatePredictions(task, val_labels, pred_labels, verbose=False):

  if task=='HTA':

    # get the correspondig predicted and val (target) labels for each task
    pred_HS_labels, pred_TR_labels, pred_AG_labels = dataset_utils.getLabelsPerTask(pred_labels)
    val_HS_labels, val_TR_labels, val_AG_labels = dataset_utils.getLabelsPerTask(val_labels)

    # compute the different metrics
    HS_acc = accuracy_score(val_HS_labels, pred_HS_labels)
    HS_prec = precision_score(val_HS_labels, val_HS_labels, average="macro")
    HS_recall = recall_score(val_HS_labels, val_HS_labels, average="macro")
    HS_f1 = f1_score(val_HS_labels, pred_HS_labels, average="macro")

    AG_acc = accuracy_score(val_AG_labels, pred_AG_labels)
    AG_prec = precision_score(val_AG_labels, pred_AG_labels, average="macro")
    AG_recall = recall_score(val_AG_labels, pred_AG_labels, average="macro")
    AG_f1 = f1_score(val_AG_labels, pred_AG_labels, average="macro")

    TR_acc = accuracy_score(val_TR_labels, pred_TR_labels)
    TR_prec = precision_score(val_TR_labels, pred_TR_labels, average="macro")
    TR_recall = recall_score(val_TR_labels, pred_TR_labels, average="macro")
    TR_f1 = f1_score(val_TR_labels, pred_TR_labels, average="macro")

    F1_multi = (HS_f1+ AG_f1 + TR_f1)/3

    EMR = computeEMR(list(zip(val_HS_labels, val_TR_labels, val_AG_labels)),
                    list(zip(pred_HS_labels, pred_TR_labels, pred_AG_labels)))

    results_dict = {'HS_acc':HS_acc,
                    'HS_prec':HS_prec,
                    'HS_recall':HS_recall,
                    'HS_f1':HS_f1,
                    'AG_acc':AG_acc,
                    'AG_prec':AG_prec,
                    'AG_recall':AG_recall,
                    'AG_f1':AG_f1,
                    'TR_acc':TR_acc,
                    'TR_prec':TR_prec,
                    'TR_recall':TR_recall,
                    'TR_f1':TR_f1,
                    'F1_multi':F1_multi,
                    'EMR':EMR}

    if verbose:
      print('EMR = ', EMR)
      print('F1_multi = ', F1_multi)
      print()

    return results_dict

  if task in ['HS', 'TR', 'AG']:
    # compute the different metrics
    acc = accuracy_score(val_labels, pred_labels)
    prec = precision_score(val_labels, pred_labels, average="macro")
    recall = recall_score(val_labels, pred_labels, average="macro")
    f1_macro = f1_score(val_labels, pred_labels, average="macro")

    results_dict = {'acc':acc,
                    'prec':prec,
                    'recall':recall,
                    'f1-macro':f1_macro}

    if verbose:
      print('Acc = ', acc)
      print('F1_macro = ', f1_macro)
      print()

    return results_dict

def computeEMR(test_labels, pred_labels):
  total_instances = len(test_labels)
  exact_match_count= 0
  for gold, pred in zip(test_labels, pred_labels):
    #print(gold, pred)
    if gold == pred:
      exact_match_count += 1

  return exact_match_count/total_instances

def compute_metrics(target, predicted):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

    accuracy = accuracy_score(target, predicted)
    precision = precision_score(target, predicted, average="macro")
    recall = recall_score(target, predicted, average="macro")
    f1 = f1_score(val_labels, pred_labels, average="macro")

    results = {'acc':accuracy_s, 
              'prec' : precision_pos,
              'recall' : precision_neg,  
              'f1': recall_pos,
              'recall_neg' : recall_neg,
              'f1_pos': f1_pos,
              'f1_neg': f1_neg}
    
    return results

In [27]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluatePredictions(task, val_labels, pred_labels, verbose=False):

  if task=='HTA':

    # get the correspondig predicted and val (target) labels for each task
    pred_HS_labels, pred_TR_labels, pred_AG_labels = dataset_utils.getLabelsPerTask(pred_labels)
    val_HS_labels, val_TR_labels, val_AG_labels = dataset_utils.getLabelsPerTask(val_labels)

    # compute ACC, PREC, RECALL and F1 metrics
    HS_acc, HS_prec, HS_recall, HS_f1 = compute_metrics(val_HS_labels, pred_HS_labels)
    AG_acc, AG_prec, AG_recall, AG_f1 = compute_metrics(val_AG_labels, pred_AG_labels)   
    TR_acc, TR_prec, TR_recall, TR_f1 = compute_metrics(val_TR_labels, pred_TR_labels)

    F1_multi = (HS_f1+ AG_f1 + TR_f1)/3

    EMR = computeEMR(list(zip(val_HS_labels, val_TR_labels, val_AG_labels)),
                    list(zip(pred_HS_labels, pred_TR_labels, pred_AG_labels)))

    results_dict = {'HS_acc':HS_acc,
                    'HS_prec':HS_prec,
                    'HS_recall':HS_recall,
                    'HS_f1':HS_f1,
                    'AG_acc':AG_acc,
                    'AG_prec':AG_prec,
                    'AG_recall':AG_recall,
                    'AG_f1':AG_f1,
                    'TR_acc':TR_acc,
                    'TR_prec':TR_prec,
                    'TR_recall':TR_recall,
                    'TR_f1':TR_f1,
                    'F1_multi':F1_multi,
                    'EMR':EMR}

    if verbose:
      print('EMR = ', EMR)
      print('F1_multi = ', F1_multi)
      print()

    return results_dict

  if task in ['HS', 'TR', 'AG']:
    # compute ACC, PREC, RECALL and F1 metrics
    acc, prec, recall, f1_macro = compute_metrics(val_labels, pred_labels)

    results_dict = {'acc':acc,
                    'prec':prec,
                    'recall':recall,
                    'f1-macro':f1_macro}

    if verbose:
      print('Acc = ', acc)
      print('F1_macro = ', f1_macro)
      print()

    return results_dict

def compute_metrics(target, predicted):
    accuracy = accuracy_score(target, predicted)
    precision = precision_score(target, predicted, average="macro")
    recall = recall_score(target, predicted, average="macro")
    f1 = f1_score(target, predicted, average="macro")
    
    return accuracy, precision, recall, f1    

def computeEMR(test_labels, pred_labels):
  total_instances = len(test_labels)
  exact_match_count= 0
  for gold, pred in zip(test_labels, pred_labels):
    #print(gold, pred)
    if gold == pred:
      exact_match_count += 1

  return exact_match_count/total_instances

### labels_utils

In [28]:
#**************************     getLabelsPerTask()    **************************
def getLabelsPerTask(HTA_labels):

    HS_labels = list()
    TR_labels = list()
    AG_labels = list()

    for HTA_label in HTA_labels:
        HS_label, TR_label, AG_label = mapTo3DimsFormat(HTA_label)

        HS_labels.append(HS_label)
        TR_labels.append(TR_label)
        AG_labels.append(AG_label)

    HS_labels = np.array(HS_labels).reshape(-1,1)
    TR_labels = np.array(TR_labels).reshape(-1,1)
    AG_labels = np.array(AG_labels).reshape(-1,1)

    return (HS_labels, TR_labels, AG_labels)

#**************************     mapTo3DimsFormat()    ************************** 
def mapTo3DimsFormat(AB_label):
  '''
  Maps label in five_classes_format to 3 dims labeling.

    0 -> (0,0,0)  [HT = 0, TR = 0, AG = 0]
    1 -> (1,0,0)  [HT = 1, TR = 0, AG = 0]
    2 -> (1,0,1)  [HT = 1, TR = 0, AG = 1]
    3 -> (1,1,0)  [HT = 1, TR = 1, AG = 0]
    4 -> (1,1,1)  [HT = 1, TR = 1, AG = 1]

  inpunt:
  label    - int, label in five_classes_format

  output:
  (H,T,A)  - ints tuple, labeling in 3 dims format

  '''
  if AB_label == 0:
    return(0,0,0)

  elif AB_label == 1:
    return(1,0,0)

  elif AB_label == 2:
    return(1,0,1)

  elif AB_label == 3:
    return(1,1,0)

  elif AB_label == 4:
    return(1,1,1)


# Ensambles de arquitectura mixta

In [None]:
res_1, res_2 = automaticMixEnsembleEvaluation('HS')

In [None]:
res_1, res_2 = automaticMixEnsembleEvaluation('AG')

In [None]:
res_1, res_2 = automaticMixEnsembleEvaluation('TR')

In [None]:
res_1, res_2 = automaticMixEnsembleEvaluation('HTA')

# Resultados

## UTILS

In [None]:
from pandas import DataFrame

In [49]:
def getMixEnsemblesResults(task):
  results_list = [f'./Results/final/ensembles/MIX-ARCH-1-{task}.df',
                  f'./Results/final/ensembles/MIX-ARCH-2-{task}.df']

  results_df = results_utils.mergeDataFrames(results_list, '.delete_this.df')

  return results_df


In [50]:
def getBestMixEnsembleResults(task):
  if task=='HTA':
    sorting_metric='EMR'
  else:
    sorting_metric='f1-macro'
    
  results_list = [f'./Results/final/ensembles/MIX-ARCH-1-{task}.df',
                  f'./Results/final/ensembles/MIX-ARCH-2-{task}.df']

  results_df = results_utils.mergeDataFrames(results_list, '.delete_this.df')

  return results_df.sort_values(by=sorting_metric, ascending=False).iloc[0:1]

In [54]:
getBestMixEnsembleResults('HS')

Unnamed: 0,model_type,n_classifiers,task,acc,prec,recall,f1-macro
1,VOTING,6,HS,0.73625,0.728364,0.730625,0.729321


In [53]:
getBestMixEnsembleResults('AG')

Unnamed: 0,model_type,n_classifiers,task,acc,prec,recall,f1-macro
2,MEAN-PROB,6,AG,0.795,0.759577,0.725472,0.738056


In [52]:
getBestMixEnsembleResults('TR')

Unnamed: 0,model_type,n_classifiers,task,acc,prec,recall,f1-macro
0,MEAN-PROB,6,TR,0.85375,0.853823,0.752936,0.783821


In [48]:
getBestMixEnsembleResults('HTA')

Unnamed: 0,model_type,n_classifiers,task,HS_acc,HS_prec,HS_recall,HS_f1,AG_acc,AG_prec,AG_recall,AG_f1,TR_acc,TR_prec,TR_recall,TR_f1,F1_multi,EMR
3,VOTING,6,HTA,0.76625,0.76185,0.750064,0.753931,0.7775,0.737125,0.757017,0.744634,0.87,0.850039,0.801842,0.821181,0.773249,0.69875


## Ensambles de configuración única

In [None]:
getEnsemblesResultsSummary('HS',1)

Unnamed: 0,conf_id,model_type,architecture,acc,prec,recall,f1-macro
0,IMsHTOqQ,E2,SNN-1,0.7325,0.724161,0.719536,0.721354
1,DmScKBFk,E2,CNN-1,0.74125,0.736283,0.742553,0.737308
2,cwHZKRDu,E1,BiLSTM-2,0.744375,0.736278,0.735735,0.735999
3,gWDhCaka,E1,ConvLSTM-2,0.744375,0.73664,0.738894,0.737605


In [None]:
getEnsemblesResultsSummary('AG',1)

Unnamed: 0,conf_id,model_type,architecture,acc,prec,recall,f1-macro
0,yCKbOmtc,E2,SNN-2,0.789375,0.749492,0.726973,0.736079
1,lYkXiQZd,E2,CNN-2,0.826875,0.800489,0.768886,0.78152
2,Afxwglds,E1,BiLSTM-2,0.799375,0.764286,0.734078,0.745714
3,VNZvaMik,E2,ConvLSTM-2,0.8,0.760613,0.774224,0.766505


In [None]:
getEnsemblesResultsSummary('TR',1)

Unnamed: 0,conf_id,model_type,architecture,acc,prec,recall,f1-macro
0,wppcsrma,E2,SNN-2,0.84875,0.849021,0.74348,0.774527
1,oRGmNoZx,E2,CNN-2,0.86875,0.848793,0.799478,0.819137
2,QXXxwbcD,E1,BiLSTM-2,0.863125,0.865674,0.768395,0.799565
3,cEozDdue,E2,ConvLSTM-2,0.869375,0.840886,0.812776,0.825102
