# Cross Validation

Given that we trained and evaluated the models for each experiment with different hyperparameter combination, we are now going to perform a cross validation in the best models for each experiment, to have a more precise evaluation of the 

In [1]:
import tensorflow as tf

from numpy import genfromtxt
import numpy as np
import pandas as pd
from sklearn import preprocessing, model_selection
from tensorflow import keras
from tensorflow.metrics import precision
import matplotlib.pyplot as plt 
import os
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import KFold

features = genfromtxt('../datasets/final-data/features.csv', delimiter=',')
labels = genfromtxt('../datasets/final-data/labels.csv', delimiter=',')

## Selecting the best hyperparameter combination for each experiment

In [2]:
experiments = [dI for dI in os.listdir('../experiments') if os.path.isdir(os.path.join('../experiments',dI))]

best_models = []
for experiment in experiments:
    training_metrics = pd.read_csv('../experiments/' + experiment + '/training_metrics.csv')
    training_metrics = training_metrics.sort_values(['acc', 'precision', 'recall'], ascending=[False, False, False])
    best_model = training_metrics.iloc[0]
    best_model["experiment"] = experiment
    
    best_models.append(best_model)
    
best_models = pd.DataFrame(best_models).reset_index().drop(columns=['index'])
best_models = best_models.sort_values(['experiment'], ascending=True)
best_models

model_name             model_472
units                         35
learning_rate               0.01
momentum                  0.0001
decay                     0.0001
activation_function         relu
acc                     0.558249
loss                    0.717674
mae                          0.5
mse                     0.275367
precision               0.549315
recall                  0.629513
fs_score                0.586686
Name: 471, dtype: object

## Defining the model construct function

In [3]:
def create_model(units, activation_function, learning_rate, decay, momentum):
    #Creating the network structure
    model = keras.Sequential()
    
    model.add(keras.layers.Input(shape=300,sparse=False))
    model.add(keras.layers.Dense(150))
    model.add(keras.layers.Dense(units, activation = activation_function))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(2, activation='softmax'))

    # Setting the optimizers parameters
    optimizer = tf.keras.optimizers.SGD(
        learning_rate=learning_rate,
        decay=decay,
        momentum=momentum,
        nesterov=True
    )

    # Compiling the model
    model.compile(
        optimizer = optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['acc', 'mae', 'mse'])
    
    return model

In [4]:
def train_model(model):
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    
    history = model.fit(
        train_x, 
        train_y, 
        epochs = 1, 
        validation_split=0.3, 
        batch_size = 16,  
        verbose=1, 
        use_multiprocessing=True,
        callbacks=[early_stop]
    )
    
    return history

def test_model(model, test_x, test_y):
    loss, acc, mae, mse = model.evaluate(test_x, test_y, verbose=1)
    
    test_output_probabilities = model.predict(
        test_x,
        batch_size=16,
        verbose=1,
        steps=None,
        callbacks=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=True
    )
    
    predicted_y = np.argmax(test_output_probabilities, axis=1)
    
    precision, recall, f_score, support = precision_recall_fscore_support(
        y_true = test_y, 
        y_pred = predicted_y, 
        average = 'binary'
    )
    
    return {
        "loss": loss,
        "acc": acc,
        "mae": mae,
        "mse": mse,
        "precision": precision,
        "recall": recall,
        "f_score": f_score
    }

def compute_c_validated_matrics(best_model, test_results):
    return {
        "model_name": best_model["model_name"],
        "units": best_model["units"], 
        "learning_rate": best_model["learning_rate"], 
        "momentum": best_model["momentum"],
        "decay": best_model["decay"],
        "activation_function": best_model["activation_function"],
        "average_acc": cross_validation_df["acc"].mean(),
        "acc_std_deviation": cross_validation_df["acc"].std(),
        "average_precision": cross_validation_df["precision"].mean(),
        "precision_std_deviation": cross_validation_df["precision"].std(),
        "average_recall": cross_validation_df["recall"].mean(),
        "recall_std_deviation": cross_validation_df["recall"].std(),
        "average_fs_score": cross_validation_df["f_score"].mean(),
        "fs_score_std_deviation": cross_validation_df["f_score"].std()
    }

In [5]:
cross_validated_models = []

for best_model in best_models:
    cross_validation_results = []

    # We used k = 10 as it's the general number used in the literature
    for train_index,test_index in KFold(n_splits=10, random_state=None, shuffle=False).split(features):
        train_x, test_x = features[train_index], features[test_index]
        train_y, test_y = labels[train_index], labels[test_index]

        model = create_model(
            best_model['units'], 
            best_model['activation_function'], 
            best_model['learning_rate'],
            best_model['decay'],
            best_model['momentum'])

        history = train_model(model)
        test_results = test_model(model, test_x, test_y)

        cross_validation_results.append(
            {
                "model_name": best_model["model_name"],
                "units": best_model["units"], 
                "learning_rate": best_model["learning_rate"], 
                "momentum": best_model["momentum"],
                "decay": best_model["decay"],
                "activation_function": best_model["activation_function"],
                "acc": test_results["acc"],
                "precision": test_results["precision"],
                "recall": test_results["recall"],
                "f_score": test_results["f_score"] 
            }
        )

    cross_validation_df = pd.DataFrame(cross_validation_results)

    cross_validated_metrics = compute_c_validated_matrics(best_model, test_results)

    cross_validated_metrics

W0513 20:57:30.863578 4670819776 deprecation.py:506] From /Users/gcarvs/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Train on 3081 samples, validate on 1321 samples
Train on 3081 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples
Train on 3082 samples, validate on 1321 samples


{'model_name': 'model_472',
 'units': 35,
 'learning_rate': 0.01,
 'momentum': 0.0001,
 'decay': 0.0001,
 'activation_function': 'relu',
 'average_acc': 0.5134764790534974,
 'acc_std_deviation': 0.04259974785770258,
 'average_precision': 0.5138952312136836,
 'precision_std_deviation': 0.09282376048905862,
 'average_recall': 0.4324116907822996,
 'recall_std_deviation': 0.2531903317732664,
 'average_fs_score': 0.4299808844098994,
 'fs_score_std_deviation': 0.1790100842132491}

In [6]:


cross_validation_df

Unnamed: 0,acc,activation_function,decay,f_score,learning_rate,model_name,momentum,precision,recall,units
0,0.536735,relu,0.0001,0.450363,0.01,model_472,0.0001,0.451456,0.449275,35
1,0.563265,relu,0.0001,0.291391,0.01,model_472,0.0001,0.463158,0.21256,35
2,0.550102,relu,0.0001,0.661538,0.01,model_472,0.0001,0.549872,0.830116,35
3,0.492843,relu,0.0001,0.493878,0.01,model_472,0.0001,0.584541,0.427562,35
4,0.474438,relu,0.0001,0.349367,0.01,model_472,0.0001,0.518797,0.263359,35
5,0.425358,relu,0.0001,0.090615,0.01,model_472,0.0001,0.583333,0.049123,35
6,0.511247,relu,0.0001,0.590051,0.01,model_472,0.0001,0.5,0.719665,35
7,0.494888,relu,0.0001,0.529524,0.01,model_472,0.0001,0.44127,0.661905,35
8,0.552147,relu,0.0001,0.257627,0.01,model_472,0.0001,0.358491,0.201058,35
9,0.533742,relu,0.0001,0.585455,0.01,model_472,0.0001,0.688034,0.509494,35
