In [22]:
#!pip install tensorflow
#!pip install sdv

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import sdv


from sdv.tabular import GaussianCopula, CTGAN, CopulaGAN, TVAE
from sdv.sampling import Condition
from sdv.evaluation import evaluate

from numpy import random
from matplotlib.pyplot import figure
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, KFold, PredefinedSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import metrics
from tensorflow import keras
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model, Sequential, load_model, save_model
from tensorflow.keras.layers import Dense, Input, Conv1D, Activation, Reshape, Flatten, Dropout, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve

from keras import backend as K 

import joblib
from joblib import dump, load


#################################################### UTILS ##########################################

def add_metas(taxo,meta,meta_extra=[]):
    new_taxo = taxo
    condition = []
    for i in range(len(taxo)):
        condition.append(pd.DataFrame(meta["condition"][meta["sample"] ==
                                                        taxo["sample"][i]]).iloc[0,0])
    for i in meta_extra:
        meta_extra_i = []
        for j in range(len(taxo)):
            meta_extra_i.append(pd.DataFrame(meta[i][meta["sample"] ==
                                          taxo["sample"][j]]).iloc[0,0])
        new_taxo[i] = meta_extra_i
    return new_taxo, condition

def drop_nas(taxo,meta):
    taxo["condition"] = meta
    taxo = taxo.dropna()
    meta = taxo["condition"]
    taxo = taxo.iloc[:,:-1]
    return taxo, meta

def taxo_taxo_meta(taxo_train, taxo_val, ID_model):
    if ID_model == "o" or ID_model == "p":
        taxo_train_meta = taxo_train.iloc[:,-2:]
        taxo_val_meta = taxo_val.iloc[:,-2:]
        taxo_train = taxo_train.iloc[:,:-2]
        taxo_val = taxo_val.iloc[:,:-2]
    elif ID_model != "a" and ID_model != "b":
        taxo_train_meta = pd.DataFrame(taxo_train.iloc[:,-1])
        taxo_val_meta = taxo_val.iloc[:,-1]
        taxo_train = taxo_train.iloc[:,:-1]
        taxo_val = taxo_val.iloc[:,:-1]
    else:
        taxo_train_meta = []; taxo_val_meta = [];
    return taxo_train, taxo_val, taxo_train_meta, taxo_val_meta

def format_data(taxo_train, taxo_val, meta_train, meta_val, expand=0):
    x_train = taxo_train.to_numpy(); y_train = pd.DataFrame(meta_train).to_numpy()
    x_val = taxo_val.to_numpy(); y_val = pd.DataFrame(meta_val).to_numpy()
    if expand == 1:
        x_train = np.expand_dims(x_train, axis=2)
        x_val = np.expand_dims(x_val, axis=2)
            
    x_train = x_train.astype('float32'); y_train = y_train.astype('float32')
    x_val = x_val.astype('float32'); y_val = y_val.astype('float32')

    return x_train, x_val, y_train, y_val
    

#################################################### RAWDATA ##########################################

def rawdata(taxo_train, taxo_val, meta_train, meta_val):
    x_train, x_val, y_train, y_val = format_data(taxo_train, taxo_val, meta_train, meta_val)
    y_train = y_train.ravel(); y_val = y_val.ravel()
    return x_train, x_val, y_train, y_val

#################################################### ENCODED ##########################################

class Autoencoder(Model):
    def __init__(self, latent_dim, n_features): #diseño autoencoder
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim  #dimensión de bottleneck deseada 
        self.n_features = n_features 
        self.encoder = tf.keras.Sequential([ #diseño encoder
          layers.Input(shape=(n_features,1)), #capa de entrada
          layers.Conv1D(3, 3, activation="relu"), #convolución 64 filtros de 3 
          layers.Flatten(), #capa plana
          layers.Dense(latent_dim, activation="tanh"), #+densa para dimension deseada
        ])
        self.decoder = tf.keras.Sequential([
          layers.Reshape((latent_dim, 1)), #cambio de forma para la convolución
          layers.Conv1DTranspose(3, 3, activation="relu"), #convolución T 32 f de 3 
          layers.Flatten(), #capa de salida plana
          layers.Dense(n_features, activation="tanh"), #densa para dimensión entrada
          layers.Reshape((n_features,1)), #cambio de forma para dimensión == entrada
        ])
    def call(self, x): #función llamada
        encoded = self.encoder(x) #encoder
        decoded = self.decoder(encoded) #decoder
        return decoded #resultado autoencoder
    
def autoencoder_training(x_train, x_val, latent_dim, lr, ID_model):
    autoencoder = Autoencoder(latent_dim, x_train.shape[1])
    opt = keras.optimizers.RMSprop(learning_rate=lr)
    autoencoder.compile(optimizer=opt, loss='mean_squared_error',
                        metrics=['accuracy'])
    callbacks = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
    checkpoint_path = cp_path("autoencoder", "encoded", ID_model)
    mc = ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", mode='min', 
                         save_best_only=True, save_weights_only=True, verbose=0)
    autoencoder.fit(x_train, x_train, validation_data=(x_val, x_val), 
                    epochs=300, callbacks = [callbacks, mc], batch_size=32, 
                    verbose=0, shuffle=True)
    autoencoder.load_weights(checkpoint_path)
    return autoencoder

def encode_norm_meta_data(autoencoder, x_train, x_val, x_train_meta, x_val_meta):
    x_train = pd.DataFrame(autoencoder.encoder(x_train).numpy())
    scaler = MinMaxScaler(); scaler.fit(x_train); x_train = pd.DataFrame(scaler.transform(x_train))
    x_train = pd.concat([x_train.reset_index(drop=True), 
                         pd.DataFrame(x_train_meta).reset_index(drop=True)], axis=1)
    
    x_val = pd.DataFrame(autoencoder.encoder(x_val).numpy())
    scaler = MinMaxScaler(); scaler.fit(x_val); x_val = pd.DataFrame(scaler.transform(x_val))
    x_val = pd.concat([x_val.reset_index(drop=True), 
                         pd.DataFrame(x_val_meta).reset_index(drop=True)], axis=1)
         
    return x_train, x_val

def save_autoencoder_encoded(autoencoder, ID_model, x_train, x_val, y_train, y_val):
    path = "models/altres_models/autoencoders/" + ID_model + "_" + "autoencoder"
    DirExists = os.path.exists(path)
    if not DirExists:
        os.makedirs(path)
    tf.saved_model.save(autoencoder, path)
    path_vars = "models/altres_models/autoencoders/" + ID_model + "_" + "encoded" + "_"
    pd.DataFrame(x_train).to_csv(path_vars + "x_train.csv", index=False)
    pd.DataFrame(y_train).to_csv(path_vars + "y_train.csv", index=False)  
    pd.DataFrame(x_val).to_csv(path_vars + "x_val.csv", index=False); 
    pd.DataFrame(y_val).to_csv(path_vars + "y_val.csv", index=False)
        
def load_autoencoder_encoded(ID_model, autoencoder_path):
    autoencoder = tf.saved_model.load(autoencoder_path)
    path_vars = "models/altres_models/autoencoders/" + ID_model + "_" + "encoded" + "_"
    x_train = pd.read_csv(path_vars + "x_train.csv", encoding='unicode_escape'); x_train.to_numpy().astype('float32')
    y_train = pd.read_csv(path_vars + "y_train.csv", encoding='unicode_escape'); y_train.to_numpy().astype('float32')
    x_val = pd.read_csv(path_vars + "x_val.csv", encoding='unicode_escape'); x_val.to_numpy().astype('float32')
    y_val = pd.read_csv(path_vars + "y_val.csv", encoding='unicode_escape'); y_val.to_numpy().astype('float32')
    return x_train, x_val, y_train, y_val

def encoded(bench, ID_model, taxo_train=None, taxo_val=None, meta_train=None, meta_val=None, autoencoder_path=None):
    if autoencoder_path == None:
        taxo_train, taxo_val, x_train_meta, x_val_meta = taxo_taxo_meta(taxo_train, taxo_val, ID_model)
        x_train, x_val, y_train, y_val = format_data(taxo_train, taxo_val, meta_train, meta_val, expand=1)
        autoencoder = autoencoder_training(x_train, x_val, latent_dim=100, lr=1e-3, ID_model=ID_model)
        x_train, x_val = encode_norm_meta_data(autoencoder, x_train, x_val, x_train_meta, x_val_meta)
        save_autoencoder_encoded(autoencoder, ID_model, x_train, x_val, y_train, y_val)
        print("Model i metadades autoencoder guardats correctament.")
    else:
        x_train, x_val, y_train, y_val = load_autoencoder_encoded(ID_model, autoencoder_path)
        print("Carregant model i metadades autoencoder...")
    y_train = y_train.ravel(); y_val = y_val.ravel()
    return x_train, x_val, y_train, y_val


#################################################### SYNTHETIC ##########################################

def synthetizer_training(x_train, y_train, taxo_train_meta):
    path = "models/altres_models/synthetizers/"
    x_train["condition"] = y_train.astype(int)
    x_train.to_csv(path + "real_data.csv", index=False)
    x_train = pd.read_csv(path + "real_data.csv", encoding='unicode_escape')
    if len(taxo_train_meta) != 0:
        if len(taxo_train_meta.columns)==2:
            field_types = {"condition": {"type": "boolean"}, "region": {"type": "boolean"}, 
                           "seq_tool": {"type": "boolean"}}
            field_transformers = {"condition": "boolean", "region": "boolean", 
                                  "seq_tool": "boolean"}
        elif len(taxo_train_meta.columns)==1:
            if taxo_train_meta.columns == ["region"]:
                field_types = {"condition": {"type": "boolean"}, "region": {"type": "boolean"}}
                field_transformers = {"condition": "boolean", "region": "boolean"}
            else:
                field_types = {"condition": {"type": "boolean"}, "seq_tool": {"type": "boolean"}}
                field_transformers = {"condition": "boolean", "seq_tool": "boolean"} 
    else:
        field_types = {"condition": {"type": "boolean"}}
        field_transformers = {"condition": "boolean"}
        
    synthetizer = TVAE(batch_size=50, epochs=500, l2scale=1e-3, embedding_dim=50,
                       field_types=field_types, field_transformers=field_transformers)
    synthetizer.fit(x_train)
    return synthetizer

def save_synthetizer(synthetizer, ID_model):
    path = "models/altres_models/synthetizers/" + ID_model + "_synthetizer.h5"
    synthetizer.save(path)
    
def synthetize(synthetizer, num_rows):
    if (num_rows % 2) == 0:
        num_rows = num_rows
    else:
        num_rows = num_rows+1
    
    condition = Condition({'condition': np.repeat([0,1],[int(num_rows/2),int(num_rows/2)],axis=0)}, 
                          num_rows=num_rows) 
 
    syn_data = synthetizer.sample_conditions(conditions=[condition])
    syn_data_y = pd.DataFrame(syn_data.iloc[:,-1]); 
    syn_data_x = syn_data.iloc[:,:-1]
    return syn_data_x, syn_data_y
    

def synthetic(taxo_train_meta, ID_model, autoencoder_path, num_rows, synthetizer_path=None):
    x_train, x_val, y_train, y_val = load_autoencoder_encoded(ID_model, autoencoder_path)
    if synthetizer_path == None:
        synthetizer = synthetizer_training(x_train, y_train, taxo_train_meta)
        save_synthetizer(synthetizer, ID_model)
        print("Model i metadades synthetizer guardats correctament.")
    else:
        synthetizer = TVAE.load(synthetizer_path)
        print("Carregant model i metadades synthetizer...")
    syn_data_x, syn_data_y = synthetize(synthetizer, num_rows)
    syn_data_y = syn_data_y.to_numpy().ravel(); y_val = y_val.to_numpy().ravel()
    return syn_data_x, x_val, syn_data_y, y_val
          
        
#################################################### SYNREAL ##########################################

def concat_synreal(syn_data_x, x_train, syn_data_y, y_train):
    synreal_data_x = pd.concat([syn_data_x.reset_index(drop=True), x_train.reset_index(drop=True)], axis=0)
    synreal_data_y = np.concatenate((syn_data_y, y_train), axis=0)
    return synreal_data_x, synreal_data_y

def synreal(ID_model, autoencoder_path, synthetizer_path):
    x_train, x_val, y_train, y_val = load_autoencoder_encoded(ID_model, autoencoder_path)
    synthetizer = TVAE.load(synthetizer_path)
    print("Carregant model i metadades synthetizer...")
    syn_data_x, syn_data_y = synthetize(synthetizer, len(x_train))
    synreal_data_x, synreal_data_y = concat_synreal(syn_data_x, x_train, syn_data_y, y_train)
    synreal_data_y = synreal_data_y.ravel(); y_val = y_val.to_numpy().ravel()
    return synreal_data_x, x_val, synreal_data_y, y_val


#################################################### PREPARE DATA ######################################

def prepare_data(bench, ID_model, taxo_train=None, taxo_val=None, meta_train=None, meta_val=None, 
                 autoencoder_path=None, synthetizer_path=None):
    
    taxo_train = taxo_train.iloc[:,1:]; taxo_val = taxo_val.iloc[:,1:]

    if bench == "rawdata":
        x_train, x_val, y_train, y_val = rawdata(taxo_train, taxo_val, meta_train, meta_val)
    elif bench == "encoded":
        if autoencoder_path==None:
            x_train, x_val, y_train, y_val = encoded(taxo_train=taxo_train, taxo_val=taxo_val, 
                                                     meta_train=meta_train, meta_val=meta_val, 
                                                     autoencoder_path=None, 
                                                     bench=bench, ID_model=ID_model)
        else:
            x_train, x_val, y_train, y_val = encoded(taxo_train=None, taxo_val=None, 
                                                     meta_train=None, meta_val=None, 
                                                     autoencoder_path=autoencoder_path,
                                                     bench=bench, ID_model=ID_model)
    elif bench == "synthetic":
        txtrain, txval, taxo_train_meta, taxo_val_meta = taxo_taxo_meta(taxo_train, taxo_val, ID_model)

        if synthetizer_path==None:
            x_train, x_val, y_train, y_val = synthetic(taxo_train_meta=taxo_train_meta, ID_model=ID_model, 
                                                       autoencoder_path=autoencoder_path, 
                                                       synthetizer_path=None, num_rows=5000)
        else:
            x_train, x_val, y_train, y_val = synthetic(taxo_train_meta=taxo_train_meta, ID_model=ID_model, 
                                                       autoencoder_path=autoencoder_path,
                                                       synthetizer_path=synthetizer_path, num_rows=5000)
    else:
        x_train, x_val, y_train, y_val = synreal(ID_model, autoencoder_path, synthetizer_path)
    return x_train, x_val, y_train, y_val

#################################################### BENCHMARKING ######################################

def get_taxo_meta_fold(x_train, x_val, y_train, y_val):
   
    # put -1 here, so they will be in training set
    val_fold = []
    for i in range(len(x_train)):
        val_fold.append(-1)

    # for all greater indices, assign 0, so they will be put in test set
    for i in range(len(x_val)):
        val_fold.append(0)
        
    taxo = np.concatenate((x_train, x_val), axis=0)
    meta = np.concatenate((y_train, y_val), axis=0)
    return taxo, meta, val_fold

def KNN_bm(taxo, meta, scoring, val_fold):
    param_grid = dict(n_neighbors=list(range(30,121)), weights = ['uniform'], p=[1,2])
    grid = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=param_grid, scoring=scoring,
                    cv=PredefinedSplit(test_fold=val_fold))
    grid_result = grid.fit(taxo, meta)
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_

def DT_bm(taxo, meta, scoring, val_fold):
    param_grid = dict(criterion=['entropy'], splitter = ['random'], 
                  min_samples_split=[0.8,0.9], 
                  min_samples_leaf=[0.1,0.2],
                  max_depth=list(range(1,6)),    
                  max_features=['sqrt'])
    grid = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_grid, scoring=scoring,
                    cv=PredefinedSplit(test_fold=val_fold))
    grid_result = grid.fit(taxo, meta)
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_

def RF_bm(taxo, meta, scoring, val_fold):
    param_grid = dict(n_estimators=[500], criterion=['entropy'], 
                  min_samples_split=[0.25,0.5,0.75], 
                  min_samples_leaf=[0.1,0.2],
                  max_depth=list(range(1,11)), 
                  max_features=['sqrt'], 
                  bootstrap=[True])
    grid = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid, scoring=scoring, 
                    cv=PredefinedSplit(test_fold=val_fold))
    grid_result = grid.fit(taxo,meta)
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_

def SVM_bm(taxo, meta, scoring, val_fold, bench):
    param_grid = dict(C=[0.1,1,5,10], kernel=['linear','rbf','sigmoid'], 
                      gamma=[0.1, 0.5], shrinking=[True], 
                      tol=[1e-3], probability=[True])
    grid = GridSearchCV(estimator=SVC(), param_grid=param_grid, scoring=scoring,
                    cv=PredefinedSplit(test_fold=val_fold))
    grid_result = grid.fit(taxo,meta)
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_

def extract_n(x_train):
    n_features = x_train.shape[1]
    n1 = int(round(n_features/2,0))
    n2 = int(round(n1/2,0))
    n3 = int(round(n2/2,0))
    return n_features, n1, n2, n3

def get_batch_size(bench):
    if bench == "synthetic":
        batch_size = 500
    elif bench == "synreal":
        batch_size = 64
    else:
        batch_size = 32
    return batch_size
        
def get_lr(type_model, bench):
    if bench == "rawdata":
        if type_model == "mlp":
            lr = 1e-5
        else:
            lr = 1e-4
    elif bench == "encoded":
        if type_model == "mlp":
            lr = 1e-4
        else:
            lr = 1e-3
    else:
        lr = 1e-3
    return lr
        
def cp_path(type_model, bench, ID_model):
    path = "models/altres_models/training_checkpoints/" + ID_model + "_" + bench + "_" + type_model
    DirExists = os.path.exists(path)
    if not DirExists:
        os.makedirs(path)
    path = path + "/cp.ckpt"
    return path

def MLP(layer, n_features, lr):
    model = Sequential()
    for i, nodes in enumerate(layer):
        if i==0:
            model.add(Dense(nodes, input_dim=n_features))
            model.add(Activation("relu"))
            model.add(Dropout(0.5))
        else:
            model.add(Dense(nodes))
            model.add(Activation("relu"))
            model.add(Dropout(0.5))
        model.add(Dense(1, activation="sigmoid"))
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), 
                      loss='binary_crossentropy', metrics=['accuracy'])
    return model
   
def MLP_bm(x_train, x_val, y_train, y_val, k, scoring, bench, ID_model):
    model = KerasClassifier(build_fn=MLP, verbose=0)
    n_features, n1, n2, n3 = extract_n(x_train)
    #lr = get_lr("mlp", bench)
    lr = 1e-3
    layer = [[n1], [n2], [n3], [n1,n2], [n2,n3], [n1,n3], [n1, n2, n3]]
    #batch_size = get_batch_size(bench)
    batch_size = 32
    callbacks = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
    checkpoint_path = cp_path("mlp", bench, ID_model)
    param_grid = dict(layer=layer, n_features=[n_features], lr=[lr],  batch_size=[batch_size], epochs=[1000])
    grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring,
                    cv=KFold(k, shuffle=True, random_state=42))
    grid_result = grid.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks = [callbacks])
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_, checkpoint_path

def CNN(filters, n_features, lr):
    model = Sequential()
    model.add(Input((n_features,1)))
    model.add(Conv1D(filters, 3))
    model.add(Activation("relu"))
    model.add(MaxPooling1D(2))
    model.add(Dropout(0.5))
    model.add(Conv1D(filters, 3))
    model.add(Activation("relu"))
    model.add(MaxPooling1D(2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1))
    model.add(Activation("sigmoid"))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), 
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

def CNN_bm(x_train, x_val, y_train, y_val, k, scoring, bench, ID_model):
    model = KerasClassifier(build_fn=CNN, verbose=0)
    filters = [16,32,64]
    n_features, n1, n2, n3 = extract_n(x_train)
    #lr = get_lr("cnn", bench)
    lr = 1e-3
    #batch_size = get_batch_size(bench)
    batch_size = 32
    callbacks = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
    checkpoint_path = cp_path("cnn", bench, ID_model)
    param_grid = dict(filters=filters, n_features=[n_features], lr=[lr], 
                      batch_size = [batch_size], epochs=[1000])
    grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring,
                    cv=KFold(k, shuffle=True, random_state=42))
    grid_result = grid.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks = [callbacks])
    return grid_result.best_estimator_, grid_result.best_score_, grid_result.best_params_, checkpoint_path

def update_best(estimator1, score1, params1, estimator2, score2, params2):
    if score1>=score2:
        return estimator1, score1, params1
    else:
        return estimator2, score2, params2
    
def restore_NN_best_estimator(best_estimator, checkpoint_path, x_train, y_train, x_val, y_val, bench):
    batch_size = get_batch_size(bench)
    callbacks = EarlyStopping(monitor='val_loss', patience=100, verbose=0)
    mc = ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", mode='min', 
                         save_best_only=True, save_weights_only=True, verbose=0)
    best_estimator.fit(x_train, y_train, validation_data=(x_val, y_val), 
                          callbacks=[callbacks, mc], epochs=1000, verbose=0, batch_size=batch_size)
    best_estimator.load_weights(checkpoint_path)
    return best_estimator
    
def benchmarking(x_train, x_val, y_train, y_val, bench, ID_model, k_NN, scoring):
    taxo, meta, val_fold = get_taxo_meta_fold(x_train, x_val, y_train, y_val)

    best_estimator, best_score, best_params = KNN_bm(taxo, meta, scoring, val_fold)
    print("KNN: " + str(round(best_score,4)) + ". Paràmetres=", best_params)
    
    #best_estimator_DT, best_score_DT, best_params_DT = DT_bm(taxo, meta, scoring, val_fold)
    #best_estimator, best_score, best_params = update_best(best_estimator, best_score, best_params,
    #                                                      best_estimator_DT, best_score_DT, best_params_DT)
    #print("DT: " + str(round(best_score_DT,4)) + ". Paràmetres=", best_params_DT)
    
    best_estimator_RF, best_score_RF, best_params_RF = RF_bm(taxo, meta, scoring, val_fold)
    best_estimator, best_score, best_params = update_best(best_estimator, best_score, best_params,
                                                          best_estimator_RF, best_score_RF, best_params_RF)
    print("RF: " + str(round(best_score_RF,4)) + ". Paràmetres=", best_params_RF)
   
    best_estimator_SVM, best_score_SVM, best_params_SVM = SVM_bm(taxo, meta, scoring, val_fold, bench)
    best_estimator, best_score, best_params = update_best(best_estimator, best_score, best_params,
                                                          best_estimator_SVM, best_score_SVM, best_params_SVM)
    print("SVM: " + str(round(best_score_SVM,4)) + ". Paràmetres=", best_params_SVM)
    
    best_estimator_MLP, best_score_MLP, best_params_MLP, checkpointpath_MLP = MLP_bm(x_train, x_val, y_train, y_val, 
                                                                                     k_NN, scoring, bench, ID_model)
    best_estimator, best_score, best_params = update_best(best_estimator, best_score, best_params,
                                                          best_estimator_MLP, best_score_MLP, best_params_MLP)
    print("MLP: " + str(round(best_score_MLP,4)) + ". Paràmetres=", best_params_MLP)
   
    best_estimator_CNN, best_score_CNN, best_params_CNN, checkpointpath_CNN = CNN_bm(x_train, x_val, y_train, y_val, 
                                                                                     k_NN, scoring, bench, ID_model)
    best_estimator, best_score, best_params = update_best(best_estimator, best_score, best_params,
                                                          best_estimator_CNN, best_score_CNN, best_params_CNN)
    print("CNN: " + str(round(best_score_CNN,4)) + ". Paràmetres=", best_params_CNN)
    
    if (str(best_estimator) == str(best_estimator_MLP)) | (str(best_estimator) == str(best_estimator_CNN)):
        if str(best_estimator) == str(best_estimator_MLP):
            checkpoint_path = checkpointpath_MLP
            best_estimator = MLP(layer=best_params["layer"], n_features=best_params["n_features"], 
                                lr=best_params["lr"])
        else:
            checkpoint_path = checkpointpath_CNN
            best_estimator = CNN(filters=best_params["filters"], n_features=best_params["n_features"], 
                                 lr=best_params["lr"])
        best_estimator = restore_NN_best_estimator(best_estimator, checkpoint_path, 
                                                   x_train, y_train, x_val, y_val, bench)
    
    print("Millor model " + ID_model + "_" + bench + ": " + str(best_estimator) + "[", best_params, "] AUC: " + str(round(best_score,4)))
    
    return best_estimator, best_score, best_params

def save_best_model(best_estimator, best_score, best_params, bench, ID_model):
    #Save model
    model_path = "models/altres_models/"
    if (str(best_estimator).find("keras") != -1):
        model_path = model_path + ID_model + "_" + bench + ".h5"
        best_estimator.save(model_path)
    else:
        model_path = model_path + ID_model + "_" + bench + ".joblib"
        joblib.dump(best_estimator, model_path)
        
    #Save model metadata
    params_path = "models/altres_models/params/" + ID_model + "_" + bench + "_params.csv"
    best_params["score"] = best_score
    best_params["model_type"] = type(best_estimator)
    pd.DataFrame(best_params.items()).to_csv(params_path, index=False, header=False)
    print("Model i paràmetres guardats correctament.")
    
#################################################### MAIN ###############################################    
    
def main(bench, ID_model, k_NN, scoring, taxo_train=None, taxo_val=None, meta_train=None, meta_val=None, 
         autoencoder_path=None, synthetizer_path=None):
        
    x_train, x_val, y_train, y_val = prepare_data(taxo_train=taxo_train, taxo_val=taxo_val, 
                                                  meta_train=meta_train, meta_val=meta_val,
                                                  autoencoder_path=autoencoder_path, 
                                                  synthetizer_path=synthetizer_path, 
                                                  bench=bench, ID_model=ID_model)
    
    best_estimator, best_score, best_params = benchmarking(x_train, x_val, y_train, y_val, 
                                                           bench, ID_model, k_NN, scoring)
    
    save_best_model(best_estimator, best_score, best_params, bench, ID_model)
    

# Benchmarking i selecció millors models

In [2]:
# Meta

## Meta = region

### Train
taxoS = pd.read_csv("train/taxoS_train.csv", encoding='unicode_escape')
taxoS1 = pd.read_csv("train/taxoS1_train.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_r, meta_r = add_metas(taxoS,meta_meta,["region"])
taxoS1_r, meta_r = add_metas(taxoS1,meta_meta,["region"])
taxoS_r, meta_r = drop_nas(taxoS_r, meta_r)
taxoS1_r, meta_r = drop_nas(taxoS1_r, meta_r)

### Val
taxoS_val = pd.read_csv("train/taxoS_val.csv", encoding='unicode_escape')
taxoS1_val = pd.read_csv("train/taxoS1_val.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_r_val, meta_r_val = add_metas(taxoS_val,meta_meta,["region"])
taxoS1_r_val, meta_r_val = add_metas(taxoS1_val,meta_meta,["region"])
taxoS_r_val, meta_r_val = drop_nas(taxoS_r_val, meta_r_val)
taxoS1_r_val, meta_r_val = drop_nas(taxoS1_r_val, meta_r_val)

## Meta = seq_tool

### Train
taxoS = pd.read_csv("train/taxoS_train.csv", encoding='unicode_escape')
taxoS1 = pd.read_csv("train/taxoS1_train.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_seq, meta_seq = add_metas(taxoS,meta_meta,["seq_tool"])
taxoS1_seq, meta_seq = add_metas(taxoS1,meta_meta,["seq_tool"])
taxoS_seq, meta_seq = drop_nas(taxoS_seq, meta_seq)
taxoS1_seq, meta_seq = drop_nas(taxoS1_seq, meta_seq)

### Val
taxoS_val = pd.read_csv("train/taxoS_val.csv", encoding='unicode_escape')
taxoS1_val = pd.read_csv("train/taxoS1_val.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_seq_val, meta_seq_val = add_metas(taxoS_val,meta_meta,["seq_tool"])
taxoS1_seq_val, meta_seq_val = add_metas(taxoS1_val,meta_meta,["seq_tool"])
taxoS_seq_val, meta_seq_val = drop_nas(taxoS_seq_val, meta_seq_val)
taxoS1_seq_val, meta_seq_val = drop_nas(taxoS1_seq_val, meta_seq_val)

## Meta = region & seq_tool

### Train
taxoS = pd.read_csv("train/taxoS_train.csv", encoding='unicode_escape')
taxoS1 = pd.read_csv("train/taxoS1_train.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_full, meta_full = add_metas(taxoS,meta_meta,["region", "seq_tool"])
taxoS1_full, meta_full = add_metas(taxoS1,meta_meta,["region", "seq_tool"])
taxoS_full, meta_full = drop_nas(taxoS_full, meta_full)
taxoS1_full, meta_full = drop_nas(taxoS1_full, meta_full)

### Val
taxoS_val = pd.read_csv("train/taxoS_val.csv", encoding='unicode_escape')
taxoS1_val = pd.read_csv("train/taxoS1_val.csv", encoding='unicode_escape')
meta_meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_full_val, meta_full_val = add_metas(taxoS_val,meta_meta,["region", "seq_tool"])
taxoS1_full_val, meta_full_val = add_metas(taxoS1_val,meta_meta,["region", "seq_tool"])
taxoS_full_val, meta_full_val = drop_nas(taxoS_full_val, meta_full_val)
taxoS1_full_val, meta_full_val = drop_nas(taxoS1_full_val, meta_full_val)

# No Meta

## Train
taxoS = pd.read_csv("train/taxoS_train.csv", encoding='unicode_escape')
taxoS1 = pd.read_csv("train/taxoS1_train.csv", encoding='unicode_escape')
meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS, meta = add_metas(taxoS,meta)
meta = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS1, meta = add_metas(taxoS1,meta)

## Val
taxoS_val = pd.read_csv("train/taxoS_val.csv", encoding='unicode_escape')
taxoS1_val = pd.read_csv("train/taxoS1_val.csv", encoding='unicode_escape')
meta_val = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS_val, meta_val = add_metas(taxoS_val, meta_val)
meta_val = pd.read_csv("metadades_full_bin.csv", encoding='unicode_escape')
taxoS1_val, meta_val = add_metas(taxoS1_val, meta_val)

## Models a) CRC Full S

In [156]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS, taxo_val=taxoS_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="a")

KNN: 0.6957. Paràmetres= {'n_neighbors': 40, 'p': 2, 'weights': 'uniform'}
DT: 0.6304. Paràmetres= {'criterion': 'entropy', 'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.8, 'splitter': 'random'}
RF: 0.6304. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 7, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6522. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5839. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [1140, 570], 'lr': 0.001, 'n_features': 2279}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5742. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 2279}
Millor model a_rawdata: KNeighborsClassifier(n_neighbors=40)[ {'n_neighbors': 40, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6957
Model i paràmetres guardats correctament.


In [157]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS, taxo_val=taxoS_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="a")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/a_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.7174. Paràmetres= {'n_neighbors': 92, 'p': 1, 'weights': 'uniform'}
DT: 0.6087. Paràmetres= {'criterion': 'entropy', 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.8, 'splitter': 'random'}
RF: 0.6522. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6304. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5743. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [12], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5935. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 100}
Millor model a_encoded: KNeighborsClassifier(n_neighbors=92, p=1)[ {'n_neighbors': 92, 'p': 1, 'weights': 'uniform'} ] AUC: 0.7174
Model i paràmetres guardats correctament.


In [25]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS, taxo_val=taxoS_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="a", 
     autoencoder_path="models/altres_models/autoencoders/a_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:22<00:00, 225.29it/s]


KNN: 0.6304. Paràmetres= {'n_neighbors': 30, 'p': 2, 'weights': 'uniform'}
RF: 0.6304. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6304. Paràmetres= {'C': 0.1, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7156. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7332. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 100}
Millor model a_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F5A4BD100>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 100} ] AUC: 0.7332
Model i paràmetres guardats correctament.


In [26]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS, taxo_val=taxoS_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="a", 
     autoencoder_path="models/altres_models/autoencoders/a_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/a_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 210/210 [00:02<00:00, 93.70it/s]


KNN: 0.6739. Paràmetres= {'n_neighbors': 65, 'p': 1, 'weights': 'uniform'}
RF: 0.6522. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 7, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6304. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6062. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6087. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 100}
Millor model a_synreal: KNeighborsClassifier(n_neighbors=65, p=1)[ {'n_neighbors': 65, 'p': 1, 'weights': 'uniform'} ] AUC: 0.6739
Model i paràmetres guardats correctament.


## Models b) CRC Full S1

In [160]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1, taxo_val=taxoS1_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="b")

KNN: 0.587. Paràmetres= {'n_neighbors': 76, 'p': 2, 'weights': 'uniform'}
DT: 0.6087. Paràmetres= {'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.9, 'splitter': 'random'}
RF: 0.6304. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.587. Paràmetres= {'C': 5, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5026. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [178, 89], 'lr': 0.001, 'n_features': 714}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5411. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 16, 'lr': 0.001, 'n_features': 714}
Millor model b_rawdata: RandomForestClassifier(criterion='entropy', max_depth=4, max_features='sqrt',
                       min_samples_leaf=0.1, min_samples_split=0.25,
                       n_estimators=500)[ {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500} ] AUC: 0.6304
Model i paràmetres guardats correctament.


In [167]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1, taxo_val=taxoS1_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="b")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/b_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6304. Paràmetres= {'n_neighbors': 87, 'p': 2, 'weights': 'uniform'}
RF: 0.5435. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.5435. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5504. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5457. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 100}
Millor model b_encoded: KNeighborsClassifier(n_neighbors=87)[ {'n_neighbors': 87, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6304
Model i paràmetres guardats correctament.


In [27]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1, taxo_val=taxoS1_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="b", 
     autoencoder_path="models/altres_models/autoencoders/b_autoencoder")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:21<00:00, 235.37it/s]


KNN: 0.6087. Paràmetres= {'n_neighbors': 60, 'p': 1, 'weights': 'uniform'}
RF: 0.5435. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 8, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6304. Paràmetres= {'C': 5, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7512. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7628. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 100}
Millor model b_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F0176D6D0>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 100} ] AUC: 0.7628
Model i paràmetres guardats correctament.


In [28]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1, taxo_val=taxoS1_val, meta_train=meta, meta_val=meta_val, 
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="b", 
     autoencoder_path="models/altres_models/autoencoders/b_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/b_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|██████████████████████████████████████████████████████████| 210/210 [00:01<00:00, 108.45it/s]


KNN: 0.6087. Paràmetres= {'n_neighbors': 35, 'p': 1, 'weights': 'uniform'}
RF: 0.5217. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.5652. Paràmetres= {'C': 0.1, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6229. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 100}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6205. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 16, 'lr': 0.001, 'n_features': 100}
Millor model b_synreal: <keras.engine.sequential.Sequential object at 0x0000025F72908280>[ {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 100} ] AUC: 0.6229
Model i paràmetres guardats correctament.


## Models k) CRC Full + Regió S

In [168]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_r, taxo_val=taxoS_r_val, meta_train=meta_r, meta_val=meta_r_val, 
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="k")

KNN: 0.6957. Paràmetres= {'n_neighbors': 34, 'p': 2, 'weights': 'uniform'}
RF: 0.587. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6739. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.579. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [1140, 570, 285], 'lr': 0.001, 'n_features': 2280}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5646. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 2280}
Millor model k_rawdata: KNeighborsClassifier(n_neighbors=34)[ {'n_neighbors': 34, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6957
Model i paràmetres guardats correctament.


In [170]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_r, taxo_val=taxoS_r_val, meta_train=meta_r, meta_val=meta_r_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="k")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/k_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.7174. Paràmetres= {'n_neighbors': 102, 'p': 2, 'weights': 'uniform'}
RF: 0.6739. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6522. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5887. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 12], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6078. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model k_encoded: KNeighborsClassifier(n_neighbors=102)[ {'n_neighbors': 102, 'p': 2, 'weights': 'uniform'} ] AUC: 0.7174
Model i paràmetres guardats correctament.


In [23]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_r, taxo_val=taxoS_r_val, meta_train=meta_r, meta_val=meta_r_val, 
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="k", 
     autoencoder_path="models/altres_models/autoencoders/k_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:23<00:00, 217.21it/s]


KNN: 0.6522. Paràmetres= {'n_neighbors': 30, 'p': 2, 'weights': 'uniform'}
RF: 0.587. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6522. Paràmetres= {'C': 5, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7262. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7416. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model k_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F017D0220>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101} ] AUC: 0.7416
Model i paràmetres guardats correctament.


In [24]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_r, taxo_val=taxoS_r_val, meta_train=meta_r, meta_val=meta_r_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="k", 
     autoencoder_path="models/altres_models/autoencoders/k_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/k_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 210/210 [00:04<00:00, 48.83it/s]


KNN: 0.6957. Paràmetres= {'n_neighbors': 36, 'p': 1, 'weights': 'uniform'}
RF: 0.6739. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6304. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.611. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5967. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model k_synreal: KNeighborsClassifier(n_neighbors=36, p=1)[ {'n_neighbors': 36, 'p': 1, 'weights': 'uniform'} ] AUC: 0.6957
Model i paràmetres guardats correctament.


## Models l) CRC Full + Regió S1

In [173]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_r, taxo_val=taxoS1_r_val, meta_train=meta_r, meta_val=meta_r_val, 
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="l")

KNN: 0.6304. Paràmetres= {'n_neighbors': 71, 'p': 2, 'weights': 'uniform'}
RF: 0.6304. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.5652. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5024. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [358, 179, 90], 'lr': 0.001, 'n_features': 715}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5314. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 715}
Millor model l_rawdata: KNeighborsClassifier(n_neighbors=71)[ {'n_neighbors': 71, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6304
Model i paràmetres guardats correctament.


In [174]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_r, taxo_val=taxoS1_r_val, meta_train=meta_r, meta_val=meta_r_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="l")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/l_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6087. Paràmetres= {'n_neighbors': 47, 'p': 2, 'weights': 'uniform'}
RF: 0.5652. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.5435. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5457. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5409. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model l_encoded: KNeighborsClassifier(n_neighbors=47)[ {'n_neighbors': 47, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6087
Model i paràmetres guardats correctament.


In [29]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_r, taxo_val=taxoS1_r_val, meta_train=meta_r, meta_val=meta_r_val,
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="l", 
     autoencoder_path="models/altres_models/autoencoders/l_autoencoder")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:21<00:00, 230.64it/s]


KNN: 0.5217. Paràmetres= {'n_neighbors': 32, 'p': 1, 'weights': 'uniform'}
RF: 0.5652. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 6, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.5435. Paràmetres= {'C': 1, 'gamma': 0.5, 'kernel': 'sigmoid', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.738. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7638. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model l_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F088CF5B0>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101} ] AUC: 0.7638
Model i paràmetres guardats correctament.


In [30]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_r, taxo_val=taxoS1_r_val, meta_train=meta_r, meta_val=meta_r_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="l", 
     autoencoder_path="models/altres_models/autoencoders/l_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/l_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 210/210 [00:02<00:00, 97.18it/s]


KNN: 0.6087. Paràmetres= {'n_neighbors': 47, 'p': 1, 'weights': 'uniform'}
RF: 0.5. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.5217. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5944. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6062. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 16, 'lr': 0.001, 'n_features': 101}
Millor model l_synreal: KNeighborsClassifier(n_neighbors=47, p=1)[ {'n_neighbors': 47, 'p': 1, 'weights': 'uniform'} ] AUC: 0.6087
Model i paràmetres guardats correctament.


## Models m) CRC Full + Seq_tool S

In [177]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_seq, taxo_val=taxoS_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="m")

KNN: 0.6842. Paràmetres= {'n_neighbors': 46, 'p': 1, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6842. Paràmetres= {'C': 5, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.651. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [285], 'lr': 0.001, 'n_features': 2280}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6377. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 2280}
Millor model m_rawdata: KNeighborsClassifier(n_neighbors=46, p=1)[ {'n_neighbors': 46, 'p': 1, 'weights': 'uniform'} ] AUC: 0.6842
Model i paràmetres guardats correctament.


In [178]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_seq, taxo_val=taxoS_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="m")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/m_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6579. Paràmetres= {'n_neighbors': 30, 'p': 2, 'weights': 'uniform'}
RF: 0.6579. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6842. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6242. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6307. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 16, 'lr': 0.001, 'n_features': 101}
Millor model m_encoded: SVC(C=1, gamma=0.1, probability=True)[ {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001} ] AUC: 0.6842
Model i paràmetres guardats correctament.


In [31]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_seq, taxo_val=taxoS_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="m", 
     autoencoder_path="models/altres_models/autoencoders/m_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:21<00:00, 237.91it/s]


KNN: 0.6842. Paràmetres= {'n_neighbors': 30, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6316. Paràmetres= {'C': 5, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7502. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7802. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model m_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F01746550>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101} ] AUC: 0.7802
Model i paràmetres guardats correctament.


In [32]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_seq, taxo_val=taxoS_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="m", 
     autoencoder_path="models/altres_models/autoencoders/m_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/m_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 150/150 [00:01<00:00, 87.68it/s]


KNN: 0.7368. Paràmetres= {'n_neighbors': 32, 'p': 2, 'weights': 'uniform'}
RF: 0.6579. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6579. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6687. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6555. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model m_synreal: KNeighborsClassifier(n_neighbors=32)[ {'n_neighbors': 32, 'p': 2, 'weights': 'uniform'} ] AUC: 0.7368
Model i paràmetres guardats correctament.


## Models n) CRC Full + Seq_tool S1

In [181]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_seq, taxo_val=taxoS1_seq_val, meta_train=meta_seq, meta_val=meta_seq_val, 
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="n")

KNN: 0.6579. Paràmetres= {'n_neighbors': 37, 'p': 2, 'weights': 'uniform'}
RF: 0.6053. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6316. Paràmetres= {'C': 5, 'gamma': 0.1, 'kernel': 'sigmoid', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.591. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [90], 'lr': 0.001, 'n_features': 715}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5576. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 715}
Millor model n_rawdata: KNeighborsClassifier(n_neighbors=37)[ {'n_neighbors': 37, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6579
Model i paràmetres guardats correctament.


In [182]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_seq, taxo_val=taxoS1_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="n")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/n_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6579. Paràmetres= {'n_neighbors': 54, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6316. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5574. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5912. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model n_encoded: KNeighborsClassifier(n_neighbors=54)[ {'n_neighbors': 54, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6579
Model i paràmetres guardats correctament.


In [33]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_seq, taxo_val=taxoS1_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="n", 
     autoencoder_path="models/altres_models/autoencoders/n_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:21<00:00, 229.57it/s]


KNN: 0.6316. Paràmetres= {'n_neighbors': 81, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.5263. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7546. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7692. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model n_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F5A39F880>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101} ] AUC: 0.7692
Model i paràmetres guardats correctament.


In [34]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_seq, taxo_val=taxoS1_seq_val, meta_train=meta_seq, meta_val=meta_seq_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="n", 
     autoencoder_path="models/altres_models/autoencoders/n_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/n_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 150/150 [00:01<00:00, 94.35it/s]


KNN: 0.6053. Paràmetres= {'n_neighbors': 37, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.5789. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6555. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6256. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 101}
Millor model n_synreal: <keras.engine.sequential.Sequential object at 0x0000025F0927F5B0>[ {'batch_size': 32, 'epochs': 1000, 'layer': [50, 25], 'lr': 0.001, 'n_features': 101} ] AUC: 0.6555
Model i paràmetres guardats correctament.


## Models o) CRC Full + Full S

In [185]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_full, taxo_val=taxoS_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="o")

KNN: 0.7105. Paràmetres= {'n_neighbors': 48, 'p': 1, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6842. Paràmetres= {'C': 5, 'gamma': 0.5, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6176. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [285], 'lr': 0.001, 'n_features': 2281}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6444. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 2281}
Millor model o_rawdata: KNeighborsClassifier(n_neighbors=48, p=1)[ {'n_neighbors': 48, 'p': 1, 'weights': 'uniform'} ] AUC: 0.7105
Model i paràmetres guardats correctament.


In [186]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_full, taxo_val=taxoS_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="o")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/o_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6579. Paràmetres= {'n_neighbors': 30, 'p': 1, 'weights': 'uniform'}
RF: 0.6579. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6842. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6245. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [26], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6307. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102}
Millor model o_encoded: SVC(C=1, gamma=0.1, probability=True)[ {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001} ] AUC: 0.6842
Model i paràmetres guardats correctament.


In [35]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_full, taxo_val=taxoS_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="o", 
     autoencoder_path="models/altres_models/autoencoders/o_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:21<00:00, 231.32it/s]


KNN: 0.6316. Paràmetres= {'n_neighbors': 85, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6053. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7468. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [51], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.7722. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102}
Millor model o_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F00600100>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102} ] AUC: 0.7722
Model i paràmetres guardats correctament.


In [36]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS_full, taxo_val=taxoS_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="o", 
     autoencoder_path="models/altres_models/autoencoders/o_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/o_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 150/150 [00:01<00:00, 82.21it/s]


KNN: 0.7105. Paràmetres= {'n_neighbors': 36, 'p': 1, 'weights': 'uniform'}
RF: 0.6316. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6316. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6621. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [51], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6522. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 32, 'lr': 0.001, 'n_features': 102}
Millor model o_synreal: KNeighborsClassifier(n_neighbors=36, p=1)[ {'n_neighbors': 36, 'p': 1, 'weights': 'uniform'} ] AUC: 0.7105
Model i paràmetres guardats correctament.


## Models p) CRC Full + Full S1

In [189]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_full, taxo_val=taxoS1_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="rawdata", ID_model="p")

KNN: 0.6842. Paràmetres= {'n_neighbors': 57, 'p': 2, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.25, 'n_estimators': 500}
SVM: 0.6316. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5706. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [90], 'lr': 0.001, 'n_features': 716}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5571. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 16, 'lr': 0.001, 'n_features': 716}
Millor model p_rawdata: KNeighborsClassifier(n_neighbors=57)[ {'n_neighbors': 57, 'p': 2, 'weights': 'uniform'} ] AUC: 0.6842
Model i paràmetres guardats correctament.


In [190]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_full, taxo_val=taxoS1_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="encoded", ID_model="p")

INFO:tensorflow:Assets written to: models/altres_models/autoencoders/p_autoencoder\assets
Model i metadades autoencoder guardats correctament.
KNN: 0.6316. Paràmetres= {'n_neighbors': 90, 'p': 1, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.6053. Paràmetres= {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.5845. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [26], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.5503. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102}
Millor model p_encoded: KNeighborsClassifier(n_neighbors=90, p=1)[ {'n_neighbors': 90, 'p': 1, 'weights': 'uniform'} ] AUC: 0.6316
Model i paràmetres guardats correctament.


In [37]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_full, taxo_val=taxoS1_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="synthetic", ID_model="p", 
     autoencoder_path="models/altres_models/autoencoders/p_autoencoder")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[column_name] = data[column_name].to_numpy().flatten()
A value is trying to be set 

Model i metadades synthetizer guardats correctament.


Sampling conditions: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:22<00:00, 224.52it/s]


KNN: 0.5789. Paràmetres= {'n_neighbors': 30, 'p': 1, 'weights': 'uniform'}
RF: 0.5789. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.2, 'min_samples_split': 0.75, 'n_estimators': 500}
SVM: 0.5526. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.7924. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [51], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.8032. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102}
Millor model p_synthetic: <keras.engine.sequential.Sequential object at 0x0000025F019526D0>[ {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102} ] AUC: 0.8032
Model i paràmetres guardats correctament.


In [38]:
os.environ['PYTHONHASHSEED'] = '0' 
np.random.seed(42) 
random.seed(42) 
tf.random.set_seed(42)

main(taxo_train=taxoS1_full, taxo_val=taxoS1_full_val, meta_train=meta_full, meta_val=meta_full_val,
     k_NN=3, scoring="accuracy", bench="synreal", ID_model="p", 
     autoencoder_path="models/altres_models/autoencoders/p_autoencoder",
     synthetizer_path="models/altres_models/synthetizers/p_synthetizer.h5")

Carregant model i metadades synthetizer...


Sampling conditions: 100%|███████████████████████████████████████████████████████████| 150/150 [00:01<00:00, 82.56it/s]


KNN: 0.6053. Paràmetres= {'n_neighbors': 59, 'p': 2, 'weights': 'uniform'}
RF: 0.6053. Paràmetres= {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 1, 'max_features': 'sqrt', 'min_samples_leaf': 0.1, 'min_samples_split': 0.5, 'n_estimators': 500}
SVM: 0.6053. Paràmetres= {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf', 'probability': True, 'shrinking': True, 'tol': 0.001}


  model = KerasClassifier(build_fn=MLP, verbose=0)


MLP: 0.6855. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'layer': [51], 'lr': 0.001, 'n_features': 102}


  model = KerasClassifier(build_fn=CNN, verbose=0)


CNN: 0.6754. Paràmetres= {'batch_size': 32, 'epochs': 1000, 'filters': 64, 'lr': 0.001, 'n_features': 102}
Millor model p_synreal: <keras.engine.sequential.Sequential object at 0x0000025F00B50430>[ {'batch_size': 32, 'epochs': 1000, 'layer': [51], 'lr': 0.001, 'n_features': 102} ] AUC: 0.6855
Model i paràmetres guardats correctament.
