# Reading and processing data

In [1]:
import pandas as pd

dataframe = pd.read_csv("titanic.csv") # https://www.kaggle.com/datasets/heptapod/titanic

# Drop all columns that start with "zero"
dataframe = dataframe.loc[:, ~dataframe.columns.str.startswith('zero')]

# Rename '2urvived' to 'Survived'
dataframe = dataframe.rename(columns={"2urvived": "Survived"})

dataframe['Embarked'] = dataframe['Embarked'].fillna(0)

In [2]:
import numpy as np
import cupy as cp # Rapids libraby

colId = "Passengerid"
target = "Survived"

features = dataframe.drop([colId, target], axis=1).columns.tolist()

X = dataframe[features]
y = dataframe[target]

# Model 2NN (2 Layers, input-output)

In [6]:
class model_2NN:
    """
    Implementa uma Rede Neural Rasa para problemas de CLASSIFICAÇÃO BINÁRIA utilizando a função de ativação tangente hiperbólica nos neurônios de camada oculta e sigmóid para o neurônio da camada de saída.
    - Obtem primeiramente os tamanhos para criar todas as matriz e utilizar de vetorização. 
    - Inicializa os parâmetros de W de forma aleatória multiplicado por 0.01 e o viés com uma matriz de zeros
    - Itera várias vezes para realizar a propagação para a frente e para trás afim de atualizar os pesos para melhor se ajustar.
    - Realiza previões de novas instâncias para conseguir tanto as probabilidades de uma intância ter valor 1, quanto preve se ela tem o valor 1 ou 0.
    """
    def __init__(
        self, 
        iterations=1000, 
        learning_rate=0.01, 
        random_state=42, 
        hidden_layer_units=16, 
        output_size=1, 
        verbose=100,
        device='CPU',
    ):
        self.random_state = random_state
        self.iterations = iterations
        self.learning_rate = learning_rate
        self.n_h = hidden_layer_units
        self.verbose = verbose
        self.device = device.lower()
        self.parameters = {}
        self.grads = {}
        self.cost = None

    def sigmoid(self, Z):
        """
        Calcula a sigmoide de Z
    
        Argumentos:
        Z -- Um array escalar ou numpy de qualquer tamanho.
    
        Retorno:
        s -- sigmoid(Z)
        """

        if self.device == 'cpu':
            Z_c=np.clip(Z,-500,500)
            s = 1 / (1 + np.exp(np.array(-Z_c)))
        else:
            Z_c=cp.clip(Z,-500,500)
            s = 1 / (1 + cp.exp(cp.array(-Z_c)))
        
        return s

    def layer_sizes(self, X, Y):
        """
        Argumentos:
        X -- conjunto de dados de entrada da forma (tamanho da entrada, número de exemplos)
        Y -- rótulos da forma (tamanho da saída, número de exemplos)
        
        Retorna:
        n_x -- o tamanho da camada de entrada
        n_h -- o tamanho da camada oculta
        n_y -- o tamanho da camada de saída
        """
        
        n_x = X.shape[0]
        n_h = self.n_h
        n_y = Y.shape[0]
        
        return n_x, n_h, n_y
    
    def initialize_parameters(self, n_x, n_h, n_y):
        """
        Argumento:
        n_x -- tamanho da camada de entrada
        n_h -- tamanho da camada oculta
        n_y -- tamanho da camada de saída
        
        Retorna:
        params -- dicionário Python contendo seus parâmetros:
        W1 -- matriz de pesos da forma (n_h, n_x)
        b1 -- vetor de viés da forma (n_h, 1)
        W2 -- matriz de pesos da forma (n_y, n_h)
        b2 -- vetor de viés da forma (n_y, 1)
        """    
        if self.device == 'cpu':
            W1 = np.random.randn(n_h, n_x) * 0.01
            b1 = np.zeros((n_h, 1))
            W2 = np.random.randn(n_y, n_h) * 0.01
            b2 = np.zeros((n_y, 1))
        else:
            W1 = cp.random.randn(n_h, n_x) * 0.01
            b1 = cp.zeros((n_h, 1))
            W2 = cp.random.randn(n_y, n_h) * 0.01
            b2 = cp.zeros((n_y, 1))
    
        parameters = {"W1": W1,
                      "b1": b1,
                      "W2": W2,
                      "b2": b2}
        
        return parameters

    def forward_propagation(self, X, parameters):
        """
        Argumento:
        X -- dado de entrada de tamanho (n_x, m)
        parameters -- dicionário Python contendo seus parâmetros (saída da função de inicialização)
        
        Retorna:
        A2 -- A saída sigmoide da segunda ativação
        cache -- um dicionário contendo "Z1", "A1", "Z2" e "A2"
        """
        
        W1 = parameters['W1']
        b1 = parameters['b1']
        W2 = parameters['W2']
        b2 = parameters['b2']

        if self.device == 'cpu':
            Z1 = np.dot(W1, X) + b1
            A1 = np.tanh(Z1)
            Z2 = np.dot(W2, A1) + b2
        else:
            Z1 = cp.dot(W1, X) + b1
            A1 = cp.tanh(Z1)
            Z2 = cp.dot(W2, A1) + b2
        
        A2 = self.sigmoid(Z2)

        assert(A2.shape == (1, X.shape[1]))
        
        cache = {"Z1": Z1,
                 "A1": A1,
                 "Z2": Z2,
                 "A2": A2}
        
        return A2, cache
        
    def compute_cost(self, A2, Y):
        """
        Calcula o custo de entropia cruzada (logloss)
        
        Argumentos:
        A2 -- A saída sigmoide da segunda ativação, da forma (1, número de exemplos)
        Y -- vetor de rótulos "verdadeiro" da forma (1, número de exemplos)
        
        Retorna:
        custo -- custo de entropia cruzada
        
        """
        
        m = Y.shape[1] # number of examples

        if self.device == 'cpu':
            cost = - (1 / m) * np.sum((Y * np.log(A2) + (1 - Y) * np.log(1 - A2)))
            cost = float(np.squeeze(cost))
        else:
            cost = - (1 / m) * cp.sum((Y * cp.log(A2) + (1 - Y) * cp.log(1 - A2)))
            cost = float(cp.squeeze(cost))
        return cost

    def backward_propagation(self, parameters, cache, X, Y):
        """
        Implementa a propagação reversa.
        
        Argumentos:
        parameters -- dicionário Python contendo os parâmetros
        cache -- um dicionário contendo "Z1", "A1", "Z2" e "A2".
        X -- dados de entrada da forma (n_x, m)
        Y -- rótulos "true" do vetor da forma (1, m)
        
        Retorna:
        grads -- dicionário Python contendo seus gradientes em relação a diferentes parâmetros
        """
        m = X.shape[1]
        
        W1 = parameters['W1']
        W2 = parameters['W2']
        
        A1 = cache['A1']
        A2 = cache['A2']
        Z1 = cache['Z1']
        
        dZ2 = A2 - Y
        if self.device == 'cpu':
            dW2 = (1 / m) * np.dot(dZ2, A1.T)
            db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
            dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
            dW1 = (1 / m) * np.dot(dZ1, X.T)
            db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
        else:
            dW2 = (1 / m) * cp.dot(dZ2, A1.T)
            db2 = (1 / m) * cp.sum(dZ2, axis=1, keepdims=True)
            dZ1 = cp.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
            dW1 = (1 / m) * cp.dot(dZ1, X.T)
            db1 = (1 / m) * cp.sum(dZ1, axis=1, keepdims=True)
        
        grads = {"dW1": dW1,
                 "db1": db1,
                 "dW2": dW2,
                 "db2": db2}
        
        return grads

    def update_parameters(self, parameters, grads, learning_rate):
        """
        Atualiza parâmetros usando a regra de atualização de gradiente descendente fornecida acima.
        
        Argumentos:
        parameters -- dicionário Python contendo seus parâmetros
        grads -- dicionário Python contendo seus gradientes
        learning_rate -- taxa de aprendizado 
        
        Retorna:
        parameters -- dicionário Python contendo seus parâmetros atualizados
        """
        
        W1 = parameters['W1']
        W2 = parameters['W2']
        b1 = parameters['b1']
        b2 = parameters['b2']
        
        dW1 = grads['dW1']
        db1 = grads['db1']
        dW2 = grads['dW2']
        db2 = grads['db2']
        
        W1 = W1 - (learning_rate * dW1)
        b1 = b1 - (learning_rate * db1)
        W2 = W2 - (learning_rate * dW2)
        b2 = b2 - (learning_rate * db2)
        
        parameters = {"W1": W1,
                      "b1": b1,
                      "W2": W2,
                      "b2": b2}
        
        return parameters

    def set_data_type(self, X, Y=None):
        np.random.seed(self.random_state)
        if self.device == 'cpu':
            X = np.array(X.T)
            if Y is not None: Y = np.array(Y).reshape(1, -1)
        else:
            cp.random.seed(self.random_state)
            X = cp.array(X.T)
            if Y is not None: Y = cp.array(Y).reshape(1, -1)
        return X, Y

    def fit(self, X, Y):
        """
        Argumentos:
        X -- conjunto de dados da forma (n_x, m)
        Y -- rótulos da forma (1, m)
        num_iterations -- Número de iterações no loop de descida de gradiente
        
        Retorna:
        parameters -- parâmetros aprendidos pelo modelo. Eles podem então ser usados ​​para prever.
        """
        
        X, Y = self.set_data_type(X, Y)
            
        n_x, n_h, n_y = self.layer_sizes(X, Y)
        self.n_x = n_x
        
        parameters = self.initialize_parameters(n_x, n_h, n_y)
    
        for i in range(0, self.iterations):
            
            A2, cache = self.forward_propagation(X, parameters)
            
            cost = self.compute_cost(A2, Y)
            
            grads = self.backward_propagation(parameters, cache, X, Y)
            
            parameters = self.update_parameters(parameters, grads, self.learning_rate)
            
            if self.verbose > 0 and i % self.verbose == 0:
                print ("Cost after iteration %i: %f" %(i, cost))
    
        self.parameters = parameters
        
        return self

    def predict_proba(self, X):
        """
        Usando os parametros aprendidos, prever as classes para cada exemplo em X
        
        Argumentos:
        X -- entrada dos dados de size (n_x, m)
        
        Retorno
        previsões -- vetor de probabilidades preditas do modelo 
        """
        
        X, _ = self.set_data_type(X)

        A2, _ = self.forward_propagation(X, self.parameters)

        if self.device == 'gpu': A2 = A2.get()

        return A2

    def predict(self, X):
        """
        Usando os parametros aprendidos, prever as classes para cada exemplo em X
        
        Argumentos:
        X -- entrada dos dados de size (n_x, m)
        
        Retorno
        previsões -- vetor de previsões do modelo 0 ou 1
        """
        
        probabilities = self.predict_proba(X)
        threshold = 0.5
        predictions = (probabilities > threshold).astype(int)
                
        return predictions.reshape(-1,)
    

# Modeling - Baseline

### CPU

In [4]:
%%time

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler

ini_params_2nn = {
    'iterations':200,
    'learning_rate':0.05, 
    'random_state':42,
    'hidden_layer_units':1024,
    'verbose':-100,
}

oof_predictions = np.zeros(len(X), dtype=int)

NUM_FOLDS = 6
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)

for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
    scaler = StandardScaler()
    
    Xtrn_fold, Xtst_fold = scaler.fit_transform(X.iloc[trn_idx]), scaler.transform(X.iloc[val_idx])
    ytrn_fold, ytst_fold = y.iloc[trn_idx], y.iloc[val_idx]

    #print(f"##### FOLD {fold} #####")
    model_instance = model_2NN(
        **ini_params_2nn,
        device='CPU',
    )

    model_instance.fit(Xtrn_fold, ytrn_fold)
    predictions = model_instance.predict(Xtst_fold)
    
    oof_predictions[val_idx] = predictions
    
    f1_fold = f1_score(ytst_fold, predictions)
    print(f"F1-score [fold-{fold}]: {f1_fold}")

f1 = f1_score(y, oof_predictions)

print(f"\nF1-score: {f1:.5f}\n\n")
print(f"\n{classification_report(y, oof_predictions)}")

F1-score [fold-0]: 0.5116279069767442
F1-score [fold-1]: 0.41975308641975306
F1-score [fold-2]: 0.45454545454545453
F1-score [fold-3]: 0.3950617283950617
F1-score [fold-4]: 0.38202247191011235
F1-score [fold-5]: 0.4186046511627907

F1-score: 0.43053



              precision    recall  f1-score   support

           0       0.80      0.94      0.86       967
           1       0.65      0.32      0.43       342

    accuracy                           0.78      1309
   macro avg       0.72      0.63      0.65      1309
weighted avg       0.76      0.78      0.75      1309

CPU times: user 1min 35s, sys: 3min 31s, total: 5min 7s
Wall time: 53 s


### GPU

In [5]:
%%time

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler

oof_predictions = np.zeros(len(X), dtype=int)

NUM_FOLDS = 6
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)

for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
    scaler = StandardScaler()
    
    Xtrn_fold, Xtst_fold = scaler.fit_transform(X.iloc[trn_idx]), scaler.transform(X.iloc[val_idx])
    ytrn_fold, ytst_fold = y.iloc[trn_idx], y.iloc[val_idx]

    #print(f"##### FOLD {fold} #####")
    model_instance = model_2NN(
        **ini_params_2nn,
        device='GPU',
    )

    model_instance.fit(Xtrn_fold, ytrn_fold)
    predictions = model_instance.predict(Xtst_fold)
    
    oof_predictions[val_idx] = predictions
    
    f1_fold = f1_score(ytst_fold, predictions)
    print(f"F1-score [fold-{fold}]: {f1_fold}")

f1 = f1_score(y, oof_predictions)

print(f"\nF1-score: {f1:.5f}\n\n")
print(f"\n{classification_report(y, oof_predictions)}")

F1-score [fold-0]: 0.47619047619047616
F1-score [fold-1]: 0.41975308641975306
F1-score [fold-2]: 0.45454545454545453
F1-score [fold-3]: 0.3950617283950617
F1-score [fold-4]: 0.38202247191011235
F1-score [fold-5]: 0.4186046511627907

F1-score: 0.42436



              precision    recall  f1-score   support

           0       0.80      0.94      0.86       967
           1       0.65      0.32      0.42       342

    accuracy                           0.78      1309
   macro avg       0.72      0.63      0.64      1309
weighted avg       0.76      0.78      0.75      1309

CPU times: user 1.99 s, sys: 734 ms, total: 2.73 s
Wall time: 952 ms


# Optimize hyperparams with device GPU

In [7]:
import optuna
NUM_FOLDS = 6
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)

def objective(trial):

    iterations=trial.suggest_int('iterations', 50, 4000, step=50)
    learning_rate=trial.suggest_float('learning_rate', 0.00001, 0.01, log=True) 
    hidden_layer_units=2 ** (trial.suggest_int('hidden_layer_units', 2, 12))

    oof_predictions = np.zeros(len(X), dtype=int)

    NUM_FOLDS = 6
    skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
    
    for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
        scaler = StandardScaler()
        
        Xtrn_fold, Xtst_fold = scaler.fit_transform(X.iloc[trn_idx]), scaler.transform(X.iloc[val_idx])
        ytrn_fold, ytst_fold = y.iloc[trn_idx], y.iloc[val_idx]
    
        #print(f"##### FOLD {fold} #####")
        model_instance = model_2NN(
            iterations=iterations,
            learning_rate=learning_rate, 
            hidden_layer_units=hidden_layer_units,
            random_state=42,
            verbose=-100,
            device='GPU'
        )
    
        model_instance.fit(Xtrn_fold, ytrn_fold)
        predictions = model_instance.predict(Xtst_fold)
        
        oof_predictions[val_idx] = predictions
        
        f1_fold = f1_score(ytst_fold, predictions)
        print(f"F1-score [fold-{fold}]: {f1_fold}")
    
    f1 = f1_score(y, oof_predictions)

    return f1

study = optuna.create_study(direction='maximize') # Queremos maximizar o F1 score

N_TRIALS = 50 # Número de conjuntos de hiperparâmetros a testar (ajuste conforme tempo/recursos)
study.optimize(objective, n_trials=N_TRIALS)

[I 2025-04-30 16:33:58,219] A new study created in memory with name: no-name-34a5f9a4-1504-45b9-aa6a-03d0118cd108


F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:34:01,150] Trial 0 finished with value: 0.0 and parameters: {'iterations': 250, 'learning_rate': 0.003933520591271461, 'hidden_layer_units': 7}. Best is trial 0 with value: 0.0.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:34:28,071] Trial 1 finished with value: 0.0 and parameters: {'iterations': 2750, 'learning_rate': 0.005854915529892832, 'hidden_layer_units': 2}. Best is trial 0 with value: 0.0.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.12903225806451613
F1-score [fold-1]: 0.09375
F1-score [fold-2]: 0.14925373134328357
F1-score [fold-3]: 0.06349206349206349
F1-score [fold-4]: 0.12307692307692308


[I 2025-04-30 16:35:03,028] Trial 2 finished with value: 0.11917098445595854 and parameters: {'iterations': 3200, 'learning_rate': 0.0004953624562418855, 'hidden_layer_units': 11}. Best is trial 2 with value: 0.11917098445595854.


F1-score [fold-5]: 0.15384615384615385
F1-score [fold-0]: 0.43902439024390244
F1-score [fold-1]: 0.41975308641975306
F1-score [fold-2]: 0.4367816091954023
F1-score [fold-3]: 0.379746835443038
F1-score [fold-4]: 0.3448275862068966


[I 2025-04-30 16:35:34,831] Trial 3 finished with value: 0.3967935871743487 and parameters: {'iterations': 1750, 'learning_rate': 0.0017881477718771008, 'hidden_layer_units': 12}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.3614457831325301
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:35:59,467] Trial 4 finished with value: 0.0 and parameters: {'iterations': 2600, 'learning_rate': 0.0031798154147008387, 'hidden_layer_units': 6}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.27586206896551724
F1-score [fold-1]: 0.3541666666666667
F1-score [fold-2]: 0.3218390804597701
F1-score [fold-3]: 0.2391304347826087
F1-score [fold-4]: 0.35555555555555557


[I 2025-04-30 16:36:12,078] Trial 5 finished with value: 0.31868131868131866 and parameters: {'iterations': 1100, 'learning_rate': 2.778514241349337e-05, 'hidden_layer_units': 11}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.3617021276595745
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:36:30,778] Trial 6 finished with value: 0.0 and parameters: {'iterations': 1950, 'learning_rate': 0.00018543877247066765, 'hidden_layer_units': 8}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.4
F1-score [fold-1]: 0.38461538461538464
F1-score [fold-2]: 0.41379310344827586
F1-score [fold-3]: 0.32432432432432434
F1-score [fold-4]: 0.32941176470588235


[I 2025-04-30 16:36:47,296] Trial 7 finished with value: 0.37344398340248963 and parameters: {'iterations': 850, 'learning_rate': 0.0003215351778878946, 'hidden_layer_units': 12}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.38461538461538464
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:37:00,701] Trial 8 finished with value: 0.0 and parameters: {'iterations': 1500, 'learning_rate': 0.0003493980578798504, 'hidden_layer_units': 10}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.06779661016949153


[I 2025-04-30 16:37:20,763] Trial 9 finished with value: 0.011594202898550725 and parameters: {'iterations': 1900, 'learning_rate': 0.006278571000800484, 'hidden_layer_units': 5}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:37:56,600] Trial 10 finished with value: 0.0 and parameters: {'iterations': 3600, 'learning_rate': 0.0009912757849735773, 'hidden_layer_units': 9}. Best is trial 3 with value: 0.3967935871743487.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.46601941747572817
F1-score [fold-1]: 0.49523809523809526
F1-score [fold-2]: 0.5046728971962616
F1-score [fold-3]: 0.5490196078431373
F1-score [fold-4]: 0.5309734513274337


[I 2025-04-30 16:38:08,541] Trial 11 finished with value: 0.5141955835962145 and parameters: {'iterations': 700, 'learning_rate': 8.677048054824348e-05, 'hidden_layer_units': 12}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.5384615384615384
F1-score [fold-0]: 0.36486486486486486
F1-score [fold-1]: 0.5063291139240507
F1-score [fold-2]: 0.45569620253164556
F1-score [fold-3]: 0.496551724137931
F1-score [fold-4]: 0.45614035087719296


[I 2025-04-30 16:38:11,504] Trial 12 finished with value: 0.47396386822529224 and parameters: {'iterations': 150, 'learning_rate': 5.9246444767861065e-05, 'hidden_layer_units': 12}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.5590062111801242
F1-score [fold-0]: 0.3826086956521739
F1-score [fold-1]: 0.23008849557522124
F1-score [fold-2]: 0.336
F1-score [fold-3]: 0.1951219512195122
F1-score [fold-4]: 0.21052631578947367


[I 2025-04-30 16:38:12,567] Trial 13 finished with value: 0.2720897615708275 and parameters: {'iterations': 100, 'learning_rate': 5.432731537267261e-05, 'hidden_layer_units': 9}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.2764227642276423
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:38:20,504] Trial 14 finished with value: 0.0 and parameters: {'iterations': 700, 'learning_rate': 8.522396063223423e-05, 'hidden_layer_units': 3}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.3684210526315789
F1-score [fold-1]: 0.4968944099378882
F1-score [fold-2]: 0.4662576687116564
F1-score [fold-3]: 0.5066666666666667
F1-score [fold-4]: 0.4519774011299435


[I 2025-04-30 16:38:28,709] Trial 15 finished with value: 0.4737384140061792 and parameters: {'iterations': 500, 'learning_rate': 1.1739421787880292e-05, 'hidden_layer_units': 12}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.5476190476190477
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.034482758620689655


[I 2025-04-30 16:38:41,906] Trial 16 finished with value: 0.005780346820809248 and parameters: {'iterations': 1250, 'learning_rate': 0.00010202578542372533, 'hidden_layer_units': 10}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.2
F1-score [fold-1]: 0.2978723404255319
F1-score [fold-2]: 0.22916666666666666
F1-score [fold-3]: 0.2524271844660194
F1-score [fold-4]: 0.3157894736842105


[I 2025-04-30 16:38:43,392] Trial 17 finished with value: 0.2593856655290102 and parameters: {'iterations': 100, 'learning_rate': 2.825663591199055e-05, 'hidden_layer_units': 4}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.2653061224489796
F1-score [fold-0]: 0.32786885245901637
F1-score [fold-1]: 0.3333333333333333
F1-score [fold-2]: 0.2677165354330709
F1-score [fold-3]: 0.3870967741935484
F1-score [fold-4]: 0.2608695652173913


[I 2025-04-30 16:38:51,936] Trial 18 finished with value: 0.3117241379310345 and parameters: {'iterations': 800, 'learning_rate': 1.0296207781148243e-05, 'hidden_layer_units': 10}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.2926829268292683
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:39:15,861] Trial 19 finished with value: 0.0 and parameters: {'iterations': 2400, 'learning_rate': 0.0001697246124474576, 'hidden_layer_units': 8}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.35514018691588783
F1-score [fold-1]: 0.3783783783783784
F1-score [fold-2]: 0.3584905660377358
F1-score [fold-3]: 0.27522935779816515
F1-score [fold-4]: 0.3925233644859813


[I 2025-04-30 16:39:21,309] Trial 20 finished with value: 0.3627906976744186 and parameters: {'iterations': 450, 'learning_rate': 3.556196153181681e-05, 'hidden_layer_units': 11}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.41904761904761906
F1-score [fold-0]: 0.3684210526315789
F1-score [fold-1]: 0.4968944099378882
F1-score [fold-2]: 0.4662576687116564
F1-score [fold-3]: 0.5066666666666667
F1-score [fold-4]: 0.4519774011299435


[I 2025-04-30 16:39:29,972] Trial 21 finished with value: 0.4742268041237113 and parameters: {'iterations': 550, 'learning_rate': 1.0831359419324483e-05, 'hidden_layer_units': 12}. Best is trial 11 with value: 0.5141955835962145.


F1-score [fold-5]: 0.5508982035928144
F1-score [fold-0]: 0.3898305084745763
F1-score [fold-1]: 0.5714285714285714
F1-score [fold-2]: 0.49612403100775193
F1-score [fold-3]: 0.5833333333333334
F1-score [fold-4]: 0.5205479452054794


[I 2025-04-30 16:39:52,235] Trial 22 finished with value: 0.5314505776636713 and parameters: {'iterations': 1150, 'learning_rate': 1.9432429103120593e-05, 'hidden_layer_units': 12}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.6165413533834586
F1-score [fold-0]: 0.27956989247311825
F1-score [fold-1]: 0.3564356435643564
F1-score [fold-2]: 0.32989690721649484
F1-score [fold-3]: 0.26
F1-score [fold-4]: 0.3673469387755102


[I 2025-04-30 16:40:05,025] Trial 23 finished with value: 0.33559322033898303 and parameters: {'iterations': 1250, 'learning_rate': 1.8725643067259224e-05, 'hidden_layer_units': 11}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.4158415841584158
F1-score [fold-0]: 0.26666666666666666
F1-score [fold-1]: 0.16091954022988506
F1-score [fold-2]: 0.17582417582417584
F1-score [fold-3]: 0.15053763440860216
F1-score [fold-4]: 0.21428571428571427


[I 2025-04-30 16:40:16,294] Trial 24 finished with value: 0.19776119402985073 and parameters: {'iterations': 1050, 'learning_rate': 1.8577575034148518e-05, 'hidden_layer_units': 9}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.21978021978021978
F1-score [fold-0]: 0.46601941747572817
F1-score [fold-1]: 0.49523809523809526
F1-score [fold-2]: 0.4807692307692308
F1-score [fold-3]: 0.5544554455445545
F1-score [fold-4]: 0.5309734513274337


[I 2025-04-30 16:40:43,758] Trial 25 finished with value: 0.5111111111111111 and parameters: {'iterations': 1500, 'learning_rate': 4.398728713539718e-05, 'hidden_layer_units': 12}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.5384615384615384
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:40:59,485] Trial 26 finished with value: 0.0 and parameters: {'iterations': 1550, 'learning_rate': 0.00016009814923456572, 'hidden_layer_units': 10}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.08955223880597014
F1-score [fold-1]: 0.2535211267605634
F1-score [fold-2]: 0.16901408450704225
F1-score [fold-3]: 0.13157894736842105
F1-score [fold-4]: 0.25


[I 2025-04-30 16:41:22,810] Trial 27 finished with value: 0.17289719626168223 and parameters: {'iterations': 2200, 'learning_rate': 4.581188333665696e-05, 'hidden_layer_units': 11}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.14084507042253522
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:41:37,485] Trial 28 finished with value: 0.0 and parameters: {'iterations': 1550, 'learning_rate': 8.699218052999323e-05, 'hidden_layer_units': 7}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.38016528925619836
F1-score [fold-1]: 0.562962962962963
F1-score [fold-2]: 0.49230769230769234
F1-score [fold-3]: 0.5645161290322581
F1-score [fold-4]: 0.5170068027210885


[I 2025-04-30 16:41:58,091] Trial 29 finished with value: 0.5240506329113924 and parameters: {'iterations': 1050, 'learning_rate': 1.989303982758552e-05, 'hidden_layer_units': 12}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.6165413533834586
F1-score [fold-0]: 0.5233644859813084
F1-score [fold-1]: 0.5
F1-score [fold-2]: 0.509090909090909
F1-score [fold-3]: 0.3838383838383838
F1-score [fold-4]: 0.42735042735042733


[I 2025-04-30 16:42:06,703] Trial 30 finished with value: 0.47878787878787876 and parameters: {'iterations': 950, 'learning_rate': 1.8904240310570346e-05, 'hidden_layer_units': 8}. Best is trial 22 with value: 0.5314505776636713.


F1-score [fold-5]: 0.5210084033613446
F1-score [fold-0]: 0.4036697247706422
F1-score [fold-1]: 0.5245901639344263
F1-score [fold-2]: 0.5254237288135594
F1-score [fold-3]: 0.5892857142857143
F1-score [fold-4]: 0.5401459854014599


[I 2025-04-30 16:42:30,024] Trial 31 finished with value: 0.5348189415041783 and parameters: {'iterations': 1300, 'learning_rate': 2.5705490185275697e-05, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.6166666666666667
F1-score [fold-0]: 0.2857142857142857
F1-score [fold-1]: 0.34
F1-score [fold-2]: 0.3404255319148936
F1-score [fold-3]: 0.26
F1-score [fold-4]: 0.35051546391752575


[I 2025-04-30 16:42:45,561] Trial 32 finished with value: 0.32989690721649484 and parameters: {'iterations': 1350, 'learning_rate': 1.813096240978805e-05, 'hidden_layer_units': 11}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.4
F1-score [fold-0]: 0.36486486486486486
F1-score [fold-1]: 0.5063291139240507
F1-score [fold-2]: 0.445859872611465
F1-score [fold-3]: 0.48951048951048953
F1-score [fold-4]: 0.4588235294117647


[I 2025-04-30 16:42:52,453] Trial 33 finished with value: 0.47171824973319104 and parameters: {'iterations': 350, 'learning_rate': 2.690887020302643e-05, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.5590062111801242
F1-score [fold-0]: 0.20512820512820512
F1-score [fold-1]: 0.36363636363636365
F1-score [fold-2]: 0.3
F1-score [fold-3]: 0.24175824175824176
F1-score [fold-4]: 0.34146341463414637


[I 2025-04-30 16:42:59,850] Trial 34 finished with value: 0.29365079365079366 and parameters: {'iterations': 650, 'learning_rate': 6.57475204090302e-05, 'hidden_layer_units': 11}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.3058823529411765
F1-score [fold-0]: 0.27586206896551724
F1-score [fold-1]: 0.17073170731707318
F1-score [fold-2]: 0.19148936170212766
F1-score [fold-3]: 0.30434782608695654
F1-score [fold-4]: 0.20689655172413793


[I 2025-04-30 16:43:16,319] Trial 35 finished with value: 0.22641509433962265 and parameters: {'iterations': 1750, 'learning_rate': 1.4212930335635143e-05, 'hidden_layer_units': 10}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.20454545454545456
F1-score [fold-0]: 0.4036697247706422
F1-score [fold-1]: 0.5344827586206896
F1-score [fold-2]: 0.5263157894736842
F1-score [fold-3]: 0.5818181818181818
F1-score [fold-4]: 0.5271317829457365


[I 2025-04-30 16:43:35,414] Trial 36 finished with value: 0.5287356321839081 and parameters: {'iterations': 1100, 'learning_rate': 3.380041777318714e-05, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.5932203389830508
F1-score [fold-0]: 0.23809523809523808
F1-score [fold-1]: 0.35789473684210527
F1-score [fold-2]: 0.32558139534883723
F1-score [fold-3]: 0.2391304347826087
F1-score [fold-4]: 0.3595505617977528


[I 2025-04-30 16:43:48,037] Trial 37 finished with value: 0.31539888682745826 and parameters: {'iterations': 1100, 'learning_rate': 2.957745514410671e-05, 'hidden_layer_units': 11}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.3655913978494624
F1-score [fold-0]: 0.41975308641975306
F1-score [fold-1]: 0.4
F1-score [fold-2]: 0.4367816091954023
F1-score [fold-3]: 0.379746835443038
F1-score [fold-4]: 0.3488372093023256


[I 2025-04-30 16:44:21,188] Trial 38 finished with value: 0.39191919191919194 and parameters: {'iterations': 1750, 'learning_rate': 0.0013401676841440242, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.36585365853658536
F1-score [fold-0]: 0.031746031746031744
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.031746031746031744
F1-score [fold-4]: 0.03225806451612903


[I 2025-04-30 16:44:51,105] Trial 39 finished with value: 0.021798365122615803 and parameters: {'iterations': 2900, 'learning_rate': 2.426168213082353e-05, 'hidden_layer_units': 10}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.03333333333333333
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:45:13,829] Trial 40 finished with value: 0.0 and parameters: {'iterations': 2200, 'learning_rate': 4.141905067093315e-05, 'hidden_layer_units': 2}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.47191011235955055
F1-score [fold-1]: 0.5416666666666666
F1-score [fold-2]: 0.4631578947368421
F1-score [fold-3]: 0.45454545454545453
F1-score [fold-4]: 0.4536082474226804


[I 2025-04-30 16:45:31,578] Trial 41 finished with value: 0.4784172661870504 and parameters: {'iterations': 900, 'learning_rate': 0.00012566254165967628, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.4835164835164835
F1-score [fold-0]: 0.3333333333333333
F1-score [fold-1]: 0.37383177570093457
F1-score [fold-2]: 0.36538461538461536
F1-score [fold-3]: 0.2641509433962264
F1-score [fold-4]: 0.38461538461538464


[I 2025-04-30 16:45:44,711] Trial 42 finished with value: 0.35782747603833864 and parameters: {'iterations': 1250, 'learning_rate': 1.4631937654338863e-05, 'hidden_layer_units': 11}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.42718446601941745
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:45:53,621] Trial 43 finished with value: 0.0 and parameters: {'iterations': 750, 'learning_rate': 2.1796863147833558e-05, 'hidden_layer_units': 6}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.47058823529411764
F1-score [fold-1]: 0.5048543689320388
F1-score [fold-2]: 0.4752475247524752
F1-score [fold-3]: 0.5454545454545454
F1-score [fold-4]: 0.5178571428571429


[I 2025-04-30 16:46:12,555] Trial 44 finished with value: 0.5112540192926045 and parameters: {'iterations': 1050, 'learning_rate': 6.813101312332612e-05, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.5523809523809524
F1-score [fold-0]: 0.3855421686746988
F1-score [fold-1]: 0.40963855421686746
F1-score [fold-2]: 0.449438202247191
F1-score [fold-3]: 0.36363636363636365
F1-score [fold-4]: 0.367816091954023


[I 2025-04-30 16:46:18,796] Trial 45 finished with value: 0.4007936507936508 and parameters: {'iterations': 300, 'learning_rate': 0.0006165783272027871, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.4235294117647059
F1-score [fold-0]: 0.20512820512820512
F1-score [fold-1]: 0.3595505617977528
F1-score [fold-2]: 0.27848101265822783
F1-score [fold-3]: 0.24175824175824176
F1-score [fold-4]: 0.35714285714285715


[I 2025-04-30 16:46:35,400] Trial 46 finished with value: 0.29191321499013806 and parameters: {'iterations': 1350, 'learning_rate': 3.092990058331701e-05, 'hidden_layer_units': 11}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.3023255813953488
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:47:13,811] Trial 47 finished with value: 0.0 and parameters: {'iterations': 3900, 'learning_rate': 3.970962701874355e-05, 'hidden_layer_units': 9}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.0
F1-score [fold-0]: 0.3611111111111111
F1-score [fold-1]: 0.5234899328859061
F1-score [fold-2]: 0.4503311258278146
F1-score [fold-3]: 0.5147058823529411
F1-score [fold-4]: 0.4720496894409938


[I 2025-04-30 16:47:29,298] Trial 48 finished with value: 0.48709315375982043 and parameters: {'iterations': 900, 'learning_rate': 1.4689305028590635e-05, 'hidden_layer_units': 12}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.6
F1-score [fold-0]: 0.0
F1-score [fold-1]: 0.0
F1-score [fold-2]: 0.0
F1-score [fold-3]: 0.0
F1-score [fold-4]: 0.0


[I 2025-04-30 16:47:35,500] Trial 49 finished with value: 0.0 and parameters: {'iterations': 600, 'learning_rate': 0.0037860766588490562, 'hidden_layer_units': 6}. Best is trial 31 with value: 0.5348189415041783.


F1-score [fold-5]: 0.0


# Final model 

In [8]:
final_params_2nn = study.best_params
final_params_2nn

{'iterations': 1300,
 'learning_rate': 2.5705490185275697e-05,
 'hidden_layer_units': 12}

In [9]:
final_params_2nn['hidden_layer_units'] = 2 ** final_params_2nn['hidden_layer_units']
final_params_2nn['verbose'] = -1
final_params_2nn['device'] = 'GPU'
final_params_2nn['random_state'] = 42

final_params_2nn

{'iterations': 1300,
 'learning_rate': 2.5705490185275697e-05,
 'hidden_layer_units': 4096,
 'verbose': -1,
 'device': 'GPU',
 'random_state': 42}

In [10]:
%%time

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import StandardScaler


oof_predictions = np.zeros(len(X), dtype=int)

NUM_FOLDS = 6
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)

for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
    scaler = StandardScaler()
    
    Xtrn_fold, Xtst_fold = scaler.fit_transform(X.iloc[trn_idx]), scaler.transform(X.iloc[val_idx])
    ytrn_fold, ytst_fold = y.iloc[trn_idx], y.iloc[val_idx]

    #print(f"##### FOLD {fold} #####")
    model_instance = model_2NN(
        **final_params_2nn,
    )

    model_instance.fit(Xtrn_fold, ytrn_fold)
    predictions = model_instance.predict(Xtst_fold)
    
    oof_predictions[val_idx] = predictions
    
    f1_fold = f1_score(ytst_fold, predictions)
    print(f"F1-score [fold-{fold}]: {f1_fold}")

f1 = f1_score(y, oof_predictions)

print(f"\nF1-score: {f1:.5f}\n\n")
print(f"\n{classification_report(y, oof_predictions)}")

F1-score [fold-0]: 0.4036697247706422
F1-score [fold-1]: 0.5245901639344263
F1-score [fold-2]: 0.5254237288135594
F1-score [fold-3]: 0.5892857142857143
F1-score [fold-4]: 0.5401459854014599
F1-score [fold-5]: 0.6166666666666667

F1-score: 0.53482



              precision    recall  f1-score   support

           0       0.84      0.81      0.82       967
           1       0.51      0.56      0.53       342

    accuracy                           0.74      1309
   macro avg       0.67      0.69      0.68      1309
weighted avg       0.75      0.74      0.75      1309

CPU times: user 22.1 s, sys: 2.23 s, total: 24.3 s
Wall time: 26.1 s
