# Co-Attention Siamese Model Training

### Importing Libraries

In [None]:
!pip install pandas==2.2.3
!pip install numpy==1.26.4
!pip install optuna==4.1.0
!pip install regex==2024.9.11
!pip install tensorflow==2.17.0
!pip install scikit-learn==1.5.2
!pip install sentence_transformers==3.4.1

Defaulting to user installation because normal site-packages is not writeable
[0mDefaulting to user installation because normal site-packages is not writeable
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers==3.4.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers==3.4.1)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers==3.4.1)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence_transformers==3.4.1)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence_

In [2]:
import re
import optuna
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras import layers, Model
from sklearn.metrics import f1_score, classification_report, matthews_corrcoef
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm
2025-04-04 13:16:45.945124: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-04 13:16:45.971082: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-04 13:16:45.998993: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-04 13:16:46.007434: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-04 13:16:46.0

### Load Data

In [None]:
# Load the training data
train_data_path = './training_data/ED/train.csv'
train_data = pd.read_csv(train_data_path)

In [None]:
# Load the dev data
dev_data_path = './training_data/ED/dev.csv'
dev_data = pd.read_csv(dev_data_path)

#### Remove [ref] from Text

In [None]:
# Remove instances of [ref] from the text
remove_ref = lambda x: re.sub(r'\[ref\]|\[ref|ref\]', '', x)

train_data['Claim'] = train_data['Claim'].apply(remove_ref)
train_data['Evidence'] = train_data['Evidence'].apply(remove_ref)
dev_data['Claim'] = dev_data['Claim'].apply(remove_ref)
dev_data['Evidence'] = dev_data['Evidence'].apply(remove_ref)

### Calculate Training Weights

In [16]:
zeroes = train_data[train_data['label'] == 0].shape[0]
ones = train_data[train_data['label'] == 1].shape[0]

print(f'Number of zeroes: {zeroes}')
print(f'Number of ones: {ones}')

Number of zeroes: 14088
Number of ones: 5269


In [17]:
# Add a column to training data for the weights
zero_weight = zeroes / (zeroes + ones)
one_weight = ones / (zeroes + ones)

train_data['weights'] = train_data['label'].apply(lambda x: zero_weight if x == 0 else one_weight)

### Sample Text Length

In [18]:
# Claim Maximum Length
claim_max_length = train_data['Claim'].apply(lambda x: len(str(x))).max()
print(f'Claim Maximum Length: {claim_max_length}')

Claim Maximum Length: 180


In [19]:
# Evidence Maximum Length
evidence_max_length = train_data['Evidence'].apply(lambda x: len(str(x))).max()
print(f'Evidence Maximum Length: {claim_max_length}')

Evidence Maximum Length: 180


### Encode Textual Inputs


In [None]:
# Load the Encoder
encoder_model = SentenceTransformer("all-MiniLM-L6-v2")
# encoder_model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

# Encode the Training Set
claim_embeddings = encoder_model.encode(train_data['Claim'])
evidence_embeddings = encoder_model.encode(train_data['Evidence'])

# Encode the Dev Set
dev_claim_embeddings = encoder_model.encode(dev_data['Claim'])
dev_evidence_embeddings = encoder_model.encode(dev_data['Evidence'])

### Design the Model

In [None]:
# Custom MultiHead Co-Attention layer
class MultiHeadCoAttention(layers.Layer):
    def __init__(self, attention_dim=128, num_heads=4, dropout_rate=0.1, **kwargs):
        super(MultiHeadCoAttention, self).__init__(**kwargs)
        self.attention_dim = attention_dim
        self.num_heads = num_heads
        self.head_dim = attention_dim // num_heads
        self.dropout_rate = dropout_rate
        
        # Ensure the attention dimension is divisible by the number of heads
        assert attention_dim % num_heads == 0, "Attention dimension must be divisible by number of heads"
        
    def build(self, input_shape):
        # Ensure we have two inputs
        assert isinstance(input_shape, list) and len(input_shape) == 2
        
        self.claim_dim = input_shape[0][-1]
        self.evidence_dim = input_shape[1][-1]
        
        # Claim projections
        self.claim_projection = layers.Dense(self.attention_dim * 3, use_bias=False)
        
        # Evidence projections
        self.evidence_projection = layers.Dense(self.attention_dim * 3, use_bias=False)
        
        # Output projections
        self.claim_output_projection = layers.Dense(self.claim_dim, use_bias=False)
        self.evidence_output_projection = layers.Dense(self.evidence_dim, use_bias=False)
        
        # Dropout layer
        self.dropout = layers.Dropout(self.dropout_rate)
        
        super(MultiHeadCoAttention, self).build(input_shape)
    
    def split_heads(self, x):
        """Split the last dimension into (num_heads, head_dim) with fixed batch size"""
        batch_size = tf.shape(x)[0]
        seq_len = tf.shape(x)[1]
        
        # Reshape with fixed dimensions where possible
        x = tf.reshape(x, [batch_size, seq_len, self.num_heads, self.head_dim])
        return tf.transpose(x, [0, 2, 1, 3])
    
    def call(self, inputs, training=None):
        # Unpack inputs
        claim, evidence = inputs

        # Get projection for claim (Q, K, V)
        claim_proj = self.claim_projection(claim)

        # Split claim projection into query, key, value
        claim_proj_split = tf.split(claim_proj, 3, axis=-1)
        claim_query, claim_key, claim_value = claim_proj_split

        # Get projection for evidence (Q, K, V)
        evidence_proj = self.evidence_projection(evidence)

        # Split evidence projection into query, key, value
        evidence_proj_split = tf.split(evidence_proj, 3, axis=-1)
        evidence_query, evidence_key, evidence_value = evidence_proj_split

        # Split heads
        claim_query_heads = self.split_heads(claim_query)
        claim_key_heads = self.split_heads(claim_key)
        claim_value_heads = self.split_heads(claim_value)

        evidence_query_heads = self.split_heads(evidence_query)
        evidence_key_heads = self.split_heads(evidence_key)
        evidence_value_heads = self.split_heads(evidence_value)

        # Claim attends to evidence with fixed scaling factor
        claim_evidence_scores = tf.matmul(claim_query_heads, evidence_key_heads, transpose_b=True)
        claim_evidence_scores = claim_evidence_scores / tf.sqrt(tf.cast(self.head_dim, tf.float32))

        claim_evidence_attention = tf.nn.softmax(claim_evidence_scores, axis=-1)
        claim_evidence_attention = self.dropout(claim_evidence_attention, training=training)

        claim_context = tf.matmul(claim_evidence_attention, evidence_value_heads)

        # Evidence attends to claim with fixed scaling factor
        evidence_claim_scores = tf.matmul(evidence_query_heads, claim_key_heads, transpose_b=True)
        evidence_claim_scores = evidence_claim_scores / tf.sqrt(tf.cast(self.head_dim, tf.float32))

        evidence_claim_attention = tf.nn.softmax(evidence_claim_scores, axis=-1)
        evidence_claim_attention = self.dropout(evidence_claim_attention, training=training)

        evidence_context = tf.matmul(evidence_claim_attention, claim_value_heads)

        # Combine heads and transpose back
        claim_context = tf.transpose(claim_context, [0, 2, 1, 3])
        evidence_context = tf.transpose(evidence_context, [0, 2, 1, 3])

        # Instead of reshaping to force seq_len=1, perform average pooling over the sequence dimension:
        claim_context = tf.reduce_mean(claim_context, axis=1, keepdims=True)
        evidence_context = tf.reduce_mean(evidence_context, axis=1, keepdims=True)

        # Reshape with fixed dimensions where possible
        batch_size = tf.shape(claim)[0]
        claim_context = tf.reshape(claim_context, [batch_size, 1, self.attention_dim])
        evidence_context = tf.reshape(evidence_context, [batch_size, 1, self.attention_dim])

        # Project back to original dimensions using Dense layers
        claim_output = self.claim_output_projection(claim_context)
        evidence_output = self.evidence_output_projection(evidence_context)

        return claim_output, evidence_output

In [12]:
def EncoderLayer(input_shape=(384, ), units = [512, 64], dropout=0.2):
    inputs = layers.Input(shape=input_shape)

    x = layers.Dense(units[0], activation="relu")(inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(units[1], activation="relu")(x)

    return Model(inputs, x)

In [13]:
def get_model(input_shape=(384,) ,loss="binary_crossentropy", optimizer="adam", learning_rate=0.008, metrics=['accuracy', 'recall', 'precision'], units=[512, 64], attention_dim=128, dropout = [0.2, 0.5, 0.3], num_heads=4, ff_dim=64, attention_dropout=0.1):
    """
    Build a co-attention model for claim-evidence binary classification.
    
    Args:
        claim_dim: Dimension of the claim embedding
        evidence_dim: Dimension of the evidence embedding
        hidden_dim: Dimension of hidden layers
        
    Returns:
        A Keras Model instance
    """
    # Input layers
    claim_input = layers.Input(shape=input_shape, name="claim_input")
    evidence_input = layers.Input(shape=input_shape, name="evidence_input")

    # Siamese Encoder Layers
    encoder = EncoderLayer(input_shape=input_shape, units=units, dropout=dropout[0])
    claim_encoded = encoder(claim_input)
    evidence_encoded = encoder(evidence_input)

    # Reshape for co-attention (adding sequence dimension of 1)
    claim_reshaped = layers.Reshape((1, units[1]))(claim_encoded)
    evidence_reshaped = layers.Reshape((1, units[1]))(evidence_encoded)

    # Apply co-attention
    claim_attended, evidence_attended = MultiHeadCoAttention(attention_dim=attention_dim, num_heads=num_heads, dropout_rate=attention_dropout)(
        [claim_reshaped, evidence_reshaped]
    )

    claim_projected = layers.TimeDistributed(layers.Dense(ff_dim))(claim_attended)
    evidence_projected = layers.TimeDistributed(layers.Dense(ff_dim))(evidence_attended)

    # Flatten and concatenate
    claim_flat = layers.Reshape((ff_dim,))(claim_projected)
    evidence_flat = layers.Reshape((ff_dim,))(evidence_projected)

    merged = layers.Concatenate()([claim_flat, evidence_flat])

    # Classification layers
    dense1 = layers.Dense(ff_dim, activation="relu")(merged)
    bn1 = layers.BatchNormalization()(dense1)
    dropout1 = layers.Dropout(dropout[1])(bn1)
    dense2 = layers.Dense(ff_dim // 2, activation="relu")(dropout1)
    bn2 = layers.BatchNormalization()(dense2)
    dropout2 = layers.Dropout(dropout[2])(bn2)

    # Output layer
    output = layers.Dense(1, activation="sigmoid", name="output")(dropout2)

    # Compile model    
    model = Model(inputs=[claim_input, evidence_input], outputs=output)
    optimizer_instance = tf.keras.optimizers.get(optimizer)
    optimizer_instance.learning_rate = learning_rate
    model.compile(optimizer=optimizer_instance, loss=loss, metrics=metrics)

    return model

### Hyperparameter Training

In [None]:
bestModel = None
bestModelScore = 0
bestThreshold = 0

# Define the objective function
def objective(trial):
    # Get a range of values
    epochs = trial.suggest_int('epochs', 5, 40)
    batch_size = trial.suggest_categorical('batch_size', [16, 32])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    optimizer = trial.suggest_categorical('optimizer', ['adam'])

    # Model Specific Hyperparameters
    dropout0 = trial.suggest_float('dropout0', 0.1, 0.5)
    dropout1 = trial.suggest_float('dropout1', 0.1, 0.5)
    dropout2 = trial.suggest_float('dropout2', 0.1, 0.5)
    units = trial.suggest_categorical('units', [[512, 64], [512, 128], [512, 256], [1024, 64], [1024, 128], [1024, 256]])
    attention_dropout = trial.suggest_float('attention_dropout', 0.1, 0.5)
    attention_dim = trial.suggest_categorical('attention_dim', [32, 64, 128, 256])
    num_heads = trial.suggest_categorical('num_heads', [2, 4, 8, 16])
    ff_dim = trial.suggest_categorical('ff_dim', [64, 128, 256])

    dropout = [dropout0, dropout1, dropout2]

    print(f"""Testing Parameters:
    Epochs: {epochs}
    Dropout: {dropout}
    Units = {units}
    Attention Dropout: {attention_dropout}
    Batch Size: {batch_size}
    Learning Rate: {learning_rate}
    Optimizer: {optimizer}
    Attention Dimension: {attention_dim}
    Number of Heads: {num_heads}
    Feed Forward Dimension: {ff_dim}
""")

    metrics = ['accuracy', 'precision' , 'recall']
    training_set = (claim_embeddings, evidence_embeddings)
    validation_set = ((dev_claim_embeddings, dev_evidence_embeddings), dev_data['label'])

    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy', min_delta=0, patience=10, verbose=0, mode='auto',
        baseline=None, restore_best_weights=True
    )

    # Create the model
    model = get_model(loss="binary_crossentropy",
                      optimizer='adam',
                      learning_rate=learning_rate,
                      metrics=metrics,
                      dropout=dropout,
                      units=units,
                      num_heads=num_heads,
                      ff_dim=ff_dim,
                      attention_dropout=attention_dropout,
                      attention_dim=attention_dim)

    model.fit(training_set, y=train_data['label'], epochs=epochs, validation_data=validation_set, callbacks=[early_stopping], sample_weight=train_data['weights'], batch_size=batch_size)

    # Get the predicted and true develoment labels
    print(" == Tuning Threshold ==")
    y_pred_prob = model.predict([dev_claim_embeddings, dev_evidence_embeddings])
    y_true = dev_data['label']

    # Threshold Tuning
    thresholds = np.arange(0.2, 0.7, 0.01)
    scores = [f1_score(y_true, (y_pred_prob > t).astype(int), average='weighted') for t in thresholds]
    best_threshold = thresholds[np.argmax(scores)]
    y_pred = (y_pred_prob > best_threshold).astype(int)

    # Calculate macro F1 Score
    report = classification_report(y_true=y_true, y_pred=y_pred, digits=10)
    print(report)
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    weighted_f1 = f1_score(y_true, y_pred, average='weighted')

    # Update the best model
    global bestModel
    global bestModelScore
    if macro_f1 > bestModelScore:
        bestModel = model
        bestModelScore = macro_f1
        bestThreshold = best_threshold

    print(f"Best Threshold: {best_threshold}")
    print(f"Model Macro F1: {macro_f1}")
    print(f"Model Weighted F1: {weighted_f1}")

    return weighted_f1

# Create the Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Print the best hyperparameters
if study.best_params:
    print("Best hyperparameters found:", study.best_params)
else:
    print("No valid hyperparameters found!")
model = bestModel

[I 2025-03-31 21:32:47,298] A new study created in memory with name: no-name-dfd559c3-32f8-48be-9069-ecee7398cbd1


Testing Parameters:
    Epochs: 13
    Dropout: [0.40766880520052284, 0.29254077431087755, 0.14040631580463483]
    Units = [512, 256]
    Attention Dropout: 0.15858138269517785
    Batch Size: 16
    Learning Rate: 0.0052136208076342
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 8
    Feed Forward Dimension: 256

Epoch 1/13




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.7168 - loss: 0.2369 - precision: 0.4754 - recall: 0.0948 - val_accuracy: 0.7479 - val_loss: 0.6160 - val_precision: 0.8724 - val_recall: 0.1043
Epoch 2/13
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7607 - loss: 0.1761 - precision: 0.7480 - recall: 0.1622 - val_accuracy: 0.7442 - val_loss: 0.5581 - val_precision: 0.9559 - val_recall: 0.0793
Epoch 3/13
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7762 - loss: 0.1633 - precision: 0.7913 - recall: 0.2374 - val_accuracy: 0.7776 - val_loss: 0.4470 - val_precision: 0.8797 - val_recall: 0.2274
Epoch 4/13
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7747 - loss: 0.1609 - precision: 0.7955 - recall: 0.2424 - val_accuracy: 0.8014 - val_loss: 0.4297 - val_precision: 0.8513 - val_recall: 0.3421
Epoch 5/13
[1m1345/1345[

[I 2025-03-31 21:33:47,804] Trial 0 finished with value: 0.8284567216981867 and parameters: {'epochs': 13, 'batch_size': 16, 'learning_rate': 0.0052136208076342, 'optimizer': 'adam', 'dropout0': 0.40766880520052284, 'dropout1': 0.29254077431087755, 'dropout2': 0.14040631580463483, 'units': [512, 256], 'attention_dropout': 0.15858138269517785, 'attention_dim': 256, 'num_heads': 8, 'ff_dim': 256}. Best is trial 0 with value: 0.8284567216981867.


              precision    recall  f1-score   support

           0  0.8721088435 0.8973401773 0.8845446182      4286
           1  0.7097625330 0.6560975610 0.6818757921      1640

    accuracy                      0.8305771178      5926
   macro avg  0.7909356883 0.7767188691 0.7832102052      5926
weighted avg  0.8271800637 0.8305771178 0.8284567217      5926

Best Threshold: 0.3300000000000001
Model Macro F1: 0.7832102051786116
Model Weighted F1: 0.8284567216981867
Testing Parameters:
    Epochs: 23
    Dropout: [0.19983373394422302, 0.1860919205799749, 0.2443094466252548]
    Units = [1024, 128]
    Attention Dropout: 0.4686890812544823
    Batch Size: 32
    Learning Rate: 0.0036427845683906263
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 16
    Feed Forward Dimension: 128

Epoch 1/23




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7026 - loss: 0.2610 - precision: 0.4368 - recall: 0.1704 - val_accuracy: 0.7744 - val_loss: 0.4854 - val_precision: 0.8565 - val_recall: 0.2220
Epoch 2/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7762 - loss: 0.1656 - precision: 0.7867 - recall: 0.2397 - val_accuracy: 0.7884 - val_loss: 0.4507 - val_precision: 0.9004 - val_recall: 0.2646
Epoch 3/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7903 - loss: 0.1530 - precision: 0.8173 - recall: 0.2856 - val_accuracy: 0.8228 - val_loss: 0.3770 - val_precision: 0.8495 - val_recall: 0.4372
Epoch 4/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8130 - loss: 0.1417 - precision: 0.8568 - recall: 0.3712 - val_accuracy: 0.8338 - val_loss: 0.3972 - val_precision: 0.7490 - val_recall: 0.6006
Epoch 5/23
[1m673/673[0m [32m━━━

[I 2025-03-31 21:34:22,914] Trial 1 finished with value: 0.8289721748570859 and parameters: {'epochs': 23, 'batch_size': 32, 'learning_rate': 0.0036427845683906263, 'optimizer': 'adam', 'dropout0': 0.19983373394422302, 'dropout1': 0.1860919205799749, 'dropout2': 0.2443094466252548, 'units': [1024, 128], 'attention_dropout': 0.4686890812544823, 'attention_dim': 64, 'num_heads': 16, 'ff_dim': 128}. Best is trial 1 with value: 0.8289721748570859.


              precision    recall  f1-score   support

           0  0.8759743237 0.8915072329 0.8836725254      4286
           1  0.7026854220 0.6701219512 0.6860174782      1640

    accuracy                      0.8302396220      5926
   macro avg  0.7893298728 0.7808145920 0.7848450018      5926
weighted avg  0.8280172196 0.8302396220 0.8289721749      5926

Best Threshold: 0.4100000000000002
Model Macro F1: 0.7848450017958588
Model Weighted F1: 0.8289721748570859
Testing Parameters:
    Epochs: 16
    Dropout: [0.29198301403088256, 0.30315689176429617, 0.2727704055642929]
    Units = [1024, 64]
    Attention Dropout: 0.12476757942943446
    Batch Size: 32
    Learning Rate: 0.004806991795240468
    Optimizer: adam
    Attention Dimension: 128
    Number of Heads: 2
    Feed Forward Dimension: 128

Epoch 1/16




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7155 - loss: 0.2493 - precision: 0.4669 - recall: 0.1573 - val_accuracy: 0.8037 - val_loss: 0.4137 - val_precision: 0.6464 - val_recall: 0.6421
Epoch 2/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7666 - loss: 0.1698 - precision: 0.7669 - recall: 0.1985 - val_accuracy: 0.7481 - val_loss: 0.4852 - val_precision: 0.9868 - val_recall: 0.0909
Epoch 3/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7833 - loss: 0.1594 - precision: 0.8117 - recall: 0.2736 - val_accuracy: 0.7936 - val_loss: 0.4378 - val_precision: 0.8770 - val_recall: 0.2957
Epoch 4/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7919 - loss: 0.1533 - precision: 0.8428 - recall: 0.3000 - val_accuracy: 0.7872 - val_loss: 0.4585 - val_precision: 0.9041 - val_recall: 0.2585
Epoch 5/16
[1m673/673[0m [32m━━━

[I 2025-03-31 21:35:01,392] Trial 2 finished with value: 0.8309654705441111 and parameters: {'epochs': 16, 'batch_size': 32, 'learning_rate': 0.004806991795240468, 'optimizer': 'adam', 'dropout0': 0.29198301403088256, 'dropout1': 0.30315689176429617, 'dropout2': 0.2727704055642929, 'units': [1024, 64], 'attention_dropout': 0.12476757942943446, 'attention_dim': 128, 'num_heads': 2, 'ff_dim': 128}. Best is trial 2 with value: 0.8309654705441111.


              precision    recall  f1-score   support

           0  0.8735554045 0.8994400373 0.8863087711      4286
           1  0.7151354924 0.6597560976 0.6863304789      1640

    accuracy                      0.8331083361      5926
   macro avg  0.7943454484 0.7795980674 0.7863196250      5926
weighted avg  0.8297132418 0.8331083361 0.8309654705      5926

Best Threshold: 0.5000000000000002
Model Macro F1: 0.7863196250160466
Model Weighted F1: 0.8309654705441111
Testing Parameters:
    Epochs: 23
    Dropout: [0.11597189185804023, 0.1649105768518277, 0.24244195005155006]
    Units = [512, 64]
    Attention Dropout: 0.10362259259272402
    Batch Size: 32
    Learning Rate: 0.007402024937971863
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/23




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7233 - loss: 0.2337 - precision: 0.5379 - recall: 0.1488 - val_accuracy: 0.7936 - val_loss: 0.4302 - val_precision: 0.8504 - val_recall: 0.3085
Epoch 2/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7794 - loss: 0.1628 - precision: 0.8238 - recall: 0.2489 - val_accuracy: 0.8211 - val_loss: 0.3738 - val_precision: 0.7816 - val_recall: 0.4909
Epoch 3/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7951 - loss: 0.1503 - precision: 0.8294 - recall: 0.3005 - val_accuracy: 0.8102 - val_loss: 0.4036 - val_precision: 0.8323 - val_recall: 0.3933
Epoch 4/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8155 - loss: 0.1405 - precision: 0.8629 - recall: 0.3903 - val_accuracy: 0.7935 - val_loss: 0.5947 - val_precision: 0.8881 - val_recall: 0.2902
Epoch 5/23
[1m673/673[0m [32m━━━

[I 2025-03-31 21:35:36,883] Trial 3 finished with value: 0.8428796728733234 and parameters: {'epochs': 23, 'batch_size': 32, 'learning_rate': 0.007402024937971863, 'optimizer': 'adam', 'dropout0': 0.11597189185804023, 'dropout1': 0.1649105768518277, 'dropout2': 0.24244195005155006, 'units': [512, 64], 'attention_dropout': 0.10362259259272402, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8789923527 0.9118058796 0.8950984883      4286
           1  0.7445945946 0.6719512195 0.7064102564      1640

    accuracy                      0.8454269322      5926
   macro avg  0.8117934736 0.7918785496 0.8007543724      5926
weighted avg  0.8417982381 0.8454269322 0.8428796729      5926

Best Threshold: 0.35000000000000014
Model Macro F1: 0.8007543723645418
Model Weighted F1: 0.8428796728733234
Testing Parameters:
    Epochs: 28
    Dropout: [0.3873972227669995, 0.24044223183324678, 0.133653692856773]
    Units = [512, 64]
    Attention Dropout: 0.4574863646548466
    Batch Size: 16
    Learning Rate: 0.004166849791634802
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 16
    Feed Forward Dimension: 128

Epoch 1/28




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7216 - loss: 0.2343 - precision: 0.4978 - recall: 0.1250 - val_accuracy: 0.7514 - val_loss: 0.5320 - val_precision: 0.8436 - val_recall: 0.1250
Epoch 2/28
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7581 - loss: 0.1728 - precision: 0.7529 - recall: 0.1724 - val_accuracy: 0.7818 - val_loss: 0.4339 - val_precision: 0.8813 - val_recall: 0.2445
Epoch 3/28
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7701 - loss: 0.1689 - precision: 0.8001 - recall: 0.2032 - val_accuracy: 0.7632 - val_loss: 0.4752 - val_precision: 0.9611 - val_recall: 0.1506
Epoch 4/28
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7884 - loss: 0.1551 - precision: 0.8118 - recall: 0.2829 - val_accuracy: 0.8107 - val_loss: 0.4307 - val_precision: 0.8587 - val_recall: 0.3780
Epoch 5/28
[1m1345/1345[

[I 2025-03-31 21:36:50,730] Trial 4 finished with value: 0.8295869902031411 and parameters: {'epochs': 28, 'batch_size': 16, 'learning_rate': 0.004166849791634802, 'optimizer': 'adam', 'dropout0': 0.3873972227669995, 'dropout1': 0.24044223183324678, 'dropout2': 0.133653692856773, 'units': [512, 64], 'attention_dropout': 0.4574863646548466, 'attention_dim': 32, 'num_heads': 16, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8650740987 0.9125058329 0.8881571477      4286
           1  0.7330960854 0.6280487805 0.6765188834      1640

    accuracy                      0.8337833277      5926
   macro avg  0.7990850920 0.7702773067 0.7823380156      5926
weighted avg  0.8285496400 0.8337833277 0.8295869902      5926

Best Threshold: 0.46000000000000024
Model Macro F1: 0.7823380155694184
Model Weighted F1: 0.8295869902031411
Testing Parameters:
    Epochs: 14
    Dropout: [0.19371424053947248, 0.37750285133654526, 0.1478478056873439]
    Units = [1024, 256]
    Attention Dropout: 0.4708629682742884
    Batch Size: 16
    Learning Rate: 0.0034303993541438403
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 2
    Feed Forward Dimension: 128

Epoch 1/14




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7064 - loss: 0.2442 - precision: 0.3335 - recall: 0.0599 - val_accuracy: 0.7233 - val_loss: 0.8672 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7291 - loss: 0.1993 - precision: 0.6327 - recall: 0.0054 - val_accuracy: 0.7233 - val_loss: 0.5111 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7328 - loss: 0.1896 - precision: 0.5591 - recall: 0.0126 - val_accuracy: 0.7427 - val_loss: 0.4731 - val_precision: 0.8249 - val_recall: 0.0890
Epoch 4/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7483 - loss: 0.1783 - precision: 0.7208 - recall: 0.0910 - val_accuracy: 0.7449 - val_loss: 0.4681 - val_precision: 0.8765 - val_recall: 0.0909
Epoch 5/14

[I 2025-03-31 21:37:51,603] Trial 5 finished with value: 0.8304048331913476 and parameters: {'epochs': 14, 'batch_size': 16, 'learning_rate': 0.0034303993541438403, 'optimizer': 'adam', 'dropout0': 0.19371424053947248, 'dropout1': 0.37750285133654526, 'dropout2': 0.1478478056873439, 'units': [1024, 256], 'attention_dropout': 0.4708629682742884, 'attention_dim': 256, 'num_heads': 2, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8723163842 0.9006066262 0.8862357938      4286
           1  0.7161892072 0.6554878049 0.6844953836      1640

    accuracy                      0.8327708404      5926
   macro avg  0.7942527957 0.7780472156 0.7853655887      5926
weighted avg  0.8291087280 0.8327708404 0.8304048332      5926

Best Threshold: 0.46000000000000024
Model Macro F1: 0.7853655887298427
Model Weighted F1: 0.8304048331913476
Testing Parameters:
    Epochs: 29
    Dropout: [0.24133055140846216, 0.1889151410746243, 0.10899367161912665]
    Units = [1024, 128]
    Attention Dropout: 0.4737694418489541
    Batch Size: 16
    Learning Rate: 0.008190353273711802
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 8
    Feed Forward Dimension: 64

Epoch 1/29




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7149 - loss: 0.2283 - precision: 0.4373 - recall: 0.0607 - val_accuracy: 0.7720 - val_loss: 0.4560 - val_precision: 0.7623 - val_recall: 0.2561
Epoch 2/29
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7598 - loss: 0.1741 - precision: 0.7760 - recall: 0.1667 - val_accuracy: 0.7523 - val_loss: 0.4806 - val_precision: 0.9343 - val_recall: 0.1128
Epoch 3/29
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7720 - loss: 0.1661 - precision: 0.7885 - recall: 0.2160 - val_accuracy: 0.7778 - val_loss: 0.4413 - val_precision: 0.9048 - val_recall: 0.2201
Epoch 4/29
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7892 - loss: 0.1569 - precision: 0.8115 - recall: 0.2960 - val_accuracy: 0.7636 - val_loss: 0.4260 - val_precision: 0.9442 - val_recall: 0.1549
Epoch 5/29
[1m1345/1345[

[I 2025-03-31 21:39:19,051] Trial 6 finished with value: 0.8303091789449258 and parameters: {'epochs': 29, 'batch_size': 16, 'learning_rate': 0.008190353273711802, 'optimizer': 'adam', 'dropout0': 0.24133055140846216, 'dropout1': 0.1889151410746243, 'dropout2': 0.10899367161912665, 'units': [1024, 128], 'attention_dropout': 0.4737694418489541, 'attention_dim': 32, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8805935544 0.8861409239 0.8833585301      4286
           1  0.6974581525 0.6859756098 0.6916692284      1640

    accuracy                      0.8307458657      5926
   macro avg  0.7890258534 0.7860582668 0.7875138792      5926
weighted avg  0.8299114654 0.8307458657 0.8303091789      5926

Best Threshold: 0.23000000000000004
Model Macro F1: 0.7875138792330925
Model Weighted F1: 0.8303091789449258
Testing Parameters:
    Epochs: 18
    Dropout: [0.3295616284040561, 0.20914237276120345, 0.26938390402370055]
    Units = [512, 64]
    Attention Dropout: 0.4271290752005207
    Batch Size: 16
    Learning Rate: 0.006215474797744274
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/18




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.7199 - loss: 0.2383 - precision: 0.4512 - recall: 0.0792 - val_accuracy: 0.7258 - val_loss: 0.5784 - val_precision: 0.8947 - val_recall: 0.0104
Epoch 2/18
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7477 - loss: 0.1823 - precision: 0.7358 - recall: 0.1213 - val_accuracy: 0.7562 - val_loss: 0.4775 - val_precision: 0.9412 - val_recall: 0.1268
Epoch 3/18
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7711 - loss: 0.1674 - precision: 0.7929 - recall: 0.2084 - val_accuracy: 0.7497 - val_loss: 0.4386 - val_precision: 0.9591 - val_recall: 0.1000
Epoch 4/18
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7713 - loss: 0.1635 - precision: 0.7820 - recall: 0.2206 - val_accuracy: 0.7503 - val_loss: 0.4780 - val_precision: 0.9706 - val_recall: 0.1006
Epoch 5/18
[1m1345/1345[

[I 2025-03-31 21:40:30,591] Trial 7 finished with value: 0.831524724705895 and parameters: {'epochs': 18, 'batch_size': 16, 'learning_rate': 0.006215474797744274, 'optimizer': 'adam', 'dropout0': 0.3295616284040561, 'dropout1': 0.20914237276120345, 'dropout2': 0.26938390402370055, 'units': [512, 64], 'attention_dropout': 0.4271290752005207, 'attention_dim': 32, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8646864686 0.9169388707 0.8900464274      4286
           1  0.7422157857 0.6250000000 0.6785832506      1640

    accuracy                      0.8361457982      5926
   macro avg  0.8034511272 0.7709694354 0.7843148390      5926
weighted avg  0.8307931308 0.8361457982 0.8315247247      5926

Best Threshold: 0.4200000000000002
Model Macro F1: 0.7843148389687242
Model Weighted F1: 0.831524724705895
Testing Parameters:
    Epochs: 12
    Dropout: [0.271121261997571, 0.3572913779948057, 0.47558429881209563]
    Units = [512, 256]
    Attention Dropout: 0.29973291547703873
    Batch Size: 32
    Learning Rate: 0.004626936909345708
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 16
    Feed Forward Dimension: 256

Epoch 1/12




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7088 - loss: 0.2696 - precision: 0.4440 - recall: 0.1516 - val_accuracy: 0.7697 - val_loss: 0.4540 - val_precision: 0.7542 - val_recall: 0.2488
Epoch 2/12
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7527 - loss: 0.1862 - precision: 0.7406 - recall: 0.1330 - val_accuracy: 0.7757 - val_loss: 0.4328 - val_precision: 0.8575 - val_recall: 0.2274
Epoch 3/12
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7820 - loss: 0.1609 - precision: 0.8080 - recall: 0.2489 - val_accuracy: 0.8088 - val_loss: 0.4376 - val_precision: 0.8133 - val_recall: 0.4012
Epoch 4/12
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7997 - loss: 0.1486 - precision: 0.8330 - recall: 0.3250 - val_accuracy: 0.7715 - val_loss: 0.5282 - val_precision: 0.9333 - val_recall: 0.1878
Epoch 5/12
[1m673/673[0m [32m━━━

[I 2025-03-31 21:41:02,308] Trial 8 finished with value: 0.829247995622057 and parameters: {'epochs': 12, 'batch_size': 32, 'learning_rate': 0.004626936909345708, 'optimizer': 'adam', 'dropout0': 0.271121261997571, 'dropout1': 0.3572913779948057, 'dropout2': 0.47558429881209563, 'units': [512, 256], 'attention_dropout': 0.29973291547703873, 'attention_dim': 256, 'num_heads': 16, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8773584906 0.8896406906 0.8834569045      4286
           1  0.7006329114 0.6750000000 0.6875776398      1640

    accuracy                      0.8302396220      5926
   macro avg  0.7889957010 0.7823203453 0.7855172721      5926
weighted avg  0.8284502979 0.8302396220 0.8292479956      5926

Best Threshold: 0.25000000000000006
Model Macro F1: 0.7855172721463974
Model Weighted F1: 0.829247995622057
Testing Parameters:
    Epochs: 21
    Dropout: [0.25666887402733163, 0.35643418905231405, 0.19901261893476918]
    Units = [1024, 128]
    Attention Dropout: 0.2735442445482513
    Batch Size: 32
    Learning Rate: 0.0008254500500524324
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 8
    Feed Forward Dimension: 256

Epoch 1/21




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7039 - loss: 0.2948 - precision: 0.4728 - recall: 0.3701 - val_accuracy: 0.7720 - val_loss: 0.5293 - val_precision: 0.9070 - val_recall: 0.1963
Epoch 2/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7816 - loss: 0.1741 - precision: 0.7577 - recall: 0.2834 - val_accuracy: 0.7973 - val_loss: 0.4757 - val_precision: 0.8113 - val_recall: 0.3488
Epoch 3/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8024 - loss: 0.1558 - precision: 0.8174 - recall: 0.3484 - val_accuracy: 0.8097 - val_loss: 0.4095 - val_precision: 0.8507 - val_recall: 0.3787
Epoch 4/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8177 - loss: 0.1417 - precision: 0.8495 - recall: 0.3902 - val_accuracy: 0.8176 - val_loss: 0.4020 - val_precision: 0.8284 - val_recall: 0.4299
Epoch 5/21
[1m673/673[0m [32m━━━

[I 2025-03-31 21:41:49,207] Trial 9 finished with value: 0.8320611429153764 and parameters: {'epochs': 21, 'batch_size': 32, 'learning_rate': 0.0008254500500524324, 'optimizer': 'adam', 'dropout0': 0.25666887402733163, 'dropout1': 0.35643418905231405, 'dropout2': 0.19901261893476918, 'units': [1024, 128], 'attention_dropout': 0.2735442445482513, 'attention_dim': 32, 'num_heads': 8, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8705487122 0.9069062063 0.8883556165      4286
           1  0.7268993840 0.6475609756 0.6849403418      1640

    accuracy                      0.8351333108      5926
   macro avg  0.7987240481 0.7772335909 0.7866479792      5926
weighted avg  0.8307942576 0.8351333108 0.8320611429      5926

Best Threshold: 0.5500000000000003
Model Macro F1: 0.7866479791630945
Model Weighted F1: 0.8320611429153764
Testing Parameters:
    Epochs: 38
    Dropout: [0.1099507777095645, 0.1052224462462305, 0.40196171089164345]
    Units = [512, 128]
    Attention Dropout: 0.21324555418870955
    Batch Size: 32
    Learning Rate: 0.009636220090399553
    Optimizer: adam
    Attention Dimension: 128
    Number of Heads: 4
    Feed Forward Dimension: 64

Epoch 1/38




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.7145 - loss: 0.2439 - precision: 0.4706 - recall: 0.1125 - val_accuracy: 0.7335 - val_loss: 0.6022 - val_precision: 0.8588 - val_recall: 0.0445
Epoch 2/38
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7580 - loss: 0.1756 - precision: 0.7836 - recall: 0.1418 - val_accuracy: 0.7654 - val_loss: 0.4508 - val_precision: 0.8634 - val_recall: 0.1811
Epoch 3/38
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7790 - loss: 0.1601 - precision: 0.8294 - recall: 0.2548 - val_accuracy: 0.7877 - val_loss: 0.4284 - val_precision: 0.8979 - val_recall: 0.2628
Epoch 4/38
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7911 - loss: 0.1511 - precision: 0.8289 - recall: 0.3273 - val_accuracy: 0.8189 - val_loss: 0.3743 - val_precision: 0.8324 - val_recall: 0.4329
Epoch 5/38
[1m673/673[0m [32m━━

[I 2025-03-31 21:42:35,169] Trial 10 finished with value: 0.8274600033981963 and parameters: {'epochs': 38, 'batch_size': 32, 'learning_rate': 0.009636220090399553, 'optimizer': 'adam', 'dropout0': 0.1099507777095645, 'dropout1': 0.1052224462462305, 'dropout2': 0.40196171089164345, 'units': [512, 128], 'attention_dropout': 0.21324555418870955, 'attention_dim': 128, 'num_heads': 4, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8819672131 0.8786747550 0.8803179056      4286
           1  0.6859903382 0.6926829268 0.6893203883      1640

    accuracy                      0.8272021600      5926
   macro avg  0.7839787756 0.7856788409 0.7848191470      5926
weighted avg  0.8277312909 0.8272021600 0.8274600034      5926

Best Threshold: 0.3200000000000001
Model Macro F1: 0.784819146956431
Model Weighted F1: 0.8274600033981963
Testing Parameters:
    Epochs: 6
    Dropout: [0.49904428210818275, 0.41885357349994756, 0.2036178743259905]
    Units = [512, 64]
    Attention Dropout: 0.30452750067518086
    Batch Size: 32
    Learning Rate: 0.00025128022963742116
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 256

Epoch 1/6




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.6077 - loss: 0.4077 - precision: 0.3328 - recall: 0.4174 - val_accuracy: 0.7832 - val_loss: 0.4569 - val_precision: 0.6336 - val_recall: 0.5134
Epoch 2/6
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7533 - loss: 0.2166 - precision: 0.6080 - recall: 0.2342 - val_accuracy: 0.7676 - val_loss: 0.4762 - val_precision: 0.8363 - val_recall: 0.1994
Epoch 3/6
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7720 - loss: 0.1847 - precision: 0.7364 - recall: 0.2570 - val_accuracy: 0.8056 - val_loss: 0.4082 - val_precision: 0.8120 - val_recall: 0.3872
Epoch 4/6
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7807 - loss: 0.1723 - precision: 0.7540 - recall: 0.2772 - val_accuracy: 0.7882 - val_loss: 0.4379 - val_precision: 0.9053 - val_recall: 0.2622
Epoch 5/6
[1m673/673[0m [32m━━━━━━

[I 2025-03-31 21:42:54,736] Trial 11 finished with value: 0.8321778939302767 and parameters: {'epochs': 6, 'batch_size': 32, 'learning_rate': 0.00025128022963742116, 'optimizer': 'adam', 'dropout0': 0.49904428210818275, 'dropout1': 0.41885357349994756, 'dropout2': 0.2036178743259905, 'units': [512, 64], 'attention_dropout': 0.30452750067518086, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8852459016 0.8819412039 0.8835904628      4286
           1  0.6944444444 0.7012195122 0.6978155340      1640

    accuracy                      0.8319271009      5926
   macro avg  0.7898451730 0.7915803581 0.7907029984      5926
weighted avg  0.8324422584 0.8319271009 0.8321778939      5926

Best Threshold: 0.23000000000000004
Model Macro F1: 0.790702998406841
Model Weighted F1: 0.8321778939302767
Testing Parameters:
    Epochs: 5
    Dropout: [0.4970533068419498, 0.48026849099469293, 0.3450945607640774]
    Units = [512, 64]
    Attention Dropout: 0.35741249836194466
    Batch Size: 32
    Learning Rate: 0.00013198888899511015
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/5




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.5462 - loss: 0.4957 - precision: 0.2869 - recall: 0.4316 - val_accuracy: 0.7307 - val_loss: 0.5263 - val_precision: 0.8143 - val_recall: 0.0348
Epoch 2/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6939 - loss: 0.2926 - precision: 0.4158 - recall: 0.2757 - val_accuracy: 0.7481 - val_loss: 0.5136 - val_precision: 0.8930 - val_recall: 0.1018
Epoch 3/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7296 - loss: 0.2398 - precision: 0.5281 - recall: 0.2176 - val_accuracy: 0.7432 - val_loss: 0.5473 - val_precision: 0.8986 - val_recall: 0.0811
Epoch 4/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7467 - loss: 0.2153 - precision: 0.6181 - recall: 0.2050 - val_accuracy: 0.7477 - val_loss: 0.5314 - val_precision: 0.9503 - val_recall: 0.0933
Epoch 5/5
[1m673/673[0m [32m━━━━━━━

[I 2025-03-31 21:43:12,425] Trial 12 finished with value: 0.8146585533804866 and parameters: {'epochs': 5, 'batch_size': 32, 'learning_rate': 0.00013198888899511015, 'optimizer': 'adam', 'dropout0': 0.4970533068419498, 'dropout1': 0.48026849099469293, 'dropout2': 0.3450945607640774, 'units': [512, 64], 'attention_dropout': 0.35741249836194466, 'attention_dim': 64, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8510452962 0.9118058796 0.8803784636      4286
           1  0.7166416792 0.5829268293 0.6429051782      1640

    accuracy                      0.8207897401      5926
   macro avg  0.7838434877 0.7473663544 0.7616418209      5926
weighted avg  0.8138495601 0.8207897401 0.8146585534      5926

Best Threshold: 0.2
Model Macro F1: 0.7616418209145477
Model Weighted F1: 0.8146585533804866
Testing Parameters:
    Epochs: 5
    Dropout: [0.10234795212903414, 0.49269999026682576, 0.20561636320317395]
    Units = [512, 64]
    Attention Dropout: 0.36214702422671935
    Batch Size: 32
    Learning Rate: 0.00725105990094338
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/5




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7163 - loss: 0.2517 - precision: 0.4403 - recall: 0.1142 - val_accuracy: 0.7644 - val_loss: 0.4479 - val_precision: 0.7785 - val_recall: 0.2079
Epoch 2/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7635 - loss: 0.1719 - precision: 0.7799 - recall: 0.1794 - val_accuracy: 0.7830 - val_loss: 0.4333 - val_precision: 0.8916 - val_recall: 0.2457
Epoch 3/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7795 - loss: 0.1612 - precision: 0.8083 - recall: 0.2403 - val_accuracy: 0.7681 - val_loss: 0.4925 - val_precision: 0.9318 - val_recall: 0.1750
Epoch 4/5
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7926 - loss: 0.1512 - precision: 0.8236 - recall: 0.3067 - val_accuracy: 0.8102 - val_loss: 0.4297 - val_precision: 0.8694 - val_recall: 0.3695
Epoch 5/5
[1m673/673[0m [32m━━━━━━━

[I 2025-03-31 21:43:28,811] Trial 13 finished with value: 0.8300257428184472 and parameters: {'epochs': 5, 'batch_size': 32, 'learning_rate': 0.00725105990094338, 'optimizer': 'adam', 'dropout0': 0.10234795212903414, 'dropout1': 0.49269999026682576, 'dropout2': 0.20561636320317395, 'units': [512, 64], 'attention_dropout': 0.36214702422671935, 'attention_dim': 64, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8719222950 0.9006066262 0.8860323654      4286
           1  0.7158105404 0.6542682927 0.6836572157      1640

    accuracy                      0.8324333446      5926
   macro avg  0.7938664177 0.7774374595 0.7848447906      5926
weighted avg  0.8287189070 0.8324333446 0.8300257428      5926

Best Threshold: 0.24000000000000005
Model Macro F1: 0.7848447905523733
Model Weighted F1: 0.8300257428184472
Testing Parameters:
    Epochs: 28
    Dropout: [0.48105106530877234, 0.4291583460684024, 0.34759205257376224]
    Units = [512, 64]
    Attention Dropout: 0.2270667098634962
    Batch Size: 32
    Learning Rate: 0.0021732762953904474
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 64

Epoch 1/28




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.6866 - loss: 0.3085 - precision: 0.3960 - recall: 0.2352 - val_accuracy: 0.7594 - val_loss: 0.5065 - val_precision: 0.7816 - val_recall: 0.1811
Epoch 2/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7601 - loss: 0.1792 - precision: 0.7312 - recall: 0.1903 - val_accuracy: 0.7491 - val_loss: 0.4715 - val_precision: 0.9227 - val_recall: 0.1018
Epoch 3/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7742 - loss: 0.1644 - precision: 0.8176 - recall: 0.2226 - val_accuracy: 0.7550 - val_loss: 0.5351 - val_precision: 0.9653 - val_recall: 0.1189
Epoch 4/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7836 - loss: 0.1547 - precision: 0.8212 - recall: 0.2631 - val_accuracy: 0.7724 - val_loss: 0.4863 - val_precision: 0.9505 - val_recall: 0.1872
Epoch 5/28
[1m673/673[0m [32m━━━

[I 2025-03-31 21:44:27,435] Trial 14 finished with value: 0.8367406690990863 and parameters: {'epochs': 28, 'batch_size': 32, 'learning_rate': 0.0021732762953904474, 'optimizer': 'adam', 'dropout0': 0.48105106530877234, 'dropout1': 0.4291583460684024, 'dropout2': 0.34759205257376224, 'units': [512, 64], 'attention_dropout': 0.2270667098634962, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8751125113 0.9073728418 0.8909507446      4286
           1  0.7321187584 0.6615853659 0.6950672646      1640

    accuracy                      0.8393520081      5926
   macro avg  0.8036156348 0.7844791038 0.7930090046      5926
weighted avg  0.8355394848 0.8393520081 0.8367406691      5926

Best Threshold: 0.22000000000000003
Model Macro F1: 0.7930090045664915
Model Weighted F1: 0.8367406690990863
Testing Parameters:
    Epochs: 34
    Dropout: [0.4179053756071468, 0.10286369224515857, 0.3345422465237085]
    Units = [1024, 256]
    Attention Dropout: 0.19992183542949998
    Batch Size: 32
    Learning Rate: 0.0024137655508840936
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 8
    Feed Forward Dimension: 64

Epoch 1/34




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7050 - loss: 0.2804 - precision: 0.4614 - recall: 0.2770 - val_accuracy: 0.7987 - val_loss: 0.4103 - val_precision: 0.7945 - val_recall: 0.3677
Epoch 2/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7435 - loss: 0.1909 - precision: 0.7054 - recall: 0.0963 - val_accuracy: 0.7260 - val_loss: 0.7260 - val_precision: 1.0000 - val_recall: 0.0098
Epoch 3/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7637 - loss: 0.1714 - precision: 0.7975 - recall: 0.1734 - val_accuracy: 0.7582 - val_loss: 0.4823 - val_precision: 0.8000 - val_recall: 0.1683
Epoch 4/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7658 - loss: 0.1688 - precision: 0.8155 - recall: 0.1834 - val_accuracy: 0.7568 - val_loss: 0.5178 - val_precision: 0.9462 - val_recall: 0.1287
Epoch 5/34
[1m673/673[0m [32m━━━

[I 2025-03-31 21:45:17,603] Trial 15 finished with value: 0.8349893507687086 and parameters: {'epochs': 34, 'batch_size': 32, 'learning_rate': 0.0024137655508840936, 'optimizer': 'adam', 'dropout0': 0.4179053756071468, 'dropout1': 0.10286369224515857, 'dropout2': 0.3345422465237085, 'units': [1024, 256], 'attention_dropout': 0.19992183542949998, 'attention_dim': 256, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8738191633 0.9064395707 0.8898305085      4286
           1  0.7290540541 0.6579268293 0.6916666667      1640

    accuracy                      0.8376645292      5926
   macro avg  0.8014366087 0.7821832000 0.7907485876      5926
weighted avg  0.8337559201 0.8376645292 0.8349893508      5926

Best Threshold: 0.2700000000000001
Model Macro F1: 0.7907485875706215
Model Weighted F1: 0.8349893507687086
Testing Parameters:
    Epochs: 27
    Dropout: [0.352049601820255, 0.44150391415696455, 0.39699878280356143]
    Units = [512, 128]
    Attention Dropout: 0.1060890881870567
    Batch Size: 32
    Learning Rate: 0.0018998069339399364
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 4
    Feed Forward Dimension: 64

Epoch 1/27




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.6807 - loss: 0.3103 - precision: 0.4260 - recall: 0.2890 - val_accuracy: 0.7389 - val_loss: 0.5540 - val_precision: 0.9515 - val_recall: 0.0598
Epoch 2/27
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7721 - loss: 0.1721 - precision: 0.7743 - recall: 0.2209 - val_accuracy: 0.8161 - val_loss: 0.3829 - val_precision: 0.8015 - val_recall: 0.4457
Epoch 3/27
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7779 - loss: 0.1656 - precision: 0.8062 - recall: 0.2259 - val_accuracy: 0.8233 - val_loss: 0.3755 - val_precision: 0.7821 - val_recall: 0.5012
Epoch 4/27
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7923 - loss: 0.1504 - precision: 0.8425 - recall: 0.2841 - val_accuracy: 0.7918 - val_loss: 0.4015 - val_precision: 0.8773 - val_recall: 0.2878
Epoch 5/27
[1m673/673[0m [32m━━━

[I 2025-03-31 21:45:57,540] Trial 16 finished with value: 0.8360012761510348 and parameters: {'epochs': 27, 'batch_size': 32, 'learning_rate': 0.0018998069339399364, 'optimizer': 'adam', 'dropout0': 0.352049601820255, 'dropout1': 0.44150391415696455, 'dropout2': 0.39699878280356143, 'units': [512, 128], 'attention_dropout': 0.1060890881870567, 'attention_dim': 64, 'num_heads': 4, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8680035258 0.9190387308 0.8927923844      4286
           1  0.7500000000 0.6347560976 0.6875825627      1640

    accuracy                      0.8403644954      5926
   macro avg  0.8090017629 0.7768974142 0.7901874736      5926
weighted avg  0.8353464582 0.8403644954 0.8360012762      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.7901874735769266
Model Weighted F1: 0.8360012761510348
Testing Parameters:
    Epochs: 32
    Dropout: [0.1674876120117175, 0.2842285442517125, 0.3307675911757543]
    Units = [1024, 64]
    Attention Dropout: 0.22099034005180362
    Batch Size: 32
    Learning Rate: 0.008793763223868796
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 2
    Feed Forward Dimension: 64

Epoch 1/32




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.7164 - loss: 0.2443 - precision: 0.4869 - recall: 0.1184 - val_accuracy: 0.7454 - val_loss: 0.5786 - val_precision: 0.8503 - val_recall: 0.0970
Epoch 2/32
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7527 - loss: 0.1807 - precision: 0.7848 - recall: 0.1387 - val_accuracy: 0.7531 - val_loss: 0.5103 - val_precision: 0.8554 - val_recall: 0.1299
Epoch 3/32
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7801 - loss: 0.1626 - precision: 0.8298 - recall: 0.2381 - val_accuracy: 0.7784 - val_loss: 0.4302 - val_precision: 0.8998 - val_recall: 0.2244
Epoch 4/32
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7886 - loss: 0.1556 - precision: 0.8414 - recall: 0.2893 - val_accuracy: 0.8075 - val_loss: 0.3886 - val_precision: 0.8707 - val_recall: 0.3573
Epoch 5/32
[1m673/673[0m [32m━━

[I 2025-03-31 21:46:36,801] Trial 17 finished with value: 0.8298872348517868 and parameters: {'epochs': 32, 'batch_size': 32, 'learning_rate': 0.008793763223868796, 'optimizer': 'adam', 'dropout0': 0.1674876120117175, 'dropout1': 0.2842285442517125, 'dropout2': 0.3307675911757543, 'units': [1024, 64], 'attention_dropout': 0.22099034005180362, 'attention_dim': 256, 'num_heads': 2, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8740050034 0.8966402240 0.8851779339      4286
           1  0.7102681491 0.6621951220 0.6853897128      1640

    accuracy                      0.8317583530      5926
   macro avg  0.7921365763 0.7794176730 0.7852838234      5926
weighted avg  0.8286913954 0.8317583530 0.8298872349      5926

Best Threshold: 0.36000000000000015
Model Macro F1: 0.7852838233684918
Model Weighted F1: 0.8298872348517868
Testing Parameters:
    Epochs: 24
    Dropout: [0.44463109648267474, 0.15096154310119517, 0.4033886011831971]
    Units = [512, 64]
    Attention Dropout: 0.16224795562469563
    Batch Size: 32
    Learning Rate: 0.006423315063348137
    Optimizer: adam
    Attention Dimension: 128
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/24




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7146 - loss: 0.2484 - precision: 0.4938 - recall: 0.1430 - val_accuracy: 0.7288 - val_loss: 0.7125 - val_precision: 0.9714 - val_recall: 0.0207
Epoch 2/24
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7658 - loss: 0.1701 - precision: 0.7913 - recall: 0.1903 - val_accuracy: 0.7371 - val_loss: 0.8048 - val_precision: 0.9881 - val_recall: 0.0506
Epoch 3/24
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7820 - loss: 0.1610 - precision: 0.8147 - recall: 0.2557 - val_accuracy: 0.7913 - val_loss: 0.4930 - val_precision: 0.8657 - val_recall: 0.2909
Epoch 4/24
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7824 - loss: 0.1583 - precision: 0.8080 - recall: 0.2629 - val_accuracy: 0.7837 - val_loss: 0.4293 - val_precision: 0.9050 - val_recall: 0.2439
Epoch 5/24
[1m673/673[0m [32m━━━

[I 2025-03-31 21:47:17,442] Trial 18 finished with value: 0.8355745464440406 and parameters: {'epochs': 24, 'batch_size': 32, 'learning_rate': 0.006423315063348137, 'optimizer': 'adam', 'dropout0': 0.44463109648267474, 'dropout1': 0.15096154310119517, 'dropout2': 0.4033886011831971, 'units': [512, 64], 'attention_dropout': 0.16224795562469563, 'attention_dim': 128, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8851429900 0.8882407839 0.8866891813      4286
           1  0.7052307692 0.6987804878 0.7019908116      1640

    accuracy                      0.8358083024      5926
   macro avg  0.7951868796 0.7935106359 0.7943399965      5926
weighted avg  0.8353529053 0.8358083024 0.8355745464      5926

Best Threshold: 0.3300000000000001
Model Macro F1: 0.7943399964795961
Model Weighted F1: 0.8355745464440406
Testing Parameters:
    Epochs: 39
    Dropout: [0.15451637149193637, 0.25107895060686947, 0.44091401450558987]
    Units = [512, 64]
    Attention Dropout: 0.25118508037197307
    Batch Size: 32
    Learning Rate: 0.0021179738746647018
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 64

Epoch 1/39




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.6894 - loss: 0.3096 - precision: 0.4303 - recall: 0.2870 - val_accuracy: 0.7781 - val_loss: 0.5021 - val_precision: 0.8860 - val_recall: 0.2274
Epoch 2/39
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7756 - loss: 0.1724 - precision: 0.7845 - recall: 0.2437 - val_accuracy: 0.8066 - val_loss: 0.3948 - val_precision: 0.8259 - val_recall: 0.3817
Epoch 3/39
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7982 - loss: 0.1542 - precision: 0.8462 - recall: 0.3144 - val_accuracy: 0.7830 - val_loss: 0.4767 - val_precision: 0.9175 - val_recall: 0.2372
Epoch 4/39
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8079 - loss: 0.1458 - precision: 0.8614 - recall: 0.3564 - val_accuracy: 0.8169 - val_loss: 0.3902 - val_precision: 0.8482 - val_recall: 0.4122
Epoch 5/39
[1m673/673[0m [32m━━━

[I 2025-03-31 21:47:52,623] Trial 19 finished with value: 0.8307416682187104 and parameters: {'epochs': 39, 'batch_size': 32, 'learning_rate': 0.0021179738746647018, 'optimizer': 'adam', 'dropout0': 0.15451637149193637, 'dropout1': 0.25107895060686947, 'dropout2': 0.44091401450558987, 'units': [512, 64], 'attention_dropout': 0.25118508037197307, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8855195110 0.8789080728 0.8822014052      4286
           1  0.6895933014 0.7030487805 0.6962560386      1640

    accuracy                      0.8302396220      5926
   macro avg  0.7875564062 0.7909784266 0.7892287219      5926
weighted avg  0.8312976103 0.8302396220 0.8307416682      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.7892287218997839
Model Weighted F1: 0.8307416682187104
Testing Parameters:
    Epochs: 34
    Dropout: [0.33702951993633024, 0.40920038777238343, 0.3070459383487078]
    Units = [512, 64]
    Attention Dropout: 0.16162855025552214
    Batch Size: 32
    Learning Rate: 0.005812550257145199
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/34




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7142 - loss: 0.2423 - precision: 0.4798 - recall: 0.1441 - val_accuracy: 0.7953 - val_loss: 0.4298 - val_precision: 0.7457 - val_recall: 0.3951
Epoch 2/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7556 - loss: 0.1783 - precision: 0.7445 - recall: 0.1436 - val_accuracy: 0.7882 - val_loss: 0.4363 - val_precision: 0.8171 - val_recall: 0.3024
Epoch 3/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7679 - loss: 0.1660 - precision: 0.7951 - recall: 0.2044 - val_accuracy: 0.7584 - val_loss: 0.5085 - val_precision: 0.9561 - val_recall: 0.1329
Epoch 4/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7903 - loss: 0.1522 - precision: 0.8312 - recall: 0.2830 - val_accuracy: 0.7840 - val_loss: 0.4701 - val_precision: 0.8965 - val_recall: 0.2482
Epoch 5/34
[1m673/673[0m [32m━━━

[I 2025-03-31 21:48:29,225] Trial 20 finished with value: 0.8393534383005857 and parameters: {'epochs': 34, 'batch_size': 32, 'learning_rate': 0.005812550257145199, 'optimizer': 'adam', 'dropout0': 0.33702951993633024, 'dropout1': 0.40920038777238343, 'dropout2': 0.3070459383487078, 'units': [512, 64], 'attention_dropout': 0.16162855025552214, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8871866295 0.8917405506 0.8894577612      4286
           1  0.7132262052 0.7036585366 0.7084100675      1640

    accuracy                      0.8396895039      5926
   macro avg  0.8002064174 0.7976995436 0.7989339144      5926
weighted avg  0.8390436839 0.8396895039 0.8393534383      5926

Best Threshold: 0.34000000000000014
Model Macro F1: 0.798933914377427
Model Weighted F1: 0.8393534383005857
Testing Parameters:
    Epochs: 34
    Dropout: [0.34668306945409527, 0.41613821996939815, 0.2955918048434577]
    Units = [512, 64]
    Attention Dropout: 0.15410458007445837
    Batch Size: 32
    Learning Rate: 0.00613550753736391
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/34




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7043 - loss: 0.2518 - precision: 0.4542 - recall: 0.1247 - val_accuracy: 0.7818 - val_loss: 0.5131 - val_precision: 0.8149 - val_recall: 0.2738
Epoch 2/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7644 - loss: 0.1706 - precision: 0.7671 - recall: 0.1629 - val_accuracy: 0.7308 - val_loss: 1.2065 - val_precision: 1.0000 - val_recall: 0.0274
Epoch 3/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7744 - loss: 0.1674 - precision: 0.8184 - recall: 0.2157 - val_accuracy: 0.7359 - val_loss: 0.5496 - val_precision: 0.9213 - val_recall: 0.0500
Epoch 4/34
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7681 - loss: 0.1659 - precision: 0.7968 - recall: 0.2127 - val_accuracy: 0.7675 - val_loss: 0.4567 - val_precision: 0.9172 - val_recall: 0.1756
Epoch 5/34
[1m673/673[0m [32m━━━

[I 2025-03-31 21:49:10,660] Trial 21 finished with value: 0.8365472844905306 and parameters: {'epochs': 34, 'batch_size': 32, 'learning_rate': 0.00613550753736391, 'optimizer': 'adam', 'dropout0': 0.34668306945409527, 'dropout1': 0.41613821996939815, 'dropout2': 0.2955918048434577, 'units': [512, 64], 'attention_dropout': 0.15410458007445837, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8839306358 0.8919738684 0.8879340379      4286
           1  0.7108057464 0.6939024390 0.7022523912      1640

    accuracy                      0.8371582855      5926
   macro avg  0.7973681911 0.7929381537 0.7950932145      5926
weighted avg  0.8360189216 0.8371582855 0.8365472845      5926

Best Threshold: 0.38000000000000017
Model Macro F1: 0.7950932145479127
Model Weighted F1: 0.8365472844905306
Testing Parameters:
    Epochs: 31
    Dropout: [0.45119035396228957, 0.4541847871889178, 0.35234443342184596]
    Units = [512, 64]
    Attention Dropout: 0.18989426025917866
    Batch Size: 32
    Learning Rate: 0.007139927911090874
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/31




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7116 - loss: 0.2436 - precision: 0.5016 - recall: 0.1436 - val_accuracy: 0.7322 - val_loss: 0.6347 - val_precision: 1.0000 - val_recall: 0.0323
Epoch 2/31
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7548 - loss: 0.1787 - precision: 0.7641 - recall: 0.1440 - val_accuracy: 0.7627 - val_loss: 0.4864 - val_precision: 0.9239 - val_recall: 0.1555
Epoch 3/31
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7828 - loss: 0.1611 - precision: 0.8086 - recall: 0.2517 - val_accuracy: 0.7813 - val_loss: 0.4490 - val_precision: 0.8874 - val_recall: 0.2402
Epoch 4/31
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7915 - loss: 0.1529 - precision: 0.8359 - recall: 0.2985 - val_accuracy: 0.7806 - val_loss: 0.4075 - val_precision: 0.9106 - val_recall: 0.2299
Epoch 5/31
[1m673/673[0m [32m━━━

[I 2025-03-31 21:49:58,685] Trial 22 finished with value: 0.8336273790087159 and parameters: {'epochs': 31, 'batch_size': 32, 'learning_rate': 0.007139927911090874, 'optimizer': 'adam', 'dropout0': 0.45119035396228957, 'dropout1': 0.4541847871889178, 'dropout2': 0.35234443342184596, 'units': [512, 64], 'attention_dropout': 0.18989426025917866, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8771649954 0.8980401307 0.8874798248      4286
           1  0.7158647594 0.6713414634 0.6928886092      1640

    accuracy                      0.8353020587      5926
   macro avg  0.7965148774 0.7846907970 0.7901842170      5926
weighted avg  0.8325257131 0.8353020587 0.8336273790      5926

Best Threshold: 0.24000000000000005
Model Macro F1: 0.790184216975915
Model Weighted F1: 0.8336273790087159
Testing Parameters:
    Epochs: 26
    Dropout: [0.3175528996525452, 0.38900727857515205, 0.24639034557098521]
    Units = [512, 64]
    Attention Dropout: 0.10410774374716232
    Batch Size: 32
    Learning Rate: 0.005511917029970526
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128





Epoch 1/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7147 - loss: 0.2430 - precision: 0.4798 - recall: 0.1463 - val_accuracy: 0.7605 - val_loss: 0.4872 - val_precision: 0.7483 - val_recall: 0.2030
Epoch 2/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7555 - loss: 0.1788 - precision: 0.7709 - recall: 0.1328 - val_accuracy: 0.7341 - val_loss: 0.7642 - val_precision: 1.0000 - val_recall: 0.0390
Epoch 3/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7673 - loss: 0.1689 - precision: 0.7813 - recall: 0.2047 - val_accuracy: 0.7774 - val_loss: 0.4748 - val_precision: 0.9105 - val_recall: 0.2171
Epoch 4/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7870 - loss: 0.1553 - precision: 0.8142 - recall: 0.2807 - val_accuracy: 0.7913 - val_loss: 0.4288 - val_precision: 0.8990 - val_recall: 0.2768
Epoch 5/26
[1m673/673[

[I 2025-03-31 21:50:46,217] Trial 23 finished with value: 0.8343012657770719 and parameters: {'epochs': 26, 'batch_size': 32, 'learning_rate': 0.005511917029970526, 'optimizer': 'adam', 'dropout0': 0.3175528996525452, 'dropout1': 0.38900727857515205, 'dropout2': 0.24639034557098521, 'units': [512, 64], 'attention_dropout': 0.10410774374716232, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8761343013 0.9010732618 0.8884288015      4286
           1  0.7206851120 0.6670731707 0.6928435719      1640

    accuracy                      0.8363145461      5926
   macro avg  0.7984097066 0.7840732163 0.7906361867      5926
weighted avg  0.8331142759 0.8363145461 0.8343012658      5926

Best Threshold: 0.47000000000000025
Model Macro F1: 0.7906361866766085
Model Weighted F1: 0.8343012657770719
Testing Parameters:
    Epochs: 36
    Dropout: [0.36621796486152314, 0.3295404275066809, 0.2972498946624544]
    Units = [512, 64]
    Attention Dropout: 0.13750054032296027
    Batch Size: 32
    Learning Rate: 0.007612327337423204
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128





Epoch 1/36
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7202 - loss: 0.2422 - precision: 0.4826 - recall: 0.1164 - val_accuracy: 0.7435 - val_loss: 0.4795 - val_precision: 0.8659 - val_recall: 0.0866
Epoch 2/36
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7620 - loss: 0.1735 - precision: 0.7821 - recall: 0.1607 - val_accuracy: 0.7989 - val_loss: 0.4064 - val_precision: 0.8601 - val_recall: 0.3262
Epoch 3/36
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7792 - loss: 0.1589 - precision: 0.8021 - recall: 0.2693 - val_accuracy: 0.7845 - val_loss: 0.4637 - val_precision: 0.8742 - val_recall: 0.2585
Epoch 4/36
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8019 - loss: 0.1495 - precision: 0.8523 - recall: 0.3147 - val_accuracy: 0.7848 - val_loss: 0.4365 - val_precision: 0.9083 - val_recall: 0.2476
Epoch 5/36
[1m673/673[

[I 2025-03-31 21:51:25,722] Trial 24 finished with value: 0.83333312593749 and parameters: {'epochs': 36, 'batch_size': 32, 'learning_rate': 0.007612327337423204, 'optimizer': 'adam', 'dropout0': 0.36621796486152314, 'dropout1': 0.3295404275066809, 'dropout2': 0.2972498946624544, 'units': [512, 64], 'attention_dropout': 0.13750054032296027, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8895197540 0.8772748483 0.8833548690      4286
           1  0.6904061212 0.7152439024 0.7026055705      1640

    accuracy                      0.8324333446      5926
   macro avg  0.7899629376 0.7962593754 0.7929802198      5926
weighted avg  0.8344157449 0.8324333446 0.8333331259      5926

Best Threshold: 0.4100000000000002
Model Macro F1: 0.7929802197769724
Model Weighted F1: 0.83333312593749
Testing Parameters:
    Epochs: 20
    Dropout: [0.23998662092627676, 0.40196911241189776, 0.3772207457872362]
    Units = [1024, 64]
    Attention Dropout: 0.23338243982315215
    Batch Size: 32
    Learning Rate: 0.00880743352162557
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 8
    Feed Forward Dimension: 64





Epoch 1/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7112 - loss: 0.2440 - precision: 0.4637 - recall: 0.1100 - val_accuracy: 0.7288 - val_loss: 0.6803 - val_precision: 1.0000 - val_recall: 0.0201
Epoch 2/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7478 - loss: 0.1812 - precision: 0.7648 - recall: 0.1265 - val_accuracy: 0.7302 - val_loss: 0.6124 - val_precision: 1.0000 - val_recall: 0.0250
Epoch 3/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7739 - loss: 0.1663 - precision: 0.7986 - recall: 0.1989 - val_accuracy: 0.7948 - val_loss: 0.4176 - val_precision: 0.8605 - val_recall: 0.3085
Epoch 4/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7854 - loss: 0.1557 - precision: 0.8262 - recall: 0.2630 - val_accuracy: 0.8022 - val_loss: 0.4227 - val_precision: 0.8305 - val_recall: 0.3585
Epoch 5/20
[1m673/673[

[I 2025-03-31 21:52:08,616] Trial 25 finished with value: 0.8361655212194176 and parameters: {'epochs': 20, 'batch_size': 32, 'learning_rate': 0.00880743352162557, 'optimizer': 'adam', 'dropout0': 0.23998662092627676, 'dropout1': 0.40196911241189776, 'dropout2': 0.3772207457872362, 'units': [1024, 64], 'attention_dropout': 0.23338243982315215, 'attention_dim': 256, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8802929732 0.8973401773 0.8887348354      4286
           1  0.7174052665 0.6810975610 0.6987801063      1640

    accuracy                      0.8374957813      5926
   macro avg  0.7988491199 0.7892188691 0.7937574709      5926
weighted avg  0.8352143639 0.8374957813 0.8361655212      5926

Best Threshold: 0.4000000000000002
Model Macro F1: 0.7937574708524944
Model Weighted F1: 0.8361655212194176
Testing Parameters:
    Epochs: 25
    Dropout: [0.4531848262057724, 0.4498951892458491, 0.31207881557381173]
    Units = [512, 256]
    Attention Dropout: 0.180305259802177
    Batch Size: 16
    Learning Rate: 0.0031127275103438002
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 16
    Feed Forward Dimension: 128

Epoch 1/25




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.7129 - loss: 0.2545 - precision: 0.4635 - recall: 0.1465 - val_accuracy: 0.7762 - val_loss: 0.4389 - val_precision: 0.7855 - val_recall: 0.2634
Epoch 2/25
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7582 - loss: 0.1760 - precision: 0.7506 - recall: 0.1636 - val_accuracy: 0.7997 - val_loss: 0.4083 - val_precision: 0.8316 - val_recall: 0.3463
Epoch 3/25
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7669 - loss: 0.1695 - precision: 0.7641 - recall: 0.2088 - val_accuracy: 0.7827 - val_loss: 0.4250 - val_precision: 0.7857 - val_recall: 0.2951
Epoch 4/25
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7615 - loss: 0.1740 - precision: 0.7968 - recall: 0.1631 - val_accuracy: 0.8014 - val_loss: 0.4101 - val_precision: 0.8312 - val_recall: 0.3543
Epoch 5/25
[1m1345/1345[

[I 2025-03-31 21:53:45,040] Trial 26 finished with value: 0.832913689374551 and parameters: {'epochs': 25, 'batch_size': 16, 'learning_rate': 0.0031127275103438002, 'optimizer': 'adam', 'dropout0': 0.4531848262057724, 'dropout1': 0.4498951892458491, 'dropout2': 0.31207881557381173, 'units': [512, 256], 'attention_dropout': 0.180305259802177, 'attention_dim': 64, 'num_heads': 16, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8869565217 0.8805412972 0.8837372673      4286
           1  0.6935966487 0.7067073171 0.7000906071      1640

    accuracy                      0.8324333446      5926
   macro avg  0.7902765852 0.7936243072 0.7919139372      5926
weighted avg  0.8334448458 0.8324333446 0.8329136894      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.7919139371831312
Model Weighted F1: 0.832913689374551
Testing Parameters:
    Epochs: 30
    Dropout: [0.2877039202568443, 0.3339451210881605, 0.23272496953032512]
    Units = [1024, 256]
    Attention Dropout: 0.25872422538856416
    Batch Size: 32
    Learning Rate: 0.005863306898367573
    Optimizer: adam
    Attention Dimension: 128
    Number of Heads: 2
    Feed Forward Dimension: 64

Epoch 1/30




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.7065 - loss: 0.2612 - precision: 0.4287 - recall: 0.1172 - val_accuracy: 0.7244 - val_loss: 0.5929 - val_precision: 0.8889 - val_recall: 0.0049
Epoch 2/30
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7380 - loss: 0.1928 - precision: 0.7264 - recall: 0.0719 - val_accuracy: 0.7337 - val_loss: 0.5807 - val_precision: 0.9844 - val_recall: 0.0384
Epoch 3/30
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7501 - loss: 0.1802 - precision: 0.7820 - recall: 0.1303 - val_accuracy: 0.7335 - val_loss: 0.5269 - val_precision: 0.8765 - val_recall: 0.0433
Epoch 4/30
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7448 - loss: 0.1830 - precision: 0.7552 - recall: 0.1013 - val_accuracy: 0.7491 - val_loss: 0.5684 - val_precision: 0.9227 - val_recall: 0.1018
Epoch 5/30
[1m673/673[0m [32m━━

[I 2025-03-31 21:54:34,063] Trial 27 finished with value: 0.8340256253192811 and parameters: {'epochs': 30, 'batch_size': 32, 'learning_rate': 0.005863306898367573, 'optimizer': 'adam', 'dropout0': 0.2877039202568443, 'dropout1': 0.3339451210881605, 'dropout2': 0.23272496953032512, 'units': [1024, 256], 'attention_dropout': 0.25872422538856416, 'attention_dim': 128, 'num_heads': 2, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8741258741 0.9041063929 0.8888634018      4286
           1  0.7247153382 0.6597560976 0.6907117778      1640

    accuracy                      0.8364832940      5926
   macro avg  0.7994206062 0.7819312452 0.7897875898      5926
weighted avg  0.8327770252 0.8364832940 0.8340256253      5926

Best Threshold: 0.4400000000000002
Model Macro F1: 0.7897875898074824
Model Weighted F1: 0.8340256253192811
Testing Parameters:
    Epochs: 33
    Dropout: [0.13808603320803858, 0.13980438114341695, 0.36704405014156116]
    Units = [512, 128]
    Attention Dropout: 0.12974991425843654
    Batch Size: 32
    Learning Rate: 0.007032539363641507
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 128





Epoch 1/33
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.7153 - loss: 0.2466 - precision: 0.4928 - recall: 0.1518 - val_accuracy: 0.7710 - val_loss: 0.5159 - val_precision: 0.9224 - val_recall: 0.1884
Epoch 2/33
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7751 - loss: 0.1664 - precision: 0.8051 - recall: 0.2344 - val_accuracy: 0.8019 - val_loss: 0.4220 - val_precision: 0.7289 - val_recall: 0.4524
Epoch 3/33
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7780 - loss: 0.1644 - precision: 0.8219 - recall: 0.2439 - val_accuracy: 0.7671 - val_loss: 0.4576 - val_precision: 0.9392 - val_recall: 0.1695
Epoch 4/33
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8055 - loss: 0.1484 - precision: 0.8541 - recall: 0.3407 - val_accuracy: 0.7926 - val_loss: 0.4305 - val_precision: 0.8612 - val_recall: 0.2988
Epoch 5/33
[1m673/673

[I 2025-03-31 21:55:18,485] Trial 28 finished with value: 0.8325677684112259 and parameters: {'epochs': 33, 'batch_size': 32, 'learning_rate': 0.007032539363641507, 'optimizer': 'adam', 'dropout0': 0.13808603320803858, 'dropout1': 0.13980438114341695, 'dropout2': 0.36704405014156116, 'units': [512, 128], 'attention_dropout': 0.12974991425843654, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8893889152 0.8761082594 0.8826986366      4286
           1  0.6883802817 0.7152439024 0.7015550239      1640

    accuracy                      0.8315896051      5926
   macro avg  0.7888845984 0.7956760809 0.7921268303      5926
weighted avg  0.8337604712 0.8315896051 0.8325677684      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.7921268302503919
Model Weighted F1: 0.8325677684112259
Testing Parameters:
    Epochs: 36
    Dropout: [0.41447229354922666, 0.2740140959003689, 0.27684890454744066]
    Units = [512, 256]
    Attention Dropout: 0.16854057590783317
    Batch Size: 16
    Learning Rate: 0.005234244632646739
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 8
    Feed Forward Dimension: 64

Epoch 1/36




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7137 - loss: 0.2379 - precision: 0.4523 - recall: 0.0998 - val_accuracy: 0.7315 - val_loss: 0.5419 - val_precision: 0.8267 - val_recall: 0.0378
Epoch 2/36
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7455 - loss: 0.1804 - precision: 0.7321 - recall: 0.0934 - val_accuracy: 0.7545 - val_loss: 0.4525 - val_precision: 0.8715 - val_recall: 0.1323
Epoch 3/36
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7638 - loss: 0.1672 - precision: 0.7661 - recall: 0.1625 - val_accuracy: 0.7892 - val_loss: 0.4125 - val_precision: 0.8600 - val_recall: 0.2848
Epoch 4/36
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7765 - loss: 0.1599 - precision: 0.8017 - recall: 0.2547 - val_accuracy: 0.7889 - val_loss: 0.4244 - val_precision: 0.8430 - val_recall: 0.2915
Epoch 5/36
[1m1345/1345[

[I 2025-03-31 21:57:50,115] Trial 29 finished with value: 0.8314462029843029 and parameters: {'epochs': 36, 'batch_size': 16, 'learning_rate': 0.005234244632646739, 'optimizer': 'adam', 'dropout0': 0.41447229354922666, 'dropout1': 0.2740140959003689, 'dropout2': 0.27684890454744066, 'units': [512, 256], 'attention_dropout': 0.16854057590783317, 'attention_dim': 256, 'num_heads': 8, 'ff_dim': 64}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8679119413 0.9106392907 0.8887623819      4286
           1  0.7319804059 0.6378048780 0.6816552623      1640

    accuracy                      0.8351333108      5926
   macro avg  0.7999461736 0.7742220844 0.7852088221      5926
weighted avg  0.8302933591 0.8351333108 0.8314462030      5926

Best Threshold: 0.4400000000000002
Model Macro F1: 0.7852088220872493
Model Weighted F1: 0.8314462029843029
Testing Parameters:
    Epochs: 21
    Dropout: [0.20828064374021246, 0.47010440574796825, 0.16991549535805794]
    Units = [512, 64]
    Attention Dropout: 0.141973757681157
    Batch Size: 32
    Learning Rate: 0.0014063687946994571
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/21




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.7134 - loss: 0.2806 - precision: 0.4869 - recall: 0.3233 - val_accuracy: 0.7875 - val_loss: 0.4866 - val_precision: 0.8802 - val_recall: 0.2689
Epoch 2/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7791 - loss: 0.1704 - precision: 0.7758 - recall: 0.2502 - val_accuracy: 0.8172 - val_loss: 0.3820 - val_precision: 0.8235 - val_recall: 0.4323
Epoch 3/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8022 - loss: 0.1475 - precision: 0.8397 - recall: 0.3181 - val_accuracy: 0.7560 - val_loss: 0.5360 - val_precision: 0.9619 - val_recall: 0.1232
Epoch 4/21
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8092 - loss: 0.1430 - precision: 0.8460 - recall: 0.3653 - val_accuracy: 0.8022 - val_loss: 0.4765 - val_precision: 0.8953 - val_recall: 0.3232
Epoch 5/21
[1m673/673[0m [32m━━━

[I 2025-03-31 21:58:26,570] Trial 30 finished with value: 0.838228097969704 and parameters: {'epochs': 21, 'batch_size': 32, 'learning_rate': 0.0014063687946994571, 'optimizer': 'adam', 'dropout0': 0.20828064374021246, 'dropout1': 0.47010440574796825, 'dropout2': 0.16991549535805794, 'units': [512, 64], 'attention_dropout': 0.141973757681157, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8779990946 0.9050396640 0.8913143382      4286
           1  0.7301061008 0.6713414634 0.6994917408      1640

    accuracy                      0.8403644954      5926
   macro avg  0.8040525977 0.7881905637 0.7954030395      5926
weighted avg  0.8370702202 0.8403644954 0.8382280980      5926

Best Threshold: 0.34000000000000014
Model Macro F1: 0.7954030395115479
Model Weighted F1: 0.838228097969704
Testing Parameters:
    Epochs: 20
    Dropout: [0.21269516467432895, 0.4730347747533285, 0.18934537760777687]
    Units = [512, 64]
    Attention Dropout: 0.14617338970271565
    Batch Size: 32
    Learning Rate: 0.001460287330325025
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/20




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7036 - loss: 0.2805 - precision: 0.4810 - recall: 0.3097 - val_accuracy: 0.7850 - val_loss: 0.5119 - val_precision: 0.8645 - val_recall: 0.2646
Epoch 2/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7828 - loss: 0.1670 - precision: 0.7976 - recall: 0.2688 - val_accuracy: 0.8137 - val_loss: 0.4143 - val_precision: 0.8367 - val_recall: 0.4061
Epoch 3/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7961 - loss: 0.1545 - precision: 0.8262 - recall: 0.3202 - val_accuracy: 0.7931 - val_loss: 0.4197 - val_precision: 0.9312 - val_recall: 0.2726
Epoch 4/20
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8102 - loss: 0.1425 - precision: 0.8623 - recall: 0.3682 - val_accuracy: 0.8302 - val_loss: 0.3795 - val_precision: 0.7776 - val_recall: 0.5415
Epoch 5/20
[1m673/673[0m [32m━━━

[I 2025-03-31 21:59:02,804] Trial 31 finished with value: 0.8312206575892519 and parameters: {'epochs': 20, 'batch_size': 32, 'learning_rate': 0.001460287330325025, 'optimizer': 'adam', 'dropout0': 0.21269516467432895, 'dropout1': 0.4730347747533285, 'dropout2': 0.18934537760777687, 'units': [512, 64], 'attention_dropout': 0.14617338970271565, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8700716846 0.9062062529 0.8877714286      4286
           1  0.7250341997 0.6463414634 0.6834300451      1640

    accuracy                      0.8342895714      5926
   macro avg  0.7975529422 0.7762738582 0.7856007369      5926
weighted avg  0.8299330624 0.8342895714 0.8312206576      5926

Best Threshold: 0.4200000000000002
Model Macro F1: 0.7856007368518007
Model Weighted F1: 0.8312206575892519
Testing Parameters:
    Epochs: 23
    Dropout: [0.1777748936939202, 0.43461774528829467, 0.1626118523908558]
    Units = [512, 64]
    Attention Dropout: 0.10035618933803964
    Batch Size: 32
    Learning Rate: 0.0027648576484465464
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/23




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.7106 - loss: 0.2540 - precision: 0.4807 - recall: 0.2373 - val_accuracy: 0.8156 - val_loss: 0.4090 - val_precision: 0.7563 - val_recall: 0.4921
Epoch 2/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7773 - loss: 0.1639 - precision: 0.8072 - recall: 0.2533 - val_accuracy: 0.7477 - val_loss: 0.5825 - val_precision: 0.9866 - val_recall: 0.0896
Epoch 3/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7905 - loss: 0.1542 - precision: 0.8230 - recall: 0.3087 - val_accuracy: 0.7784 - val_loss: 0.5213 - val_precision: 0.9037 - val_recall: 0.2232
Epoch 4/23
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8140 - loss: 0.1422 - precision: 0.8593 - recall: 0.3719 - val_accuracy: 0.8331 - val_loss: 0.3619 - val_precision: 0.8226 - val_recall: 0.5061
Epoch 5/23
[1m673/673[0m [32m━━━

[I 2025-03-31 21:59:35,574] Trial 32 finished with value: 0.8350406908032754 and parameters: {'epochs': 23, 'batch_size': 32, 'learning_rate': 0.0027648576484465464, 'optimizer': 'adam', 'dropout0': 0.1777748936939202, 'dropout1': 0.43461774528829467, 'dropout2': 0.1626118523908558, 'units': [512, 64], 'attention_dropout': 0.10035618933803964, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8908875740 0.8782081195 0.8845024086      4286
           1  0.6931216931 0.7189024390 0.7057767136      1640

    accuracy                      0.8341208235      5926
   macro avg  0.7920046335 0.7985552792 0.7951395611      5926
weighted avg  0.8361565506 0.8341208235 0.8350406908      5926

Best Threshold: 0.3300000000000001
Model Macro F1: 0.7951395611032237
Model Weighted F1: 0.8350406908032754
Testing Parameters:
    Epochs: 26
    Dropout: [0.13211061814202846, 0.4613068412051763, 0.24270509535501422]
    Units = [512, 64]
    Attention Dropout: 0.1750879979514382
    Batch Size: 32
    Learning Rate: 0.001108793556213536
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/26




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - accuracy: 0.6826 - loss: 0.3083 - precision: 0.4281 - recall: 0.3367 - val_accuracy: 0.7330 - val_loss: 0.6281 - val_precision: 1.0000 - val_recall: 0.0354
Epoch 2/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7800 - loss: 0.1697 - precision: 0.7661 - recall: 0.2575 - val_accuracy: 0.7983 - val_loss: 0.4310 - val_precision: 0.8816 - val_recall: 0.3134
Epoch 3/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7993 - loss: 0.1529 - precision: 0.8246 - recall: 0.3378 - val_accuracy: 0.8107 - val_loss: 0.3998 - val_precision: 0.8617 - val_recall: 0.3762
Epoch 4/26
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8162 - loss: 0.1415 - precision: 0.8611 - recall: 0.3901 - val_accuracy: 0.8169 - val_loss: 0.4005 - val_precision: 0.8562 - val_recall: 0.4067
Epoch 5/26
[1m673/673[0m [32m━━

[I 2025-03-31 22:00:22,778] Trial 33 finished with value: 0.8344766650856126 and parameters: {'epochs': 26, 'batch_size': 32, 'learning_rate': 0.001108793556213536, 'optimizer': 'adam', 'dropout0': 0.13211061814202846, 'dropout1': 0.4613068412051763, 'dropout2': 0.24270509535501422, 'units': [512, 64], 'attention_dropout': 0.1750879979514382, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8769895407 0.8999066729 0.8883003224      4286
           1  0.7192408377 0.6701219512 0.6938131313      1640

    accuracy                      0.8363145461      5926
   macro avg  0.7981151892 0.7850143121 0.7910567269      5926
weighted avg  0.8333331328 0.8363145461 0.8344766651      5926

Best Threshold: 0.4000000000000002
Model Macro F1: 0.7910567268725951
Model Weighted F1: 0.8344766650856126
Testing Parameters:
    Epochs: 22
    Dropout: [0.21402995827284496, 0.49581486051185913, 0.31969941775469124]
    Units = [512, 64]
    Attention Dropout: 0.13039519006285405
    Batch Size: 32
    Learning Rate: 0.004110987219823888
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 16
    Feed Forward Dimension: 128





Epoch 1/22
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.7178 - loss: 0.2533 - precision: 0.5171 - recall: 0.2295 - val_accuracy: 0.7992 - val_loss: 0.4967 - val_precision: 0.8082 - val_recall: 0.3598
Epoch 2/22
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7743 - loss: 0.1651 - precision: 0.7916 - recall: 0.2412 - val_accuracy: 0.7698 - val_loss: 0.4793 - val_precision: 0.9182 - val_recall: 0.1848
Epoch 3/22
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7628 - loss: 0.1714 - precision: 0.8138 - recall: 0.1886 - val_accuracy: 0.7528 - val_loss: 0.6180 - val_precision: 0.9630 - val_recall: 0.1110
Epoch 4/22
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7649 - loss: 0.1722 - precision: 0.7868 - recall: 0.1809 - val_accuracy: 0.7241 - val_loss: 0.9639 - val_precision: 1.0000 - val_recall: 0.0030
Epoch 5/22
[1m673/673

[I 2025-03-31 22:01:17,227] Trial 34 finished with value: 0.8249666446435788 and parameters: {'epochs': 22, 'batch_size': 32, 'learning_rate': 0.004110987219823888, 'optimizer': 'adam', 'dropout0': 0.21402995827284496, 'dropout1': 0.49581486051185913, 'dropout2': 0.31969941775469124, 'units': [512, 64], 'attention_dropout': 0.13039519006285405, 'attention_dim': 64, 'num_heads': 16, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8682310469 0.8978068129 0.8827712778      4286
           1  0.7068273092 0.6439024390 0.6738991704      1640

    accuracy                      0.8275396558      5926
   macro avg  0.7875291781 0.7708546260 0.7783352241      5926
weighted avg  0.8235631209 0.8275396558 0.8249666446      5926

Best Threshold: 0.49000000000000027
Model Macro F1: 0.7783352241026458
Model Weighted F1: 0.8249666446435788
Testing Parameters:
    Epochs: 17
    Dropout: [0.37822710755300315, 0.37168280511034824, 0.172294992582675]
    Units = [1024, 128]
    Attention Dropout: 0.20013144232042784
    Batch Size: 32
    Learning Rate: 0.001647706248342656
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/17




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.7085 - loss: 0.2758 - precision: 0.4897 - recall: 0.2956 - val_accuracy: 0.7840 - val_loss: 0.4771 - val_precision: 0.7990 - val_recall: 0.2933
Epoch 2/17
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7829 - loss: 0.1682 - precision: 0.8059 - recall: 0.2728 - val_accuracy: 0.8102 - val_loss: 0.4088 - val_precision: 0.7725 - val_recall: 0.4451
Epoch 3/17
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7982 - loss: 0.1540 - precision: 0.8302 - recall: 0.3112 - val_accuracy: 0.7609 - val_loss: 0.4832 - val_precision: 0.9705 - val_recall: 0.1402
Epoch 4/17
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8057 - loss: 0.1467 - precision: 0.8387 - recall: 0.3428 - val_accuracy: 0.8194 - val_loss: 0.3825 - val_precision: 0.8125 - val_recall: 0.4518
Epoch 5/17
[1m673/673[0m [32m━━

[I 2025-03-31 22:02:01,485] Trial 35 finished with value: 0.8375417559447655 and parameters: {'epochs': 17, 'batch_size': 32, 'learning_rate': 0.001647706248342656, 'optimizer': 'adam', 'dropout0': 0.37822710755300315, 'dropout1': 0.37168280511034824, 'dropout2': 0.172294992582675, 'units': [1024, 128], 'attention_dropout': 0.20013144232042784, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8870517000 0.8887074195 0.8878787879      4286
           1  0.7077205882 0.7042682927 0.7059902200      1640

    accuracy                      0.8376645292      5926
   macro avg  0.7973861441 0.7964878561 0.7969345040      5926
weighted avg  0.8374224352 0.8376645292 0.8375417559      5926

Best Threshold: 0.21000000000000002
Model Macro F1: 0.7969345039638438
Model Weighted F1: 0.8375417559447655
Testing Parameters:
    Epochs: 10
    Dropout: [0.3845411164794181, 0.3201372959442424, 0.16971270402203834]
    Units = [1024, 128]
    Attention Dropout: 0.1217555606071797
    Batch Size: 16
    Learning Rate: 0.003833248850212892
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 2
    Feed Forward Dimension: 128

Epoch 1/10




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - accuracy: 0.7213 - loss: 0.2386 - precision: 0.4791 - recall: 0.1337 - val_accuracy: 0.7401 - val_loss: 0.7063 - val_precision: 0.8731 - val_recall: 0.0713
Epoch 2/10
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7605 - loss: 0.1743 - precision: 0.7606 - recall: 0.1839 - val_accuracy: 0.7580 - val_loss: 0.5255 - val_precision: 0.8456 - val_recall: 0.1537
Epoch 3/10
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7675 - loss: 0.1674 - precision: 0.7775 - recall: 0.2026 - val_accuracy: 0.7555 - val_loss: 0.4802 - val_precision: 0.9207 - val_recall: 0.1274
Epoch 4/10
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7809 - loss: 0.1617 - precision: 0.8186 - recall: 0.2693 - val_accuracy: 0.8120 - val_loss: 0.4292 - val_precision: 0.8479 - val_recall: 0.3909
Epoch 5/10
[1m1345/1345[

[I 2025-03-31 22:02:48,181] Trial 36 finished with value: 0.8311714246983146 and parameters: {'epochs': 10, 'batch_size': 16, 'learning_rate': 0.003833248850212892, 'optimizer': 'adam', 'dropout0': 0.3845411164794181, 'dropout1': 0.3201372959442424, 'dropout2': 0.16971270402203834, 'units': [1024, 128], 'attention_dropout': 0.1217555606071797, 'attention_dim': 32, 'num_heads': 2, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8812615955 0.8866075595 0.8839264945      4286
           1  0.6988847584 0.6878048780 0.6933005532      1640

    accuracy                      0.8315896051      5926
   macro avg  0.7900731770 0.7872062188 0.7886135238      5926
weighted avg  0.8307894368 0.8315896051 0.8311714247      5926

Best Threshold: 0.3300000000000001
Model Macro F1: 0.7886135238494738
Model Weighted F1: 0.8311714246983146
Testing Parameters:
    Epochs: 16
    Dropout: [0.3061580794097038, 0.3601925899132049, 0.10090948132642252]
    Units = [1024, 128]
    Attention Dropout: 0.19908290392158937
    Batch Size: 32
    Learning Rate: 0.00474237672305493
    Optimizer: adam
    Attention Dimension: 128
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/16




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - accuracy: 0.7152 - loss: 0.2390 - precision: 0.4844 - recall: 0.1604 - val_accuracy: 0.7258 - val_loss: 0.5279 - val_precision: 1.0000 - val_recall: 0.0091
Epoch 2/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7552 - loss: 0.1846 - precision: 0.8039 - recall: 0.1417 - val_accuracy: 0.7403 - val_loss: 0.6240 - val_precision: 1.0000 - val_recall: 0.0616
Epoch 3/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7616 - loss: 0.1725 - precision: 0.7827 - recall: 0.1630 - val_accuracy: 0.8002 - val_loss: 0.4127 - val_precision: 0.8333 - val_recall: 0.3476
Epoch 4/16
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7822 - loss: 0.1557 - precision: 0.8311 - recall: 0.2658 - val_accuracy: 0.7869 - val_loss: 0.4372 - val_precision: 0.8689 - val_recall: 0.2707
Epoch 5/16
[1m673/673[0m [32m━━

[I 2025-03-31 22:03:32,732] Trial 37 finished with value: 0.8306475454874083 and parameters: {'epochs': 16, 'batch_size': 32, 'learning_rate': 0.00474237672305493, 'optimizer': 'adam', 'dropout0': 0.3061580794097038, 'dropout1': 0.3601925899132049, 'dropout2': 0.10090948132642252, 'units': [1024, 128], 'attention_dropout': 0.19908290392158937, 'attention_dim': 128, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8808254115 0.8863742417 0.8835911152      4286
           1  0.6980781153 0.6865853659 0.6922840455      1640

    accuracy                      0.8310833615      5926
   macro avg  0.7894517634 0.7864798038 0.7879375804      5926
weighted avg  0.8302507295 0.8310833615 0.8306475455      5926

Best Threshold: 0.35000000000000014
Model Macro F1: 0.7879375803712119
Model Weighted F1: 0.8306475454874083
Testing Parameters:
    Epochs: 17
    Dropout: [0.38716830796158097, 0.3834937538614402, 0.14214088896339377]
    Units = [1024, 128]
    Attention Dropout: 0.15757084457727594
    Batch Size: 16
    Learning Rate: 0.006651051685574346
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 16
    Feed Forward Dimension: 128

Epoch 1/17




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7234 - loss: 0.2254 - precision: 0.5242 - recall: 0.0968 - val_accuracy: 0.7457 - val_loss: 0.5228 - val_precision: 0.8276 - val_recall: 0.1024
Epoch 2/17
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7591 - loss: 0.1742 - precision: 0.7892 - recall: 0.1838 - val_accuracy: 0.7697 - val_loss: 0.5626 - val_precision: 0.8463 - val_recall: 0.2049
Epoch 3/17
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7696 - loss: 0.1638 - precision: 0.7655 - recall: 0.2130 - val_accuracy: 0.7631 - val_loss: 0.5412 - val_precision: 0.9155 - val_recall: 0.1585
Epoch 4/17
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7835 - loss: 0.1589 - precision: 0.8102 - recall: 0.2748 - val_accuracy: 0.7705 - val_loss: 0.4612 - val_precision: 0.9348 - val_recall: 0.1835
Epoch 5/17
[1m1345/1345[

[I 2025-03-31 22:04:51,163] Trial 38 finished with value: 0.8339336162600988 and parameters: {'epochs': 17, 'batch_size': 16, 'learning_rate': 0.006651051685574346, 'optimizer': 'adam', 'dropout0': 0.38716830796158097, 'dropout1': 0.3834937538614402, 'dropout2': 0.14214088896339377, 'units': [1024, 128], 'attention_dropout': 0.15757084457727594, 'attention_dim': 256, 'num_heads': 16, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8772209567 0.8985067662 0.8877362840      4286
           1  0.7167968750 0.6713414634 0.6933249370      1640

    accuracy                      0.8356395545      5926
   macro avg  0.7970089159 0.7849241148 0.7905306105      5926
weighted avg  0.8328241471 0.8356395545 0.8339336163      5926

Best Threshold: 0.45000000000000023
Model Macro F1: 0.790530610514776
Model Weighted F1: 0.8339336162600988
Testing Parameters:
    Epochs: 13
    Dropout: [0.3380214058797486, 0.21014163567277622, 0.1242402467834647]
    Units = [1024, 128]
    Attention Dropout: 0.49849751303308315
    Batch Size: 32
    Learning Rate: 0.008021082373978318
    Optimizer: adam
    Attention Dimension: 32
    Number of Heads: 4
    Feed Forward Dimension: 128

Epoch 1/13




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.7131 - loss: 0.2442 - precision: 0.4491 - recall: 0.0777 - val_accuracy: 0.7280 - val_loss: 0.5436 - val_precision: 0.9667 - val_recall: 0.0177
Epoch 2/13
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7513 - loss: 0.1814 - precision: 0.7610 - recall: 0.1299 - val_accuracy: 0.7867 - val_loss: 0.4459 - val_precision: 0.8658 - val_recall: 0.2713
Epoch 3/13
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7663 - loss: 0.1697 - precision: 0.8139 - recall: 0.2127 - val_accuracy: 0.7411 - val_loss: 0.4875 - val_precision: 0.9907 - val_recall: 0.0652
Epoch 4/13
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7838 - loss: 0.1592 - precision: 0.8252 - recall: 0.2797 - val_accuracy: 0.7730 - val_loss: 0.4606 - val_precision: 0.9178 - val_recall: 0.1976
Epoch 5/13
[1m673/673[0m [32m━━

[I 2025-03-31 22:05:25,911] Trial 39 finished with value: 0.8363976873114792 and parameters: {'epochs': 13, 'batch_size': 32, 'learning_rate': 0.008021082373978318, 'optimizer': 'adam', 'dropout0': 0.3380214058797486, 'dropout1': 0.21014163567277622, 'dropout2': 0.1242402467834647, 'units': [1024, 128], 'attention_dropout': 0.49849751303308315, 'attention_dim': 32, 'num_heads': 4, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8748874887 0.9071395240 0.8907216495      4286
           1  0.7314439946 0.6609756098 0.6944266496      1640

    accuracy                      0.8390145123      5926
   macro avg  0.8031657417 0.7840575669 0.7925741495      5926
weighted avg  0.8351899980 0.8390145123 0.8363976873      5926

Best Threshold: 0.49000000000000027
Model Macro F1: 0.7925741495340681
Model Weighted F1: 0.8363976873114792
Testing Parameters:
    Epochs: 19
    Dropout: [0.27881566361126237, 0.40531806960177913, 0.22277143183462514]
    Units = [1024, 64]
    Attention Dropout: 0.12496684164635194
    Batch Size: 32
    Learning Rate: 0.003106061360633301
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 2
    Feed Forward Dimension: 128

Epoch 1/19




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7278 - loss: 0.2514 - precision: 0.5370 - recall: 0.2440 - val_accuracy: 0.7978 - val_loss: 0.4059 - val_precision: 0.8044 - val_recall: 0.3561
Epoch 2/19
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7729 - loss: 0.1676 - precision: 0.7870 - recall: 0.1994 - val_accuracy: 0.7933 - val_loss: 0.4373 - val_precision: 0.8374 - val_recall: 0.3140
Epoch 3/19
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7862 - loss: 0.1599 - precision: 0.8336 - recall: 0.2806 - val_accuracy: 0.7973 - val_loss: 0.4286 - val_precision: 0.8604 - val_recall: 0.3195
Epoch 4/19
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8035 - loss: 0.1497 - precision: 0.8497 - recall: 0.3465 - val_accuracy: 0.8007 - val_loss: 0.4372 - val_precision: 0.8720 - val_recall: 0.3280
Epoch 5/19
[1m673/673[0m [32m━━━

[I 2025-03-31 22:06:14,189] Trial 40 finished with value: 0.8272092839595571 and parameters: {'epochs': 19, 'batch_size': 32, 'learning_rate': 0.003106061360633301, 'optimizer': 'adam', 'dropout0': 0.27881566361126237, 'dropout1': 0.40531806960177913, 'dropout2': 0.22277143183462514, 'units': [1024, 64], 'attention_dropout': 0.12496684164635194, 'attention_dim': 64, 'num_heads': 2, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8704206242 0.8980401307 0.8840146991      4286
           1  0.7094414894 0.6506097561 0.6787531807      1640

    accuracy                      0.8295646304      5926
   macro avg  0.7899310568 0.7743249434 0.7813839399      5926
weighted avg  0.8258702055 0.8295646304 0.8272092840      5926

Best Threshold: 0.4200000000000002
Model Macro F1: 0.7813839398944085
Model Weighted F1: 0.8272092839595571
Testing Parameters:
    Epochs: 28
    Dropout: [0.24150220046533455, 0.37242174670837747, 0.26546942678872426]
    Units = [512, 64]
    Attention Dropout: 0.2307426875796872
    Batch Size: 32
    Learning Rate: 0.0008152106978419312
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/28




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.6825 - loss: 0.3249 - precision: 0.4321 - recall: 0.3893 - val_accuracy: 0.7735 - val_loss: 0.4675 - val_precision: 0.9093 - val_recall: 0.2018
Epoch 2/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7778 - loss: 0.1794 - precision: 0.7509 - recall: 0.2584 - val_accuracy: 0.7833 - val_loss: 0.4539 - val_precision: 0.8724 - val_recall: 0.2543
Epoch 3/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7873 - loss: 0.1624 - precision: 0.8128 - recall: 0.3051 - val_accuracy: 0.8044 - val_loss: 0.4020 - val_precision: 0.8638 - val_recall: 0.3482
Epoch 4/28
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8052 - loss: 0.1478 - precision: 0.8446 - recall: 0.3519 - val_accuracy: 0.8189 - val_loss: 0.4010 - val_precision: 0.8630 - val_recall: 0.4110
Epoch 5/28
[1m673/673[0m [32m━━

[I 2025-03-31 22:07:13,746] Trial 41 finished with value: 0.8279221597299058 and parameters: {'epochs': 28, 'batch_size': 32, 'learning_rate': 0.0008152106978419312, 'optimizer': 'adam', 'dropout0': 0.24150220046533455, 'dropout1': 0.37242174670837747, 'dropout2': 0.26546942678872426, 'units': [512, 64], 'attention_dropout': 0.2307426875796872, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8593205575 0.9206719552 0.8889389502      4286
           1  0.7451274363 0.6060975610 0.6684599866      1640

    accuracy                      0.8336145798      5926
   macro avg  0.8022239969 0.7633847581 0.7786994684      5926
weighted avg  0.8277180062 0.8336145798 0.8279221597      5926

Best Threshold: 0.4200000000000002
Model Macro F1: 0.7786994683820565
Model Weighted F1: 0.8279221597299058
Testing Parameters:
    Epochs: 15
    Dropout: [0.47369527334263156, 0.431087305013706, 0.16908096634761136]
    Units = [1024, 256]
    Attention Dropout: 0.20561960469284699
    Batch Size: 32
    Learning Rate: 0.0016452906648500043
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/15




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.7068 - loss: 0.2782 - precision: 0.4865 - recall: 0.2753 - val_accuracy: 0.8090 - val_loss: 0.4058 - val_precision: 0.6825 - val_recall: 0.5793
Epoch 2/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7612 - loss: 0.1777 - precision: 0.7583 - recall: 0.2032 - val_accuracy: 0.7621 - val_loss: 0.4497 - val_precision: 0.9492 - val_recall: 0.1482
Epoch 3/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7811 - loss: 0.1602 - precision: 0.8166 - recall: 0.2446 - val_accuracy: 0.8122 - val_loss: 0.3892 - val_precision: 0.7692 - val_recall: 0.4591
Epoch 4/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7980 - loss: 0.1500 - precision: 0.8342 - recall: 0.3150 - val_accuracy: 0.7702 - val_loss: 0.4923 - val_precision: 0.9290 - val_recall: 0.1835
Epoch 5/15
[1m673/673[0m [32m━━

[I 2025-03-31 22:07:51,830] Trial 42 finished with value: 0.8369784215425042 and parameters: {'epochs': 15, 'batch_size': 32, 'learning_rate': 0.0016452906648500043, 'optimizer': 'adam', 'dropout0': 0.47369527334263156, 'dropout1': 0.431087305013706, 'dropout2': 0.16908096634761136, 'units': [1024, 256], 'attention_dropout': 0.20561960469284699, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8769509161 0.9045730285 0.8905478351      4286
           1  0.7282392027 0.6682926829 0.6969793323      1640

    accuracy                      0.8391832602      5926
   macro avg  0.8025950594 0.7864328557 0.7937635837      5926
weighted avg  0.8357954638 0.8391832602 0.8369784215      5926

Best Threshold: 0.4400000000000002
Model Macro F1: 0.7937635836743384
Model Weighted F1: 0.8369784215425042
Testing Parameters:
    Epochs: 10
    Dropout: [0.36327412972104267, 0.4674678603486752, 0.17595026456247068]
    Units = [1024, 256]
    Attention Dropout: 0.29643386007434724
    Batch Size: 32
    Learning Rate: 0.0018064001551413554
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128





Epoch 1/10
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - accuracy: 0.6957 - loss: 0.2795 - precision: 0.4329 - recall: 0.2460 - val_accuracy: 0.7548 - val_loss: 0.5366 - val_precision: 0.8755 - val_recall: 0.1329
Epoch 2/10
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7689 - loss: 0.1728 - precision: 0.7670 - recall: 0.2098 - val_accuracy: 0.8174 - val_loss: 0.3877 - val_precision: 0.7622 - val_recall: 0.4945
Epoch 3/10
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7920 - loss: 0.1559 - precision: 0.8119 - recall: 0.3061 - val_accuracy: 0.7784 - val_loss: 0.4156 - val_precision: 0.9160 - val_recall: 0.2195
Epoch 4/10
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7893 - loss: 0.1517 - precision: 0.8293 - recall: 0.2955 - val_accuracy: 0.7626 - val_loss: 0.4510 - val_precision: 0.9088 - val_recall: 0.1579
Epoch 5/10
[1m673/673

[I 2025-03-31 22:08:22,493] Trial 43 finished with value: 0.8368971794154271 and parameters: {'epochs': 10, 'batch_size': 32, 'learning_rate': 0.0018064001551413554, 'optimizer': 'adam', 'dropout0': 0.36327412972104267, 'dropout1': 0.4674678603486752, 'dropout2': 0.17595026456247068, 'units': [1024, 256], 'attention_dropout': 0.29643386007434724, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8867660764 0.8880074662 0.8873863371      4286
           1  0.7062423501 0.7036585366 0.7049480757      1640

    accuracy                      0.8369895376      5926
   macro avg  0.7965042132 0.7958330014 0.7961672064      5926
weighted avg  0.8368067596 0.8369895376 0.8368971794      5926

Best Threshold: 0.34000000000000014
Model Macro F1: 0.7961672064449224
Model Weighted F1: 0.8368971794154271
Testing Parameters:
    Epochs: 15
    Dropout: [0.3083307206945281, 0.399047634962895, 0.12108190303372665]
    Units = [1024, 256]
    Attention Dropout: 0.20567586230611085
    Batch Size: 32
    Learning Rate: 0.001423505811396387
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/15




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.7085 - loss: 0.2762 - precision: 0.4886 - recall: 0.3062 - val_accuracy: 0.7567 - val_loss: 0.4773 - val_precision: 0.8587 - val_recall: 0.1445
Epoch 2/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7703 - loss: 0.1720 - precision: 0.7635 - recall: 0.2302 - val_accuracy: 0.7897 - val_loss: 0.4449 - val_precision: 0.8717 - val_recall: 0.2817
Epoch 3/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7905 - loss: 0.1548 - precision: 0.8176 - recall: 0.3013 - val_accuracy: 0.7837 - val_loss: 0.4208 - val_precision: 0.8891 - val_recall: 0.2494
Epoch 4/15
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7996 - loss: 0.1487 - precision: 0.8281 - recall: 0.3518 - val_accuracy: 0.7901 - val_loss: 0.4770 - val_precision: 0.8867 - val_recall: 0.2768
Epoch 5/15
[1m673/673[0m [32m━━━

[I 2025-03-31 22:09:01,265] Trial 44 finished with value: 0.830401673873801 and parameters: {'epochs': 15, 'batch_size': 32, 'learning_rate': 0.001423505811396387, 'optimizer': 'adam', 'dropout0': 0.3083307206945281, 'dropout1': 0.399047634962895, 'dropout2': 0.12108190303372665, 'units': [1024, 256], 'attention_dropout': 0.20567586230611085, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8787459659 0.8894073728 0.8840445269      4286
           1  0.7015113350 0.6792682927 0.6902106568      1640

    accuracy                      0.8312521093      5926
   macro avg  0.7901286504 0.7843378328 0.7871275918      5926
weighted avg  0.8296968949 0.8312521093 0.8304016739      5926

Best Threshold: 0.47000000000000025
Model Macro F1: 0.7871275918275387
Model Weighted F1: 0.830401673873801
Testing Parameters:
    Epochs: 18
    Dropout: [0.42877004188373746, 0.3431080534387604, 0.1552042321732756]
    Units = [1024, 256]
    Attention Dropout: 0.17819293614415332
    Batch Size: 32
    Learning Rate: 0.0005538521743394878
    Optimizer: adam
    Attention Dimension: 64
    Number of Heads: 8
    Feed Forward Dimension: 128

Epoch 1/18




[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.6859 - loss: 0.3306 - precision: 0.4473 - recall: 0.4523 - val_accuracy: 0.7828 - val_loss: 0.4695 - val_precision: 0.8669 - val_recall: 0.2543
Epoch 2/18
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7781 - loss: 0.1761 - precision: 0.7658 - recall: 0.2670 - val_accuracy: 0.7872 - val_loss: 0.4667 - val_precision: 0.8907 - val_recall: 0.2634
Epoch 3/18
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7999 - loss: 0.1566 - precision: 0.8311 - recall: 0.3113 - val_accuracy: 0.8036 - val_loss: 0.4309 - val_precision: 0.8719 - val_recall: 0.3402
Epoch 4/18
[1m673/673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8060 - loss: 0.1482 - precision: 0.8336 - recall: 0.3504 - val_accuracy: 0.8051 - val_loss: 0.4130 - val_precision: 0.8702 - val_recall: 0.3476
Epoch 5/18
[1m673/673[0m [32m━━

[I 2025-03-31 22:09:48,030] Trial 45 finished with value: 0.8347784001215673 and parameters: {'epochs': 18, 'batch_size': 32, 'learning_rate': 0.0005538521743394878, 'optimizer': 'adam', 'dropout0': 0.42877004188373746, 'dropout1': 0.3431080534387604, 'dropout2': 0.1552042321732756, 'units': [1024, 256], 'attention_dropout': 0.17819293614415332, 'attention_dim': 64, 'num_heads': 8, 'ff_dim': 128}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8815668203 0.8926738217 0.8870855553      4286
           1  0.7099621690 0.6865853659 0.6980781153      1640

    accuracy                      0.8356395545      5926
   macro avg  0.7957644946 0.7896295938 0.7925818353      5926
weighted avg  0.8340758267 0.8356395545 0.8347784001      5926

Best Threshold: 0.3300000000000001
Model Macro F1: 0.7925818353055087
Model Weighted F1: 0.8347784001215673
Testing Parameters:
    Epochs: 12
    Dropout: [0.26385493962769857, 0.4262323149990268, 0.21619765816362016]
    Units = [1024, 128]
    Attention Dropout: 0.150011854082354
    Batch Size: 16
    Learning Rate: 0.0025949340632912777
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/12




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - accuracy: 0.7156 - loss: 0.2421 - precision: 0.4640 - recall: 0.1440 - val_accuracy: 0.7929 - val_loss: 0.4367 - val_precision: 0.7101 - val_recall: 0.4256
Epoch 2/12
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7570 - loss: 0.1771 - precision: 0.7350 - recall: 0.1504 - val_accuracy: 0.7577 - val_loss: 0.4611 - val_precision: 0.8864 - val_recall: 0.1427
Epoch 3/12
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7697 - loss: 0.1677 - precision: 0.7733 - recall: 0.2191 - val_accuracy: 0.7813 - val_loss: 0.4521 - val_precision: 0.9000 - val_recall: 0.2360
Epoch 4/12
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7821 - loss: 0.1599 - precision: 0.8098 - recall: 0.2555 - val_accuracy: 0.7492 - val_loss: 0.5823 - val_precision: 0.9753 - val_recall: 0.0963
Epoch 5/12
[1m1345/1345[

[I 2025-03-31 22:10:48,881] Trial 46 finished with value: 0.8378155868247334 and parameters: {'epochs': 12, 'batch_size': 16, 'learning_rate': 0.0025949340632912777, 'optimizer': 'adam', 'dropout0': 0.26385493962769857, 'dropout1': 0.4262323149990268, 'dropout2': 0.21619765816362016, 'units': [1024, 128], 'attention_dropout': 0.150011854082354, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8753089193 0.9090060663 0.8918393041      4286
           1  0.7355932203 0.6615853659 0.6966292135      1640

    accuracy                      0.8405332433      5926
   macro avg  0.8054510698 0.7852957161 0.7942342588      5926
weighted avg  0.8366430830 0.8405332433 0.8378155868      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.794234258796054
Model Weighted F1: 0.8378155868247334
Testing Parameters:
    Epochs: 11
    Dropout: [0.26227035621560363, 0.41825012958040253, 0.22012058333147091]
    Units = [1024, 128]
    Attention Dropout: 0.14572200597545537
    Batch Size: 16
    Learning Rate: 0.0026303987434666276
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/11




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - accuracy: 0.7243 - loss: 0.2489 - precision: 0.5057 - recall: 0.1653 - val_accuracy: 0.7617 - val_loss: 0.4725 - val_precision: 0.5521 - val_recall: 0.7372
Epoch 2/11
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7317 - loss: 0.2018 - precision: 0.6085 - recall: 0.0553 - val_accuracy: 0.7315 - val_loss: 0.5967 - val_precision: 0.9804 - val_recall: 0.0305
Epoch 3/11
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7682 - loss: 0.1692 - precision: 0.7615 - recall: 0.2066 - val_accuracy: 0.7406 - val_loss: 0.5568 - val_precision: 0.9905 - val_recall: 0.0634
Epoch 4/11
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7841 - loss: 0.1581 - precision: 0.8087 - recall: 0.2807 - val_accuracy: 0.7945 - val_loss: 0.4510 - val_precision: 0.8981 - val_recall: 0.2902
Epoch 5/11
[1m1345/1345[

[I 2025-03-31 22:11:44,344] Trial 47 finished with value: 0.8282530109259901 and parameters: {'epochs': 11, 'batch_size': 16, 'learning_rate': 0.0026303987434666276, 'optimizer': 'adam', 'dropout0': 0.26227035621560363, 'dropout1': 0.41825012958040253, 'dropout2': 0.22012058333147091, 'units': [1024, 128], 'attention_dropout': 0.14572200597545537, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8649667406 0.9101726552 0.8869940882      4286
           1  0.7281073446 0.6286585366 0.6747382199      1640

    accuracy                      0.8322645967      5926
   macro avg  0.7965370426 0.7694155959 0.7808661541      5926
weighted avg  0.8270913762 0.8322645967 0.8282530109      5926

Best Threshold: 0.37000000000000016
Model Macro F1: 0.7808661540586035
Model Weighted F1: 0.8282530109259901
Testing Parameters:
    Epochs: 8
    Dropout: [0.18901696257304607, 0.3661364310145595, 0.18521685125517484]
    Units = [1024, 128]
    Attention Dropout: 0.3982749486449687
    Batch Size: 16
    Learning Rate: 0.0043880286484797556
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/8




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 6ms/step - accuracy: 0.7142 - loss: 0.2464 - precision: 0.4272 - recall: 0.0782 - val_accuracy: 0.7233 - val_loss: 0.6008 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/8
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7295 - loss: 0.2024 - precision: 0.5697 - recall: 0.0327 - val_accuracy: 0.7287 - val_loss: 0.5154 - val_precision: 0.8333 - val_recall: 0.0244
Epoch 3/8
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7504 - loss: 0.1804 - precision: 0.7326 - recall: 0.1166 - val_accuracy: 0.7319 - val_loss: 0.5851 - val_precision: 0.9636 - val_recall: 0.0323
Epoch 4/8
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7655 - loss: 0.1682 - precision: 0.7686 - recall: 0.1858 - val_accuracy: 0.7597 - val_loss: 0.4644 - val_precision: 0.8724 - val_recall: 0.1543
Epoch 5/8
[1m1345/13

[I 2025-03-31 22:12:31,037] Trial 48 finished with value: 0.8234148379930972 and parameters: {'epochs': 8, 'batch_size': 16, 'learning_rate': 0.0043880286484797556, 'optimizer': 'adam', 'dropout0': 0.18901696257304607, 'dropout1': 0.3661364310145595, 'dropout2': 0.18521685125517484, 'units': [1024, 128], 'attention_dropout': 0.3982749486449687, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8824503311 0.8705086328 0.8764388067      4286
           1  0.6731448763 0.6969512195 0.6848412223      1640

    accuracy                      0.8224772190      5926
   macro avg  0.7777976037 0.7837299261 0.7806400145      5926
weighted avg  0.8245257706 0.8224772190 0.8234148380      5926

Best Threshold: 0.2700000000000001
Model Macro F1: 0.7806400144800802
Model Weighted F1: 0.8234148379930972
Testing Parameters:
    Epochs: 14
    Dropout: [0.3273556371865825, 0.3062260875317018, 0.25691544855200066]
    Units = [1024, 128]
    Attention Dropout: 0.11596540701304642
    Batch Size: 16
    Learning Rate: 0.00999765488720216
    Optimizer: adam
    Attention Dimension: 256
    Number of Heads: 4
    Feed Forward Dimension: 256

Epoch 1/14




[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7264 - loss: 0.2271 - precision: 0.5241 - recall: 0.0834 - val_accuracy: 0.8112 - val_loss: 0.4526 - val_precision: 0.7911 - val_recall: 0.4317
Epoch 2/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7759 - loss: 0.1678 - precision: 0.7969 - recall: 0.2377 - val_accuracy: 0.7519 - val_loss: 0.4453 - val_precision: 0.9722 - val_recall: 0.1067
Epoch 3/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7765 - loss: 0.1629 - precision: 0.7765 - recall: 0.2352 - val_accuracy: 0.8078 - val_loss: 0.4373 - val_precision: 0.8224 - val_recall: 0.3896
Epoch 4/14
[1m1345/1345[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7849 - loss: 0.1598 - precision: 0.8127 - recall: 0.2820 - val_accuracy: 0.8014 - val_loss: 0.3994 - val_precision: 0.8852 - val_recall: 0.3244
Epoch 5/14
[1m1345/1345[

[I 2025-03-31 22:13:40,688] Trial 49 finished with value: 0.8225316821536466 and parameters: {'epochs': 14, 'batch_size': 16, 'learning_rate': 0.00999765488720216, 'optimizer': 'adam', 'dropout0': 0.3273556371865825, 'dropout1': 0.3062260875317018, 'dropout2': 0.25691544855200066, 'units': [1024, 128], 'attention_dropout': 0.11596540701304642, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 256}. Best is trial 3 with value: 0.8428796728733234.


              precision    recall  f1-score   support

           0  0.8690313779 0.8917405506 0.8802395210      4286
           1  0.6963350785 0.6487804878 0.6717171717      1640

    accuracy                      0.8245021937      5926
   macro avg  0.7826832282 0.7702605192 0.7759783463      5926
weighted avg  0.8212382745 0.8245021937 0.8225316822      5926

Best Threshold: 0.3200000000000001
Model Macro F1: 0.7759783463376277
Model Weighted F1: 0.8225316821536466
Best hyperparameters found: {'epochs': 23, 'batch_size': 32, 'learning_rate': 0.007402024937971863, 'optimizer': 'adam', 'dropout0': 0.11597189185804023, 'dropout1': 0.1649105768518277, 'dropout2': 0.24244195005155006, 'units': [512, 64], 'attention_dropout': 0.10362259259272402, 'attention_dim': 256, 'num_heads': 4, 'ff_dim': 128}


### Evaluate the Model

In [17]:
eval = model.evaluate(x=[dev_claim_embeddings, dev_evidence_embeddings], y=dev_data['label'])

[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8292 - loss: 0.3837 - precision: 0.8145 - recall: 0.5087


In [18]:
accuracy = eval[1]
precision = eval[2]
recall = eval[3]
f1 = (2 * (precision * recall)) / (precision + recall)

print(f"Accuracy: {eval[1]}")
print(f"Precision: {eval[2]}")
print(f"Recall: {eval[3]}")
print(f"F1-Score: {f1}")

Accuracy: 0.8268646597862244
Precision: 0.8045634627342224
Recall: 0.4945122003555298
F1-Score: 0.6125377598654564


In [None]:
# Get the predicted and true labels
y_pred = model.predict(x=[dev_claim_embeddings, dev_evidence_embeddings])
y_true = dev_data['label'].to_numpy()

[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [None]:


# Threshold Tuning
thresholds = np.arange(0.2, 0.7, 0.01)
scores = [f1_score(y_true, (y_pred > t).astype(int), average='macro') for t in thresholds]
bestThreshold = thresholds[np.argmax(scores)]
print(f"Best Threshold: {bestThreshold}")

# Apply the best threshold to the predictions
y_pred_binary = (y_pred > bestThreshold).astype(int)

# Calculate the classification report
report = classification_report(y_true=y_true, y_pred=y_pred_binary, digits=10)
print(report)

Best Threshold: 0.34000000000000014
              precision    recall  f1-score   support

           0  0.8812868147 0.9076061596 0.8942528736      4286
           1  0.7380952381 0.6804878049 0.7081218274      1640

    accuracy                      0.8447519406      5926
   macro avg  0.8096910264 0.7940469822 0.8011873505      5926
weighted avg  0.8416590412 0.8447519406 0.8427417504      5926



In [None]:


# Calculate Matthews Correlation Coefficient
mcc = matthews_corrcoef(y_true, y_pred_binary)
print(f"Matthews Correlation Coefficient: {mcc}")

In [None]:
# model.save('CoAttentionSiameseDeepLearning.keras')