Below is the best hyperparamter resulted in the `main.ipynb` optimization. The purpose of this notebook is to see the effectiveness of the optimization. 

[I 2025-01-24 19:55:40,455] Trial 27 finished with value: 0.904019688269073 and parameters: {'learning_rate': 8.300432875328772e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.3847406475130443, 'batch_size': 32, 'step_size': 9, 'gamma': 0.5951936405857416, 'epochs': 5}. Best is trial 27 with value: 0.904019688269073.


In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, DistilBertConfig
from sklearn.metrics import f1_score, confusion_matrix, balanced_accuracy_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from torch.optim import AdamW, lr_scheduler
import shutil
import zipfile

In [2]:
learning_rate = 8.300432875328772e-05
num_trainable_layers = 2
dropout_rate = 0.3847406475130443
batch_size = 32
step_size = 9
gamma = 0.5951936405857416
epochs = 5

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
# Load and prepare data
# original
df = pd.read_parquet("/kaggle/input/train-parquet")
df['label_int'] = df['label'].str.split("_").str[0].astype('int')

texts = df["quote"].to_list()
labels = df["label_int"].to_list()

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42, stratify=labels)

In [5]:
# aug 
train1 = pd.read_csv('/kaggle/input/balanced/train1.csv')
train2 = pd.read_csv('/kaggle/input/balanced/train2.csv')
train3 = pd.read_csv('/kaggle/input/balanced/train3.csv')
train4 = pd.read_csv('/kaggle/input/balanced/train4.csv')

datasets = [train1, train2, train3, train4]

# Extract quotes and labels using list comprehension
texts = [ds['quote'] for ds in datasets]
labels = [ds['numeric_label'] for ds in datasets]

# Concatenate all texts and labels using pandas.concat
train1234_texts = pd.concat(texts, ignore_index=True)
train1234_labels = pd.concat(labels, ignore_index=True)

In [6]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)
MAX_LENGTH = 365

# Dataset and DataLoader preparation
class QuotesDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

def encode_data(tokenizer, texts, labels, max_length):
    try:
        if isinstance(texts, pd.Series):
            texts = texts.tolist()
        if isinstance(labels, pd.Series):
            labels = labels.tolist()
            
        encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length, return_tensors='pt')
        return QuotesDataset(encodings, labels)

    except Exception as e:
        print(f"Error during tokenization: {e}")
        return None

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [7]:
train1234_dataset = encode_data(tokenizer, train1234_texts, train1234_labels, MAX_LENGTH)
val_dataset = encode_data(tokenizer, X_test, y_test, MAX_LENGTH)

In [8]:
def modify_model(model, num_trainable_layers, dropout_rate):
    # Freeze layers: only the last 'num_trainable_layers' are trainable
    total_layers = len(model.distilbert.transformer.layer)
    for layer_index, layer in enumerate(model.distilbert.transformer.layer):
        if layer_index < total_layers - num_trainable_layers:
            for param in layer.parameters():
                param.requires_grad = False

    # Adjust dropout rates in applicable transformer layers
    for layer in model.distilbert.transformer.layer:
        layer.attention.dropout.p = dropout_rate
        layer.ffn.dropout.p = dropout_rate

    return model

In [9]:
def train_one_epoch(model, train_loader, optimizer, device):
    model.train()
    train_loss = 0
    correct_train = 0
    total_train = 0
    for batch in train_loader:
        optimizer.zero_grad()
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct_train += (predictions == batch['labels']).sum().item()
        total_train += batch['labels'].size(0)
    average_loss = train_loss / len(train_loader)
    accuracy = correct_train / total_train
    return average_loss, accuracy

In [10]:
def validate_model(model, val_loader, device):
    model.eval()
    val_loss = 0
    correct_val = 0
    total_val = 0
    all_predictions = []
    all_true_labels = []
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            val_loss += outputs.loss.item()
            predictions = torch.argmax(outputs.logits, dim=-1)
            all_predictions.extend(predictions.cpu().numpy())
            all_true_labels.extend(batch['labels'].cpu().numpy())
            correct_val += (predictions == batch['labels']).sum().item()
            total_val += batch['labels'].size(0)
    average_val_loss = val_loss / len(val_loader)
    accuracy = correct_val / total_val
    return average_val_loss, accuracy, all_predictions, all_true_labels

In [11]:
model_config = DistilBertConfig.from_pretrained('distilbert-base-uncased', num_labels=8)
model = DistilBertForSequenceClassification(model_config)
model = modify_model(model, num_trainable_layers, dropout_rate)
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.3847406475130443, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.384

In [12]:
optimizer = AdamW(model.parameters(), lr= learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size= step_size, gamma= gamma)

In [13]:
train_loader = DataLoader(train1234_dataset, batch_size= batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size= batch_size, shuffle=False) 

In [14]:
val_accuracies = []

In [15]:
for epoch in range(epochs):
    train_loss, train_accuracy = train_one_epoch(model, train_loader, optimizer, device)
    val_loss, val_accuracy, all_predictions, all_true_labels = validate_model(model, val_loader, device)
    scheduler.step()

    val_accuracies.append(val_accuracy)
    print(val_accuracy)

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


0.579163248564397
0.7653814602132896
0.844954881050041
0.8843314191960624
0.896636587366694


In [16]:
val_accuracies

[0.579163248564397,
 0.7653814602132896,
 0.844954881050041,
 0.8843314191960624,
 0.896636587366694]

Next step is to deciede what data to be used for the final model. 

For document purpose, below is the log of the log of `main.ipynb` that got interrupted because of time out (completed total of 30 trials instead of intended 40) 

Time

Log Message
10.9s	1	Collecting nlpaug
10.9s	2	  Downloading nlpaug-1.1.11-py3-none-any.whl.metadata (14 kB)
10.9s	3	Requirement already satisfied: numpy>=1.16.2 in /usr/local/lib/python3.10/dist-packages (from nlpaug) (1.26.4)
10.9s	4	Requirement already satisfied: pandas>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from nlpaug) (2.2.2)
11.0s	5	Requirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from nlpaug) (2.32.3)
11.0s	6	Requirement already satisfied: gdown>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from nlpaug) (5.2.0)
11.0s	7	Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown>=4.0.0->nlpaug) (4.12.3)
11.0s	8	Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown>=4.0.0->nlpaug) (3.16.1)
11.0s	9	Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from gdown>=4.0.0->nlpaug) (4.67.1)
11.0s	10	Requirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (1.3.8)
11.0s	11	Requirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (1.2.4)
11.0s	12	Requirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (0.1.1)
11.0s	13	Requirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (2025.0.1)
11.0s	14	Requirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (2022.0.0)
11.0s	15	Requirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy>=1.16.2->nlpaug) (2.4.1)
11.0s	16	Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2.0->nlpaug) (2.8.2)
11.0s	17	Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2.0->nlpaug) (2024.2)
11.0s	18	Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.2.0->nlpaug) (2024.2)
11.0s	19	Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->nlpaug) (3.4.0)
11.0s	20	Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->nlpaug) (3.10)
11.0s	21	Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->nlpaug) (2.2.3)
11.0s	22	Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->nlpaug) (2024.12.14)
11.0s	23	Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=1.2.0->nlpaug) (1.17.0)
11.0s	24	Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown>=4.0.0->nlpaug) (2.6)
11.0s	25	Requirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.16.2->nlpaug) (2024.2.0)
11.0s	26	Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy>=1.16.2->nlpaug) (2022.0.0)
11.0s	27	Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy>=1.16.2->nlpaug) (1.2.0)
11.0s	28	Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy>=1.16.2->nlpaug) (2024.2.0)
11.0s	29	Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown>=4.0.0->nlpaug) (1.7.1)
11.1s	30	Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy>=1.16.2->nlpaug) (2024.2.0)
11.1s	31	Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
11.1s	32	[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/410.5 kB[0m [31m?[0m eta [36m-:--:--[0m
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━[0m [32m276.5/410.5 kB[0m [31m8.9 MB/s[0m eta [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
14.4s	33	[?25hInstalling collected packages: nlpaug
14.7s	34	Successfully installed nlpaug-1.1.11
16.0s	35	Requirement already satisfied: optuna in /usr/local/lib/python3.10/dist-packages (4.1.0)
16.0s	36	Requirement already satisfied: alembic>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (1.14.0)
16.0s	37	Requirement already satisfied: colorlog in /usr/local/lib/python3.10/dist-packages (from optuna) (6.9.0)
16.0s	38	Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optuna) (1.26.4)
16.0s	39	Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (24.2)
16.0s	40	Requirement already satisfied: sqlalchemy>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from optuna) (2.0.36)
16.0s	41	Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from optuna) (4.67.1)
16.0s	42	Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from optuna) (6.0.2)
16.0s	43	Requirement already satisfied: Mako in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (1.3.8)
16.0s	44	Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)
16.0s	45	Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.4.2->optuna) (3.1.1)
16.0s	46	Requirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (1.3.8)
16.0s	47	Requirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (1.2.4)
16.0s	48	Requirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (0.1.1)
16.0s	49	Requirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2025.0.1)
16.0s	50	Requirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2022.0.0)
16.0s	51	Requirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2.4.1)
16.0s	52	Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.2)
16.0s	53	Requirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->optuna) (2024.2.0)
16.0s	54	Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->optuna) (2022.0.0)
16.0s	55	Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy->optuna) (1.2.0)
16.0s	56	Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy->optuna) (2024.2.0)
16.0s	57	Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy->optuna) (2024.2.0)
34.1s	58	2025-01-24 08:46:58.819626: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
34.3s	59	2025-01-24 08:46:59.075624: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
34.4s	60	2025-01-24 08:46:59.145258: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
90.0s	61	Using device: cuda
115.3s	62	[I 2025-01-24 08:48:20,029] A new study created in memory with name: no-name-681e1e96-a752-456d-99a5-298689dcb266
117.1s	63	<ipython-input-8-2d9dfced9ac1>:11: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
117.1s	64	  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
1162.8s	65	[I 2025-01-24 09:05:47,567] Trial 0 finished with value: 0.6890894175553732 and parameters: {'learning_rate': 1.4818151091980784e-05, 'num_trainable_layers': 4, 'dropout_rate': 0.119110690825033, 'batch_size': 16, 'step_size': 4, 'gamma': 0.8960630325737292, 'epochs': 3}. Best is trial 0 with value: 0.6890894175553732.
3083.8s	66	[I 2025-01-24 09:37:48,516] Trial 1 finished with value: 0.8728465955701394 and parameters: {'learning_rate': 2.517549631081625e-05, 'num_trainable_layers': 6, 'dropout_rate': 0.41403406363825646, 'batch_size': 16, 'step_size': 4, 'gamma': 0.42358721860354653, 'epochs': 5}. Best is trial 1 with value: 0.8728465955701394.
4771.2s	67	[I 2025-01-24 10:05:55,931] Trial 2 finished with value: 0.8884331419196062 and parameters: {'learning_rate': 5.202379803067906e-05, 'num_trainable_layers': 3, 'dropout_rate': 0.298865675518514, 'batch_size': 32, 'step_size': 5, 'gamma': 0.7444522192227857, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
5828.5s	68	[I 2025-01-24 10:23:33,239] Trial 3 finished with value: 0.7875307629204266 and parameters: {'learning_rate': 0.00016564874914610164, 'num_trainable_layers': 4, 'dropout_rate': 0.24484638353248456, 'batch_size': 32, 'step_size': 8, 'gamma': 0.7133676325508109, 'epochs': 3}. Best is trial 2 with value: 0.8884331419196062.
7368.5s	69	[I 2025-01-24 10:49:13,274] Trial 4 finished with value: 0.8310090237899918 and parameters: {'learning_rate': 6.24880645646062e-05, 'num_trainable_layers': 6, 'dropout_rate': 0.1787526406980196, 'batch_size': 16, 'step_size': 9, 'gamma': 0.4218077557604636, 'epochs': 4}. Best is trial 2 with value: 0.8884331419196062.
9207.9s	70	[I 2025-01-24 11:19:52,632] Trial 5 finished with value: 0.8835110746513536 and parameters: {'learning_rate': 5.770893676901601e-05, 'num_trainable_layers': 5, 'dropout_rate': 0.13033142617846433, 'batch_size': 32, 'step_size': 6, 'gamma': 0.41133703177026737, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
11037.0s	71	[I 2025-01-24 11:50:21,737] Trial 6 finished with value: 0.874487284659557 and parameters: {'learning_rate': 0.00013303199203240556, 'num_trainable_layers': 5, 'dropout_rate': 0.34849991659568424, 'batch_size': 64, 'step_size': 2, 'gamma': 0.41341582314867265, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
11763.7s	72	[I 2025-01-24 12:02:28,472] Trial 7 finished with value: 0.11566858080393766 and parameters: {'learning_rate': 0.0005831206497447505, 'num_trainable_layers': 5, 'dropout_rate': 0.13925389521423429, 'batch_size': 32, 'step_size': 5, 'gamma': 0.5817810547623458, 'epochs': 2}. Best is trial 2 with value: 0.8884331419196062.
12707.8s	73	[I 2025-01-24 12:18:12,553] Trial 8 finished with value: 0.26579163248564397 and parameters: {'learning_rate': 0.0008846062157467111, 'num_trainable_layers': 2, 'dropout_rate': 0.10968685543143071, 'batch_size': 32, 'step_size': 4, 'gamma': 0.5417040989520252, 'epochs': 3}. Best is trial 2 with value: 0.8884331419196062.
14241.5s	74	[I 2025-01-24 12:43:46,253] Trial 9 finished with value: 0.8884331419196062 and parameters: {'learning_rate': 0.00019879063323214105, 'num_trainable_layers': 1, 'dropout_rate': 0.4669940144299951, 'batch_size': 32, 'step_size': 8, 'gamma': 0.5409999740245737, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
15542.1s	75	[I 2025-01-24 13:05:26,876] Trial 10 finished with value: 0.5176374077112387 and parameters: {'learning_rate': 2.9798315739368094e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.29656423235918294, 'batch_size': 64, 'step_size': 2, 'gamma': 0.1813949711449518, 'epochs': 4}. Best is trial 2 with value: 0.8884331419196062.
17074.1s	76	[I 2025-01-24 13:30:58,862] Trial 11 finished with value: 0.8851517637407711 and parameters: {'learning_rate': 0.0003659511106558045, 'num_trainable_layers': 1, 'dropout_rate': 0.47905495553653893, 'batch_size': 32, 'step_size': 7, 'gamma': 0.7655793929360052, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
18303.5s	77	[I 2025-01-24 13:51:28,268] Trial 12 finished with value: 0.8859721082854799 and parameters: {'learning_rate': 0.00024783443813555816, 'num_trainable_layers': 1, 'dropout_rate': 0.37504869040198946, 'batch_size': 32, 'step_size': 10, 'gamma': 0.6504820771728465, 'epochs': 4}. Best is trial 2 with value: 0.8884331419196062.
19990.2s	78	[I 2025-01-24 14:19:34,927] Trial 13 finished with value: 0.8884331419196062 and parameters: {'learning_rate': 8.01191130718201e-05, 'num_trainable_layers': 3, 'dropout_rate': 0.4826522544681453, 'batch_size': 32, 'step_size': 7, 'gamma': 0.8440203634438823, 'epochs': 5}. Best is trial 2 with value: 0.8884331419196062.
21280.1s	79	[I 2025-01-24 14:41:04,810] Trial 14 finished with value: 0.8334700574241182 and parameters: {'learning_rate': 3.9201057232782166e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.2609609742681635, 'batch_size': 32, 'step_size': 8, 'gamma': 0.6413069876133749, 'epochs': 4}. Best is trial 2 with value: 0.8884331419196062.
22973.8s	80	[I 2025-01-24 15:09:18,497] Trial 15 finished with value: 0.8900738310090238 and parameters: {'learning_rate': 0.00021978865018528252, 'num_trainable_layers': 3, 'dropout_rate': 0.4103056673589882, 'batch_size': 64, 'step_size': 6, 'gamma': 0.2524299563975234, 'epochs': 5}. Best is trial 15 with value: 0.8900738310090238.
23653.2s	81	[I 2025-01-24 15:20:37,923] Trial 16 finished with value: 0.31009023789991796 and parameters: {'learning_rate': 1.2999872686451634e-05, 'num_trainable_layers': 3, 'dropout_rate': 0.346836119246987, 'batch_size': 64, 'step_size': 5, 'gamma': 0.15456539728381552, 'epochs': 2}. Best is trial 15 with value: 0.8900738310090238.
25005.7s	82	[I 2025-01-24 15:43:10,446] Trial 17 finished with value: 0.734208367514356 and parameters: {'learning_rate': 0.00033573893216988217, 'num_trainable_layers': 3, 'dropout_rate': 0.4157701006442217, 'batch_size': 64, 'step_size': 1, 'gamma': 0.2733314481355774, 'epochs': 4}. Best is trial 15 with value: 0.8900738310090238.
26767.6s	83	[I 2025-01-24 16:12:32,354] Trial 18 finished with value: 0.8884331419196062 and parameters: {'learning_rate': 0.00010764068509778241, 'num_trainable_layers': 4, 'dropout_rate': 0.22330447689964178, 'batch_size': 64, 'step_size': 6, 'gamma': 0.29735957431573334, 'epochs': 5}. Best is trial 15 with value: 0.8900738310090238.
28121.0s	84	[I 2025-01-24 16:35:05,702] Trial 19 finished with value: 0.77850697292863 and parameters: {'learning_rate': 4.441761716834638e-05, 'num_trainable_layers': 3, 'dropout_rate': 0.30322960836332824, 'batch_size': 64, 'step_size': 3, 'gamma': 0.28389315769469425, 'epochs': 4}. Best is trial 15 with value: 0.8900738310090238.
29743.6s	85	[I 2025-01-24 17:02:08,289] Trial 20 finished with value: 0.6694011484823625 and parameters: {'learning_rate': 2.0683734117834178e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.4221268085035388, 'batch_size': 64, 'step_size': 7, 'gamma': 0.10765187998547693, 'epochs': 5}. Best is trial 15 with value: 0.8900738310090238.
31277.1s	86	[I 2025-01-24 17:27:41,863] Trial 21 finished with value: 0.8999179655455292 and parameters: {'learning_rate': 0.0001932713713098696, 'num_trainable_layers': 1, 'dropout_rate': 0.4517023694187439, 'batch_size': 32, 'step_size': 8, 'gamma': 0.7710877288358753, 'epochs': 5}. Best is trial 21 with value: 0.8999179655455292.
32888.7s	87	[I 2025-01-24 17:54:33,421] Trial 22 finished with value: 0.8982772764561116 and parameters: {'learning_rate': 0.00010003900869814402, 'num_trainable_layers': 2, 'dropout_rate': 0.43953240939272936, 'batch_size': 32, 'step_size': 10, 'gamma': 0.790299268957037, 'epochs': 5}. Best is trial 21 with value: 0.8999179655455292.
34403.4s	88	[I 2025-01-24 18:19:48,093] Trial 23 finished with value: 0.8679245283018868 and parameters: {'learning_rate': 0.00027631868435139663, 'num_trainable_layers': 1, 'dropout_rate': 0.44978345529451597, 'batch_size': 16, 'step_size': 10, 'gamma': 0.8175774891200873, 'epochs': 5}. Best is trial 21 with value: 0.8999179655455292.
35696.0s	89	[I 2025-01-24 18:41:20,712] Trial 24 finished with value: 0.8720262510254306 and parameters: {'learning_rate': 0.00013348467042746236, 'num_trainable_layers': 2, 'dropout_rate': 0.4998480936674242, 'batch_size': 32, 'step_size': 9, 'gamma': 0.67408644365238, 'epochs': 4}. Best is trial 21 with value: 0.8999179655455292.
37319.2s	90	[I 2025-01-24 19:08:23,900] Trial 25 finished with value: 0.8728465955701394 and parameters: {'learning_rate': 0.00046130960721521693, 'num_trainable_layers': 2, 'dropout_rate': 0.37594013765889545, 'batch_size': 64, 'step_size': 9, 'gamma': 0.813857333625018, 'epochs': 5}. Best is trial 21 with value: 0.8999179655455292.
38547.0s	91	[I 2025-01-24 19:28:51,714] Trial 26 finished with value: 0.8851517637407711 and parameters: {'learning_rate': 0.0001564700242114712, 'num_trainable_layers': 1, 'dropout_rate': 0.4407740355882178, 'batch_size': 32, 'step_size': 10, 'gamma': 0.3389991488731838, 'epochs': 4}. Best is trial 21 with value: 0.8999179655455292.
40155.7s	92	[I 2025-01-24 19:55:40,455] Trial 27 finished with value: 0.904019688269073 and parameters: {'learning_rate': 8.300432875328772e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.3847406475130443, 'batch_size': 32, 'step_size': 9, 'gamma': 0.5951936405857416, 'epochs': 5}. Best is trial 27 with value: 0.904019688269073.
41077.2s	93	[I 2025-01-24 20:11:01,929] Trial 28 finished with value: 0.8400328137817884 and parameters: {'learning_rate': 8.067582306165917e-05, 'num_trainable_layers': 1, 'dropout_rate': 0.3811454948605194, 'batch_size': 32, 'step_size': 9, 'gamma': 0.8948541994871584, 'epochs': 3}. Best is trial 27 with value: 0.904019688269073.
42037.1s	94	[I 2025-01-24 20:27:01,824] Trial 29 finished with value: 0.8630024610336341 and parameters: {'learning_rate': 9.029252417928428e-05, 'num_trainable_layers': 2, 'dropout_rate': 0.33361246073388606, 'batch_size': 16, 'step_size': 10, 'gamma': 0.5930182416596956, 'epochs': 3}. Best is trial 27 with value: 0.904019688269073.