#  Training Model- Solution B - BiLSTM

In [None]:
# Training Model Code - Solution B - BILSTM

# !cat /proc/cpuinfo | grep "model name" | uniq
# !nvidia-smi
# Please install these libraries below
# !pip install torch
# !pip install transformers
# !pip install tqdm
# !pip install pandas
# !pip install numpy
# !pip install scikit-learn

# Use the A100 GPU to run this code

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import libraries
import json
import os
import re
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, AutoModel, BertModel
from sklearn.metrics import f1_score, accuracy_score
from itertools import product

# File path of dataset
train_file_path = '/content/drive/My Drive/nlu-lab/train.csv'
validation_file_path = '/content/drive/My Drive/nlu-lab/dev.csv'

# Load training and testing data
train_data = pd.read_csv(train_file_path)
val_data = pd.read_csv(validation_file_path)

# Check the data loading
print(train_data.head(2))
print(val_data.head(2))

Mounted at /content/drive
                                 Claim  \
0  We should introduce school vouchers   
1   We should legalize insider trading   

                                            Evidence  label  
0  Among the many educational reform efforts, suc...      0  
1  The U.S. Securities and Exchange Commission wa...      0  
                                 Claim  \
0  Democracy should be done away with.   
1       Polygamy should be made legal.   

                                            Evidence  label  
0  Amartya Sen, an Indian economist and Nobel lau...      0  
1  The Supreme Court's unanimous decision in Reyn...      1  


# Text Preprocessing

In [None]:
# text preprocessing
def clean_text(text):
  text = re.sub(r"can't\b", "cannot", text)  # Replace can't with cannot
  text = re.sub(r"won't\b", "will not", text)  # Replace won't with will not
  text = re.sub(r"n't\b", " not", text)  # Replace n't with not
  text = re.sub(r"'re\b", " are", text)  # Replace 're with are
  text = re.sub(r"'m\b", " am", text)  # Replace 'm with am
  text = re.sub(r"'ve\b", " have", text)  # Replace 've with have
  text = re.sub(r"'ll\b", " will", text)  # Replace 'll with will
  text = re.sub(r"'d\b", " would", text)  # Replace 'd with would
  # Expand only pronoun + 's
  text = re.sub(r"\b(he|she|it|that|what|who|there|where|why|when)'s\b", r"\1 is", text, flags=re.IGNORECASE)
  # url
  text = re.sub(r'http\S+|www\S+|https\S+', '[URL]', text)
  # blank space
  text = re.sub(r"\s+", " ", text).strip()
  return text

train_data['combined_text'] = (train_data['Claim'] + " " + train_data['Evidence']).apply(clean_text)
val_data['combined_text'] = (val_data['Claim'] + " " + val_data['Evidence']).apply(clean_text)

# Check the cleaned data
print(train_data[['combined_text']].head())
print(val_data[['combined_text']].head())

                                       combined_text
0  We should introduce school vouchers Among the ...
1  We should legalize insider trading The U.S. Se...
2  We should subsidize investigative journalism T...
3  We should further exploit nuclear power a 2001...
4  We should ban whaling The US and several other...
                                       combined_text
0  Democracy should be done away with. Amartya Se...
1  Polygamy should be made legal. The Supreme Cou...
2  Hunting should be banned In total it is estima...
3  Television should be given up. Barbera mention...
4  Abortions ought to be prohibited. According to...


# BiLSTM Classifier Using BERT Word Embeddings

In [3]:
class BERT_LSTMClassifier(nn.Module):
  def __init__(self, hidden_size=128, num_classes=2, dropout=0.3, num_layers=2):
    super(BERT_LSTMClassifier, self).__init__()
    self.bert_embeddings = self._load_bert_embeddings('bert-base-uncased')

    self.lstm = nn.LSTM(
        input_size=768,
        hidden_size=hidden_size,
        num_layers=num_layers,
        batch_first=True,
        bidirectional=True
    )
    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear(hidden_size * 2, num_classes)

  def _load_bert_embeddings(self, model_path):
    bert = AutoModel.from_pretrained(model_path)
    return bert.embeddings.word_embeddings

  def forward(self, input_ids, attention_mask):
    embeddings = self.bert_embeddings(input_ids)
    lstm_out, _ = self.lstm(embeddings)
    pooled_output = torch.mean(lstm_out, dim=1)

    out = self.dropout(pooled_output)
    logits = self.fc(out)
    return logits

# Training & Evaluation Function

In [None]:
# Parameters
MAX_LEN = 128

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

#load tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Dataset class
class EDDataset(Dataset):
  def __init__(self, texts, labels=None):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, index):
    text = self.texts[index]
    encoding = self.tokenizer(text, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='pt')
    item = {
        'input_ids': encoding['input_ids'].squeeze(0),
        'attention_mask': encoding['attention_mask'].squeeze(0)
    }
    if self.labels is not None:
      item['labels'] = torch.tensor(self.labels[index], dtype=torch.long)
    return item

# Function to train model, tqdm used for clear training process
def train_model(model, data_loader, optimizer, criterion, device, num_epochs, show_progress=False):
  model.train()
  for epoch in range(1, num_epochs + 1):
    total_loss = 0

    if show_progress:
      tqdm.write(f"Epoch {epoch}/{num_epochs}")

    for batch in data_loader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      optimizer.zero_grad()
      outputs = model(input_ids, attention_mask=attention_mask)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      total_loss += loss.item()

    avg_loss = total_loss / len(data_loader)
    tqdm.write(f"Epoch {epoch}: Avg Loss = {avg_loss:.4f}")

# Function to evaluate model
def evaluate_model(model, data_loader, device):
  model.eval()
  all_preds, all_labels = [], []
  with torch.no_grad():
    for batch in data_loader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      outputs = model(input_ids, attention_mask=attention_mask)
      preds = torch.argmax(outputs, dim=1)

      all_preds.extend(preds.cpu().numpy())
      all_labels.extend(labels.cpu().numpy())

  acc = accuracy_score(all_labels, all_preds)
  f1 = f1_score(all_labels, all_preds)
  return acc, f1


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

# Model Training

In [None]:
# param grid
param_grid = {
    'learning_rate': [2e-5, 5e-5],
    'batch_size': [16, 32],
    'epochs': [3, 5, 10],
    'dropout': [0.1, 0.3],
    'lstm_hidden_size': [128, 256],
    'num_lstm_layers': [1, 2],
}

# param combitions
param_combinations = list(product(
    param_grid['learning_rate'],
    param_grid['batch_size'],
    param_grid['epochs'],
    param_grid['dropout'],
    param_grid['lstm_hidden_size'],
    param_grid['num_lstm_layers'],
))

best_f1 = 0
best_params = {}

model_dir = '/content/drive/My Drive/nlu-lab/lstm_models'
os.makedirs(model_dir, exist_ok=True)

results = []

# training
for idx, (lr, bs, epochs, dropout, lstm_hidden_size, num_lstm_layers) in enumerate(tqdm(param_combinations, desc="Grid Search Progress")):
  tqdm.write(f"\n[{idx+1}/{len(param_combinations)}] Training with lr={lr}, bs={bs}, epochs={epochs}, dropout={dropout}, lstm_hidden_size={lstm_hidden_size}, num_lstm_layers={num_lstm_layers}")

  train_dataset = EDDataset(train_data['combined_text'].tolist(), train_data['label'].tolist())
  val_dataset = EDDataset(val_data['combined_text'].tolist(), val_data['label'].tolist())
  train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=bs)

  model = BERT_LSTMClassifier(hidden_size=lstm_hidden_size, num_layers=num_lstm_layers, num_classes=2, dropout=dropout).to(device)
  optimizer = AdamW(model.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()

  train_model(model, train_loader, optimizer, criterion, device, epochs, show_progress=True)

  acc, f1 = evaluate_model(model, val_loader, device)
  tqdm.write(f"Validation Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

  results.append({
      'lr': lr,
      'batch_size': bs,
      'epochs': epochs,
      'dropout': dropout,
      'lstm_hidden_size': lstm_hidden_size,
      'num_lstm_layers': num_lstm_layers,
      'acc': acc,
      'f1': f1
  })
  # Choose the best parameters according to F1 scores, then save the best model.
  if f1 > best_f1:
    best_f1 = f1
    best_params = {
        'learning_rate': lr,
        'batch_size': bs,
        'epochs': epochs,
        'dropout': dropout,
        'lstm_hidden_size': lstm_hidden_size,
        'num_lstm_layers': num_lstm_layers
    }

    # check path exists
    if not os.path.exists(model_dir):
      os.makedirs(model_dir)

    torch.save(model.state_dict(), os.path.join(model_dir, 'best_model_bilstm.pt'))
    tqdm.write("Best model saved.")

with open(os.path.join(model_dir, 'grid_search_results_bilstm.json'), 'w') as f:
  json.dump(results, f, indent=2)

print(f"\nBest Parameters: {best_params}, Best F1 Score: {best_f1:.4f}")


Grid Search Progress:   0%|          | 0/96 [00:00<?, ?it/s]


[1/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Grid Search Progress:   0%|          | 0/96 [00:06<?, ?it/s]

Epoch 1/3


Grid Search Progress:   0%|          | 0/96 [00:34<?, ?it/s]

Epoch 1: Avg Loss = 0.5938
Epoch 2/3


Grid Search Progress:   0%|          | 0/96 [01:01<?, ?it/s]

Epoch 2: Avg Loss = 0.5782
Epoch 3/3


Grid Search Progress:   0%|          | 0/96 [01:27<?, ?it/s]

Epoch 3: Avg Loss = 0.5131


Grid Search Progress:   0%|          | 0/96 [01:34<?, ?it/s]

Validation Accuracy: 0.7643, F1 Score: 0.5875


Grid Search Progress:   1%|          | 1/96 [01:34<2:29:16, 94.28s/it]

Best model saved.

[2/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:   1%|          | 1/96 [01:35<2:29:16, 94.28s/it]

Epoch 1/3


Grid Search Progress:   1%|          | 1/96 [02:04<2:29:16, 94.28s/it]

Epoch 1: Avg Loss = 0.5889
Epoch 2/3


Grid Search Progress:   1%|          | 1/96 [02:33<2:29:16, 94.28s/it]

Epoch 2: Avg Loss = 0.5767
Epoch 3/3


Grid Search Progress:   1%|          | 1/96 [03:03<2:29:16, 94.28s/it]

Epoch 3: Avg Loss = 0.4570


Grid Search Progress:   2%|▏         | 2/96 [03:10<2:28:53, 95.04s/it]

Validation Accuracy: 0.7887, F1 Score: 0.5470

[3/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:   2%|▏         | 2/96 [03:45<2:28:53, 95.04s/it]

Epoch 1: Avg Loss = 0.5889
Epoch 2/3


Grid Search Progress:   2%|▏         | 2/96 [04:20<2:28:53, 95.04s/it]

Epoch 2: Avg Loss = 0.5717
Epoch 3/3


Grid Search Progress:   2%|▏         | 2/96 [04:56<2:28:53, 95.04s/it]

Epoch 3: Avg Loss = 0.4893


Grid Search Progress:   3%|▎         | 3/96 [05:03<2:40:24, 103.49s/it]

Validation Accuracy: 0.6547, F1 Score: 0.5802

[4/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:   3%|▎         | 3/96 [05:49<2:40:24, 103.49s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/3


Grid Search Progress:   3%|▎         | 3/96 [06:34<2:40:24, 103.49s/it]

Epoch 2: Avg Loss = 0.5588
Epoch 3/3


Grid Search Progress:   3%|▎         | 3/96 [07:19<2:40:24, 103.49s/it]

Epoch 3: Avg Loss = 0.4263


Grid Search Progress:   4%|▍         | 4/96 [07:28<3:03:32, 119.70s/it]

Validation Accuracy: 0.7734, F1 Score: 0.3865

[5/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:   4%|▍         | 4/96 [07:55<3:03:32, 119.70s/it]

Epoch 1: Avg Loss = 0.5933
Epoch 2/3


Grid Search Progress:   4%|▍         | 4/96 [08:22<3:03:32, 119.70s/it]

Epoch 2: Avg Loss = 0.5796
Epoch 3/3


Grid Search Progress:   4%|▍         | 4/96 [08:48<3:03:32, 119.70s/it]

Epoch 3: Avg Loss = 0.5470


Grid Search Progress:   5%|▌         | 5/96 [08:55<2:43:48, 108.00s/it]

Validation Accuracy: 0.7271, F1 Score: 0.0335

[6/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:   5%|▌         | 5/96 [08:55<2:43:48, 108.00s/it]

Epoch 1/3


Grid Search Progress:   5%|▌         | 5/96 [09:24<2:43:48, 108.00s/it]

Epoch 1: Avg Loss = 0.5900
Epoch 2/3


Grid Search Progress:   5%|▌         | 5/96 [09:54<2:43:48, 108.00s/it]

Epoch 2: Avg Loss = 0.5795
Epoch 3/3


Grid Search Progress:   5%|▌         | 5/96 [10:23<2:43:48, 108.00s/it]

Epoch 3: Avg Loss = 0.4922


Grid Search Progress:   6%|▋         | 6/96 [10:30<2:35:13, 103.49s/it]

Validation Accuracy: 0.7803, F1 Score: 0.5501

[7/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:   6%|▋         | 6/96 [11:05<2:35:13, 103.49s/it]

Epoch 1: Avg Loss = 0.5897
Epoch 2/3


Grid Search Progress:   6%|▋         | 6/96 [11:41<2:35:13, 103.49s/it]

Epoch 2: Avg Loss = 0.5789
Epoch 3/3


Grid Search Progress:   6%|▋         | 6/96 [12:17<2:35:13, 103.49s/it]

Epoch 3: Avg Loss = 0.4903


Grid Search Progress:   7%|▋         | 7/96 [12:24<2:38:39, 106.96s/it]

Validation Accuracy: 0.7546, F1 Score: 0.5798

[8/96] Training with lr=2e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:   7%|▋         | 7/96 [13:10<2:38:39, 106.96s/it]

Epoch 1: Avg Loss = 0.5882
Epoch 2/3


Grid Search Progress:   7%|▋         | 7/96 [13:56<2:38:39, 106.96s/it]

Epoch 2: Avg Loss = 0.5643
Epoch 3/3


Grid Search Progress:   7%|▋         | 7/96 [14:42<2:38:39, 106.96s/it]

Epoch 3: Avg Loss = 0.4440


Grid Search Progress:   8%|▊         | 8/96 [14:51<2:55:28, 119.64s/it]

Validation Accuracy: 0.7892, F1 Score: 0.5320

[9/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:   8%|▊         | 8/96 [15:18<2:55:28, 119.64s/it]

Epoch 1: Avg Loss = 0.5900
Epoch 2/5


Grid Search Progress:   8%|▊         | 8/96 [15:46<2:55:28, 119.64s/it]

Epoch 2: Avg Loss = 0.5787
Epoch 3/5


Grid Search Progress:   8%|▊         | 8/96 [16:13<2:55:28, 119.64s/it]

Epoch 3: Avg Loss = 0.5166
Epoch 4/5


Grid Search Progress:   8%|▊         | 8/96 [16:40<2:55:28, 119.64s/it]

Epoch 4: Avg Loss = 0.4381
Epoch 5/5


Grid Search Progress:   8%|▊         | 8/96 [17:08<2:55:28, 119.64s/it]

Epoch 5: Avg Loss = 0.3987


Grid Search Progress:   8%|▊         | 8/96 [17:14<2:55:28, 119.64s/it]

Validation Accuracy: 0.7857, F1 Score: 0.6145


Grid Search Progress:   9%|▉         | 9/96 [17:15<3:04:34, 127.30s/it]

Best model saved.

[10/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:   9%|▉         | 9/96 [17:44<3:04:34, 127.30s/it]

Epoch 1: Avg Loss = 0.5914
Epoch 2/5


Grid Search Progress:   9%|▉         | 9/96 [18:14<3:04:34, 127.30s/it]

Epoch 2: Avg Loss = 0.5784
Epoch 3/5


Grid Search Progress:   9%|▉         | 9/96 [18:44<3:04:34, 127.30s/it]

Epoch 3: Avg Loss = 0.4872
Epoch 4/5


Grid Search Progress:   9%|▉         | 9/96 [19:13<3:04:34, 127.30s/it]

Epoch 4: Avg Loss = 0.4016
Epoch 5/5


Grid Search Progress:   9%|▉         | 9/96 [19:43<3:04:34, 127.30s/it]

Epoch 5: Avg Loss = 0.3573


Grid Search Progress:  10%|█         | 10/96 [19:50<3:14:47, 135.90s/it]

Validation Accuracy: 0.8009, F1 Score: 0.6040

[11/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  10%|█         | 10/96 [20:26<3:14:47, 135.90s/it]

Epoch 1: Avg Loss = 0.5889
Epoch 2/5


Grid Search Progress:  10%|█         | 10/96 [21:02<3:14:47, 135.90s/it]

Epoch 2: Avg Loss = 0.5744
Epoch 3/5


Grid Search Progress:  10%|█         | 10/96 [21:38<3:14:47, 135.90s/it]

Epoch 3: Avg Loss = 0.4734
Epoch 4/5


Grid Search Progress:  10%|█         | 10/96 [22:14<3:14:47, 135.90s/it]

Epoch 4: Avg Loss = 0.4189
Epoch 5/5


Grid Search Progress:  10%|█         | 10/96 [22:50<3:14:47, 135.90s/it]

Epoch 5: Avg Loss = 0.3781


Grid Search Progress:  11%|█▏        | 11/96 [22:57<3:34:45, 151.59s/it]

Validation Accuracy: 0.7967, F1 Score: 0.5552

[12/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  11%|█▏        | 11/96 [23:43<3:34:45, 151.59s/it]

Epoch 1: Avg Loss = 0.5871
Epoch 2/5


Grid Search Progress:  11%|█▏        | 11/96 [24:28<3:34:45, 151.59s/it]

Epoch 2: Avg Loss = 0.5503
Epoch 3/5


Grid Search Progress:  11%|█▏        | 11/96 [25:13<3:34:45, 151.59s/it]

Epoch 3: Avg Loss = 0.4149
Epoch 4/5


Grid Search Progress:  11%|█▏        | 11/96 [25:58<3:34:45, 151.59s/it]

Epoch 4: Avg Loss = 0.3648
Epoch 5/5


Grid Search Progress:  11%|█▏        | 11/96 [26:43<3:34:45, 151.59s/it]

Epoch 5: Avg Loss = 0.3265


Grid Search Progress:  11%|█▏        | 11/96 [26:51<3:34:45, 151.59s/it]

Validation Accuracy: 0.7879, F1 Score: 0.6594


Grid Search Progress:  12%|█▎        | 12/96 [26:51<4:07:23, 176.71s/it]

Best model saved.

[13/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  12%|█▎        | 12/96 [27:18<4:07:23, 176.71s/it]

Epoch 1: Avg Loss = 0.5921
Epoch 2/5


Grid Search Progress:  12%|█▎        | 12/96 [27:44<4:07:23, 176.71s/it]

Epoch 2: Avg Loss = 0.5798
Epoch 3/5


Grid Search Progress:  12%|█▎        | 12/96 [28:11<4:07:23, 176.71s/it]

Epoch 3: Avg Loss = 0.5353
Epoch 4/5


Grid Search Progress:  12%|█▎        | 12/96 [28:38<4:07:23, 176.71s/it]

Epoch 4: Avg Loss = 0.4653
Epoch 5/5


Grid Search Progress:  12%|█▎        | 12/96 [29:05<4:07:23, 176.71s/it]

Epoch 5: Avg Loss = 0.4221


Grid Search Progress:  14%|█▎        | 13/96 [29:11<3:48:59, 165.54s/it]

Validation Accuracy: 0.7791, F1 Score: 0.6049

[14/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  14%|█▎        | 13/96 [29:40<3:48:59, 165.54s/it]

Epoch 1: Avg Loss = 0.5905
Epoch 2/5


Grid Search Progress:  14%|█▎        | 13/96 [30:09<3:48:59, 165.54s/it]

Epoch 2: Avg Loss = 0.5791
Epoch 3/5


Grid Search Progress:  14%|█▎        | 13/96 [30:38<3:48:59, 165.54s/it]

Epoch 3: Avg Loss = 0.5072
Epoch 4/5


Grid Search Progress:  14%|█▎        | 13/96 [31:07<3:48:59, 165.54s/it]

Epoch 4: Avg Loss = 0.4212
Epoch 5/5


Grid Search Progress:  14%|█▎        | 13/96 [31:37<3:48:59, 165.54s/it]

Epoch 5: Avg Loss = 0.3740


Grid Search Progress:  15%|█▍        | 14/96 [31:43<3:40:52, 161.61s/it]

Validation Accuracy: 0.7847, F1 Score: 0.4736

[15/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  15%|█▍        | 14/96 [32:19<3:40:52, 161.61s/it]

Epoch 1: Avg Loss = 0.5886
Epoch 2/5


Grid Search Progress:  15%|█▍        | 14/96 [32:54<3:40:52, 161.61s/it]

Epoch 2: Avg Loss = 0.5793
Epoch 3/5


Grid Search Progress:  15%|█▍        | 14/96 [33:29<3:40:52, 161.61s/it]

Epoch 3: Avg Loss = 0.5199
Epoch 4/5


Grid Search Progress:  15%|█▍        | 14/96 [34:04<3:40:52, 161.61s/it]

Epoch 4: Avg Loss = 0.4449
Epoch 5/5


Grid Search Progress:  15%|█▍        | 14/96 [34:39<3:40:52, 161.61s/it]

Epoch 5: Avg Loss = 0.4102


Grid Search Progress:  16%|█▌        | 15/96 [34:46<3:46:53, 168.06s/it]

Validation Accuracy: 0.7892, F1 Score: 0.6271

[16/96] Training with lr=2e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  16%|█▌        | 15/96 [34:47<3:46:53, 168.06s/it]

Epoch 1/5


Grid Search Progress:  16%|█▌        | 15/96 [35:32<3:46:53, 168.06s/it]

Epoch 1: Avg Loss = 0.5887
Epoch 2/5


Grid Search Progress:  16%|█▌        | 15/96 [36:17<3:46:53, 168.06s/it]

Epoch 2: Avg Loss = 0.5678
Epoch 3/5


Grid Search Progress:  16%|█▌        | 15/96 [37:02<3:46:53, 168.06s/it]

Epoch 3: Avg Loss = 0.4392
Epoch 4/5


Grid Search Progress:  16%|█▌        | 15/96 [37:47<3:46:53, 168.06s/it]

Epoch 4: Avg Loss = 0.3824
Epoch 5/5


Grid Search Progress:  16%|█▌        | 15/96 [38:32<3:46:53, 168.06s/it]

Epoch 5: Avg Loss = 0.3459


Grid Search Progress:  17%|█▋        | 16/96 [38:40<4:10:21, 187.77s/it]

Validation Accuracy: 0.8054, F1 Score: 0.6109

[17/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  17%|█▋        | 16/96 [39:07<4:10:21, 187.77s/it]

Epoch 1: Avg Loss = 0.5899
Epoch 2/10


Grid Search Progress:  17%|█▋        | 16/96 [39:34<4:10:21, 187.77s/it]

Epoch 2: Avg Loss = 0.5784
Epoch 3/10


Grid Search Progress:  17%|█▋        | 16/96 [40:00<4:10:21, 187.77s/it]

Epoch 3: Avg Loss = 0.5268
Epoch 4/10


Grid Search Progress:  17%|█▋        | 16/96 [40:27<4:10:21, 187.77s/it]

Epoch 4: Avg Loss = 0.4508
Epoch 5/10


Grid Search Progress:  17%|█▋        | 16/96 [40:54<4:10:21, 187.77s/it]

Epoch 5: Avg Loss = 0.4108
Epoch 6/10


Grid Search Progress:  17%|█▋        | 16/96 [41:20<4:10:21, 187.77s/it]

Epoch 6: Avg Loss = 0.3770
Epoch 7/10


Grid Search Progress:  17%|█▋        | 16/96 [41:47<4:10:21, 187.77s/it]

Epoch 7: Avg Loss = 0.3455
Epoch 8/10


Grid Search Progress:  17%|█▋        | 16/96 [42:14<4:10:21, 187.77s/it]

Epoch 8: Avg Loss = 0.3182
Epoch 9/10


Grid Search Progress:  17%|█▋        | 16/96 [42:41<4:10:21, 187.77s/it]

Epoch 9: Avg Loss = 0.2932
Epoch 10/10


Grid Search Progress:  17%|█▋        | 16/96 [43:07<4:10:21, 187.77s/it]

Epoch 10: Avg Loss = 0.2745


Grid Search Progress:  18%|█▊        | 17/96 [43:14<4:41:18, 213.66s/it]

Validation Accuracy: 0.7933, F1 Score: 0.6093

[18/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  18%|█▊        | 17/96 [43:14<4:41:18, 213.66s/it]

Epoch 1/10


Grid Search Progress:  18%|█▊        | 17/96 [43:43<4:41:18, 213.66s/it]

Epoch 1: Avg Loss = 0.5929
Epoch 2/10


Grid Search Progress:  18%|█▊        | 17/96 [44:12<4:41:18, 213.66s/it]

Epoch 2: Avg Loss = 0.5749
Epoch 3/10


Grid Search Progress:  18%|█▊        | 17/96 [44:41<4:41:18, 213.66s/it]

Epoch 3: Avg Loss = 0.4492
Epoch 4/10


Grid Search Progress:  18%|█▊        | 17/96 [45:11<4:41:18, 213.66s/it]

Epoch 4: Avg Loss = 0.3880
Epoch 5/10


Grid Search Progress:  18%|█▊        | 17/96 [45:40<4:41:18, 213.66s/it]

Epoch 5: Avg Loss = 0.3419
Epoch 6/10


Grid Search Progress:  18%|█▊        | 17/96 [46:09<4:41:18, 213.66s/it]

Epoch 6: Avg Loss = 0.3031
Epoch 7/10


Grid Search Progress:  18%|█▊        | 17/96 [46:38<4:41:18, 213.66s/it]

Epoch 7: Avg Loss = 0.2683
Epoch 8/10


Grid Search Progress:  18%|█▊        | 17/96 [47:07<4:41:18, 213.66s/it]

Epoch 8: Avg Loss = 0.2409
Epoch 9/10


Grid Search Progress:  18%|█▊        | 17/96 [47:37<4:41:18, 213.66s/it]

Epoch 9: Avg Loss = 0.2142
Epoch 10/10


Grid Search Progress:  18%|█▊        | 17/96 [48:06<4:41:18, 213.66s/it]

Epoch 10: Avg Loss = 0.1957


Grid Search Progress:  19%|█▉        | 18/96 [48:12<5:10:58, 239.21s/it]

Validation Accuracy: 0.7801, F1 Score: 0.6211

[19/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  19%|█▉        | 18/96 [48:13<5:10:58, 239.21s/it]

Epoch 1/10


Grid Search Progress:  19%|█▉        | 18/96 [48:48<5:10:58, 239.21s/it]

Epoch 1: Avg Loss = 0.5892
Epoch 2/10


Grid Search Progress:  19%|█▉        | 18/96 [49:23<5:10:58, 239.21s/it]

Epoch 2: Avg Loss = 0.5777
Epoch 3/10


Grid Search Progress:  19%|█▉        | 18/96 [49:58<5:10:58, 239.21s/it]

Epoch 3: Avg Loss = 0.4915
Epoch 4/10


Grid Search Progress:  19%|█▉        | 18/96 [50:33<5:10:58, 239.21s/it]

Epoch 4: Avg Loss = 0.4418
Epoch 5/10


Grid Search Progress:  19%|█▉        | 18/96 [51:09<5:10:58, 239.21s/it]

Epoch 5: Avg Loss = 0.4033
Epoch 6/10


Grid Search Progress:  19%|█▉        | 18/96 [51:44<5:10:58, 239.21s/it]

Epoch 6: Avg Loss = 0.3710
Epoch 7/10


Grid Search Progress:  19%|█▉        | 18/96 [52:19<5:10:58, 239.21s/it]

Epoch 7: Avg Loss = 0.3439
Epoch 8/10


Grid Search Progress:  19%|█▉        | 18/96 [52:55<5:10:58, 239.21s/it]

Epoch 8: Avg Loss = 0.3189
Epoch 9/10


Grid Search Progress:  19%|█▉        | 18/96 [53:31<5:10:58, 239.21s/it]

Epoch 9: Avg Loss = 0.2919
Epoch 10/10


Grid Search Progress:  19%|█▉        | 18/96 [54:07<5:10:58, 239.21s/it]

Epoch 10: Avg Loss = 0.2708


Grid Search Progress:  20%|█▉        | 19/96 [54:14<5:54:04, 275.90s/it]

Validation Accuracy: 0.8000, F1 Score: 0.5739

[20/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  20%|█▉        | 19/96 [54:14<5:54:04, 275.90s/it]

Epoch 1/10


Grid Search Progress:  20%|█▉        | 19/96 [54:59<5:54:04, 275.90s/it]

Epoch 1: Avg Loss = 0.5875
Epoch 2/10


Grid Search Progress:  20%|█▉        | 19/96 [55:43<5:54:04, 275.90s/it]

Epoch 2: Avg Loss = 0.5514
Epoch 3/10


Grid Search Progress:  20%|█▉        | 19/96 [56:28<5:54:04, 275.90s/it]

Epoch 3: Avg Loss = 0.4168
Epoch 4/10


Grid Search Progress:  20%|█▉        | 19/96 [57:13<5:54:04, 275.90s/it]

Epoch 4: Avg Loss = 0.3674
Epoch 5/10


Grid Search Progress:  20%|█▉        | 19/96 [57:58<5:54:04, 275.90s/it]

Epoch 5: Avg Loss = 0.3269
Epoch 6/10


Grid Search Progress:  20%|█▉        | 19/96 [58:42<5:54:04, 275.90s/it]

Epoch 6: Avg Loss = 0.2929
Epoch 7/10


Grid Search Progress:  20%|█▉        | 19/96 [59:27<5:54:04, 275.90s/it]

Epoch 7: Avg Loss = 0.2624
Epoch 8/10


Grid Search Progress:  20%|█▉        | 19/96 [1:00:12<5:54:04, 275.90s/it]

Epoch 8: Avg Loss = 0.2360
Epoch 9/10


Grid Search Progress:  20%|█▉        | 19/96 [1:00:57<5:54:04, 275.90s/it]

Epoch 9: Avg Loss = 0.2080
Epoch 10/10


Grid Search Progress:  20%|█▉        | 19/96 [1:01:41<5:54:04, 275.90s/it]

Epoch 10: Avg Loss = 0.1911


Grid Search Progress:  21%|██        | 20/96 [1:01:49<6:57:45, 329.82s/it]

Validation Accuracy: 0.7985, F1 Score: 0.5958

[21/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  21%|██        | 20/96 [1:02:16<6:57:45, 329.82s/it]

Epoch 1: Avg Loss = 0.5923
Epoch 2/10


Grid Search Progress:  21%|██        | 20/96 [1:02:43<6:57:45, 329.82s/it]

Epoch 2: Avg Loss = 0.5793
Epoch 3/10


Grid Search Progress:  21%|██        | 20/96 [1:03:10<6:57:45, 329.82s/it]

Epoch 3: Avg Loss = 0.5305
Epoch 4/10


Grid Search Progress:  21%|██        | 20/96 [1:03:36<6:57:45, 329.82s/it]

Epoch 4: Avg Loss = 0.4556
Epoch 5/10


Grid Search Progress:  21%|██        | 20/96 [1:04:03<6:57:45, 329.82s/it]

Epoch 5: Avg Loss = 0.4151
Epoch 6/10


Grid Search Progress:  21%|██        | 20/96 [1:04:30<6:57:45, 329.82s/it]

Epoch 6: Avg Loss = 0.3788
Epoch 7/10


Grid Search Progress:  21%|██        | 20/96 [1:04:56<6:57:45, 329.82s/it]

Epoch 7: Avg Loss = 0.3470
Epoch 8/10


Grid Search Progress:  21%|██        | 20/96 [1:05:23<6:57:45, 329.82s/it]

Epoch 8: Avg Loss = 0.3152
Epoch 9/10


Grid Search Progress:  21%|██        | 20/96 [1:05:49<6:57:45, 329.82s/it]

Epoch 9: Avg Loss = 0.2919
Epoch 10/10


Grid Search Progress:  21%|██        | 20/96 [1:06:16<6:57:45, 329.82s/it]

Epoch 10: Avg Loss = 0.2693


Grid Search Progress:  22%|██▏       | 21/96 [1:06:22<6:30:50, 312.67s/it]

Validation Accuracy: 0.7847, F1 Score: 0.6323

[22/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  22%|██▏       | 21/96 [1:06:22<6:30:50, 312.67s/it]

Epoch 1/10


Grid Search Progress:  22%|██▏       | 21/96 [1:06:51<6:30:50, 312.67s/it]

Epoch 1: Avg Loss = 0.5921
Epoch 2/10


Grid Search Progress:  22%|██▏       | 21/96 [1:07:20<6:30:50, 312.67s/it]

Epoch 2: Avg Loss = 0.5785
Epoch 3/10


Grid Search Progress:  22%|██▏       | 21/96 [1:07:50<6:30:50, 312.67s/it]

Epoch 3: Avg Loss = 0.4881
Epoch 4/10


Grid Search Progress:  22%|██▏       | 21/96 [1:08:19<6:30:50, 312.67s/it]

Epoch 4: Avg Loss = 0.4165
Epoch 5/10


Grid Search Progress:  22%|██▏       | 21/96 [1:08:48<6:30:50, 312.67s/it]

Epoch 5: Avg Loss = 0.3772
Epoch 6/10


Grid Search Progress:  22%|██▏       | 21/96 [1:09:18<6:30:50, 312.67s/it]

Epoch 6: Avg Loss = 0.3363
Epoch 7/10


Grid Search Progress:  22%|██▏       | 21/96 [1:09:47<6:30:50, 312.67s/it]

Epoch 7: Avg Loss = 0.2995
Epoch 8/10


Grid Search Progress:  22%|██▏       | 21/96 [1:10:16<6:30:50, 312.67s/it]

Epoch 8: Avg Loss = 0.2672
Epoch 9/10


Grid Search Progress:  22%|██▏       | 21/96 [1:10:46<6:30:50, 312.67s/it]

Epoch 9: Avg Loss = 0.2407
Epoch 10/10


Grid Search Progress:  22%|██▏       | 21/96 [1:11:15<6:30:50, 312.67s/it]

Epoch 10: Avg Loss = 0.2227


Grid Search Progress:  23%|██▎       | 22/96 [1:11:22<6:20:47, 308.75s/it]

Validation Accuracy: 0.7918, F1 Score: 0.5820

[23/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  23%|██▎       | 22/96 [1:11:57<6:20:47, 308.75s/it]

Epoch 1: Avg Loss = 0.5902
Epoch 2/10


Grid Search Progress:  23%|██▎       | 22/96 [1:12:32<6:20:47, 308.75s/it]

Epoch 2: Avg Loss = 0.5789
Epoch 3/10


Grid Search Progress:  23%|██▎       | 22/96 [1:13:07<6:20:47, 308.75s/it]

Epoch 3: Avg Loss = 0.5030
Epoch 4/10


Grid Search Progress:  23%|██▎       | 22/96 [1:13:42<6:20:47, 308.75s/it]

Epoch 4: Avg Loss = 0.4341
Epoch 5/10


Grid Search Progress:  23%|██▎       | 22/96 [1:14:17<6:20:47, 308.75s/it]

Epoch 5: Avg Loss = 0.3909
Epoch 6/10


Grid Search Progress:  23%|██▎       | 22/96 [1:14:52<6:20:47, 308.75s/it]

Epoch 6: Avg Loss = 0.3507
Epoch 7/10


Grid Search Progress:  23%|██▎       | 22/96 [1:15:27<6:20:47, 308.75s/it]

Epoch 7: Avg Loss = 0.3121
Epoch 8/10


Grid Search Progress:  23%|██▎       | 22/96 [1:16:02<6:20:47, 308.75s/it]

Epoch 8: Avg Loss = 0.2827
Epoch 9/10


Grid Search Progress:  23%|██▎       | 22/96 [1:16:37<6:20:47, 308.75s/it]

Epoch 9: Avg Loss = 0.2558
Epoch 10/10


Grid Search Progress:  23%|██▎       | 22/96 [1:17:12<6:20:47, 308.75s/it]

Epoch 10: Avg Loss = 0.2330


Grid Search Progress:  24%|██▍       | 23/96 [1:17:19<6:33:33, 323.47s/it]

Validation Accuracy: 0.7962, F1 Score: 0.6138

[24/96] Training with lr=2e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  24%|██▍       | 23/96 [1:17:20<6:33:33, 323.47s/it]

Epoch 1/10


Grid Search Progress:  24%|██▍       | 23/96 [1:18:05<6:33:33, 323.47s/it]

Epoch 1: Avg Loss = 0.5893
Epoch 2/10


Grid Search Progress:  24%|██▍       | 23/96 [1:18:50<6:33:33, 323.47s/it]

Epoch 2: Avg Loss = 0.5691
Epoch 3/10


Grid Search Progress:  24%|██▍       | 23/96 [1:19:35<6:33:33, 323.47s/it]

Epoch 3: Avg Loss = 0.4546
Epoch 4/10


Grid Search Progress:  24%|██▍       | 23/96 [1:20:20<6:33:33, 323.47s/it]

Epoch 4: Avg Loss = 0.3986
Epoch 5/10


Grid Search Progress:  24%|██▍       | 23/96 [1:21:04<6:33:33, 323.47s/it]

Epoch 5: Avg Loss = 0.3549
Epoch 6/10


Grid Search Progress:  24%|██▍       | 23/96 [1:21:49<6:33:33, 323.47s/it]

Epoch 6: Avg Loss = 0.3137
Epoch 7/10


Grid Search Progress:  24%|██▍       | 23/96 [1:22:34<6:33:33, 323.47s/it]

Epoch 7: Avg Loss = 0.2804
Epoch 8/10


Grid Search Progress:  24%|██▍       | 23/96 [1:23:19<6:33:33, 323.47s/it]

Epoch 8: Avg Loss = 0.2506
Epoch 9/10


Grid Search Progress:  24%|██▍       | 23/96 [1:24:04<6:33:33, 323.47s/it]

Epoch 9: Avg Loss = 0.2258
Epoch 10/10


Grid Search Progress:  24%|██▍       | 23/96 [1:24:48<6:33:33, 323.47s/it]

Epoch 10: Avg Loss = 0.2053


Grid Search Progress:  25%|██▌       | 24/96 [1:24:56<7:16:11, 363.49s/it]

Validation Accuracy: 0.7933, F1 Score: 0.6130

[25/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1


Grid Search Progress:  25%|██▌       | 24/96 [1:24:56<7:16:11, 363.49s/it]

Epoch 1/3


Grid Search Progress:  25%|██▌       | 24/96 [1:25:21<7:16:11, 363.49s/it]

Epoch 1: Avg Loss = 0.5972
Epoch 2/3


Grid Search Progress:  25%|██▌       | 24/96 [1:25:45<7:16:11, 363.49s/it]

Epoch 2: Avg Loss = 0.5807
Epoch 3/3


Grid Search Progress:  25%|██▌       | 24/96 [1:26:09<7:16:11, 363.49s/it]

Epoch 3: Avg Loss = 0.5748


Grid Search Progress:  26%|██▌       | 25/96 [1:26:15<5:28:59, 278.02s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[26/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  26%|██▌       | 25/96 [1:26:41<5:28:59, 278.02s/it]

Epoch 1: Avg Loss = 0.5961
Epoch 2/3


Grid Search Progress:  26%|██▌       | 25/96 [1:27:06<5:28:59, 278.02s/it]

Epoch 2: Avg Loss = 0.5817
Epoch 3/3


Grid Search Progress:  26%|██▌       | 25/96 [1:27:32<5:28:59, 278.02s/it]

Epoch 3: Avg Loss = 0.5752


Grid Search Progress:  27%|██▋       | 26/96 [1:27:38<4:16:13, 219.63s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[27/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  27%|██▋       | 26/96 [1:28:07<4:16:13, 219.63s/it]

Epoch 1: Avg Loss = 0.5946
Epoch 2/3


Grid Search Progress:  27%|██▋       | 26/96 [1:28:36<4:16:13, 219.63s/it]

Epoch 2: Avg Loss = 0.5812
Epoch 3/3


Grid Search Progress:  27%|██▋       | 26/96 [1:29:05<4:16:13, 219.63s/it]

Epoch 3: Avg Loss = 0.5707


Grid Search Progress:  28%|██▊       | 27/96 [1:29:11<3:28:46, 181.55s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[28/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  28%|██▊       | 27/96 [1:29:44<3:28:46, 181.55s/it]

Epoch 1: Avg Loss = 0.5910
Epoch 2/3


Grid Search Progress:  28%|██▊       | 27/96 [1:30:17<3:28:46, 181.55s/it]

Epoch 2: Avg Loss = 0.5818
Epoch 3/3


Grid Search Progress:  28%|██▊       | 27/96 [1:30:51<3:28:46, 181.55s/it]

Epoch 3: Avg Loss = 0.5337


Grid Search Progress:  29%|██▉       | 28/96 [1:30:58<3:00:16, 159.06s/it]

Validation Accuracy: 0.7828, F1 Score: 0.5389

[29/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1


Grid Search Progress:  29%|██▉       | 28/96 [1:30:58<3:00:16, 159.06s/it]

Epoch 1/3


Grid Search Progress:  29%|██▉       | 28/96 [1:31:22<3:00:16, 159.06s/it]

Epoch 1: Avg Loss = 0.5978
Epoch 2/3


Grid Search Progress:  29%|██▉       | 28/96 [1:31:46<3:00:16, 159.06s/it]

Epoch 2: Avg Loss = 0.5817
Epoch 3/3


Grid Search Progress:  29%|██▉       | 28/96 [1:32:10<3:00:16, 159.06s/it]

Epoch 3: Avg Loss = 0.5777


Grid Search Progress:  30%|███       | 29/96 [1:32:17<2:30:47, 135.04s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[30/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  30%|███       | 29/96 [1:32:42<2:30:47, 135.04s/it]

Epoch 1: Avg Loss = 0.6007
Epoch 2/3


Grid Search Progress:  30%|███       | 29/96 [1:33:08<2:30:47, 135.04s/it]

Epoch 2: Avg Loss = 0.5813
Epoch 3/3


Grid Search Progress:  30%|███       | 29/96 [1:33:34<2:30:47, 135.04s/it]

Epoch 3: Avg Loss = 0.5756


Grid Search Progress:  31%|███▏      | 30/96 [1:33:40<2:11:30, 119.55s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[31/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  31%|███▏      | 30/96 [1:34:09<2:11:30, 119.55s/it]

Epoch 1: Avg Loss = 0.5947
Epoch 2/3


Grid Search Progress:  31%|███▏      | 30/96 [1:34:38<2:11:30, 119.55s/it]

Epoch 2: Avg Loss = 0.5811
Epoch 3/3


Grid Search Progress:  31%|███▏      | 30/96 [1:35:06<2:11:30, 119.55s/it]

Epoch 3: Avg Loss = 0.5747


Grid Search Progress:  32%|███▏      | 31/96 [1:35:13<2:00:45, 111.48s/it]

Validation Accuracy: 0.7233, F1 Score: 0.0000

[32/96] Training with lr=2e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  32%|███▏      | 31/96 [1:35:46<2:00:45, 111.48s/it]

Epoch 1: Avg Loss = 0.5913
Epoch 2/3


Grid Search Progress:  32%|███▏      | 31/96 [1:36:19<2:00:45, 111.48s/it]

Epoch 2: Avg Loss = 0.5819
Epoch 3/3


Grid Search Progress:  32%|███▏      | 31/96 [1:36:52<2:00:45, 111.48s/it]

Epoch 3: Avg Loss = 0.5419


Grid Search Progress:  33%|███▎      | 32/96 [1:36:59<1:57:20, 110.02s/it]

Validation Accuracy: 0.7594, F1 Score: 0.5947

[33/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  33%|███▎      | 32/96 [1:37:24<1:57:20, 110.02s/it]

Epoch 1: Avg Loss = 0.5998
Epoch 2/5


Grid Search Progress:  33%|███▎      | 32/96 [1:37:48<1:57:20, 110.02s/it]

Epoch 2: Avg Loss = 0.5811
Epoch 3/5


Grid Search Progress:  33%|███▎      | 32/96 [1:38:12<1:57:20, 110.02s/it]

Epoch 3: Avg Loss = 0.5767
Epoch 4/5


Grid Search Progress:  33%|███▎      | 32/96 [1:38:36<1:57:20, 110.02s/it]

Epoch 4: Avg Loss = 0.5280
Epoch 5/5


Grid Search Progress:  33%|███▎      | 32/96 [1:39:00<1:57:20, 110.02s/it]

Epoch 5: Avg Loss = 0.4533


Grid Search Progress:  34%|███▍      | 33/96 [1:39:06<2:00:43, 114.98s/it]

Validation Accuracy: 0.7602, F1 Score: 0.3412

[34/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  34%|███▍      | 33/96 [1:39:31<2:00:43, 114.98s/it]

Epoch 1: Avg Loss = 0.5950
Epoch 2/5


Grid Search Progress:  34%|███▍      | 33/96 [1:39:57<2:00:43, 114.98s/it]

Epoch 2: Avg Loss = 0.5816
Epoch 3/5


Grid Search Progress:  34%|███▍      | 33/96 [1:40:23<2:00:43, 114.98s/it]

Epoch 3: Avg Loss = 0.5704
Epoch 4/5


Grid Search Progress:  34%|███▍      | 33/96 [1:40:48<2:00:43, 114.98s/it]

Epoch 4: Avg Loss = 0.4648
Epoch 5/5


Grid Search Progress:  34%|███▍      | 33/96 [1:41:14<2:00:43, 114.98s/it]

Epoch 5: Avg Loss = 0.4031


Grid Search Progress:  35%|███▌      | 34/96 [1:41:20<2:04:49, 120.80s/it]

Validation Accuracy: 0.7827, F1 Score: 0.6162

[35/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  35%|███▌      | 34/96 [1:41:49<2:04:49, 120.80s/it]

Epoch 1: Avg Loss = 0.5927
Epoch 2/5


Grid Search Progress:  35%|███▌      | 34/96 [1:42:17<2:04:49, 120.80s/it]

Epoch 2: Avg Loss = 0.5809
Epoch 3/5


Grid Search Progress:  35%|███▌      | 34/96 [1:42:46<2:04:49, 120.80s/it]

Epoch 3: Avg Loss = 0.5659
Epoch 4/5


Grid Search Progress:  35%|███▌      | 34/96 [1:43:14<2:04:49, 120.80s/it]

Epoch 4: Avg Loss = 0.4699
Epoch 5/5


Grid Search Progress:  35%|███▌      | 34/96 [1:43:43<2:04:49, 120.80s/it]

Epoch 5: Avg Loss = 0.4149


Grid Search Progress:  36%|███▋      | 35/96 [1:43:49<2:11:24, 129.25s/it]

Validation Accuracy: 0.7649, F1 Score: 0.6250

[36/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  36%|███▋      | 35/96 [1:44:23<2:11:24, 129.25s/it]

Epoch 1: Avg Loss = 0.5932
Epoch 2/5


Grid Search Progress:  36%|███▋      | 35/96 [1:44:56<2:11:24, 129.25s/it]

Epoch 2: Avg Loss = 0.5815
Epoch 3/5


Grid Search Progress:  36%|███▋      | 35/96 [1:45:29<2:11:24, 129.25s/it]

Epoch 3: Avg Loss = 0.5178
Epoch 4/5


Grid Search Progress:  36%|███▋      | 35/96 [1:46:02<2:11:24, 129.25s/it]

Epoch 4: Avg Loss = 0.4178
Epoch 5/5


Grid Search Progress:  36%|███▋      | 35/96 [1:46:36<2:11:24, 129.25s/it]

Epoch 5: Avg Loss = 0.3727


Grid Search Progress:  38%|███▊      | 36/96 [1:46:43<2:22:34, 142.57s/it]

Validation Accuracy: 0.7967, F1 Score: 0.5719

[37/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  38%|███▊      | 36/96 [1:47:07<2:22:34, 142.57s/it]

Epoch 1: Avg Loss = 0.5953
Epoch 2/5


Grid Search Progress:  38%|███▊      | 36/96 [1:47:32<2:22:34, 142.57s/it]

Epoch 2: Avg Loss = 0.5827
Epoch 3/5


Grid Search Progress:  38%|███▊      | 36/96 [1:47:57<2:22:34, 142.57s/it]

Epoch 3: Avg Loss = 0.5783
Epoch 4/5


Grid Search Progress:  38%|███▊      | 36/96 [1:48:22<2:22:34, 142.57s/it]

Epoch 4: Avg Loss = 0.5473
Epoch 5/5


Grid Search Progress:  38%|███▊      | 36/96 [1:48:47<2:22:34, 142.57s/it]

Epoch 5: Avg Loss = 0.4595


Grid Search Progress:  39%|███▊      | 37/96 [1:48:53<2:16:38, 138.95s/it]

Validation Accuracy: 0.7533, F1 Score: 0.6016

[38/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  39%|███▊      | 37/96 [1:49:20<2:16:38, 138.95s/it]

Epoch 1: Avg Loss = 0.5933
Epoch 2/5


Grid Search Progress:  39%|███▊      | 37/96 [1:49:46<2:16:38, 138.95s/it]

Epoch 2: Avg Loss = 0.5824
Epoch 3/5


Grid Search Progress:  39%|███▊      | 37/96 [1:50:12<2:16:38, 138.95s/it]

Epoch 3: Avg Loss = 0.5769
Epoch 4/5


Grid Search Progress:  39%|███▊      | 37/96 [1:50:39<2:16:38, 138.95s/it]

Epoch 4: Avg Loss = 0.4714
Epoch 5/5


Grid Search Progress:  39%|███▊      | 37/96 [1:51:05<2:16:38, 138.95s/it]

Epoch 5: Avg Loss = 0.4040


Grid Search Progress:  40%|███▉      | 38/96 [1:51:11<2:14:00, 138.63s/it]

Validation Accuracy: 0.7857, F1 Score: 0.6095

[39/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  40%|███▉      | 38/96 [1:51:41<2:14:00, 138.63s/it]

Epoch 1: Avg Loss = 0.5928
Epoch 2/5


Grid Search Progress:  40%|███▉      | 38/96 [1:52:10<2:14:00, 138.63s/it]

Epoch 2: Avg Loss = 0.5818
Epoch 3/5


Grid Search Progress:  40%|███▉      | 38/96 [1:52:39<2:14:00, 138.63s/it]

Epoch 3: Avg Loss = 0.5715
Epoch 4/5


Grid Search Progress:  40%|███▉      | 38/96 [1:53:07<2:14:00, 138.63s/it]

Epoch 4: Avg Loss = 0.4724
Epoch 5/5


Grid Search Progress:  40%|███▉      | 38/96 [1:53:36<2:14:00, 138.63s/it]

Epoch 5: Avg Loss = 0.4199


Grid Search Progress:  41%|████      | 39/96 [1:53:42<2:15:10, 142.29s/it]

Validation Accuracy: 0.7810, F1 Score: 0.5301

[40/96] Training with lr=2e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  41%|████      | 39/96 [1:54:16<2:15:10, 142.29s/it]

Epoch 1: Avg Loss = 0.5919
Epoch 2/5


Grid Search Progress:  41%|████      | 39/96 [1:54:49<2:15:10, 142.29s/it]

Epoch 2: Avg Loss = 0.5816
Epoch 3/5


Grid Search Progress:  41%|████      | 39/96 [1:55:23<2:15:10, 142.29s/it]

Epoch 3: Avg Loss = 0.5308
Epoch 4/5


Grid Search Progress:  41%|████      | 39/96 [1:55:56<2:15:10, 142.29s/it]

Epoch 4: Avg Loss = 0.4202
Epoch 5/5


Grid Search Progress:  41%|████      | 39/96 [1:56:29<2:15:10, 142.29s/it]

Epoch 5: Avg Loss = 0.3714


Grid Search Progress:  42%|████▏     | 40/96 [1:56:37<2:21:46, 151.91s/it]

Validation Accuracy: 0.8032, F1 Score: 0.6345

[41/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  42%|████▏     | 40/96 [1:57:01<2:21:46, 151.91s/it]

Epoch 1: Avg Loss = 0.5970
Epoch 2/10


Grid Search Progress:  42%|████▏     | 40/96 [1:57:25<2:21:46, 151.91s/it]

Epoch 2: Avg Loss = 0.5811
Epoch 3/10


Grid Search Progress:  42%|████▏     | 40/96 [1:57:50<2:21:46, 151.91s/it]

Epoch 3: Avg Loss = 0.5768
Epoch 4/10


Grid Search Progress:  42%|████▏     | 40/96 [1:58:14<2:21:46, 151.91s/it]

Epoch 4: Avg Loss = 0.5355
Epoch 5/10


Grid Search Progress:  42%|████▏     | 40/96 [1:58:38<2:21:46, 151.91s/it]

Epoch 5: Avg Loss = 0.4519
Epoch 6/10


Grid Search Progress:  42%|████▏     | 40/96 [1:59:03<2:21:46, 151.91s/it]

Epoch 6: Avg Loss = 0.4133
Epoch 7/10


Grid Search Progress:  42%|████▏     | 40/96 [1:59:27<2:21:46, 151.91s/it]

Epoch 7: Avg Loss = 0.3775
Epoch 8/10


Grid Search Progress:  42%|████▏     | 40/96 [1:59:52<2:21:46, 151.91s/it]

Epoch 8: Avg Loss = 0.3458
Epoch 9/10


Grid Search Progress:  42%|████▏     | 40/96 [2:00:16<2:21:46, 151.91s/it]

Epoch 9: Avg Loss = 0.3166
Epoch 10/10


Grid Search Progress:  42%|████▏     | 40/96 [2:00:41<2:21:46, 151.91s/it]

Epoch 10: Avg Loss = 0.2895


Grid Search Progress:  43%|████▎     | 41/96 [2:00:47<2:46:21, 181.49s/it]

Validation Accuracy: 0.7965, F1 Score: 0.6323

[42/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  43%|████▎     | 41/96 [2:00:47<2:46:21, 181.49s/it]

Epoch 1/10


Grid Search Progress:  43%|████▎     | 41/96 [2:01:13<2:46:21, 181.49s/it]

Epoch 1: Avg Loss = 0.5972
Epoch 2/10


Grid Search Progress:  43%|████▎     | 41/96 [2:01:39<2:46:21, 181.49s/it]

Epoch 2: Avg Loss = 0.5818
Epoch 3/10


Grid Search Progress:  43%|████▎     | 41/96 [2:02:05<2:46:21, 181.49s/it]

Epoch 3: Avg Loss = 0.5723
Epoch 4/10


Grid Search Progress:  43%|████▎     | 41/96 [2:02:31<2:46:21, 181.49s/it]

Epoch 4: Avg Loss = 0.4712
Epoch 5/10


Grid Search Progress:  43%|████▎     | 41/96 [2:02:56<2:46:21, 181.49s/it]

Epoch 5: Avg Loss = 0.4144
Epoch 6/10


Grid Search Progress:  43%|████▎     | 41/96 [2:03:22<2:46:21, 181.49s/it]

Epoch 6: Avg Loss = 0.3746
Epoch 7/10


Grid Search Progress:  43%|████▎     | 41/96 [2:03:48<2:46:21, 181.49s/it]

Epoch 7: Avg Loss = 0.3373
Epoch 8/10


Grid Search Progress:  43%|████▎     | 41/96 [2:04:14<2:46:21, 181.49s/it]

Epoch 8: Avg Loss = 0.3045
Epoch 9/10


Grid Search Progress:  43%|████▎     | 41/96 [2:04:39<2:46:21, 181.49s/it]

Epoch 9: Avg Loss = 0.2742
Epoch 10/10


Grid Search Progress:  43%|████▎     | 41/96 [2:05:05<2:46:21, 181.49s/it]

Epoch 10: Avg Loss = 0.2452


Grid Search Progress:  44%|████▍     | 42/96 [2:05:11<3:05:43, 206.36s/it]

Validation Accuracy: 0.7960, F1 Score: 0.6040

[43/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  44%|████▍     | 42/96 [2:05:40<3:05:43, 206.36s/it]

Epoch 1: Avg Loss = 0.5941
Epoch 2/10


Grid Search Progress:  44%|████▍     | 42/96 [2:06:08<3:05:43, 206.36s/it]

Epoch 2: Avg Loss = 0.5813
Epoch 3/10


Grid Search Progress:  44%|████▍     | 42/96 [2:06:37<3:05:43, 206.36s/it]

Epoch 3: Avg Loss = 0.5643
Epoch 4/10


Grid Search Progress:  44%|████▍     | 42/96 [2:07:05<3:05:43, 206.36s/it]

Epoch 4: Avg Loss = 0.4685
Epoch 5/10


Grid Search Progress:  44%|████▍     | 42/96 [2:07:34<3:05:43, 206.36s/it]

Epoch 5: Avg Loss = 0.4186
Epoch 6/10


Grid Search Progress:  44%|████▍     | 42/96 [2:08:02<3:05:43, 206.36s/it]

Epoch 6: Avg Loss = 0.3826
Epoch 7/10


Grid Search Progress:  44%|████▍     | 42/96 [2:08:31<3:05:43, 206.36s/it]

Epoch 7: Avg Loss = 0.3486
Epoch 8/10


Grid Search Progress:  44%|████▍     | 42/96 [2:09:00<3:05:43, 206.36s/it]

Epoch 8: Avg Loss = 0.3212
Epoch 9/10


Grid Search Progress:  44%|████▍     | 42/96 [2:09:29<3:05:43, 206.36s/it]

Epoch 9: Avg Loss = 0.2941
Epoch 10/10


Grid Search Progress:  44%|████▍     | 42/96 [2:09:58<3:05:43, 206.36s/it]

Epoch 10: Avg Loss = 0.2721


Grid Search Progress:  45%|████▍     | 43/96 [2:10:04<3:25:13, 232.32s/it]

Validation Accuracy: 0.7607, F1 Score: 0.6386

[44/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  45%|████▍     | 43/96 [2:10:05<3:25:13, 232.32s/it]

Epoch 1/10


Grid Search Progress:  45%|████▍     | 43/96 [2:10:38<3:25:13, 232.32s/it]

Epoch 1: Avg Loss = 0.5925
Epoch 2/10


Grid Search Progress:  45%|████▍     | 43/96 [2:11:11<3:25:13, 232.32s/it]

Epoch 2: Avg Loss = 0.5814
Epoch 3/10


Grid Search Progress:  45%|████▍     | 43/96 [2:11:44<3:25:13, 232.32s/it]

Epoch 3: Avg Loss = 0.5412
Epoch 4/10


Grid Search Progress:  45%|████▍     | 43/96 [2:12:17<3:25:13, 232.32s/it]

Epoch 4: Avg Loss = 0.4378
Epoch 5/10


Grid Search Progress:  45%|████▍     | 43/96 [2:12:51<3:25:13, 232.32s/it]

Epoch 5: Avg Loss = 0.3970
Epoch 6/10


Grid Search Progress:  45%|████▍     | 43/96 [2:13:24<3:25:13, 232.32s/it]

Epoch 6: Avg Loss = 0.3596
Epoch 7/10


Grid Search Progress:  45%|████▍     | 43/96 [2:13:57<3:25:13, 232.32s/it]

Epoch 7: Avg Loss = 0.3279
Epoch 8/10


Grid Search Progress:  45%|████▍     | 43/96 [2:14:30<3:25:13, 232.32s/it]

Epoch 8: Avg Loss = 0.3024
Epoch 9/10


Grid Search Progress:  45%|████▍     | 43/96 [2:15:03<3:25:13, 232.32s/it]

Epoch 9: Avg Loss = 0.2752
Epoch 10/10


Grid Search Progress:  45%|████▍     | 43/96 [2:15:36<3:25:13, 232.32s/it]

Epoch 10: Avg Loss = 0.2538


Grid Search Progress:  46%|████▌     | 44/96 [2:15:44<3:49:08, 264.39s/it]

Validation Accuracy: 0.7884, F1 Score: 0.6331

[45/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  46%|████▌     | 44/96 [2:16:08<3:49:08, 264.39s/it]

Epoch 1: Avg Loss = 0.5980
Epoch 2/10


Grid Search Progress:  46%|████▌     | 44/96 [2:16:32<3:49:08, 264.39s/it]

Epoch 2: Avg Loss = 0.5825
Epoch 3/10


Grid Search Progress:  46%|████▌     | 44/96 [2:16:55<3:49:08, 264.39s/it]

Epoch 3: Avg Loss = 0.5788
Epoch 4/10


Grid Search Progress:  46%|████▌     | 44/96 [2:17:20<3:49:08, 264.39s/it]

Epoch 4: Avg Loss = 0.5366
Epoch 5/10


Grid Search Progress:  46%|████▌     | 44/96 [2:17:44<3:49:08, 264.39s/it]

Epoch 5: Avg Loss = 0.4623
Epoch 6/10


Grid Search Progress:  46%|████▌     | 44/96 [2:18:08<3:49:08, 264.39s/it]

Epoch 6: Avg Loss = 0.4297
Epoch 7/10


Grid Search Progress:  46%|████▌     | 44/96 [2:18:32<3:49:08, 264.39s/it]

Epoch 7: Avg Loss = 0.4015
Epoch 8/10


Grid Search Progress:  46%|████▌     | 44/96 [2:18:56<3:49:08, 264.39s/it]

Epoch 8: Avg Loss = 0.3742
Epoch 9/10


Grid Search Progress:  46%|████▌     | 44/96 [2:19:20<3:49:08, 264.39s/it]

Epoch 9: Avg Loss = 0.3478
Epoch 10/10


Grid Search Progress:  46%|████▌     | 44/96 [2:19:44<3:49:08, 264.39s/it]

Epoch 10: Avg Loss = 0.3244


Grid Search Progress:  47%|████▋     | 45/96 [2:19:50<3:40:18, 259.18s/it]

Validation Accuracy: 0.7791, F1 Score: 0.6349

[46/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  47%|████▋     | 45/96 [2:19:51<3:40:18, 259.18s/it]

Epoch 1/10


Grid Search Progress:  47%|████▋     | 45/96 [2:20:16<3:40:18, 259.18s/it]

Epoch 1: Avg Loss = 0.6050
Epoch 2/10


Grid Search Progress:  47%|████▋     | 45/96 [2:20:42<3:40:18, 259.18s/it]

Epoch 2: Avg Loss = 0.5821
Epoch 3/10


Grid Search Progress:  47%|████▋     | 45/96 [2:21:07<3:40:18, 259.18s/it]

Epoch 3: Avg Loss = 0.5740
Epoch 4/10


Grid Search Progress:  47%|████▋     | 45/96 [2:21:33<3:40:18, 259.18s/it]

Epoch 4: Avg Loss = 0.4750
Epoch 5/10


Grid Search Progress:  47%|████▋     | 45/96 [2:21:58<3:40:18, 259.18s/it]

Epoch 5: Avg Loss = 0.4130
Epoch 6/10


Grid Search Progress:  47%|████▋     | 45/96 [2:22:24<3:40:18, 259.18s/it]

Epoch 6: Avg Loss = 0.3729
Epoch 7/10


Grid Search Progress:  47%|████▋     | 45/96 [2:22:50<3:40:18, 259.18s/it]

Epoch 7: Avg Loss = 0.3424
Epoch 8/10


Grid Search Progress:  47%|████▋     | 45/96 [2:23:15<3:40:18, 259.18s/it]

Epoch 8: Avg Loss = 0.3103
Epoch 9/10


Grid Search Progress:  47%|████▋     | 45/96 [2:23:41<3:40:18, 259.18s/it]

Epoch 9: Avg Loss = 0.2821
Epoch 10/10


Grid Search Progress:  47%|████▋     | 45/96 [2:24:07<3:40:18, 259.18s/it]

Epoch 10: Avg Loss = 0.2542


Grid Search Progress:  48%|████▊     | 46/96 [2:24:13<3:36:52, 260.26s/it]

Validation Accuracy: 0.7940, F1 Score: 0.5714

[47/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  48%|████▊     | 46/96 [2:24:13<3:36:52, 260.26s/it]

Epoch 1/10


Grid Search Progress:  48%|████▊     | 46/96 [2:24:42<3:36:52, 260.26s/it]

Epoch 1: Avg Loss = 0.5930
Epoch 2/10


Grid Search Progress:  48%|████▊     | 46/96 [2:25:10<3:36:52, 260.26s/it]

Epoch 2: Avg Loss = 0.5816
Epoch 3/10


Grid Search Progress:  48%|████▊     | 46/96 [2:25:38<3:36:52, 260.26s/it]

Epoch 3: Avg Loss = 0.5755
Epoch 4/10


Grid Search Progress:  48%|████▊     | 46/96 [2:26:07<3:36:52, 260.26s/it]

Epoch 4: Avg Loss = 0.4809
Epoch 5/10


Grid Search Progress:  48%|████▊     | 46/96 [2:26:35<3:36:52, 260.26s/it]

Epoch 5: Avg Loss = 0.4236
Epoch 6/10


Grid Search Progress:  48%|████▊     | 46/96 [2:27:03<3:36:52, 260.26s/it]

Epoch 6: Avg Loss = 0.3867
Epoch 7/10


Grid Search Progress:  48%|████▊     | 46/96 [2:27:31<3:36:52, 260.26s/it]

Epoch 7: Avg Loss = 0.3556
Epoch 8/10


Grid Search Progress:  48%|████▊     | 46/96 [2:27:59<3:36:52, 260.26s/it]

Epoch 8: Avg Loss = 0.3275
Epoch 9/10


Grid Search Progress:  48%|████▊     | 46/96 [2:28:28<3:36:52, 260.26s/it]

Epoch 9: Avg Loss = 0.3032
Epoch 10/10


Grid Search Progress:  48%|████▊     | 46/96 [2:28:56<3:36:52, 260.26s/it]

Epoch 10: Avg Loss = 0.2781


Grid Search Progress:  49%|████▉     | 47/96 [2:29:02<3:39:36, 268.90s/it]

Validation Accuracy: 0.7951, F1 Score: 0.6180

[48/96] Training with lr=2e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  49%|████▉     | 47/96 [2:29:02<3:39:36, 268.90s/it]

Epoch 1/10


Grid Search Progress:  49%|████▉     | 47/96 [2:29:36<3:39:36, 268.90s/it]

Epoch 1: Avg Loss = 0.5928
Epoch 2/10


Grid Search Progress:  49%|████▉     | 47/96 [2:30:09<3:39:36, 268.90s/it]

Epoch 2: Avg Loss = 0.5824
Epoch 3/10


Grid Search Progress:  49%|████▉     | 47/96 [2:30:42<3:39:36, 268.90s/it]

Epoch 3: Avg Loss = 0.5398
Epoch 4/10


Grid Search Progress:  49%|████▉     | 47/96 [2:31:15<3:39:36, 268.90s/it]

Epoch 4: Avg Loss = 0.4311
Epoch 5/10


Grid Search Progress:  49%|████▉     | 47/96 [2:31:48<3:39:36, 268.90s/it]

Epoch 5: Avg Loss = 0.3816
Epoch 6/10


Grid Search Progress:  49%|████▉     | 47/96 [2:32:21<3:39:36, 268.90s/it]

Epoch 6: Avg Loss = 0.3422
Epoch 7/10


Grid Search Progress:  49%|████▉     | 47/96 [2:32:54<3:39:36, 268.90s/it]

Epoch 7: Avg Loss = 0.3089
Epoch 8/10


Grid Search Progress:  49%|████▉     | 47/96 [2:33:27<3:39:36, 268.90s/it]

Epoch 8: Avg Loss = 0.2821
Epoch 9/10


Grid Search Progress:  49%|████▉     | 47/96 [2:34:00<3:39:36, 268.90s/it]

Epoch 9: Avg Loss = 0.2549
Epoch 10/10


Grid Search Progress:  49%|████▉     | 47/96 [2:34:33<3:39:36, 268.90s/it]

Epoch 10: Avg Loss = 0.2308


Grid Search Progress:  50%|█████     | 48/96 [2:34:40<3:51:42, 289.64s/it]

Validation Accuracy: 0.7820, F1 Score: 0.6464

[49/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1


Grid Search Progress:  50%|█████     | 48/96 [2:34:40<3:51:42, 289.64s/it]

Epoch 1/3


Grid Search Progress:  50%|█████     | 48/96 [2:35:07<3:51:42, 289.64s/it]

Epoch 1: Avg Loss = 0.5833
Epoch 2/3


Grid Search Progress:  50%|█████     | 48/96 [2:35:34<3:51:42, 289.64s/it]

Epoch 2: Avg Loss = 0.4821
Epoch 3/3


Grid Search Progress:  50%|█████     | 48/96 [2:36:01<3:51:42, 289.64s/it]

Epoch 3: Avg Loss = 0.4088


Grid Search Progress:  51%|█████     | 49/96 [2:36:07<2:59:10, 228.74s/it]

Validation Accuracy: 0.7798, F1 Score: 0.6210

[50/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  51%|█████     | 49/96 [2:36:36<2:59:10, 228.74s/it]

Epoch 1: Avg Loss = 0.5817
Epoch 2/3


Grid Search Progress:  51%|█████     | 49/96 [2:37:06<2:59:10, 228.74s/it]

Epoch 2: Avg Loss = 0.4358
Epoch 3/3


Grid Search Progress:  51%|█████     | 49/96 [2:37:35<2:59:10, 228.74s/it]

Epoch 3: Avg Loss = 0.3547


Grid Search Progress:  52%|█████▏    | 50/96 [2:37:42<2:24:33, 188.56s/it]

Validation Accuracy: 0.8064, F1 Score: 0.5843

[51/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  52%|█████▏    | 50/96 [2:37:42<2:24:33, 188.56s/it]

Epoch 1/3


Grid Search Progress:  52%|█████▏    | 50/96 [2:38:17<2:24:33, 188.56s/it]

Epoch 1: Avg Loss = 0.5832
Epoch 2/3


Grid Search Progress:  52%|█████▏    | 50/96 [2:38:53<2:24:33, 188.56s/it]

Epoch 2: Avg Loss = 0.4658
Epoch 3/3


Grid Search Progress:  52%|█████▏    | 50/96 [2:39:28<2:24:33, 188.56s/it]

Epoch 3: Avg Loss = 0.3941


Grid Search Progress:  53%|█████▎    | 51/96 [2:39:35<2:04:27, 165.95s/it]

Validation Accuracy: 0.8064, F1 Score: 0.6081

[52/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  53%|█████▎    | 51/96 [2:39:35<2:04:27, 165.95s/it]

Epoch 1/3


Grid Search Progress:  53%|█████▎    | 51/96 [2:40:21<2:04:27, 165.95s/it]

Epoch 1: Avg Loss = 0.5684
Epoch 2/3


Grid Search Progress:  53%|█████▎    | 51/96 [2:41:06<2:04:27, 165.95s/it]

Epoch 2: Avg Loss = 0.4181
Epoch 3/3


Grid Search Progress:  53%|█████▎    | 51/96 [2:41:51<2:04:27, 165.95s/it]

Epoch 3: Avg Loss = 0.3526


Grid Search Progress:  54%|█████▍    | 52/96 [2:41:59<1:56:53, 159.40s/it]

Validation Accuracy: 0.8063, F1 Score: 0.6211

[53/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  54%|█████▍    | 52/96 [2:42:26<1:56:53, 159.40s/it]

Epoch 1: Avg Loss = 0.5841
Epoch 2/3


Grid Search Progress:  54%|█████▍    | 52/96 [2:42:53<1:56:53, 159.40s/it]

Epoch 2: Avg Loss = 0.4985
Epoch 3/3


Grid Search Progress:  54%|█████▍    | 52/96 [2:43:20<1:56:53, 159.40s/it]

Epoch 3: Avg Loss = 0.4285


Grid Search Progress:  55%|█████▌    | 53/96 [2:43:26<1:38:41, 137.70s/it]

Validation Accuracy: 0.7840, F1 Score: 0.6307

[54/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  55%|█████▌    | 53/96 [2:43:56<1:38:41, 137.70s/it]

Epoch 1: Avg Loss = 0.5836
Epoch 2/3


Grid Search Progress:  55%|█████▌    | 53/96 [2:44:25<1:38:41, 137.70s/it]

Epoch 2: Avg Loss = 0.4628
Epoch 3/3


Grid Search Progress:  55%|█████▌    | 53/96 [2:44:54<1:38:41, 137.70s/it]

Epoch 3: Avg Loss = 0.3796


Grid Search Progress:  56%|█████▋    | 54/96 [2:45:01<1:27:24, 124.87s/it]

Validation Accuracy: 0.8002, F1 Score: 0.5723

[55/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  56%|█████▋    | 54/96 [2:45:37<1:27:24, 124.87s/it]

Epoch 1: Avg Loss = 0.5820
Epoch 2/3


Grid Search Progress:  56%|█████▋    | 54/96 [2:46:12<1:27:24, 124.87s/it]

Epoch 2: Avg Loss = 0.5065
Epoch 3/3


Grid Search Progress:  56%|█████▋    | 54/96 [2:46:48<1:27:24, 124.87s/it]

Epoch 3: Avg Loss = 0.4341


Grid Search Progress:  57%|█████▋    | 55/96 [2:46:55<1:23:02, 121.52s/it]

Validation Accuracy: 0.7882, F1 Score: 0.6253

[56/96] Training with lr=5e-05, bs=16, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  57%|█████▋    | 55/96 [2:47:40<1:23:02, 121.52s/it]

Epoch 1: Avg Loss = 0.5740
Epoch 2/3


Grid Search Progress:  57%|█████▋    | 55/96 [2:48:25<1:23:02, 121.52s/it]

Epoch 2: Avg Loss = 0.4248
Epoch 3/3


Grid Search Progress:  57%|█████▋    | 55/96 [2:49:10<1:23:02, 121.52s/it]

Epoch 3: Avg Loss = 0.3515


Grid Search Progress:  58%|█████▊    | 56/96 [2:49:18<1:25:16, 127.92s/it]

Validation Accuracy: 0.8007, F1 Score: 0.6499

[57/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  58%|█████▊    | 56/96 [2:49:45<1:25:16, 127.92s/it]

Epoch 1: Avg Loss = 0.5835
Epoch 2/5


Grid Search Progress:  58%|█████▊    | 56/96 [2:50:11<1:25:16, 127.92s/it]

Epoch 2: Avg Loss = 0.4927
Epoch 3/5


Grid Search Progress:  58%|█████▊    | 56/96 [2:50:38<1:25:16, 127.92s/it]

Epoch 3: Avg Loss = 0.4279
Epoch 4/5


Grid Search Progress:  58%|█████▊    | 56/96 [2:51:05<1:25:16, 127.92s/it]

Epoch 4: Avg Loss = 0.4081
Epoch 5/5


Grid Search Progress:  58%|█████▊    | 56/96 [2:51:32<1:25:16, 127.92s/it]

Epoch 5: Avg Loss = 0.3476


Grid Search Progress:  59%|█████▉    | 57/96 [2:51:38<1:25:37, 131.73s/it]

Validation Accuracy: 0.7965, F1 Score: 0.5873

[58/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  59%|█████▉    | 57/96 [2:52:08<1:25:37, 131.73s/it]

Epoch 1: Avg Loss = 0.5798
Epoch 2/5


Grid Search Progress:  59%|█████▉    | 57/96 [2:52:37<1:25:37, 131.73s/it]

Epoch 2: Avg Loss = 0.4289
Epoch 3/5


Grid Search Progress:  59%|█████▉    | 57/96 [2:53:06<1:25:37, 131.73s/it]

Epoch 3: Avg Loss = 0.3471
Epoch 4/5


Grid Search Progress:  59%|█████▉    | 57/96 [2:53:36<1:25:37, 131.73s/it]

Epoch 4: Avg Loss = 0.2898
Epoch 5/5


Grid Search Progress:  59%|█████▉    | 57/96 [2:54:05<1:25:37, 131.73s/it]

Epoch 5: Avg Loss = 0.2439


Grid Search Progress:  60%|██████    | 58/96 [2:54:12<1:27:31, 138.20s/it]

Validation Accuracy: 0.7936, F1 Score: 0.6189

[59/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  60%|██████    | 58/96 [2:54:47<1:27:31, 138.20s/it]

Epoch 1: Avg Loss = 0.5811
Epoch 2/5


Grid Search Progress:  60%|██████    | 58/96 [2:55:22<1:27:31, 138.20s/it]

Epoch 2: Avg Loss = 0.4764
Epoch 3/5


Grid Search Progress:  60%|██████    | 58/96 [2:55:57<1:27:31, 138.20s/it]

Epoch 3: Avg Loss = 0.3911
Epoch 4/5


Grid Search Progress:  60%|██████    | 58/96 [2:56:33<1:27:31, 138.20s/it]

Epoch 4: Avg Loss = 0.3359
Epoch 5/5


Grid Search Progress:  60%|██████    | 58/96 [2:57:08<1:27:31, 138.20s/it]

Epoch 5: Avg Loss = 0.2942


Grid Search Progress:  61%|██████▏   | 59/96 [2:57:15<1:33:34, 151.74s/it]

Validation Accuracy: 0.8049, F1 Score: 0.6129

[60/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  61%|██████▏   | 59/96 [2:58:00<1:33:34, 151.74s/it]

Epoch 1: Avg Loss = 0.5713
Epoch 2/5


Grid Search Progress:  61%|██████▏   | 59/96 [2:58:45<1:33:34, 151.74s/it]

Epoch 2: Avg Loss = 0.4254
Epoch 3/5


Grid Search Progress:  61%|██████▏   | 59/96 [2:59:30<1:33:34, 151.74s/it]

Epoch 3: Avg Loss = 0.3587
Epoch 4/5


Grid Search Progress:  61%|██████▏   | 59/96 [3:00:15<1:33:34, 151.74s/it]

Epoch 4: Avg Loss = 0.3067
Epoch 5/5


Grid Search Progress:  61%|██████▏   | 59/96 [3:01:00<1:33:34, 151.74s/it]

Epoch 5: Avg Loss = 0.2637


Grid Search Progress:  62%|██████▎   | 60/96 [3:01:08<1:45:44, 176.23s/it]

Validation Accuracy: 0.7962, F1 Score: 0.5354

[61/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  62%|██████▎   | 60/96 [3:01:35<1:45:44, 176.23s/it]

Epoch 1: Avg Loss = 0.5848
Epoch 2/5


Grid Search Progress:  62%|██████▎   | 60/96 [3:02:02<1:45:44, 176.23s/it]

Epoch 2: Avg Loss = 0.5244
Epoch 3/5


Grid Search Progress:  62%|██████▎   | 60/96 [3:02:29<1:45:44, 176.23s/it]

Epoch 3: Avg Loss = 0.4572
Epoch 4/5


Grid Search Progress:  62%|██████▎   | 60/96 [3:02:55<1:45:44, 176.23s/it]

Epoch 4: Avg Loss = 0.4023
Epoch 5/5


Grid Search Progress:  62%|██████▎   | 60/96 [3:03:22<1:45:44, 176.23s/it]

Epoch 5: Avg Loss = 0.3436


Grid Search Progress:  64%|██████▎   | 61/96 [3:03:29<1:36:29, 165.43s/it]

Validation Accuracy: 0.7808, F1 Score: 0.5613

[62/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  64%|██████▎   | 61/96 [3:03:58<1:36:29, 165.43s/it]

Epoch 1: Avg Loss = 0.5832
Epoch 2/5


Grid Search Progress:  64%|██████▎   | 61/96 [3:04:27<1:36:29, 165.43s/it]

Epoch 2: Avg Loss = 0.4684
Epoch 3/5


Grid Search Progress:  64%|██████▎   | 61/96 [3:04:56<1:36:29, 165.43s/it]

Epoch 3: Avg Loss = 0.4034
Epoch 4/5


Grid Search Progress:  64%|██████▎   | 61/96 [3:05:26<1:36:29, 165.43s/it]

Epoch 4: Avg Loss = 0.3529
Epoch 5/5


Grid Search Progress:  64%|██████▎   | 61/96 [3:05:55<1:36:29, 165.43s/it]

Epoch 5: Avg Loss = 0.2997


Grid Search Progress:  65%|██████▍   | 62/96 [3:06:01<1:31:38, 161.71s/it]

Validation Accuracy: 0.8043, F1 Score: 0.6000

[63/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  65%|██████▍   | 62/96 [3:06:02<1:31:38, 161.71s/it]

Epoch 1/5


Grid Search Progress:  65%|██████▍   | 62/96 [3:06:37<1:31:38, 161.71s/it]

Epoch 1: Avg Loss = 0.5829
Epoch 2/5


Grid Search Progress:  65%|██████▍   | 62/96 [3:07:12<1:31:38, 161.71s/it]

Epoch 2: Avg Loss = 0.5099
Epoch 3/5


Grid Search Progress:  65%|██████▍   | 62/96 [3:07:47<1:31:38, 161.71s/it]

Epoch 3: Avg Loss = 0.4239
Epoch 4/5


Grid Search Progress:  65%|██████▍   | 62/96 [3:08:22<1:31:38, 161.71s/it]

Epoch 4: Avg Loss = 0.3737
Epoch 5/5


Grid Search Progress:  65%|██████▍   | 62/96 [3:08:57<1:31:38, 161.71s/it]

Epoch 5: Avg Loss = 0.3293


Grid Search Progress:  66%|██████▌   | 63/96 [3:09:05<1:32:26, 168.07s/it]

Validation Accuracy: 0.7989, F1 Score: 0.5740

[64/96] Training with lr=5e-05, bs=16, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  66%|██████▌   | 63/96 [3:09:49<1:32:26, 168.07s/it]

Epoch 1: Avg Loss = 0.5759
Epoch 2/5


Grid Search Progress:  66%|██████▌   | 63/96 [3:10:34<1:32:26, 168.07s/it]

Epoch 2: Avg Loss = 0.4228
Epoch 3/5


Grid Search Progress:  66%|██████▌   | 63/96 [3:11:19<1:32:26, 168.07s/it]

Epoch 3: Avg Loss = 0.3487
Epoch 4/5


Grid Search Progress:  66%|██████▌   | 63/96 [3:12:04<1:32:26, 168.07s/it]

Epoch 4: Avg Loss = 0.2978
Epoch 5/5


Grid Search Progress:  66%|██████▌   | 63/96 [3:12:49<1:32:26, 168.07s/it]

Epoch 5: Avg Loss = 0.2524


Grid Search Progress:  67%|██████▋   | 64/96 [3:12:57<1:39:57, 187.41s/it]

Validation Accuracy: 0.7869, F1 Score: 0.6467

[65/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:13:24<1:39:57, 187.41s/it]

Epoch 1: Avg Loss = 0.5839
Epoch 2/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:13:50<1:39:57, 187.41s/it]

Epoch 2: Avg Loss = 0.4685
Epoch 3/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:14:17<1:39:57, 187.41s/it]

Epoch 3: Avg Loss = 0.3759
Epoch 4/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:14:44<1:39:57, 187.41s/it]

Epoch 4: Avg Loss = 0.3146
Epoch 5/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:15:11<1:39:57, 187.41s/it]

Epoch 5: Avg Loss = 0.2663
Epoch 6/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:15:38<1:39:57, 187.41s/it]

Epoch 6: Avg Loss = 0.2289
Epoch 7/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:16:04<1:39:57, 187.41s/it]

Epoch 7: Avg Loss = 0.2004
Epoch 8/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:16:31<1:39:57, 187.41s/it]

Epoch 8: Avg Loss = 0.1716
Epoch 9/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:16:58<1:39:57, 187.41s/it]

Epoch 9: Avg Loss = 0.1479
Epoch 10/10


Grid Search Progress:  67%|██████▋   | 64/96 [3:17:24<1:39:57, 187.41s/it]

Epoch 10: Avg Loss = 0.1305


Grid Search Progress:  68%|██████▊   | 65/96 [3:17:31<1:50:13, 213.33s/it]

Validation Accuracy: 0.7884, F1 Score: 0.6093

[66/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  68%|██████▊   | 65/96 [3:17:31<1:50:13, 213.33s/it]

Epoch 1/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:18:00<1:50:13, 213.33s/it]

Epoch 1: Avg Loss = 0.5803
Epoch 2/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:18:29<1:50:13, 213.33s/it]

Epoch 2: Avg Loss = 0.4309
Epoch 3/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:18:59<1:50:13, 213.33s/it]

Epoch 3: Avg Loss = 0.3469
Epoch 4/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:19:28<1:50:13, 213.33s/it]

Epoch 4: Avg Loss = 0.2882
Epoch 5/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:19:57<1:50:13, 213.33s/it]

Epoch 5: Avg Loss = 0.2391
Epoch 6/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:20:27<1:50:13, 213.33s/it]

Epoch 6: Avg Loss = 0.2024
Epoch 7/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:20:56<1:50:13, 213.33s/it]

Epoch 7: Avg Loss = 0.1679
Epoch 8/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:21:25<1:50:13, 213.33s/it]

Epoch 8: Avg Loss = 0.1435
Epoch 9/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:21:54<1:50:13, 213.33s/it]

Epoch 9: Avg Loss = 0.1203
Epoch 10/10


Grid Search Progress:  68%|██████▊   | 65/96 [3:22:24<1:50:13, 213.33s/it]

Epoch 10: Avg Loss = 0.1027


Grid Search Progress:  69%|██████▉   | 66/96 [3:22:30<1:59:36, 239.22s/it]

Validation Accuracy: 0.7823, F1 Score: 0.5630

[67/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  69%|██████▉   | 66/96 [3:22:31<1:59:36, 239.22s/it]

Epoch 1/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:23:06<1:59:36, 239.22s/it]

Epoch 1: Avg Loss = 0.5817
Epoch 2/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:23:41<1:59:36, 239.22s/it]

Epoch 2: Avg Loss = 0.4484
Epoch 3/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:24:16<1:59:36, 239.22s/it]

Epoch 3: Avg Loss = 0.3855
Epoch 4/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:24:52<1:59:36, 239.22s/it]

Epoch 4: Avg Loss = 0.3353
Epoch 5/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:25:27<1:59:36, 239.22s/it]

Epoch 5: Avg Loss = 0.2903
Epoch 6/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:26:02<1:59:36, 239.22s/it]

Epoch 6: Avg Loss = 0.2533
Epoch 7/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:26:37<1:59:36, 239.22s/it]

Epoch 7: Avg Loss = 0.2279
Epoch 8/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:27:12<1:59:36, 239.22s/it]

Epoch 8: Avg Loss = 0.1997
Epoch 9/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:27:48<1:59:36, 239.22s/it]

Epoch 9: Avg Loss = 0.1862
Epoch 10/10


Grid Search Progress:  69%|██████▉   | 66/96 [3:28:23<1:59:36, 239.22s/it]

Epoch 10: Avg Loss = 0.1584


Grid Search Progress:  70%|██████▉   | 67/96 [3:28:30<2:13:01, 275.23s/it]

Validation Accuracy: 0.7762, F1 Score: 0.6239

[68/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  70%|██████▉   | 67/96 [3:28:30<2:13:01, 275.23s/it]

Epoch 1/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:29:15<2:13:01, 275.23s/it]

Epoch 1: Avg Loss = 0.5739
Epoch 2/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:30:00<2:13:01, 275.23s/it]

Epoch 2: Avg Loss = 0.4251
Epoch 3/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:30:45<2:13:01, 275.23s/it]

Epoch 3: Avg Loss = 0.3481
Epoch 4/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:31:30<2:13:01, 275.23s/it]

Epoch 4: Avg Loss = 0.2954
Epoch 5/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:32:15<2:13:01, 275.23s/it]

Epoch 5: Avg Loss = 0.2496
Epoch 6/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:33:00<2:13:01, 275.23s/it]

Epoch 6: Avg Loss = 0.2135
Epoch 7/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:33:45<2:13:01, 275.23s/it]

Epoch 7: Avg Loss = 0.1802
Epoch 8/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:34:30<2:13:01, 275.23s/it]

Epoch 8: Avg Loss = 0.1575
Epoch 9/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:35:15<2:13:01, 275.23s/it]

Epoch 9: Avg Loss = 0.1350
Epoch 10/10


Grid Search Progress:  70%|██████▉   | 67/96 [3:36:00<2:13:01, 275.23s/it]

Epoch 10: Avg Loss = 0.1183


Grid Search Progress:  71%|███████   | 68/96 [3:36:08<2:34:03, 330.13s/it]

Validation Accuracy: 0.7739, F1 Score: 0.5927

[69/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1


Grid Search Progress:  71%|███████   | 68/96 [3:36:08<2:34:03, 330.13s/it]

Epoch 1/10


Grid Search Progress:  71%|███████   | 68/96 [3:36:35<2:34:03, 330.13s/it]

Epoch 1: Avg Loss = 0.5846
Epoch 2/10


Grid Search Progress:  71%|███████   | 68/96 [3:37:02<2:34:03, 330.13s/it]

Epoch 2: Avg Loss = 0.5086
Epoch 3/10


Grid Search Progress:  71%|███████   | 68/96 [3:37:28<2:34:03, 330.13s/it]

Epoch 3: Avg Loss = 0.4459
Epoch 4/10


Grid Search Progress:  71%|███████   | 68/96 [3:37:55<2:34:03, 330.13s/it]

Epoch 4: Avg Loss = 0.3952
Epoch 5/10


Grid Search Progress:  71%|███████   | 68/96 [3:38:22<2:34:03, 330.13s/it]

Epoch 5: Avg Loss = 0.3501
Epoch 6/10


Grid Search Progress:  71%|███████   | 68/96 [3:38:49<2:34:03, 330.13s/it]

Epoch 6: Avg Loss = 0.3046
Epoch 7/10


Grid Search Progress:  71%|███████   | 68/96 [3:39:16<2:34:03, 330.13s/it]

Epoch 7: Avg Loss = 0.2616
Epoch 8/10


Grid Search Progress:  71%|███████   | 68/96 [3:39:43<2:34:03, 330.13s/it]

Epoch 8: Avg Loss = 0.2299
Epoch 9/10


Grid Search Progress:  71%|███████   | 68/96 [3:40:10<2:34:03, 330.13s/it]

Epoch 9: Avg Loss = 0.1919
Epoch 10/10


Grid Search Progress:  71%|███████   | 68/96 [3:40:36<2:34:03, 330.13s/it]

Epoch 10: Avg Loss = 0.1617


Grid Search Progress:  72%|███████▏  | 69/96 [3:40:43<2:21:05, 313.54s/it]

Validation Accuracy: 0.7649, F1 Score: 0.5772

[70/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:41:12<2:21:05, 313.54s/it]

Epoch 1: Avg Loss = 0.5848
Epoch 2/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:41:41<2:21:05, 313.54s/it]

Epoch 2: Avg Loss = 0.4691
Epoch 3/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:42:11<2:21:05, 313.54s/it]

Epoch 3: Avg Loss = 0.3756
Epoch 4/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:42:40<2:21:05, 313.54s/it]

Epoch 4: Avg Loss = 0.3103
Epoch 5/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:43:10<2:21:05, 313.54s/it]

Epoch 5: Avg Loss = 0.2597
Epoch 6/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:43:39<2:21:05, 313.54s/it]

Epoch 6: Avg Loss = 0.2164
Epoch 7/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:44:08<2:21:05, 313.54s/it]

Epoch 7: Avg Loss = 0.1792
Epoch 8/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:44:37<2:21:05, 313.54s/it]

Epoch 8: Avg Loss = 0.1514
Epoch 9/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:45:07<2:21:05, 313.54s/it]

Epoch 9: Avg Loss = 0.1293
Epoch 10/10


Grid Search Progress:  72%|███████▏  | 69/96 [3:45:36<2:21:05, 313.54s/it]

Epoch 10: Avg Loss = 0.1108


Grid Search Progress:  73%|███████▎  | 70/96 [3:45:43<2:14:07, 309.52s/it]

Validation Accuracy: 0.7848, F1 Score: 0.6114

[71/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  73%|███████▎  | 70/96 [3:45:43<2:14:07, 309.52s/it]

Epoch 1/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:46:19<2:14:07, 309.52s/it]

Epoch 1: Avg Loss = 0.5832
Epoch 2/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:46:55<2:14:07, 309.52s/it]

Epoch 2: Avg Loss = 0.4665
Epoch 3/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:47:31<2:14:07, 309.52s/it]

Epoch 3: Avg Loss = 0.3774
Epoch 4/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:48:06<2:14:07, 309.52s/it]

Epoch 4: Avg Loss = 0.3187
Epoch 5/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:48:42<2:14:07, 309.52s/it]

Epoch 5: Avg Loss = 0.2738
Epoch 6/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:49:18<2:14:07, 309.52s/it]

Epoch 6: Avg Loss = 0.2366
Epoch 7/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:49:54<2:14:07, 309.52s/it]

Epoch 7: Avg Loss = 0.2077
Epoch 8/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:50:30<2:14:07, 309.52s/it]

Epoch 8: Avg Loss = 0.1803
Epoch 9/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:51:05<2:14:07, 309.52s/it]

Epoch 9: Avg Loss = 0.1610
Epoch 10/10


Grid Search Progress:  73%|███████▎  | 70/96 [3:51:41<2:14:07, 309.52s/it]

Epoch 10: Avg Loss = 0.1441


Grid Search Progress:  74%|███████▍  | 71/96 [3:51:48<2:15:56, 326.24s/it]

Validation Accuracy: 0.7823, F1 Score: 0.6021

[72/96] Training with lr=5e-05, bs=16, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  74%|███████▍  | 71/96 [3:51:48<2:15:56, 326.24s/it]

Epoch 1/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:52:34<2:15:56, 326.24s/it]

Epoch 1: Avg Loss = 0.5719
Epoch 2/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:53:19<2:15:56, 326.24s/it]

Epoch 2: Avg Loss = 0.4223
Epoch 3/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:54:04<2:15:56, 326.24s/it]

Epoch 3: Avg Loss = 0.3529
Epoch 4/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:54:50<2:15:56, 326.24s/it]

Epoch 4: Avg Loss = 0.2958
Epoch 5/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:55:35<2:15:56, 326.24s/it]

Epoch 5: Avg Loss = 0.2550
Epoch 6/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:56:20<2:15:56, 326.24s/it]

Epoch 6: Avg Loss = 0.2196
Epoch 7/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:57:05<2:15:56, 326.24s/it]

Epoch 7: Avg Loss = 0.1911
Epoch 8/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:57:51<2:15:56, 326.24s/it]

Epoch 8: Avg Loss = 0.1655
Epoch 9/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:58:36<2:15:56, 326.24s/it]

Epoch 9: Avg Loss = 0.1504
Epoch 10/10


Grid Search Progress:  74%|███████▍  | 71/96 [3:59:21<2:15:56, 326.24s/it]

Epoch 10: Avg Loss = 0.1264


Grid Search Progress:  75%|███████▌  | 72/96 [3:59:29<2:26:41, 366.72s/it]

Validation Accuracy: 0.7827, F1 Score: 0.6061

[73/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  75%|███████▌  | 72/96 [3:59:54<2:26:41, 366.72s/it]

Epoch 1: Avg Loss = 0.5872
Epoch 2/3


Grid Search Progress:  75%|███████▌  | 72/96 [4:00:18<2:26:41, 366.72s/it]

Epoch 2: Avg Loss = 0.5371
Epoch 3/3


Grid Search Progress:  75%|███████▌  | 72/96 [4:00:42<2:26:41, 366.72s/it]

Epoch 3: Avg Loss = 0.4339


Grid Search Progress:  76%|███████▌  | 73/96 [4:00:48<1:47:29, 280.41s/it]

Validation Accuracy: 0.7945, F1 Score: 0.5913

[74/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  76%|███████▌  | 73/96 [4:01:14<1:47:29, 280.41s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/3


Grid Search Progress:  76%|███████▌  | 73/96 [4:01:40<1:47:29, 280.41s/it]

Epoch 2: Avg Loss = 0.4887
Epoch 3/3


Grid Search Progress:  76%|███████▌  | 73/96 [4:02:05<1:47:29, 280.41s/it]

Epoch 3: Avg Loss = 0.3730


Grid Search Progress:  77%|███████▋  | 74/96 [4:02:11<1:21:06, 221.18s/it]

Validation Accuracy: 0.8051, F1 Score: 0.5903

[75/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  77%|███████▋  | 74/96 [4:02:40<1:21:06, 221.18s/it]

Epoch 1: Avg Loss = 0.5870
Epoch 2/3


Grid Search Progress:  77%|███████▋  | 74/96 [4:03:08<1:21:06, 221.18s/it]

Epoch 2: Avg Loss = 0.5491
Epoch 3/3


Grid Search Progress:  77%|███████▋  | 74/96 [4:03:36<1:21:06, 221.18s/it]

Epoch 3: Avg Loss = 0.4617


Grid Search Progress:  78%|███████▊  | 75/96 [4:03:43<1:03:47, 182.28s/it]

Validation Accuracy: 0.7551, F1 Score: 0.3936

[76/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  78%|███████▊  | 75/96 [4:04:16<1:03:47, 182.28s/it]

Epoch 1: Avg Loss = 0.5866
Epoch 2/3


Grid Search Progress:  78%|███████▊  | 75/96 [4:04:49<1:03:47, 182.28s/it]

Epoch 2: Avg Loss = 0.4676
Epoch 3/3


Grid Search Progress:  78%|███████▊  | 75/96 [4:05:23<1:03:47, 182.28s/it]

Epoch 3: Avg Loss = 0.3674


Grid Search Progress:  79%|███████▉  | 76/96 [4:05:30<53:12, 159.62s/it]

Validation Accuracy: 0.8026, F1 Score: 0.5386

[77/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  79%|███████▉  | 76/96 [4:05:54<53:12, 159.62s/it]

Epoch 1: Avg Loss = 0.5900
Epoch 2/3


Grid Search Progress:  79%|███████▉  | 76/96 [4:06:18<53:12, 159.62s/it]

Epoch 2: Avg Loss = 0.5517
Epoch 3/3


Grid Search Progress:  79%|███████▉  | 76/96 [4:06:42<53:12, 159.62s/it]

Epoch 3: Avg Loss = 0.4760


Grid Search Progress:  80%|████████  | 77/96 [4:06:49<42:52, 135.40s/it]

Validation Accuracy: 0.7713, F1 Score: 0.4106

[78/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/3


Grid Search Progress:  80%|████████  | 77/96 [4:07:14<42:52, 135.40s/it]

Epoch 1: Avg Loss = 0.5906
Epoch 2/3


Grid Search Progress:  80%|████████  | 77/96 [4:07:40<42:52, 135.40s/it]

Epoch 2: Avg Loss = 0.5164
Epoch 3/3


Grid Search Progress:  80%|████████  | 77/96 [4:08:05<42:52, 135.40s/it]

Epoch 3: Avg Loss = 0.4044


Grid Search Progress:  81%|████████▏ | 78/96 [4:08:11<35:53, 119.66s/it]

Validation Accuracy: 0.8002, F1 Score: 0.6024

[79/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/3


Grid Search Progress:  81%|████████▏ | 78/96 [4:08:40<35:53, 119.66s/it]

Epoch 1: Avg Loss = 0.5864
Epoch 2/3


Grid Search Progress:  81%|████████▏ | 78/96 [4:09:08<35:53, 119.66s/it]

Epoch 2: Avg Loss = 0.5565
Epoch 3/3


Grid Search Progress:  81%|████████▏ | 78/96 [4:09:37<35:53, 119.66s/it]

Epoch 3: Avg Loss = 0.4387


Grid Search Progress:  82%|████████▏ | 79/96 [4:09:43<31:32, 111.31s/it]

Validation Accuracy: 0.7929, F1 Score: 0.5817

[80/96] Training with lr=5e-05, bs=32, epochs=3, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  82%|████████▏ | 79/96 [4:09:44<31:32, 111.31s/it]

Epoch 1/3


Grid Search Progress:  82%|████████▏ | 79/96 [4:10:17<31:32, 111.31s/it]

Epoch 1: Avg Loss = 0.5860
Epoch 2/3


Grid Search Progress:  82%|████████▏ | 79/96 [4:10:50<31:32, 111.31s/it]

Epoch 2: Avg Loss = 0.4641
Epoch 3/3


Grid Search Progress:  82%|████████▏ | 79/96 [4:11:23<31:32, 111.31s/it]

Epoch 3: Avg Loss = 0.3632


Grid Search Progress:  83%|████████▎ | 80/96 [4:11:30<29:20, 110.02s/it]

Validation Accuracy: 0.7980, F1 Score: 0.5293

[81/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  83%|████████▎ | 80/96 [4:11:54<29:20, 110.02s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/5


Grid Search Progress:  83%|████████▎ | 80/96 [4:12:19<29:20, 110.02s/it]

Epoch 2: Avg Loss = 0.5346
Epoch 3/5


Grid Search Progress:  83%|████████▎ | 80/96 [4:12:43<29:20, 110.02s/it]

Epoch 3: Avg Loss = 0.4487
Epoch 4/5


Grid Search Progress:  83%|████████▎ | 80/96 [4:13:07<29:20, 110.02s/it]

Epoch 4: Avg Loss = 0.3885
Epoch 5/5


Grid Search Progress:  83%|████████▎ | 80/96 [4:13:31<29:20, 110.02s/it]

Epoch 5: Avg Loss = 0.3404


Grid Search Progress:  84%|████████▍ | 81/96 [4:13:38<28:47, 115.18s/it]

Validation Accuracy: 0.7918, F1 Score: 0.6375

[82/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  84%|████████▍ | 81/96 [4:14:03<28:47, 115.18s/it]

Epoch 1: Avg Loss = 0.5857
Epoch 2/5


Grid Search Progress:  84%|████████▍ | 81/96 [4:14:29<28:47, 115.18s/it]

Epoch 2: Avg Loss = 0.5011
Epoch 3/5


Grid Search Progress:  84%|████████▍ | 81/96 [4:14:55<28:47, 115.18s/it]

Epoch 3: Avg Loss = 0.3789
Epoch 4/5


Grid Search Progress:  84%|████████▍ | 81/96 [4:15:21<28:47, 115.18s/it]

Epoch 4: Avg Loss = 0.3212
Epoch 5/5


Grid Search Progress:  84%|████████▍ | 81/96 [4:15:46<28:47, 115.18s/it]

Epoch 5: Avg Loss = 0.2725


Grid Search Progress:  85%|████████▌ | 82/96 [4:15:53<28:16, 121.18s/it]

Validation Accuracy: 0.8063, F1 Score: 0.6060

[83/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  85%|████████▌ | 82/96 [4:16:21<28:16, 121.18s/it]

Epoch 1: Avg Loss = 0.5860
Epoch 2/5


Grid Search Progress:  85%|████████▌ | 82/96 [4:16:50<28:16, 121.18s/it]

Epoch 2: Avg Loss = 0.5330
Epoch 3/5


Grid Search Progress:  85%|████████▌ | 82/96 [4:17:18<28:16, 121.18s/it]

Epoch 3: Avg Loss = 0.4308
Epoch 4/5


Grid Search Progress:  85%|████████▌ | 82/96 [4:17:46<28:16, 121.18s/it]

Epoch 4: Avg Loss = 0.3757
Epoch 5/5


Grid Search Progress:  85%|████████▌ | 82/96 [4:18:15<28:16, 121.18s/it]

Epoch 5: Avg Loss = 0.3253


Grid Search Progress:  86%|████████▋ | 83/96 [4:18:21<28:01, 129.36s/it]

Validation Accuracy: 0.8049, F1 Score: 0.5601

[84/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  86%|████████▋ | 83/96 [4:18:55<28:01, 129.36s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/5


Grid Search Progress:  86%|████████▋ | 83/96 [4:19:28<28:01, 129.36s/it]

Epoch 2: Avg Loss = 0.4932
Epoch 3/5


Grid Search Progress:  86%|████████▋ | 83/96 [4:20:01<28:01, 129.36s/it]

Epoch 3: Avg Loss = 0.3902
Epoch 4/5


Grid Search Progress:  86%|████████▋ | 83/96 [4:20:34<28:01, 129.36s/it]

Epoch 4: Avg Loss = 0.3300
Epoch 5/5


Grid Search Progress:  86%|████████▋ | 83/96 [4:21:08<28:01, 129.36s/it]

Epoch 5: Avg Loss = 0.2811


Grid Search Progress:  88%|████████▊ | 84/96 [4:21:15<28:31, 142.64s/it]

Validation Accuracy: 0.8053, F1 Score: 0.5937

[85/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1


Grid Search Progress:  88%|████████▊ | 84/96 [4:21:15<28:31, 142.64s/it]

Epoch 1/5


Grid Search Progress:  88%|████████▊ | 84/96 [4:21:39<28:31, 142.64s/it]

Epoch 1: Avg Loss = 0.5897
Epoch 2/5


Grid Search Progress:  88%|████████▊ | 84/96 [4:22:03<28:31, 142.64s/it]

Epoch 2: Avg Loss = 0.5553
Epoch 3/5


Grid Search Progress:  88%|████████▊ | 84/96 [4:22:27<28:31, 142.64s/it]

Epoch 3: Avg Loss = 0.4669
Epoch 4/5


Grid Search Progress:  88%|████████▊ | 84/96 [4:22:52<28:31, 142.64s/it]

Epoch 4: Avg Loss = 0.4224
Epoch 5/5


Grid Search Progress:  88%|████████▊ | 84/96 [4:23:16<28:31, 142.64s/it]

Epoch 5: Avg Loss = 0.3852


Grid Search Progress:  89%|████████▊ | 85/96 [4:23:22<25:19, 138.10s/it]

Validation Accuracy: 0.7975, F1 Score: 0.6068

[86/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  89%|████████▊ | 85/96 [4:23:48<25:19, 138.10s/it]

Epoch 1: Avg Loss = 0.5885
Epoch 2/5


Grid Search Progress:  89%|████████▊ | 85/96 [4:24:14<25:19, 138.10s/it]

Epoch 2: Avg Loss = 0.5245
Epoch 3/5


Grid Search Progress:  89%|████████▊ | 85/96 [4:24:39<25:19, 138.10s/it]

Epoch 3: Avg Loss = 0.4026
Epoch 4/5


Grid Search Progress:  89%|████████▊ | 85/96 [4:25:05<25:19, 138.10s/it]

Epoch 4: Avg Loss = 0.3391
Epoch 5/5


Grid Search Progress:  89%|████████▊ | 85/96 [4:25:31<25:19, 138.10s/it]

Epoch 5: Avg Loss = 0.2892


Grid Search Progress:  90%|████████▉ | 86/96 [4:25:37<22:51, 137.11s/it]

Validation Accuracy: 0.8046, F1 Score: 0.6213

[87/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1
Epoch 1/5


Grid Search Progress:  90%|████████▉ | 86/96 [4:26:05<22:51, 137.11s/it]

Epoch 1: Avg Loss = 0.5888
Epoch 2/5


Grid Search Progress:  90%|████████▉ | 86/96 [4:26:34<22:51, 137.11s/it]

Epoch 2: Avg Loss = 0.5501
Epoch 3/5


Grid Search Progress:  90%|████████▉ | 86/96 [4:27:02<22:51, 137.11s/it]

Epoch 3: Avg Loss = 0.4564
Epoch 4/5


Grid Search Progress:  90%|████████▉ | 86/96 [4:27:31<22:51, 137.11s/it]

Epoch 4: Avg Loss = 0.4005
Epoch 5/5


Grid Search Progress:  90%|████████▉ | 86/96 [4:27:59<22:51, 137.11s/it]

Epoch 5: Avg Loss = 0.3538


Grid Search Progress:  91%|█████████ | 87/96 [4:28:05<21:04, 140.49s/it]

Validation Accuracy: 0.7889, F1 Score: 0.6410

[88/96] Training with lr=5e-05, bs=32, epochs=5, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2
Epoch 1/5


Grid Search Progress:  91%|█████████ | 87/96 [4:28:39<21:04, 140.49s/it]

Epoch 1: Avg Loss = 0.5871
Epoch 2/5


Grid Search Progress:  91%|█████████ | 87/96 [4:29:12<21:04, 140.49s/it]

Epoch 2: Avg Loss = 0.4767
Epoch 3/5


Grid Search Progress:  91%|█████████ | 87/96 [4:29:45<21:04, 140.49s/it]

Epoch 3: Avg Loss = 0.3719
Epoch 4/5


Grid Search Progress:  91%|█████████ | 87/96 [4:30:18<21:04, 140.49s/it]

Epoch 4: Avg Loss = 0.3140
Epoch 5/5


Grid Search Progress:  91%|█████████ | 87/96 [4:30:52<21:04, 140.49s/it]

Epoch 5: Avg Loss = 0.2688


Grid Search Progress:  92%|█████████▏| 88/96 [4:30:59<20:02, 150.30s/it]

Validation Accuracy: 0.8051, F1 Score: 0.6275

[89/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:31:23<20:02, 150.30s/it]

Epoch 1: Avg Loss = 0.5861
Epoch 2/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:31:47<20:02, 150.30s/it]

Epoch 2: Avg Loss = 0.5496
Epoch 3/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:32:11<20:02, 150.30s/it]

Epoch 3: Avg Loss = 0.4592
Epoch 4/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:32:35<20:02, 150.30s/it]

Epoch 4: Avg Loss = 0.4169
Epoch 5/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:32:59<20:02, 150.30s/it]

Epoch 5: Avg Loss = 0.3828
Epoch 6/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:33:23<20:02, 150.30s/it]

Epoch 6: Avg Loss = 0.3548
Epoch 7/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:33:47<20:02, 150.30s/it]

Epoch 7: Avg Loss = 0.3156
Epoch 8/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:34:11<20:02, 150.30s/it]

Epoch 8: Avg Loss = 0.2797
Epoch 9/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:34:36<20:02, 150.30s/it]

Epoch 9: Avg Loss = 0.2467
Epoch 10/10


Grid Search Progress:  92%|█████████▏| 88/96 [4:35:00<20:02, 150.30s/it]

Epoch 10: Avg Loss = 0.2181


Grid Search Progress:  93%|█████████▎| 89/96 [4:35:06<20:55, 179.38s/it]

Validation Accuracy: 0.7882, F1 Score: 0.5840

[90/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  93%|█████████▎| 89/96 [4:35:06<20:55, 179.38s/it]

Epoch 1/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:35:32<20:55, 179.38s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:35:57<20:55, 179.38s/it]

Epoch 2: Avg Loss = 0.4949
Epoch 3/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:36:23<20:55, 179.38s/it]

Epoch 3: Avg Loss = 0.3748
Epoch 4/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:36:48<20:55, 179.38s/it]

Epoch 4: Avg Loss = 0.3156
Epoch 5/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:37:14<20:55, 179.38s/it]

Epoch 5: Avg Loss = 0.2652
Epoch 6/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:37:40<20:55, 179.38s/it]

Epoch 6: Avg Loss = 0.2263
Epoch 7/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:38:06<20:55, 179.38s/it]

Epoch 7: Avg Loss = 0.1939
Epoch 8/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:38:31<20:55, 179.38s/it]

Epoch 8: Avg Loss = 0.1741
Epoch 9/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:38:57<20:55, 179.38s/it]

Epoch 9: Avg Loss = 0.1473
Epoch 10/10


Grid Search Progress:  93%|█████████▎| 89/96 [4:39:23<20:55, 179.38s/it]

Epoch 10: Avg Loss = 0.1333


Grid Search Progress:  94%|█████████▍| 90/96 [4:39:29<20:26, 204.50s/it]

Validation Accuracy: 0.7815, F1 Score: 0.5670

[91/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  94%|█████████▍| 90/96 [4:39:29<20:26, 204.50s/it]

Epoch 1/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:39:57<20:26, 204.50s/it]

Epoch 1: Avg Loss = 0.5856
Epoch 2/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:40:26<20:26, 204.50s/it]

Epoch 2: Avg Loss = 0.5450
Epoch 3/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:40:54<20:26, 204.50s/it]

Epoch 3: Avg Loss = 0.4598
Epoch 4/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:41:23<20:26, 204.50s/it]

Epoch 4: Avg Loss = 0.3881
Epoch 5/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:41:51<20:26, 204.50s/it]

Epoch 5: Avg Loss = 0.3351
Epoch 6/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:42:20<20:26, 204.50s/it]

Epoch 6: Avg Loss = 0.2963
Epoch 7/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:42:48<20:26, 204.50s/it]

Epoch 7: Avg Loss = 0.2608
Epoch 8/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:43:17<20:26, 204.50s/it]

Epoch 8: Avg Loss = 0.2297
Epoch 9/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:43:45<20:26, 204.50s/it]

Epoch 9: Avg Loss = 0.2055
Epoch 10/10


Grid Search Progress:  94%|█████████▍| 90/96 [4:44:13<20:26, 204.50s/it]

Epoch 10: Avg Loss = 0.1854


Grid Search Progress:  95%|█████████▍| 91/96 [4:44:20<19:12, 230.43s/it]

Validation Accuracy: 0.7923, F1 Score: 0.5963

[92/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.1, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  95%|█████████▍| 91/96 [4:44:20<19:12, 230.43s/it]

Epoch 1/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:44:53<19:12, 230.43s/it]

Epoch 1: Avg Loss = 0.5878
Epoch 2/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:45:27<19:12, 230.43s/it]

Epoch 2: Avg Loss = 0.4758
Epoch 3/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:46:00<19:12, 230.43s/it]

Epoch 3: Avg Loss = 0.3652
Epoch 4/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:46:33<19:12, 230.43s/it]

Epoch 4: Avg Loss = 0.3082
Epoch 5/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:47:07<19:12, 230.43s/it]

Epoch 5: Avg Loss = 0.2645
Epoch 6/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:47:40<19:12, 230.43s/it]

Epoch 6: Avg Loss = 0.2258
Epoch 7/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:48:13<19:12, 230.43s/it]

Epoch 7: Avg Loss = 0.2000
Epoch 8/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:48:46<19:12, 230.43s/it]

Epoch 8: Avg Loss = 0.1742
Epoch 9/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:49:20<19:12, 230.43s/it]

Epoch 9: Avg Loss = 0.1810
Epoch 10/10


Grid Search Progress:  95%|█████████▍| 91/96 [4:49:53<19:12, 230.43s/it]

Epoch 10: Avg Loss = 0.1432


Grid Search Progress:  96%|█████████▌| 92/96 [4:50:00<17:33, 263.29s/it]

Validation Accuracy: 0.7882, F1 Score: 0.5821

[93/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=1
Epoch 1/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:50:24<17:33, 263.29s/it]

Epoch 1: Avg Loss = 0.5903
Epoch 2/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:50:48<17:33, 263.29s/it]

Epoch 2: Avg Loss = 0.5572
Epoch 3/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:51:12<17:33, 263.29s/it]

Epoch 3: Avg Loss = 0.4721
Epoch 4/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:51:37<17:33, 263.29s/it]

Epoch 4: Avg Loss = 0.4161
Epoch 5/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:52:01<17:33, 263.29s/it]

Epoch 5: Avg Loss = 0.3835
Epoch 6/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:52:25<17:33, 263.29s/it]

Epoch 6: Avg Loss = 0.3527
Epoch 7/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:52:49<17:33, 263.29s/it]

Epoch 7: Avg Loss = 0.3151
Epoch 8/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:53:14<17:33, 263.29s/it]

Epoch 8: Avg Loss = 0.2887
Epoch 9/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:53:38<17:33, 263.29s/it]

Epoch 9: Avg Loss = 0.2635
Epoch 10/10


Grid Search Progress:  96%|█████████▌| 92/96 [4:54:02<17:33, 263.29s/it]

Epoch 10: Avg Loss = 0.2479


Grid Search Progress:  97%|█████████▋| 93/96 [4:54:08<12:56, 258.85s/it]

Validation Accuracy: 0.7874, F1 Score: 0.6018

[94/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=128, num_lstm_layers=2


Grid Search Progress:  97%|█████████▋| 93/96 [4:54:08<12:56, 258.85s/it]

Epoch 1/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:54:34<12:56, 258.85s/it]

Epoch 1: Avg Loss = 0.5888
Epoch 2/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:55:00<12:56, 258.85s/it]

Epoch 2: Avg Loss = 0.4994
Epoch 3/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:55:25<12:56, 258.85s/it]

Epoch 3: Avg Loss = 0.3840
Epoch 4/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:55:51<12:56, 258.85s/it]

Epoch 4: Avg Loss = 0.3284
Epoch 5/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:56:17<12:56, 258.85s/it]

Epoch 5: Avg Loss = 0.2848
Epoch 6/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:56:42<12:56, 258.85s/it]

Epoch 6: Avg Loss = 0.2370
Epoch 7/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:57:08<12:56, 258.85s/it]

Epoch 7: Avg Loss = 0.2060
Epoch 8/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:57:34<12:56, 258.85s/it]

Epoch 8: Avg Loss = 0.1738
Epoch 9/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:58:00<12:56, 258.85s/it]

Epoch 9: Avg Loss = 0.1508
Epoch 10/10


Grid Search Progress:  97%|█████████▋| 93/96 [4:58:25<12:56, 258.85s/it]

Epoch 10: Avg Loss = 0.1359


Grid Search Progress:  98%|█████████▊| 94/96 [4:58:32<08:40, 260.22s/it]

Validation Accuracy: 0.7821, F1 Score: 0.5813

[95/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=1


Grid Search Progress:  98%|█████████▊| 94/96 [4:58:32<08:40, 260.22s/it]

Epoch 1/10


Grid Search Progress:  98%|█████████▊| 94/96 [4:59:00<08:40, 260.22s/it]

Epoch 1: Avg Loss = 0.5856
Epoch 2/10


Grid Search Progress:  98%|█████████▊| 94/96 [4:59:29<08:40, 260.22s/it]

Epoch 2: Avg Loss = 0.5189
Epoch 3/10


Grid Search Progress:  98%|█████████▊| 94/96 [4:59:57<08:40, 260.22s/it]

Epoch 3: Avg Loss = 0.3942
Epoch 4/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:00:26<08:40, 260.22s/it]

Epoch 4: Avg Loss = 0.3370
Epoch 5/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:00:54<08:40, 260.22s/it]

Epoch 5: Avg Loss = 0.2950
Epoch 6/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:01:23<08:40, 260.22s/it]

Epoch 6: Avg Loss = 0.2544
Epoch 7/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:01:51<08:40, 260.22s/it]

Epoch 7: Avg Loss = 0.2238
Epoch 8/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:02:20<08:40, 260.22s/it]

Epoch 8: Avg Loss = 0.1973
Epoch 9/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:02:48<08:40, 260.22s/it]

Epoch 9: Avg Loss = 0.1770
Epoch 10/10


Grid Search Progress:  98%|█████████▊| 94/96 [5:03:17<08:40, 260.22s/it]

Epoch 10: Avg Loss = 0.1566


Grid Search Progress:  99%|█████████▉| 95/96 [5:03:23<04:29, 269.54s/it]

Validation Accuracy: 0.7904, F1 Score: 0.5983

[96/96] Training with lr=5e-05, bs=32, epochs=10, dropout=0.3, lstm_hidden_size=256, num_lstm_layers=2


Grid Search Progress:  99%|█████████▉| 95/96 [5:03:23<04:29, 269.54s/it]

Epoch 1/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:03:57<04:29, 269.54s/it]

Epoch 1: Avg Loss = 0.5873
Epoch 2/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:04:30<04:29, 269.54s/it]

Epoch 2: Avg Loss = 0.4750
Epoch 3/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:05:04<04:29, 269.54s/it]

Epoch 3: Avg Loss = 0.3696
Epoch 4/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:05:37<04:29, 269.54s/it]

Epoch 4: Avg Loss = 0.3075
Epoch 5/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:06:10<04:29, 269.54s/it]

Epoch 5: Avg Loss = 0.2638
Epoch 6/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:06:44<04:29, 269.54s/it]

Epoch 6: Avg Loss = 0.2235
Epoch 7/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:07:17<04:29, 269.54s/it]

Epoch 7: Avg Loss = 0.1935
Epoch 8/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:07:50<04:29, 269.54s/it]

Epoch 8: Avg Loss = 0.1703
Epoch 9/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:08:23<04:29, 269.54s/it]

Epoch 9: Avg Loss = 0.1470
Epoch 10/10


Grid Search Progress:  99%|█████████▉| 95/96 [5:08:57<04:29, 269.54s/it]

Epoch 10: Avg Loss = 0.1280


Grid Search Progress: 100%|██████████| 96/96 [5:09:04<00:00, 193.17s/it]

Validation Accuracy: 0.7771, F1 Score: 0.5959

Best Parameters: {'learning_rate': 2e-05, 'batch_size': 16, 'epochs': 5, 'dropout': 0.1, 'lstm_hidden_size': 256, 'num_lstm_layers': 2}, Best F1 Score: 0.6594





# Save JSON File containing best parameter setting.

In [None]:
# Use independent code block to generate best parameter file in case of model training process being interrupted.
import json
import os

# path
model_dir = '/content/drive/My Drive/nlu-lab/lstm_models'
grid_search_path = os.path.join(model_dir, 'grid_search_results_bilstm.json')
best_param_path = os.path.join(model_dir, 'best_params_bilstm.json')

# load gird search and save best parameters
with open(grid_search_path, 'r') as f:
  results = json.load(f)

if results:
  best_result = max(results, key=lambda x: x['f1'])

  with open(best_param_path, 'w') as f:
    json.dump(best_result, f, indent=2)

  print(f"\nBest Parameters saved to: {best_param_path}")
  print(f"Best F1 Score: {best_result['f1']:.4f}")
  print(f"Parameters: {best_result}")
else:
  print("grid_search_results_bilstm.json is empty.")


Best Parameters saved to: /content/drive/My Drive/nlu-lab/lstm_models/best_params_bilstm.json
Best F1 Score: 0.6594
Parameters: {'lr': 2e-05, 'batch_size': 16, 'epochs': 5, 'dropout': 0.1, 'lstm_hidden_size': 256, 'num_lstm_layers': 2, 'acc': 0.7878839014512319, 'f1': 0.6594418856678407}


# Make sure the model file is being overwritten by last training process

In [7]:
# check model saved successfully or not
import os
from datetime import datetime

save_path = '/content/drive/My Drive/nlu-lab/lstm_models/best_model_bilstm.pt'

if os.path.exists(save_path):
  print(f"model exist: {save_path}")
  print("latest modify time:", datetime.fromtimestamp(os.path.getmtime(save_path)))
else:
  print(f"can't find model file, please check.")



model exist: /content/drive/My Drive/nlu-lab/lstm_models/best_model_bilstm.pt
latest modify time: 2025-04-10 02:09:54
