#  Training Model- Solution C - BERT

In [None]:
# Training Model - Solution C - BERT

!cat /proc/cpuinfo | grep "model name" | uniq
!nvidia-smi
# Please install these libraries below
# !pip install torch
# !pip install transformers
# !pip install tqdm
# !pip install pandas
# !pip install numpy
# !pip install scikit-learn

# Use the A100 GPU to run this code

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import libraries
import re
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import f1_score, accuracy_score

# File path of dataset
train_file_path = '/content/drive/My Drive/nlu-lab/train.csv'
validation_file_path = '/content/drive/My Drive/nlu-lab/dev.csv'

# Load training and testing data
train_data = pd.read_csv(train_file_path)
val_data = pd.read_csv(validation_file_path)

# Check the data loading
print(train_data.head(2))
print(val_data.head(2))

model name	: Intel(R) Xeon(R) CPU @ 2.20GHz
Thu Apr 10 13:14:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P0             49W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
    

# Text Preprocessing

In [None]:
# text preprocessing
def clean_text(text):
  text = re.sub(r"can't\b", "cannot", text)  # Replace can't with cannot
  text = re.sub(r"won't\b", "will not", text)  # Replace won't with will not
  text = re.sub(r"n't\b", " not", text)  # Replace n't with not
  text = re.sub(r"'re\b", " are", text)  # Replace 're with are
  text = re.sub(r"'m\b", " am", text)  # Replace 'm with am
  text = re.sub(r"'ve\b", " have", text)  # Replace 've with have
  text = re.sub(r"'ll\b", " will", text)  # Replace 'll with will
  text = re.sub(r"'d\b", " would", text)  # Replace 'd with would
  # Expand only pronoun + 's
  text = re.sub(r"\b(he|she|it|that|what|who|there|where|why|when)'s\b", r"\1 is", text, flags=re.IGNORECASE)
  # url
  text = re.sub(r'http\S+|www\S+|https\S+', '[URL]', text)
  # blank space
  text = re.sub(r"\s+", " ", text).strip()
  return text

train_data['combined_text'] = (train_data['Claim'] + " " + train_data['Evidence']).apply(clean_text)
val_data['combined_text'] = (val_data['Claim'] + " " + val_data['Evidence']).apply(clean_text)

# Check the cleaned data
print(train_data[['combined_text']].head())
print(val_data[['combined_text']].head())

                                       combined_text
0  We should introduce school vouchers Among the ...
1  We should legalize insider trading The U.S. Se...
2  We should subsidize investigative journalism T...
3  We should further exploit nuclear power a 2001...
4  We should ban whaling The US and several other...
                                       combined_text
0  Democracy should be done away with. Amartya Se...
1  Polygamy should be made legal. The Supreme Cou...
2  Hunting should be banned In total it is estima...
3  Television should be given up. Barbera mention...
4  Abortions ought to be prohibited. According to...


# Training & Evaluation Function

In [None]:
# Parameters
MAX_LEN = 128

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

#load tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Dataset class
class EDDataset(Dataset):
  def __init__(self, texts, labels=None):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, index):
    text = self.texts[index]
    encoding = self.tokenizer(text, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='pt')
    item = {
        'input_ids': encoding['input_ids'].squeeze(0),
        'attention_mask': encoding['attention_mask'].squeeze(0)
    }
    if self.labels is not None:
      item['labels'] = torch.tensor(self.labels[index], dtype=torch.long)
    return item

# Function to train model, tqdm used for clear training process
from tqdm import tqdm

def train_model(model, data_loader, optimizer, criterion, device, num_epochs, show_progress=True):
  model.train()
  for epoch in tqdm(range(1, num_epochs + 1), desc="Epoch"):
    total_loss = 0
    loop = data_loader

    for batch in loop:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      optimizer.zero_grad()
      outputs = model(input_ids, attention_mask=attention_mask)
      loss = criterion(outputs.logits, labels)
      loss.backward()
      optimizer.step()

      total_loss += loss.item()

    avg_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch}: Loss = {avg_loss:.4f}")

# Function to evaluate model
def evaluate_model(model, data_loader, device):
  model.eval()
  all_preds, all_labels = [], []
  with torch.no_grad():
    for batch in data_loader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      outputs = model(input_ids, attention_mask=attention_mask)
      preds = torch.argmax(outputs.logits, dim=1)

      all_preds.extend(preds.cpu().numpy())
      all_labels.extend(labels.cpu().numpy())

  acc = accuracy_score(all_labels, all_preds)
  f1 = f1_score(all_labels, all_preds)
  return acc, f1


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

# Model Training

In [None]:
from itertools import product
import os
import json
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import BertConfig, BertForSequenceClassification
from torch.optim import AdamW
import torch

# parameter gird search
param_grid = {
    'learning_rate': [1e-5, 2e-5],
    'batch_size': [8, 16, 32],
    'epochs': [3, 5],
    'dropout': [0.1, 0.3, 0.5],
}

# parameter combination list
param_combinations = list(product(
    param_grid['learning_rate'],
    param_grid['batch_size'],
    param_grid['epochs'],
    param_grid['dropout'],
))

best_f1 = 0
best_params = {}

model_dir = '/content/drive/My Drive/nlu-lab/bert_model'
os.makedirs(model_dir, exist_ok=True)

results = []

# training
for idx, (lr, bs, epochs, dropout) in enumerate(tqdm(param_combinations, desc="Grid Search Progress")):
  tqdm.write(f"\n[{idx+1}/{len(param_combinations)}] Training with lr={lr}, bs={bs}, epochs={epochs}, dropout={dropout}")

  train_dataset = EDDataset(train_data['combined_text'].tolist(), train_data['label'].tolist())
  val_dataset = EDDataset(val_data['combined_text'].tolist(), val_data['label'].tolist())
  train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=bs)

  # bert configuration
  config = BertConfig.from_pretrained(
      'bert-base-uncased',
      num_labels=2,
      hidden_dropout_prob=dropout,
      attention_probs_dropout_prob=dropout
  )
  model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=config).to(device)

  optimizer = AdamW(model.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()

  train_model(model, train_loader, optimizer, criterion, device, epochs, show_progress=True)

  acc, f1 = evaluate_model(model, val_loader, device)
  tqdm.write(f"Validation Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

  results.append({
      'learning_rate': lr,
      'batch_size': bs,
      'epochs': epochs,
      'dropout': dropout,
      'acc': acc,
      'f1': f1
  })
  # Choose the best parameters according to F1 scores, then save the best model.
  if f1 > best_f1:
    best_f1 = f1
    best_params = {
        'learning_rate': lr,
        'batch_size': bs,
        'epochs': epochs,
        'dropout': dropout
    }
    torch.save(model.state_dict(), os.path.join(model_dir, 'best_model_bert.pt'))
    tqdm.write("Best model saved.")

with open(os.path.join(model_dir, 'grid_search_results_bert.json'), 'w') as f:
  json.dump(results, f, indent=2)

print(f"\nGrid search complete.")
print(f"Best Parameters: {best_params}")
print(f"Best F1 Score: {best_f1:.4f}")


Grid Search Progress:   0%|          | 0/36 [00:00<?, ?it/s]Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`



[1/36] Training with lr=1e-05, bs=8, epochs=3, dropout=0.1


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:28<04:57, 148.70s/it][A

Epoch 1: Loss = 0.3654



Epoch:  67%|██████▋   | 2/3 [04:56<02:27, 147.93s/it][A

Epoch 2: Loss = 0.2291



Epoch: 100%|██████████| 3/3 [07:23<00:00, 147.79s/it]

Epoch 3: Loss = 0.1302



Grid Search Progress:   0%|          | 0/36 [07:42<?, ?it/s]

Validation Accuracy: 0.8510, F1 Score: 0.7637


Grid Search Progress:   3%|▎         | 1/36 [07:43<4:30:14, 463.27s/it]

Best model saved.

[2/36] Training with lr=1e-05, bs=8, epochs=3, dropout=0.3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:27<04:54, 147.40s/it][A

Epoch 1: Loss = 0.4453



Epoch:  67%|██████▋   | 2/3 [04:54<02:27, 147.39s/it][A

Epoch 2: Loss = 0.3294



Epoch: 100%|██████████| 3/3 [07:22<00:00, 147.40s/it]

Epoch 3: Loss = 0.2823



Grid Search Progress:   6%|▌         | 2/36 [15:21<4:21:00, 460.59s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8201, F1 Score: 0.7404

[3/36] Training with lr=1e-05, bs=8, epochs=3, dropout=0.5



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:27<04:55, 147.68s/it][A

Epoch 1: Loss = 0.5416



Epoch:  67%|██████▋   | 2/3 [04:56<02:28, 148.12s/it][A

Epoch 2: Loss = 0.4627



Epoch: 100%|██████████| 3/3 [07:24<00:00, 148.28s/it]

Epoch 3: Loss = 0.4280



Grid Search Progress:   8%|▊         | 3/36 [23:03<4:13:30, 460.91s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7693, F1 Score: 0.6612

[4/36] Training with lr=1e-05, bs=8, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:27<09:48, 147.24s/it][A

Epoch 1: Loss = 0.3741



Epoch:  40%|████      | 2/5 [04:54<07:21, 147.24s/it][A

Epoch 2: Loss = 0.2439



Epoch:  60%|██████    | 3/5 [07:21<04:54, 147.24s/it][A

Epoch 3: Loss = 0.1423



Epoch:  80%|████████  | 4/5 [09:49<02:27, 147.27s/it][A

Epoch 4: Loss = 0.0735



Epoch: 100%|██████████| 5/5 [12:16<00:00, 147.30s/it]

Epoch 5: Loss = 0.0419



Grid Search Progress:   8%|▊         | 3/36 [35:36<4:13:30, 460.91s/it]

Validation Accuracy: 0.8616, F1 Score: 0.7666


Grid Search Progress:  11%|█         | 4/36 [35:37<5:07:32, 576.64s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Best model saved.

[5/36] Training with lr=1e-05, bs=8, epochs=5, dropout=0.3



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:28<09:52, 148.22s/it][A

Epoch 1: Loss = 0.4211



Epoch:  40%|████      | 2/5 [04:56<07:24, 148.14s/it][A

Epoch 2: Loss = 0.3235



Epoch:  60%|██████    | 3/5 [07:24<04:55, 148.00s/it][A

Epoch 3: Loss = 0.2761



Epoch:  80%|████████  | 4/5 [09:51<02:27, 147.88s/it][A

Epoch 4: Loss = 0.2384



Epoch: 100%|██████████| 5/5 [12:19<00:00, 147.91s/it]

Epoch 5: Loss = 0.2035



Grid Search Progress:  14%|█▍        | 5/36 [48:13<5:31:21, 641.34s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7335, F1 Score: 0.6679

[6/36] Training with lr=1e-05, bs=8, epochs=5, dropout=0.5



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:27<09:50, 147.65s/it][A

Epoch 1: Loss = 0.5496



Epoch:  40%|████      | 2/5 [04:55<07:22, 147.63s/it][A

Epoch 2: Loss = 0.4754



Epoch:  60%|██████    | 3/5 [07:22<04:55, 147.64s/it][A

Epoch 3: Loss = 0.4397



Epoch:  80%|████████  | 4/5 [09:50<02:27, 147.65s/it][A

Epoch 4: Loss = 0.4131



Epoch: 100%|██████████| 5/5 [12:17<00:00, 147.58s/it]

Epoch 5: Loss = 0.3914



Grid Search Progress:  17%|█▋        | 6/36 [1:00:47<5:39:52, 679.76s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7064, F1 Score: 0.6345

[7/36] Training with lr=1e-05, bs=16, epochs=3, dropout=0.1



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:18, 129.19s/it][A

Epoch 1: Loss = 0.3804



Epoch:  67%|██████▋   | 2/3 [04:18<02:09, 129.13s/it][A

Epoch 2: Loss = 0.2516



Epoch: 100%|██████████| 3/3 [06:27<00:00, 129.17s/it]

Epoch 3: Loss = 0.1622



Grid Search Progress:  17%|█▋        | 6/36 [1:07:30<5:39:52, 679.76s/it]

Validation Accuracy: 0.8633, F1 Score: 0.7740


Grid Search Progress:  19%|█▉        | 7/36 [1:07:31<4:44:59, 589.64s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Best model saved.

[8/36] Training with lr=1e-05, bs=16, epochs=3, dropout=0.3



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:18, 129.26s/it][A

Epoch 1: Loss = 0.4460



Epoch:  67%|██████▋   | 2/3 [04:18<02:09, 129.25s/it][A

Epoch 2: Loss = 0.3479



Epoch: 100%|██████████| 3/3 [06:27<00:00, 129.29s/it]

Epoch 3: Loss = 0.3027



Grid Search Progress:  22%|██▏       | 8/36 [1:14:15<4:07:30, 530.38s/it]

Validation Accuracy: 0.7908, F1 Score: 0.7088

[9/36] Training with lr=1e-05, bs=16, epochs=3, dropout=0.5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:18, 129.42s/it][A

Epoch 1: Loss = 0.5488



Epoch:  67%|██████▋   | 2/3 [04:18<02:09, 129.39s/it][A

Epoch 2: Loss = 0.4733



Epoch: 100%|██████████| 3/3 [06:28<00:00, 129.35s/it]

Epoch 3: Loss = 0.4390



Grid Search Progress:  25%|██▌       | 9/36 [1:20:59<3:40:51, 490.79s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8026, F1 Score: 0.6497

[10/36] Training with lr=1e-05, bs=16, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:09<08:36, 129.11s/it][A

Epoch 1: Loss = 0.3756



Epoch:  40%|████      | 2/5 [04:18<06:27, 129.08s/it][A

Epoch 2: Loss = 0.2522



Epoch:  60%|██████    | 3/5 [06:27<04:18, 129.35s/it][A

Epoch 3: Loss = 0.1673



Epoch:  80%|████████  | 4/5 [08:37<02:09, 129.29s/it][A

Epoch 4: Loss = 0.0956



Epoch: 100%|██████████| 5/5 [10:46<00:00, 129.20s/it]

Epoch 5: Loss = 0.0541



Grid Search Progress:  28%|██▊       | 10/36 [1:32:00<3:55:31, 543.51s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8495, F1 Score: 0.7614

[11/36] Training with lr=1e-05, bs=16, epochs=5, dropout=0.3



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:09<08:36, 129.25s/it][A

Epoch 1: Loss = 0.4487



Epoch:  40%|████      | 2/5 [04:18<06:27, 129.16s/it][A

Epoch 2: Loss = 0.3391



Epoch:  60%|██████    | 3/5 [06:27<04:18, 129.16s/it][A

Epoch 3: Loss = 0.2967



Epoch:  80%|████████  | 4/5 [08:36<02:09, 129.27s/it][A

Epoch 4: Loss = 0.2604



Epoch: 100%|██████████| 5/5 [10:46<00:00, 129.26s/it]

Epoch 5: Loss = 0.2281



Grid Search Progress:  31%|███       | 11/36 [1:43:02<4:01:34, 579.79s/it]

Validation Accuracy: 0.7273, F1 Score: 0.6642

[12/36] Training with lr=1e-05, bs=16, epochs=5, dropout=0.5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:09<08:37, 129.38s/it][A

Epoch 1: Loss = 0.5539



Epoch:  40%|████      | 2/5 [04:19<06:28, 129.60s/it][A

Epoch 2: Loss = 0.4746



Epoch:  60%|██████    | 3/5 [06:28<04:19, 129.52s/it][A

Epoch 3: Loss = 0.4429



Epoch:  80%|████████  | 4/5 [08:37<02:09, 129.39s/it][A

Epoch 4: Loss = 0.4170



Epoch: 100%|██████████| 5/5 [10:46<00:00, 129.36s/it]

Epoch 5: Loss = 0.3983



Grid Search Progress:  33%|███▎      | 12/36 [1:54:05<4:01:59, 604.99s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.6998, F1 Score: 0.6290

[13/36] Training with lr=1e-05, bs=32, epochs=3, dropout=0.1



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:00, 120.17s/it][A

Epoch 1: Loss = 0.3897



Epoch:  67%|██████▋   | 2/3 [04:00<02:00, 120.26s/it][A

Epoch 2: Loss = 0.2732



Epoch: 100%|██████████| 3/3 [06:00<00:00, 120.27s/it]

Epoch 3: Loss = 0.1988



Grid Search Progress:  36%|███▌      | 13/36 [2:00:20<3:25:15, 535.47s/it]

Validation Accuracy: 0.8577, F1 Score: 0.7709

[14/36] Training with lr=1e-05, bs=32, epochs=3, dropout=0.3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:00, 120.12s/it][A

Epoch 1: Loss = 0.4657



Epoch:  67%|██████▋   | 2/3 [04:00<02:00, 120.38s/it][A

Epoch 2: Loss = 0.3514



Epoch: 100%|██████████| 3/3 [06:01<00:00, 120.42s/it]

Epoch 3: Loss = 0.3086



Grid Search Progress:  39%|███▉      | 14/36 [2:06:37<2:58:44, 487.46s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.6925, F1 Score: 0.6368

[15/36] Training with lr=1e-05, bs=32, epochs=3, dropout=0.5



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:01, 120.80s/it][A

Epoch 1: Loss = 0.5653



Epoch:  67%|██████▋   | 2/3 [04:01<02:00, 120.65s/it][A

Epoch 2: Loss = 0.4916



Epoch: 100%|██████████| 3/3 [06:01<00:00, 120.44s/it]

Epoch 3: Loss = 0.4661



Grid Search Progress:  42%|████▏     | 15/36 [2:12:53<2:38:50, 453.84s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7681, F1 Score: 0.6238

[16/36] Training with lr=1e-05, bs=32, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [01:59<07:59, 119.98s/it][A

Epoch 1: Loss = 0.3979



Epoch:  40%|████      | 2/5 [04:00<06:00, 120.01s/it][A

Epoch 2: Loss = 0.2757



Epoch:  60%|██████    | 3/5 [05:59<03:59, 119.95s/it][A

Epoch 3: Loss = 0.1962



Epoch:  80%|████████  | 4/5 [08:00<02:00, 120.02s/it][A

Epoch 4: Loss = 0.1291



Epoch: 100%|██████████| 5/5 [09:59<00:00, 119.99s/it]

Epoch 5: Loss = 0.0737



Grid Search Progress:  44%|████▍     | 16/36 [2:23:07<2:47:24, 502.20s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8667, F1 Score: 0.7633

[17/36] Training with lr=1e-05, bs=32, epochs=5, dropout=0.3



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [01:59<07:59, 119.91s/it][A

Epoch 1: Loss = 0.4633



Epoch:  40%|████      | 2/5 [03:59<05:59, 119.94s/it][A

Epoch 2: Loss = 0.3545



Epoch:  60%|██████    | 3/5 [05:59<03:59, 119.92s/it][A

Epoch 3: Loss = 0.3086



Epoch:  80%|████████  | 4/5 [07:59<01:59, 119.91s/it][A

Epoch 4: Loss = 0.2785



Epoch: 100%|██████████| 5/5 [09:59<00:00, 119.92s/it]

Epoch 5: Loss = 0.2512



Grid Search Progress:  47%|████▋     | 17/36 [2:33:21<2:49:41, 535.89s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7503, F1 Score: 0.6798

[18/36] Training with lr=1e-05, bs=32, epochs=5, dropout=0.5



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [01:59<07:59, 119.92s/it][A

Epoch 1: Loss = 0.5630



Epoch:  40%|████      | 2/5 [03:59<05:59, 119.94s/it][A

Epoch 2: Loss = 0.4882



Epoch:  60%|██████    | 3/5 [05:59<03:59, 119.94s/it][A

Epoch 3: Loss = 0.4559



Epoch:  80%|████████  | 4/5 [07:59<01:59, 119.94s/it][A

Epoch 4: Loss = 0.4405



Epoch: 100%|██████████| 5/5 [09:59<00:00, 119.94s/it]

Epoch 5: Loss = 0.4165



Grid Search Progress:  50%|█████     | 18/36 [2:43:36<2:47:50, 559.48s/it]

Validation Accuracy: 0.7049, F1 Score: 0.6252

[19/36] Training with lr=2e-05, bs=8, epochs=3, dropout=0.1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:27<04:54, 147.34s/it][A

Epoch 1: Loss = 0.3697



Epoch:  67%|██████▋   | 2/3 [04:54<02:27, 147.29s/it][A

Epoch 2: Loss = 0.2197



Epoch: 100%|██████████| 3/3 [07:21<00:00, 147.32s/it]

Epoch 3: Loss = 0.1056



Grid Search Progress:  53%|█████▎    | 19/36 [2:51:14<2:29:55, 529.14s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8682, F1 Score: 0.7432

[20/36] Training with lr=2e-05, bs=8, epochs=3, dropout=0.3



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:27<04:54, 147.31s/it][A

Epoch 1: Loss = 0.4148



Epoch:  67%|██████▋   | 2/3 [04:54<02:27, 147.29s/it][A

Epoch 2: Loss = 0.3104



Epoch: 100%|██████████| 3/3 [07:21<00:00, 147.32s/it]

Epoch 3: Loss = 0.2552



Grid Search Progress:  56%|█████▌    | 20/36 [2:58:53<2:15:26, 507.91s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7727, F1 Score: 0.6978

[21/36] Training with lr=2e-05, bs=8, epochs=3, dropout=0.5



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:27<04:54, 147.45s/it][A

Epoch 1: Loss = 0.5948



Epoch:  67%|██████▋   | 2/3 [04:55<02:27, 147.88s/it][A

Epoch 2: Loss = 0.5139



Epoch: 100%|██████████| 3/3 [07:23<00:00, 147.80s/it]

Epoch 3: Loss = 0.4511



Grid Search Progress:  58%|█████▊    | 21/36 [3:06:33<2:03:22, 493.49s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7653, F1 Score: 0.6469

[22/36] Training with lr=2e-05, bs=8, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:27<09:50, 147.65s/it][A

Epoch 1: Loss = 0.3713



Epoch:  40%|████      | 2/5 [04:55<07:23, 147.84s/it][A

Epoch 2: Loss = 0.2202



Epoch:  60%|██████    | 3/5 [07:23<04:55, 147.68s/it][A

Epoch 3: Loss = 0.1081



Epoch:  80%|████████  | 4/5 [09:50<02:27, 147.62s/it][A

Epoch 4: Loss = 0.0556



Epoch: 100%|██████████| 5/5 [12:18<00:00, 147.62s/it]

Epoch 5: Loss = 0.0399



Grid Search Progress:  61%|██████    | 22/36 [3:19:07<2:13:26, 571.86s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8434, F1 Score: 0.7544

[23/36] Training with lr=2e-05, bs=8, epochs=5, dropout=0.3



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:28<09:55, 148.93s/it][A

Epoch 1: Loss = 0.4279



Epoch:  40%|████      | 2/5 [04:59<07:29, 149.83s/it][A

Epoch 2: Loss = 0.3186



Epoch:  60%|██████    | 3/5 [07:28<04:59, 149.72s/it][A

Epoch 3: Loss = 0.2623



Epoch:  80%|████████  | 4/5 [09:58<02:29, 149.49s/it][A

Epoch 4: Loss = 0.2123



Epoch: 100%|██████████| 5/5 [12:27<00:00, 149.43s/it]

Epoch 5: Loss = 0.1718



Grid Search Progress:  64%|██████▍   | 23/36 [3:31:51<2:16:22, 629.42s/it]

Validation Accuracy: 0.7536, F1 Score: 0.6798

[24/36] Training with lr=2e-05, bs=8, epochs=5, dropout=0.5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:29<09:57, 149.33s/it][A

Epoch 1: Loss = 0.5422



Epoch:  40%|████      | 2/5 [04:58<07:28, 149.45s/it][A

Epoch 2: Loss = 0.4474



Epoch:  60%|██████    | 3/5 [07:28<04:59, 149.72s/it][A

Epoch 3: Loss = 0.4000



Epoch:  80%|████████  | 4/5 [09:58<02:29, 149.56s/it][A

Epoch 4: Loss = 0.3673



Epoch: 100%|██████████| 5/5 [12:27<00:00, 149.41s/it]

Epoch 5: Loss = 0.3348



Grid Search Progress:  67%|██████▋   | 24/36 [3:44:35<2:13:56, 669.71s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8274, F1 Score: 0.7018

[25/36] Training with lr=2e-05, bs=16, epochs=3, dropout=0.1



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:19, 129.56s/it][A

Epoch 1: Loss = 0.3638



Epoch:  67%|██████▋   | 2/3 [04:18<02:09, 129.41s/it][A

Epoch 2: Loss = 0.2189



Epoch: 100%|██████████| 3/3 [06:28<00:00, 129.48s/it]

Epoch 3: Loss = 0.1103



Grid Search Progress:  69%|██████▉   | 25/36 [3:51:19<1:48:10, 590.02s/it]

Validation Accuracy: 0.8662, F1 Score: 0.7561

[26/36] Training with lr=2e-05, bs=16, epochs=3, dropout=0.3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:19, 129.72s/it][A

Epoch 1: Loss = 0.4136



Epoch:  67%|██████▋   | 2/3 [04:19<02:09, 129.96s/it][A

Epoch 2: Loss = 0.3117



Epoch: 100%|██████████| 3/3 [06:29<00:00, 129.80s/it]

Epoch 3: Loss = 0.2576



Grid Search Progress:  72%|███████▏  | 26/36 [3:58:04<1:29:05, 534.52s/it]

Validation Accuracy: 0.7511, F1 Score: 0.6812

[27/36] Training with lr=2e-05, bs=16, epochs=3, dropout=0.5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:09<04:19, 129.61s/it][A

Epoch 1: Loss = 0.5374



Epoch:  67%|██████▋   | 2/3 [04:19<02:09, 129.71s/it][A

Epoch 2: Loss = 0.4662



Epoch: 100%|██████████| 3/3 [06:29<00:00, 129.80s/it]

Epoch 3: Loss = 0.4288



Grid Search Progress:  75%|███████▌  | 27/36 [4:04:50<1:14:23, 495.90s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.6870, F1 Score: 0.6182

[28/36] Training with lr=2e-05, bs=16, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:09<08:39, 129.76s/it][A

Epoch 1: Loss = 0.3806



Epoch:  40%|████      | 2/5 [04:19<06:28, 129.54s/it][A

Epoch 2: Loss = 0.2274



Epoch:  60%|██████    | 3/5 [06:29<04:19, 129.78s/it][A

Epoch 3: Loss = 0.1205



Epoch:  80%|████████  | 4/5 [08:38<02:09, 129.72s/it][A

Epoch 4: Loss = 0.0580



Epoch: 100%|██████████| 5/5 [10:48<00:00, 129.76s/it]

Epoch 5: Loss = 0.0355



Grid Search Progress:  78%|███████▊  | 28/36 [4:15:54<1:12:51, 546.47s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8674, F1 Score: 0.7625

[29/36] Training with lr=2e-05, bs=16, epochs=5, dropout=0.3



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:10<08:41, 130.29s/it][A

Epoch 1: Loss = 0.4271



Epoch:  40%|████      | 2/5 [04:20<06:30, 130.22s/it][A

Epoch 2: Loss = 0.3182



Epoch:  60%|██████    | 3/5 [06:30<04:20, 130.35s/it][A

Epoch 3: Loss = 0.2674



Epoch:  80%|████████  | 4/5 [08:41<02:10, 130.37s/it][A

Epoch 4: Loss = 0.2203



Epoch: 100%|██████████| 5/5 [10:51<00:00, 130.28s/it]

Epoch 5: Loss = 0.1777



Grid Search Progress:  81%|████████  | 29/36 [4:27:01<1:07:58, 582.65s/it]

Validation Accuracy: 0.7923, F1 Score: 0.7130

[30/36] Training with lr=2e-05, bs=16, epochs=5, dropout=0.5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:09<08:39, 129.83s/it][A

Epoch 1: Loss = 0.5395



Epoch:  40%|████      | 2/5 [04:19<06:29, 129.73s/it][A

Epoch 2: Loss = 0.4624



Epoch:  60%|██████    | 3/5 [06:28<04:19, 129.54s/it][A

Epoch 3: Loss = 0.4251



Epoch:  80%|████████  | 4/5 [08:38<02:09, 129.66s/it][A

Epoch 4: Loss = 0.3938



Epoch: 100%|██████████| 5/5 [10:48<00:00, 129.73s/it]

Epoch 5: Loss = 0.3662



Grid Search Progress:  83%|████████▎ | 30/36 [4:38:05<1:00:42, 607.15s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7774, F1 Score: 0.6746

[31/36] Training with lr=2e-05, bs=32, epochs=3, dropout=0.1



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:01, 120.56s/it][A

Epoch 1: Loss = 0.3756



Epoch:  67%|██████▋   | 2/3 [04:01<02:00, 120.53s/it][A

Epoch 2: Loss = 0.2363



Epoch: 100%|██████████| 3/3 [06:01<00:00, 120.42s/it]

Epoch 3: Loss = 0.1355



Grid Search Progress:  86%|████████▌ | 31/36 [4:44:21<44:48, 537.78s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.8599, F1 Score: 0.7654

[32/36] Training with lr=2e-05, bs=32, epochs=3, dropout=0.3



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:00, 120.40s/it][A

Epoch 1: Loss = 0.4510



Epoch:  67%|██████▋   | 2/3 [04:00<02:00, 120.25s/it][A

Epoch 2: Loss = 0.3234



Epoch: 100%|██████████| 3/3 [06:00<00:00, 120.25s/it]

Epoch 3: Loss = 0.2775



Grid Search Progress:  89%|████████▉ | 32/36 [4:50:37<32:36, 489.06s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7489, F1 Score: 0.6783

[33/36] Training with lr=2e-05, bs=32, epochs=3, dropout=0.5



Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [02:00<04:00, 120.12s/it][A

Epoch 1: Loss = 0.5419



Epoch:  67%|██████▋   | 2/3 [04:00<01:59, 119.99s/it][A

Epoch 2: Loss = 0.4645



Epoch: 100%|██████████| 3/3 [06:00<00:00, 120.04s/it]

Epoch 3: Loss = 0.4259



Grid Search Progress:  92%|█████████▏| 33/36 [4:56:51<22:44, 454.75s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7351, F1 Score: 0.6464

[34/36] Training with lr=2e-05, bs=32, epochs=5, dropout=0.1



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:00<08:00, 120.14s/it][A

Epoch 1: Loss = 0.3811



Epoch:  40%|████      | 2/5 [04:00<06:00, 120.10s/it][A

Epoch 2: Loss = 0.2380



Epoch:  60%|██████    | 3/5 [06:00<04:00, 120.09s/it][A

Epoch 3: Loss = 0.1307



Epoch:  80%|████████  | 4/5 [08:00<02:00, 120.20s/it][A

Epoch 4: Loss = 0.0710



Epoch: 100%|██████████| 5/5 [10:01<00:00, 120.29s/it]

Epoch 5: Loss = 0.0417



Grid Search Progress:  94%|█████████▍| 34/36 [5:07:08<16:46, 503.22s/it]

Validation Accuracy: 0.8502, F1 Score: 0.7543

[35/36] Training with lr=2e-05, bs=32, epochs=5, dropout=0.3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:00<08:01, 120.30s/it][A

Epoch 1: Loss = 0.4249



Epoch:  40%|████      | 2/5 [04:00<06:00, 120.28s/it][A

Epoch 2: Loss = 0.3166



Epoch:  60%|██████    | 3/5 [06:00<04:00, 120.31s/it][A

Epoch 3: Loss = 0.2689



Epoch:  80%|████████  | 4/5 [08:01<02:00, 120.23s/it][A

Epoch 4: Loss = 0.2340



Epoch: 100%|██████████| 5/5 [10:01<00:00, 120.34s/it]

Epoch 5: Loss = 0.1981



Grid Search Progress:  97%|█████████▋| 35/36 [5:17:25<08:57, 537.36s/it]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.7638, F1 Score: 0.6933

[36/36] Training with lr=2e-05, bs=32, epochs=5, dropout=0.5



Epoch:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch:  20%|██        | 1/5 [02:00<08:03, 120.82s/it][A

Epoch 1: Loss = 0.5591



Epoch:  40%|████      | 2/5 [04:01<06:02, 120.76s/it][A

Epoch 2: Loss = 0.4852



Epoch:  60%|██████    | 3/5 [06:02<04:01, 120.76s/it][A

Epoch 3: Loss = 0.4480



Epoch:  80%|████████  | 4/5 [08:03<02:00, 120.83s/it][A

Epoch 4: Loss = 0.4183



Epoch: 100%|██████████| 5/5 [10:03<00:00, 120.77s/it]

Epoch 5: Loss = 0.4005



Grid Search Progress: 100%|██████████| 36/36 [5:27:43<00:00, 546.22s/it]

Validation Accuracy: 0.6181, F1 Score: 0.5829

Grid search complete.
Best Parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'epochs': 3, 'dropout': 0.1}
Best F1 Score: 0.7740





# Save JSON File containing best parameter setting.

In [None]:
# Use independent code block to generate best parameter file in case of model training process being interrupted.
import json
import os

# path
model_dir = '/content/drive/My Drive/nlu-lab/bert_model'
grid_search_path = os.path.join(model_dir, 'grid_search_results_bert.json')
best_param_path = os.path.join(model_dir, 'best_params_bert.json')

# load best parameter
with open(grid_search_path, 'r') as f:
  results = json.load(f)

if results:
  best_result = max(results, key=lambda x: x['f1'])

  with open(best_param_path, 'w') as f:
    json.dump(best_result, f, indent=2)

  print(f"\nBest Parameters saved to: {best_param_path}")
  print(f"Best F1 Score: {best_result['f1']:.4f}")
  print(f"Params: {best_result}")
else:
  print("grid_search_results_bert.json is empty. Please check your training.")



Best Parameters saved to: /content/drive/My Drive/nlu-lab/bert_model/best_params_bert.json
Best F1 Score: 0.7740
Params: {'learning_rate': 1e-05, 'batch_size': 16, 'epochs': 3, 'dropout': 0.1, 'acc': 0.8633142085723928, 'f1': 0.7739955357142857}
