In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
!pip install torch



In [3]:
import torch
import random
import numpy as np

seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(f"Random seed set as {seed}")

torch.cuda.empty_cache()

Random seed set as 42


In [4]:
MAIN_DIR = "./metamia/."

In [5]:
!pip install transformers



In [6]:
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
import json
import pandas as pd

In [7]:
df = pd.read_csv(f"{MAIN_DIR}/train_data.csv")
df.head()

Unnamed: 0,document,label
0,"Economy of India From Wikipedia, the free ency...",0
1,"Silicon From Wikipedia, the free encyclopedia ...",0
2,Call Us 1 - 603 - 244 - 6292 Follow Us 1 - 603...,1
3,Skip to main content .us Hello Select your add...,0
4,"Mucus From Wikipedia, the free encyclopedia Ju...",0


In [8]:
train_texts, train_labels = df['document'].values, df['label'].values

In [9]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
tokenized_texts = []
from tqdm import tqdm

max_seq_length = 512  # Maximum sequence length for BERT

for text in tqdm(train_texts):
    tokenized_texts.append(tokenizer(text, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt'))

100%|██████████| 1500/1500 [01:54<00:00, 13.08it/s]


In [11]:
# Tokenize the texts and convert them to tensors
from sklearn.metrics import accuracy_score, f1_score, classification_report

input_ids = torch.cat([t['input_ids'] for t in tokenized_texts], dim=0)
attention_mask = torch.cat([t['attention_mask'] for t in tokenized_texts], dim=0)
labels = torch.tensor(train_labels)

# Create a dataset and data loader
dataset = TensorDataset(input_ids, attention_mask, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 8
lr = 2e-5

train_loader = DataLoader(train_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

optimizer = AdamW(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()



In [12]:
# len(val_dataset)

# val_dataset[0]

In [13]:
def train_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    for batch in tqdm(dataloader):
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids, attention_mask, labels = batch
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=1)
            correct_predictions += torch.sum(predictions == labels).item()
            print(labels.cpu())
            total_samples += labels.size(0)


    print(classification_report(predictions.cpu().numpy(), labels.cpu().numpy()))
    return total_loss / len(dataloader), correct_predictions / total_samples

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [15]:
num_epochs = 3
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)

    print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val Accuracy: {val_accuracy:.2%}")
    model.save_pretrained(f"{MAIN_DIR}/fine_tuned_bert_epoch_{epoch+1}_lr_{lr}")

# Save the fine-tuned model
model.save_pretrained(f"{MAIN_DIR}/fine_tuned_bert")

100%|██████████| 150/150 [1:48:49<00:00, 43.53s/it]   
  3%|▎         | 1/38 [00:08<05:01,  8.14s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 1])


  5%|▌         | 2/38 [00:16<04:52,  8.12s/it]

tensor([0, 0, 0, 1, 0, 0, 0, 0])


  8%|▊         | 3/38 [00:24<04:44,  8.12s/it]

tensor([1, 1, 1, 0, 0, 0, 0, 1])


 11%|█         | 4/38 [00:32<04:35,  8.11s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 13%|█▎        | 5/38 [00:40<04:27,  8.10s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 0])


 16%|█▌        | 6/38 [00:48<04:19,  8.09s/it]

tensor([0, 1, 1, 0, 1, 0, 0, 0])


 18%|█▊        | 7/38 [00:56<04:11,  8.12s/it]

tensor([0, 0, 0, 1, 0, 1, 1, 1])


 21%|██        | 8/38 [01:04<04:03,  8.12s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 24%|██▎       | 9/38 [01:12<03:55,  8.12s/it]

tensor([0, 0, 0, 0, 0, 1, 1, 0])


 26%|██▋       | 10/38 [01:21<03:47,  8.13s/it]

tensor([0, 1, 1, 0, 0, 0, 0, 0])


 29%|██▉       | 11/38 [01:29<03:38,  8.11s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 32%|███▏      | 12/38 [01:37<03:30,  8.09s/it]

tensor([1, 0, 0, 1, 1, 0, 0, 0])


 34%|███▍      | 13/38 [01:45<03:22,  8.09s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 1])


 37%|███▋      | 14/38 [01:53<03:14,  8.09s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 0])


 39%|███▉      | 15/38 [02:01<03:06,  8.10s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 42%|████▏     | 16/38 [02:09<02:57,  8.09s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 45%|████▍     | 17/38 [02:17<02:49,  8.09s/it]

tensor([0, 1, 0, 0, 0, 1, 0, 0])


 47%|████▋     | 18/38 [02:25<02:41,  8.09s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 50%|█████     | 19/38 [02:33<02:33,  8.08s/it]

tensor([1, 0, 0, 0, 0, 0, 1, 1])


 53%|█████▎    | 20/38 [02:41<02:25,  8.08s/it]

tensor([0, 1, 0, 1, 0, 0, 0, 0])


 55%|█████▌    | 21/38 [02:50<02:17,  8.08s/it]

tensor([0, 0, 0, 0, 1, 1, 0, 0])


 58%|█████▊    | 22/38 [02:58<02:09,  8.09s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 61%|██████    | 23/38 [03:06<02:01,  8.09s/it]

tensor([0, 0, 0, 1, 0, 1, 0, 0])


 63%|██████▎   | 24/38 [03:14<01:53,  8.08s/it]

tensor([0, 1, 0, 0, 0, 0, 1, 0])


 66%|██████▌   | 25/38 [03:22<01:45,  8.08s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 0])


 68%|██████▊   | 26/38 [03:30<01:37,  8.13s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 71%|███████   | 27/38 [03:39<01:30,  8.25s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 74%|███████▎  | 28/38 [03:47<01:22,  8.29s/it]

tensor([0, 0, 1, 1, 1, 0, 0, 0])


 76%|███████▋  | 29/38 [03:56<01:15,  8.37s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 79%|███████▉  | 30/38 [04:04<01:07,  8.40s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 1])


 82%|████████▏ | 31/38 [04:13<00:58,  8.43s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 1])


 84%|████████▍ | 32/38 [04:21<00:50,  8.44s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 87%|████████▋ | 33/38 [04:30<00:42,  8.47s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 89%|████████▉ | 34/38 [04:38<00:33,  8.49s/it]

tensor([0, 0, 0, 1, 1, 0, 0, 0])


 92%|█████████▏| 35/38 [04:47<00:25,  8.65s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 95%|█████████▍| 36/38 [04:56<00:17,  8.73s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 97%|█████████▋| 37/38 [05:05<00:08,  8.68s/it]

tensor([0, 1, 0, 0, 0, 1, 1, 0])


100%|██████████| 38/38 [05:09<00:00,  8.14s/it]

tensor([0, 0, 0, 1])
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         1

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Epoch 1/3 - Train Loss: 0.4669 - Val Loss: 0.3233 - Val Accuracy: 86.33%



100%|██████████| 150/150 [2:38:12<00:00, 63.28s/it]   
  3%|▎         | 1/38 [00:08<05:07,  8.31s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 1])


  5%|▌         | 2/38 [00:16<04:56,  8.23s/it]

tensor([0, 0, 0, 1, 0, 0, 0, 0])


  8%|▊         | 3/38 [00:24<04:51,  8.34s/it]

tensor([1, 1, 1, 0, 0, 0, 0, 1])


 11%|█         | 4/38 [00:34<04:50,  8.56s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 13%|█▎        | 5/38 [00:42<04:44,  8.61s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 0])


 16%|█▌        | 6/38 [00:51<04:38,  8.71s/it]

tensor([0, 1, 1, 0, 1, 0, 0, 0])


 18%|█▊        | 7/38 [01:00<04:29,  8.68s/it]

tensor([0, 0, 0, 1, 0, 1, 1, 1])


 21%|██        | 8/38 [01:08<04:19,  8.67s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 24%|██▎       | 9/38 [01:17<04:08,  8.57s/it]

tensor([0, 0, 0, 0, 0, 1, 1, 0])


 26%|██▋       | 10/38 [01:25<03:56,  8.44s/it]

tensor([0, 1, 1, 0, 0, 0, 0, 0])


 29%|██▉       | 11/38 [01:34<03:49,  8.48s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 32%|███▏      | 12/38 [01:42<03:41,  8.51s/it]

tensor([1, 0, 0, 1, 1, 0, 0, 0])


 34%|███▍      | 13/38 [01:51<03:34,  8.56s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 1])


 37%|███▋      | 14/38 [01:59<03:26,  8.60s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 0])


 39%|███▉      | 15/38 [02:08<03:18,  8.63s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 42%|████▏     | 16/38 [02:17<03:10,  8.67s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 45%|████▍     | 17/38 [02:26<03:04,  8.76s/it]

tensor([0, 1, 0, 0, 0, 1, 0, 0])


 47%|████▋     | 18/38 [02:34<02:51,  8.57s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 50%|█████     | 19/38 [02:42<02:40,  8.43s/it]

tensor([1, 0, 0, 0, 0, 0, 1, 1])


 53%|█████▎    | 20/38 [02:50<02:29,  8.33s/it]

tensor([0, 1, 0, 1, 0, 0, 0, 0])


 55%|█████▌    | 21/38 [02:59<02:21,  8.35s/it]

tensor([0, 0, 0, 0, 1, 1, 0, 0])


 58%|█████▊    | 22/38 [03:07<02:11,  8.25s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 61%|██████    | 23/38 [03:15<02:02,  8.19s/it]

tensor([0, 0, 0, 1, 0, 1, 0, 0])


 63%|██████▎   | 24/38 [03:23<01:55,  8.28s/it]

tensor([0, 1, 0, 0, 0, 0, 1, 0])


 66%|██████▌   | 25/38 [03:31<01:46,  8.22s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 0])


 68%|██████▊   | 26/38 [03:39<01:37,  8.12s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 71%|███████   | 27/38 [03:47<01:28,  8.05s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 74%|███████▎  | 28/38 [03:55<01:19,  8.00s/it]

tensor([0, 0, 1, 1, 1, 0, 0, 0])


 76%|███████▋  | 29/38 [04:03<01:11,  7.95s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 79%|███████▉  | 30/38 [04:11<01:03,  7.93s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 1])


 82%|████████▏ | 31/38 [04:18<00:55,  7.91s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 1])


 84%|████████▍ | 32/38 [04:26<00:47,  7.90s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 87%|████████▋ | 33/38 [04:34<00:39,  7.88s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 89%|████████▉ | 34/38 [04:42<00:31,  7.90s/it]

tensor([0, 0, 0, 1, 1, 0, 0, 0])


 92%|█████████▏| 35/38 [04:50<00:23,  7.91s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 95%|█████████▍| 36/38 [04:58<00:15,  7.89s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 97%|█████████▋| 37/38 [05:06<00:07,  7.87s/it]

tensor([0, 1, 0, 0, 0, 1, 1, 0])


100%|██████████| 38/38 [05:10<00:00,  8.16s/it]

tensor([0, 0, 0, 1])
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         1

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Epoch 2/3 - Train Loss: 0.2549 - Val Loss: 0.2759 - Val Accuracy: 91.00%



100%|██████████| 150/150 [1:09:52<00:00, 27.95s/it]
  3%|▎         | 1/38 [00:07<04:55,  7.99s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 1])


  5%|▌         | 2/38 [00:16<04:48,  8.01s/it]

tensor([0, 0, 0, 1, 0, 0, 0, 0])


  8%|▊         | 3/38 [00:24<04:40,  8.02s/it]

tensor([1, 1, 1, 0, 0, 0, 0, 1])


 11%|█         | 4/38 [00:32<04:32,  8.01s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 13%|█▎        | 5/38 [00:40<04:24,  8.01s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 0])


 16%|█▌        | 6/38 [00:48<04:18,  8.08s/it]

tensor([0, 1, 1, 0, 1, 0, 0, 0])


 18%|█▊        | 7/38 [00:56<04:10,  8.10s/it]

tensor([0, 0, 0, 1, 0, 1, 1, 1])


 21%|██        | 8/38 [01:04<04:02,  8.10s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 24%|██▎       | 9/38 [01:12<03:54,  8.07s/it]

tensor([0, 0, 0, 0, 0, 1, 1, 0])


 26%|██▋       | 10/38 [01:20<03:45,  8.05s/it]

tensor([0, 1, 1, 0, 0, 0, 0, 0])


 29%|██▉       | 11/38 [01:28<03:37,  8.06s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 32%|███▏      | 12/38 [01:36<03:30,  8.10s/it]

tensor([1, 0, 0, 1, 1, 0, 0, 0])


 34%|███▍      | 13/38 [01:44<03:21,  8.06s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 1])


 37%|███▋      | 14/38 [01:52<03:13,  8.07s/it]

tensor([0, 0, 1, 1, 0, 0, 0, 0])


 39%|███▉      | 15/38 [02:01<03:06,  8.10s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 0])


 42%|████▏     | 16/38 [02:09<02:57,  8.08s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 45%|████▍     | 17/38 [02:17<02:49,  8.07s/it]

tensor([0, 1, 0, 0, 0, 1, 0, 0])


 47%|████▋     | 18/38 [02:25<02:41,  8.07s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 50%|█████     | 19/38 [02:33<02:33,  8.06s/it]

tensor([1, 0, 0, 0, 0, 0, 1, 1])


 53%|█████▎    | 20/38 [02:41<02:24,  8.05s/it]

tensor([0, 1, 0, 1, 0, 0, 0, 0])


 55%|█████▌    | 21/38 [02:49<02:16,  8.04s/it]

tensor([0, 0, 0, 0, 1, 1, 0, 0])


 58%|█████▊    | 22/38 [02:57<02:08,  8.02s/it]

tensor([0, 0, 0, 0, 0, 1, 0, 1])


 61%|██████    | 23/38 [03:05<02:00,  8.03s/it]

tensor([0, 0, 0, 1, 0, 1, 0, 0])


 63%|██████▎   | 24/38 [03:13<01:52,  8.06s/it]

tensor([0, 1, 0, 0, 0, 0, 1, 0])


 66%|██████▌   | 25/38 [03:21<01:44,  8.07s/it]

tensor([1, 0, 0, 0, 1, 1, 0, 0])


 68%|██████▊   | 26/38 [03:30<01:38,  8.19s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 71%|███████   | 27/38 [03:38<01:29,  8.17s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 74%|███████▎  | 28/38 [03:46<01:21,  8.13s/it]

tensor([0, 0, 1, 1, 1, 0, 0, 0])


 76%|███████▋  | 29/38 [03:54<01:12,  8.11s/it]

tensor([0, 1, 0, 0, 0, 0, 0, 0])


 79%|███████▉  | 30/38 [04:02<01:04,  8.07s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 1])


 82%|████████▏ | 31/38 [04:10<00:56,  8.04s/it]

tensor([0, 0, 1, 0, 0, 0, 0, 1])


 84%|████████▍ | 32/38 [04:18<00:48,  8.14s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 87%|████████▋ | 33/38 [04:26<00:40,  8.11s/it]

tensor([0, 0, 1, 0, 1, 0, 0, 0])


 89%|████████▉ | 34/38 [04:34<00:32,  8.09s/it]

tensor([0, 0, 0, 1, 1, 0, 0, 0])


 92%|█████████▏| 35/38 [04:43<00:24,  8.27s/it]

tensor([1, 0, 0, 0, 0, 0, 0, 0])


 95%|█████████▍| 36/38 [04:51<00:16,  8.36s/it]

tensor([0, 0, 0, 0, 0, 0, 0, 0])


 97%|█████████▋| 37/38 [05:00<00:08,  8.30s/it]

tensor([0, 1, 0, 0, 0, 1, 1, 0])


100%|██████████| 38/38 [05:04<00:00,  8.01s/it]

tensor([0, 0, 0, 1])
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         1

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Epoch 3/3 - Train Loss: 0.1712 - Val Loss: 0.3032 - Val Accuracy: 91.00%





In [16]:
# np.count_nonzero(labels.cpu().numpy() == 0)

c = 0
for i in labels.cpu().numpy():
  if i == 0:
    c += 1

print(c)
len(labels)

1139


1500

## Inference

In [17]:
df_test = pd.read_csv(f"{MAIN_DIR}/test_data.csv")
df_test.head()

Unnamed: 0,document
0,News World Cup Business Opinion Ukraine Sport ...
1,"Skeleton From Wikipedia, the free encyclopedia..."
2,Wassermann reaction | definition of Wassermann...
3,Skip to main content Search My Account Hi! Sig...
4,Menu Topics Buildings Care Ministries Conflict...


In [18]:
texts_test = df_test['document'].values

In [19]:
from transformers import LongformerTokenizer

# To use the fine-tuned model for inference:
loaded_model = BertForSequenceClassification.from_pretrained(f"{MAIN_DIR}/fine_tuned_bert")
#loaded_model = LongformerTokenizer.from_pretrained(f"{MAIN_DIR}/fine_tuned_bert")
loaded_model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [20]:
loaded_model.eval()

all_preds = []

with torch.no_grad():
  for text in tqdm(texts_test):
      tokenized_sentence = tokenizer(text, truncation=True, padding='max_length', max_length=max_seq_length, return_tensors='pt')
      input_ids = tokenized_sentence["input_ids"].to(device)
      attention_mask = tokenized_sentence["attention_mask"].to(device)

      outputs = loaded_model(input_ids, attention_mask=attention_mask)
      logits = outputs.logits
      predictions = torch.argmax(logits, dim=1)
      all_preds.extend(predictions.cpu().numpy())

100%|██████████| 500/500 [09:20<00:00,  1.12s/it]


In [21]:
import csv
with open(f'{MAIN_DIR}/metamia_results.csv', mode='w') as csv_file: # for mp3.1, use filename 'mp3.1_results.csv'
    writer = csv.writer(csv_file)
    writer.writerow(['label'])
    for item in all_preds:
        writer.writerow([item])