In [2]:
import torch.nn as nn
import torch
from data import MyDataset
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from transformers import AutoModelForSequenceClassification
from transformers import BertTokenizer
import wandb
from model import SequenceClassificationModel
import pandas as pd



In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
test_dataset = MyDataset("../datasets/nsdc/test.csv", tokenizer, max_len=512)
test_dataloader = DataLoader(test_dataset, batch_size=1, collate_fn=test_dataset.collate_translation_data)

Read the ../datasets/nsdc/test.csv corpus: 100%|██████████| 3991/3991 [00:01<00:00, 3557.30it/s]


In [8]:
huggingface_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
backbone_model = huggingface_model.bert
huggingface_model.to(device)
model = SequenceClassificationModel(backbone_model, 4).to(device)
model.load_state_dict(torch.load("checkpoint_best.pth", map_location=device))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

<All keys matched successfully>

In [9]:
@torch.inference_mode()
def evaluate():
    eval_dataloader = test_dataloader
    eval_dataset = test_dataset
    model.eval()
    preds = []
    for idx, (src, tgt) in enumerate(tqdm(eval_dataloader)):
        if idx == 1000:
            break
        tgt = tgt.type(torch.LongTensor)
        src, tgt = src.to(device), tgt.to(device)
        preds.append(model(src))
    return preds

In [10]:
test_data = pd.read_csv("../datasets/nsdc/test.csv")[["text", "score"]]

In [11]:
preds = torch.argmax(torch.vstack(evaluate()), dim=1)

In [12]:
test_data["pred"] = pd.Series(preds.cpu())
test_data = test_data.dropna()
test_data['pred'] = test_data["pred"].astype(int)
test_data = test_data[test_data['score'] != test_data['pred']].iloc[:100]

In [13]:
test_data.to_csv("errors_to_analyze.csv", index=False)