In [7]:
import pandas as pd
import torch
from transformers import BertForRanking
import pickle

In [8]:
class RankingDataset(torch.utils.data.Dataset):
    def __init__(self, encodings1, encodings2, labels, user_id):
        self.encodings1 = encodings1
        self.encodings2 = encodings2
        self.labels = labels
        self.user_id = user_id

    def __getitem__(self, idx):
        item1 = {key + "_1": torch.tensor(val[idx]) for key, val in self.encodings1.items()}
        item2 = {key + "_2": torch.tensor(val[idx]) for key, val in self.encodings2.items()}
        item = dict(**item1, **item2)
        item['labels'] = torch.tensor(self.labels[idx])
        item['user_id'] = torch.tensor(self.user_id[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [9]:
#train_dataset = pickle.load(open("../book_data/dataset/train_dataset.pkl", "rb"))
#val_dataset = pickle.load(open("../book_data/dataset/val_dataset.pkl", "rb"))
test_dataset = pickle.load(open("../book_data/rankingDataset/test_dataset.pkl", "rb"))

In [13]:

model = BertForRanking.from_pretrained("bert-base-uncased")



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForRanking: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForRanking from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForRanking from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForRanking were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.b

In [14]:
model.load_state_dict(torch.load("../results/checkpoint-12000/pytorch_model.bin"))


<All keys matched successfully>

In [15]:
dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128)


In [16]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="7"

In [17]:
model = model.cuda()

In [18]:
from tqdm import tqdm

In [19]:
model.eval()
pred_list = list()
real_list = list()
with torch.no_grad():
    for b in tqdm(dataloader):
        b = {k:v.cuda() for k, v in b.items()}
        output = model(**b)
        pred = torch.argmax(output[1], dim=1)
        pred_list.append(pred)
        real_list.append(b['labels'])
        #print(pred)
        #print(b['labels'])
        #print("-------")


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 257/257 [03:43<00:00,  1.15it/s]


In [20]:
preds = torch.cat(pred_list)
reals = torch.cat(real_list)

In [21]:
(preds == reals).sum() / reals.shape[0]

tensor(0.9122, device='cuda:0')