In [1]:
import pandas as pd
import torch
from transformers import BertForRanking
import pickle

import os
from tqdm import tqdm
import numpy as np
import seaborn as sns

In [2]:
import torch

class RankingDataset(torch.utils.data.Dataset):
    def __init__(self, encodings1, encodings2, labels, user_id):
        self.encodings1 = encodings1
        self.encodings2 = encodings2
        self.labels = labels
        self.user_id = user_id

    def __getitem__(self, idx):
        item1 = {key + "_1": torch.tensor(val[idx]) for key, val in self.encodings1.items()}
        item2 = {key + "_2": torch.tensor(val[idx]) for key, val in self.encodings2.items()}
        item = dict(**item1, **item2)
        item['labels'] = torch.tensor(self.labels[idx])
        item['user_id'] = torch.tensor(self.user_id[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [3]:
train_dataset = pickle.load(open("../book_data/rankingDataset/train_dataset.pkl", "rb"))
#val_dataset = pickle.load(open("../book_data/dataset/val_dataset.pkl", "rb"))
test_dataset = pickle.load(open("../book_data/rankingDataset/test_dataset.pkl", "rb"))

In [4]:

model = BertForRanking.from_pretrained("bert-base-uncased")



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForRanking: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForRanking from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForRanking from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForRanking were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.w

In [30]:
model.load_state_dict(torch.load("../results/checkpoint-500/pytorch_model.bin"))


<All keys matched successfully>

In [31]:

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="7"

In [32]:
model = model.cuda()

In [38]:
def eval_model(dataset, model, shuffle_ix=False):
    model.eval()
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=128)
    
    pred_list, real_list, uid_list = list(), list(), list()
    
    i=0
    with torch.no_grad():
        for b in tqdm(dataloader):
            if shuffle_ix:
                r=torch.randperm(len(b['user_id']))
                b['user_id'] = b['user_id'][r]
                
            b = {k:v.cuda() for k, v in b.items()}
            output = model(**b)
            
            out = output[1].reshape(output[1].shape[0] // 2, 2)
            pred = torch.argmax(out, dim=1)
            pred_list.append(pred.cpu().numpy())
            real_list.append(b['labels'].cpu().numpy())
            uid_list.append(b['user_id'].cpu().numpy())
            
            i+=1
            if i> 10:
                pass
            break
            
    preds = np.concatenate(pred_list)
    reals = np.concatenate(real_list)
    uids = np.concatenate(uid_list)
    
    data = [[p, r, u] for p, r, u in zip(preds, reals, uids)]

    return pd.DataFrame(data, columns = ["pred", "read", "uid"])
    

In [28]:
df_test_rand = eval_model(test_dataset, model, False)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 87/87 [00:50<00:00,  1.73it/s]


In [29]:
df_test_rand

Unnamed: 0,pred,read,uid
0,1,0,49154
1,0,0,49154
2,0,0,49154
3,0,0,32773
4,0,0,32773
...,...,...,...
11041,0,1,172030
11042,0,1,172030
11043,0,1,172030
11044,1,0,172030


In [20]:
get_acc(df_test_rand)

0.5016295491580662

In [39]:
df_test = eval_model(train_dataset, model)

  1%|██▏                                                                                                                                                                   | 10/774 [00:11<14:05,  1.11s/it]


In [40]:
df_test

Unnamed: 0,pred,read,uid
0,0,0,49154
1,0,0,49154
2,0,0,49154
3,0,0,49154
4,0,0,49154
...,...,...,...
1403,0,0,41084
1404,1,0,41084
1405,1,0,41084
1406,1,0,41084


In [41]:
get_acc(df_test)

0.5063920454545454

In [None]:
get_acc(df_test)

In [None]:
df_train = eval_model(train_dataset, model)

In [19]:
def get_acc(df: pd.DataFrame)->float:
    return (df.pred == df.read).sum() / df.shape[0]

In [None]:
train_acc = (df_train.pred == df_train.read).sum() / df_train.shape[0]
test_acc = (df_test.pred == df_test.read).sum() / df_test.shape[0]

print(train_acc, test_acc)

In [None]:
train_count = df_train.uid.value_counts()
test_count = df_test.uid.value_counts()

In [None]:
train_count = train_count[train_count.index.isin(test_count.index)]

In [None]:
train_count = train_count.sort_index()
test_count = test_count.sort_index()

In [None]:
all(train_count.index == test_count.index)

In [None]:
all_counts = pd.concat([train_count, test_count],1)

In [None]:
all_counts = all_counts.set_axis(["trainCount", "testCount"], axis=1)

In [None]:
all_counts

In [None]:
items = list()
for i, row in all_counts.iterrows():
    acc = get_acc(df_test[df_test.uid == i])
    item = list(row) + [acc, i]
    items.append(item)


In [None]:
result_df = pd.DataFrame(items, columns=["trainCount", "testCount", "acc", "uid"])

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid' : False})


In [None]:
plt.figure(figsize=(10, 6))

ax = sns.histplot(result_df.acc)

ax.invert_xaxis()

#plt.legend(title='', loc='upper right', labels=['Prosit Transformer', 'Prosit RNN'], prop={"size":14})

A = ax.get_legend()
#A.set_title('')

#plt.setp(A.get_texts(), fontsize='14') # for legend text
#plt.setp(A.get_title(), fontsize='14') # for legend title

plt.xlabel("Accuracy", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylabel("Number of Users", fontsize=14)
plt.tight_layout()

#seaborn.histplot(data=filterDf(datafile), x="Angular Similarity", hue="Data Set", alpha=0.2)
#plt.savefig('./plots/spectralAngleDist.png')

In [None]:
plt.figure(figsize=(10, 6))

ax = sns.histplot(result_df[result_df.trainCount>20].acc)

ax.invert_xaxis()

#plt.legend(title='', loc='upper right', labels=['Prosit Transformer', 'Prosit RNN'], prop={"size":14})

A = ax.get_legend()
#A.set_title('')

#plt.setp(A.get_texts(), fontsize='14') # for legend text
#plt.setp(A.get_title(), fontsize='14') # for legend title

plt.xlabel("Accuracy", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylabel("Density", fontsize=14)
plt.tight_layout()

#seaborn.histplot(data=filterDf(datafile), x="Angular Similarity", hue="Data Set", alpha=0.2)
#plt.savefig('./plots/spectralAngleDist.png')

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.histplot(result_df[(result_df.trainCount>10) & (result_df.testCount>5)].acc)

ax.invert_xaxis()

#plt.legend(title='', loc='upper right', labels=['Prosit Transformer', 'Prosit RNN'], prop={"size":14})

A = ax.get_legend()
#A.set_title('')

#plt.setp(A.get_texts(), fontsize='14') # for legend text
#plt.setp(A.get_title(), fontsize='14') # for legend title

plt.xlabel("Accuracy", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylabel("Number of Users", fontsize=14)
plt.tight_layout()

#seaborn.histplot(data=filterDf(datafile), x="Angular Similarity", hue="Data Set", alpha=0.2)
#plt.savefig('./plots/spectralAngleDist.png')

In [None]:
ax = sns.distplot(result_df.acc, hist=False)

In [None]:
result_df

In [None]:
row

In [None]:
ixs_1 = train_count[train_count == 1].index

In [None]:
get_acc(df_test[df_test.uid.isin(ixs_1)])

In [None]:
def get_ixs(counts, l, b=None):
    if b is not None:
        return counts[(l <= counts) & (counts <= b)].index
    else:
        return counts[counts == l].index
    
def filerDf(df, train_ix, test_ix):
    df = df[df.uid.isin(train_ix)]
    df = df[df.uid.isin(test_ix)]
    return df
    
def getUserAcc(df):
    acc_list = list()
    for uid in df_10.uid:
        acc = get_acc(df[df.uid == uid])
        acc_list.append(acc)
    return acc_list
  

In [None]:
train_ixs_10 = get_ixs(train_count, 5, 10) 
test_ixs_10 = get_ixs(test_count, 5, 10) 
df_10 = filerDf(df, train_ixs_10, test_ixs_10)

In [None]:
acc_list = getUserAcc(df_10)
sns.boxplot(acc_list)

In [None]:
train_ixs_10 = get_ixs(train_count, 10, 100) 
test_ixs_10 = get_ixs(test_count, 10, 100) 
df_10 = filerDf(df, train_ixs_10, test_ixs_10)
acc_list = getUserAcc(df_10)
sns.boxplot(acc_list)

In [None]:
train_ixs_10 = get_ixs(train_count, 100, 1000) 
test_ixs_10 = get_ixs(test_count, 1, 1000) 
df_10 = filerDf(df, train_ixs_10, test_ixs_10)
acc_list = getUserAcc(df_10)
sns.boxplot(acc_list)

In [None]:
get_acc(df_10[df_10.uid == uid])

In [None]:
get_acc(df_test[df_test.uid.isin(ixs_10)])

In [None]:
ixs_100 = train_count[train_count >= 2000].index

In [None]:
get_acc(df_test[df_test.uid.isin(ixs_100)])

In [None]:
df_test.uid.isin(ixs_100).sum()

In [None]:
ixs_100

In [None]:
val_count = df_train.uid.value_counts()

In [None]:
preds = np.concatenate(pred_list)
reals = np.concatenate(real_list)
uids = np.concatenate(uid_list)

In [None]:
data = [[p, r, u] for p, r, u in zip(preds, reals, uids)]

df = pd.DataFrame(data, columns = ["pred", "read", "uid"])

In [None]:
val_count = df.uid.value_counts()

In [None]:
counts = (val_count == 1)


In [None]:
ix = counts[counts.values].index

In [None]:
df_1_review = df[df.uid.isin(ix)]

In [None]:
df_1_review

In [None]:
(df_1_review.pred == df_1_review.read).sum() / df_1_review.shape[0]

In [None]:
preds = torch.cat(pred_list)
reals = torch.cat(real_list)

In [None]:
(preds == reals).sum() / reals.shape[0]