In [1]:
import pandas as pd
import torch
import numpy as np

from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertModel
from cvdd_net import CVDDNet
from training_pipe import CVDD
from data_preprocess import BrandDataset, CVDDDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
test_data = pd.read_csv('G:/Code/Python/GitHub/maboo/Brand/BrandData/test_brand.csv').loc[:, ['name']]


In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
train_dataset = CVDDDataset(test_data, tokenizer, 256)
train_loader = DataLoader(train_dataset, batch_size=512)

In [4]:
net = CVDDNet('bert-base-multilingual-uncased', 150, 3)
net.load_state_dict(torch.load('./log/model.ckpt'))

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [5]:
device = 'cuda'
net.to(device)
attention_heads = net.attention_heads
epoch_loss = 0.0
n_batches = 0
att_matrix = np.zeros((attention_heads, attention_heads))
dists_per_head = ()
idx_label_score_head = []
att_weights = []
net.eval()
i = 1
with torch.no_grad():
    for data in train_loader:
        print(i)
        i += 1
        idx = data['index']
        ids = data['ids'].to(device)
        mask = data['mask'].to(device)

        cosine_dists, context_weights, A = net(ids, mask)
        scores = context_weights * cosine_dists
        _, best_att_head = torch.min(scores, dim=1)

        I = torch.eye(attention_heads).to(device)
        CCT = net.c @ net.c.transpose(1, 2)
        P = torch.mean((CCT.squeeze() - I) **2)

        loss_P = 1.0 * P
        loss_emp = torch.mean(torch.sum(scores, dim=1))
        loss = loss_emp + loss_P

        # Save tuples of (idx, label, score, best_att_head) in a list
        dists_per_head += (cosine_dists.cpu().data.numpy(),)
        ad_scores = torch.mean(cosine_dists, dim=1)
        idx_label_score_head += list(zip(idx,
                                         ad_scores.cpu().data.numpy().tolist(),
                                         best_att_head.cpu().data.numpy().tolist()))
        

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [None]:
att_weights += A[range(len(idx)), best_att_head].cpu().data.numpy().tolist()

In [None]:
AAT = A @ A.transpose(1, 2)
att_matrix += torch.mean(AAT, 0).cpu().data.numpy()

In [None]:
att_matrix

array([[0.00680766, 0.00443824, 0.00442817],
       [0.00443824, 0.0078618 , 0.00358839],
       [0.00442817, 0.00358839, 0.00690247]])

In [None]:
idx_label_score_head.sort(key=lambda x: x[1])

In [None]:
idx_label_score_head

[(tensor([73986]), 0.09593291580677032, 0),
 (tensor([19841]), 0.09744205325841904, 0),
 (tensor([75613]), 0.09787462651729584, 1),
 (tensor([102215]), 0.09788398444652557, 0),
 (tensor([42165]), 0.09807679057121277, 1),
 (tensor([46034]), 0.09845193475484848, 1),
 (tensor([103616]), 0.09867081046104431, 0),
 (tensor([74303]), 0.09941354393959045, 2),
 (tensor([100873]), 0.09973739832639694, 1),
 (tensor([52574]), 0.10000158846378326, 1),
 (tensor([22857]), 0.10034309327602386, 1),
 (tensor([76884]), 0.10056882351636887, 0),
 (tensor([23899]), 0.10069786012172699, 0),
 (tensor([54742]), 0.10069786012172699, 0),
 (tensor([44478]), 0.10096250474452972, 0),
 (tensor([60272]), 0.10113995522260666, 1),
 (tensor([12976]), 0.10125327110290527, 0),
 (tensor([22293]), 0.10133018344640732, 1),
 (tensor([23568]), 0.10133418440818787, 2),
 (tensor([85566]), 0.10157091915607452, 0),
 (tensor([46447]), 0.10167746245861053, 0),
 (tensor([90191]), 0.10168299078941345, 0),
 (tensor([29268]), 0.10190913

In [None]:
new_list = []
for i in idx_label_score_head:
    i_list = list(i)
    i_list[0] = int(i_list[0])
    new_list.append(i_list)

In [None]:
pd.DataFrame(new_list, columns=['index', 'score', 'attention_head']).to_csv('log/score.csv', index=False)

In [None]:
train_loader.dataset.X.iloc[17877]

name    sf036超持妝全能氣墊粉餅
Name: 17877, dtype: object

In [None]:
next(iter(train_loader))

{'index': tensor([[  0],
         [  1],
         [  2],
         [  3],
         [  4],
         [  5],
         [  6],
         [  7],
         [  8],
         [  9],
         [ 10],
         [ 11],
         [ 12],
         [ 13],
         [ 14],
         [ 15],
         [ 16],
         [ 17],
         [ 18],
         [ 19],
         [ 20],
         [ 21],
         [ 22],
         [ 23],
         [ 24],
         [ 25],
         [ 26],
         [ 27],
         [ 28],
         [ 29],
         [ 30],
         [ 31],
         [ 32],
         [ 33],
         [ 34],
         [ 35],
         [ 36],
         [ 37],
         [ 38],
         [ 39],
         [ 40],
         [ 41],
         [ 42],
         [ 43],
         [ 44],
         [ 45],
         [ 46],
         [ 47],
         [ 48],
         [ 49],
         [ 50],
         [ 51],
         [ 52],
         [ 53],
         [ 54],
         [ 55],
         [ 56],
         [ 57],
         [ 58],
         [ 59],
         [ 60],
         [ 61],