In [1]:
import pandas as pd
import torch
import numpy as np
import re

from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertModel
from cvdd_net import CVDDNet
from training_pipe import CVDD
from data_preprocess import BrandDataset, CVDDDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
test_data = pd.read_csv('G:/Code/Python/GitHub/maboo/Brand/BrandData/Original_data/brand_test(question).csv').loc[:, ['name']]

In [3]:
def clean_string(string):
    s_list = re.findall(r'[a-zA-Z0-9\u4e00-\u9fff]+',string)
    return "".join(s_list).lower()

test_data['name'] = test_data['name'].apply(clean_string)

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
test_dataset = CVDDDataset(test_data, tokenizer, 256)
test_loader = DataLoader(test_dataset, batch_size=512)

In [5]:
net = CVDDNet('bert-base-multilingual-uncased', 150, 3)
net.load_state_dict(torch.load('./log/model.ckpt'))

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [6]:
device = 'cuda'
net.to(device)
attention_heads = net.attention_heads
epoch_loss = 0.0
n_batches = 0
att_matrix = np.zeros((attention_heads, attention_heads))
dists_per_head = ()
idx_label_score_head = []
att_weights = []
net.eval()
i = 1
with torch.no_grad():
    for data in test_loader:
        idx = data['index']
        ids = data['ids'].to(device)
        mask = data['mask'].to(device)

        cosine_dists, context_weights, A = net(ids, mask)
        scores = context_weights * cosine_dists
        _, best_att_head = torch.min(scores, dim=1)

        I = torch.eye(attention_heads).to(device)
        CCT = net.c @ net.c.transpose(1, 2)
        P = torch.mean((CCT.squeeze() - I) **2)

        loss_P = 1.0 * P
        loss_emp = torch.mean(torch.sum(scores, dim=1))
        loss = loss_emp + loss_P

        # Save tuples of (idx, label, score, best_att_head) in a list
        dists_per_head += (cosine_dists.cpu().data.numpy(),)
        ad_scores = torch.mean(cosine_dists, dim=1)
        idx_label_score_head += list(zip(idx,
                                         ad_scores.cpu().data.numpy().tolist(),
                                         best_att_head.cpu().data.numpy().tolist()))
        

In [17]:
att_weights += A[range(len(idx)), best_att_head].cpu().data.numpy().tolist()

In [18]:
AAT = A @ A.transpose(1, 2)
att_matrix += torch.mean(AAT, 0).cpu().data.numpy()

In [19]:
att_matrix

array([[0.00403464, 0.00394544, 0.0039958 ],
       [0.00394544, 0.11728392, 0.00441868],
       [0.0039958 , 0.00441868, 0.00402764]])

In [7]:
idx_label_score_head.sort(key=lambda x: x[1])

In [8]:
idx_label_score_head

[(tensor([80715]), 0.22132711112499237, 0),
 (tensor([29790]), 0.22247669100761414, 0),
 (tensor([30519]), 0.22494590282440186, 0),
 (tensor([33421]), 0.22778984904289246, 0),
 (tensor([89922]), 0.22866606712341309, 0),
 (tensor([3540]), 0.22891438007354736, 0),
 (tensor([52202]), 0.23123720288276672, 0),
 (tensor([80348]), 0.23166243731975555, 0),
 (tensor([59534]), 0.23198823630809784, 0),
 (tensor([88846]), 0.23274926841259003, 0),
 (tensor([32253]), 0.2332528531551361, 0),
 (tensor([52417]), 0.23340964317321777, 0),
 (tensor([24150]), 0.23372428119182587, 0),
 (tensor([18237]), 0.23401856422424316, 0),
 (tensor([16714]), 0.2340743988752365, 0),
 (tensor([9288]), 0.23412136733531952, 0),
 (tensor([21434]), 0.23424366116523743, 0),
 (tensor([18279]), 0.23464129865169525, 0),
 (tensor([485]), 0.23496174812316895, 0),
 (tensor([23631]), 0.2351374328136444, 0),
 (tensor([77630]), 0.2351374328136444, 0),
 (tensor([26339]), 0.2354358732700348, 0),
 (tensor([22128]), 0.23563747107982635, 0

In [9]:
new_list = []
for i in idx_label_score_head:
    i_list = list(i)
    i_list[0] = int(i_list[0])
    new_list.append(i_list)

In [10]:
pd.DataFrame(new_list, columns=['index', 'score', 'attention_head']).to_csv('log/score.csv', index=False)

In [29]:
test_loader.dataset.X.iloc[485]

name    asicsdm女運鞋款1488
Name: 485, dtype: object