In [1]:
import pandas as pd
import numpy as np

In [2]:
import torch
print(f"Torch Version: {torch.__version__}")

import transformers
print(f"transformers (Adapter) Version: {transformers.__version__}")

Torch Version: 1.8.1
transformers (Adapter) Version: 2.0.1


In [3]:
from transformers import RobertaTokenizer
import numpy as np

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], max_length=80, truncation=True, padding="max_length")

In [4]:
data_path = "./NER_multilabel_data_v2.csv"
df = pd.read_csv(data_path)

all_tags = df.newTag

all_tags = set(all_tags)

all_tags = "|".join(all_tags)
all_tags = all_tags.split("|")
all_tags = set(all_tags)
all_tags = list(all_tags)

In [5]:

def process_csv(data_path):
    df = pd.read_csv(data_path, encoding="latin-1")
    df.loc[:, "Sentence #"] = df["Sentence #"].fillna(method="ffill")
    sentences = df.groupby("Sentence #")["Word"].apply(list).values
    tags = df.groupby("Sentence #")["newTag"].apply(list).values
    return sentences, tags

sentences, tags = process_csv(data_path)

In [6]:
from torch.utils.data import Dataset
from sklearn.preprocessing import OneHotEncoder
    
    


class NER_Dataset(Dataset):
    # 讀取前處理後的 tsv 檔並初始化一些參數
    def __init__(self, mode, tokenizer, data_path, labels):
        assert mode in ["train", "test"]  # 一般訓練你會需要 dev set
        self.mode = mode
        # 大數據你會需要用 iterator=True
        self.sentences, self.tags = process_csv(data_path)
        self.len = len(self.sentences)
        

        if mode != "test":
            self.label_map = {}
            for i in range(len(labels)):
                self.label_map[labels[i]] = i
                
            possible_labels = np.array(range(len(labels))).reshape(-1, 1)
            self.oneHotEncoder = OneHotEncoder()
            self.oneHotEncoder.fit(possible_labels)
        else:
            self.label_map = None
        
        self.tokenizer = tokenizer  # 我們將使用 BERT tokenizer
        self.O_label = self.label_map["O"]

    
    # 定義回傳一筆訓練 / 測試數據的函式
    def __getitem__(self, idx):
        if self.mode == "test":
            label_tensor = None
        else:
            label = ["O"] + self.tags[idx] + ["O"]

            label = np.array(label)
            label = label.reshape(-1,1)

            label = np.apply_along_axis(self.split_one_hot_multiTags, 1, label)
            label_tensor = torch.tensor(label, dtype = torch.float32)
            
        # 建立第一個句子的 BERT tokens 並加入分隔符號 [SEP]
        word_pieces = ['[CLS]']
        word_pieces += self.sentences[idx]
        word_pieces += ['[SEP]']
        
        ids = self.tokenizer.convert_tokens_to_ids(word_pieces)
        tokens_tensor = torch.tensor(ids)
        
        # 將第一句包含 [SEP] 的 token 位置設為 0
        segments_tensor = torch.zeros_like(tokens_tensor)
        
        return (tokens_tensor, segments_tensor, label_tensor)
    
    def __len__(self):
        return self.len
    
    def split_one_hot_multiTags(self, tags):
        # tags = ['B-org|Party|String']
        tags = tags[0]
        tags = tags.split("|")


        tags_num = list(map(lambda x: self.label_map[x], tags))
        #[5, 20, 23]

        tags_num = np.array(tags_num).reshape(-1,1)

        tags_one_hot = self.oneHotEncoder.transform(tags_num).toarray()

        tags_one_hot = tags_one_hot.sum(axis = 0)

        #return torch.tensor(tags_one_hot, dtype = torch.float32)

        return tags_one_hot
    
    
# 初始化一個專門讀取訓練樣本的 Dataset，使用中文 BERT 斷詞

df = pd.read_csv(data_path, encoding="latin-1")

labels = np.unique("|".join(list(df.newTag)).split("|"))
print(f"labels: {labels}")

trainset = NER_Dataset("train", tokenizer=tokenizer, data_path=data_path, labels= labels)

labels: ['B-art' 'B-eve' 'B-geo' 'B-gpe' 'B-nat' 'B-org' 'B-per' 'B-tim'
 'CountryCode' 'CryptoCurrencyCode' 'CurrencyCode' 'Event' 'Float' 'I-art'
 'I-eve' 'I-geo' 'I-gpe' 'I-nat' 'I-org' 'I-per' 'I-tim' 'Integer'
 'Location' 'Month' 'O' 'Object' 'Party' 'Race' 'SpecialTerm'
 'TemporalUnit' 'Time' 'Timezone' 'US_States']


In [7]:
from torch.utils.data import DataLoader, IterableDataset
from torch.nn.utils.rnn import pad_sequence
def create_mini_batch(samples):
    tokens_tensors = [s[0] for s in samples]
    segments_tensors = [s[1] for s in samples]
    
    # 測試集有 labels
    if samples[0][2] is not None:
        label_ids = [s[2] for s in samples]
        label_ids = pad_sequence(label_ids, 
                                  batch_first=True)
    else:
        label_ids = None
    
    # zero pad 到同一序列長度
    tokens_tensors = pad_sequence(tokens_tensors, 
                                  batch_first=True)
    segments_tensors = pad_sequence(segments_tensors, 
                                    batch_first=True)
    
    # attention masks，將 tokens_tensors 裡頭不為 zero padding
    # 的位置設為 1 讓 BERT 只關注這些位置的 tokens
    masks_tensors = torch.zeros(tokens_tensors.shape, 
                                dtype=torch.long)
    masks_tensors = masks_tensors.masked_fill(
        tokens_tensors != 0, 1)
    
    return tokens_tensors, segments_tensors, masks_tensors, label_ids

In [8]:
trainset.id2label = {}
for key in trainset.label_map.keys():
    trainset.id2label[trainset.label_map[key]] = key

In [9]:
from transformers import RobertaConfig, RobertaModelWithHeads

config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=len(all_tags),
    label2id = trainset.label_map, 
    id2label = trainset.id2label
)
model = RobertaModelWithHeads.from_pretrained(
    "roberta-base",
    config=config,
)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infere

In [10]:
for tag in all_tags:
    break
    model.add_adapter(tag)
    model.add_tagging_head(
        tag,
        num_labels=2
      )
    model.train_adapter(tag)

In [11]:
tag = "ALL2"
model.add_adapter(tag)
model.add_tagging_head(
    tag,
    num_labels=len(trainset.label_map.keys())
  )
model.train_adapter(tag)

name = model.load_adapter("./save_adapters/ALL_tag_0730")
model.add_tagging_head(
        name,
        num_labels=len(trainset.label_map.keys()), overwrite_ok=True
      )
model.train_adapter(name)

In [12]:
BATCH_SIZE = 16
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, 
                         collate_fn=create_mini_batch)

In [13]:
data = next(iter(trainloader))

tokens_tensors, segments_tensors, \
    masks_tensors, label_ids = data

'''print(f"""
tokens_tensors.shape   = {tokens_tensors.shape} 
{tokens_tensors}
------------------------
segments_tensors.shape = {segments_tensors.shape}
{segments_tensors}
------------------------
masks_tensors.shape    = {masks_tensors.shape}
{masks_tensors}
------------------------
label_ids.shape        = {label_ids.shape}
{label_ids}
""")'''

'print(f"""\ntokens_tensors.shape   = {tokens_tensors.shape} \n{tokens_tensors}\n------------------------\nsegments_tensors.shape = {segments_tensors.shape}\n{segments_tensors}\n------------------------\nmasks_tensors.shape    = {masks_tensors.shape}\n{masks_tensors}\n------------------------\nlabel_ids.shape        = {label_ids.shape}\n{label_ids}\n""")'

In [14]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

model = model.to(device)

In [15]:

no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
                {
                    "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                    "weight_decay": 1e-5,
                },
                {
                    "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                    "weight_decay": 0.0,
                },
            ]
optimizer = torch.optim.AdamW(params=optimizer_grouped_parameters, lr=1e-4)


In [16]:
device = model.device

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [18]:
for epoch in range(2):
    print(f"epoch {epoch}")
    for i, data in enumerate(trainloader):
        
        tokens_tensors, segments_tensors, \
        masks_tensors, labels = [t.to(device) for t in data]

        outputs = model(input_ids = tokens_tensors,
            attention_mask=masks_tensors,
            token_type_ids=segments_tensors)



        logits = outputs[0]

        active_logits = logits.view(-1, logits.shape[-1])[masks_tensors.view(-1) == 1]

        active_labels = labels.view(-1, logits.shape[-1])[masks_tensors.view(-1)== 1]

        loss_fct = torch.nn.BCEWithLogitsLoss()

        loss = loss_fct(active_logits, active_labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if i % 1 == 0:
            print(f"loss: {loss}")

epoch 0
loss: 0.6937466263771057
loss: 0.6930429935455322
loss: 0.6914026737213135
loss: 0.6894311308860779
loss: 0.6937182545661926
loss: 0.6907380223274231
loss: 0.6893541216850281
loss: 0.6895379424095154
loss: 0.6888818740844727
loss: 0.6876673102378845
loss: 0.6875622868537903
loss: 0.6854007244110107
loss: 0.6829606890678406
loss: 0.6837712526321411
loss: 0.6864466667175293
loss: 0.6834150552749634
loss: 0.6829182505607605
loss: 0.6826192736625671
loss: 0.6795256733894348
loss: 0.6812450289726257
loss: 0.6780616044998169
loss: 0.6791813969612122
loss: 0.677659273147583
loss: 0.675607442855835
loss: 0.6763076782226562
loss: 0.6772870421409607
loss: 0.6775592565536499
loss: 0.6729828715324402
loss: 0.6756740212440491
loss: 0.6708444952964783
loss: 0.6731039881706238
loss: 0.6720023155212402
loss: 0.6709281206130981
loss: 0.6722413897514343
loss: 0.6711953282356262
loss: 0.6678696870803833
loss: 0.6666512489318848
loss: 0.6671018004417419
loss: 0.6635587811470032
loss: 0.66615408658

loss: 0.11104214191436768
loss: 0.11785035580396652
loss: 0.1010780856013298
loss: 0.10185141116380692
loss: 0.11537782102823257
loss: 0.11083055287599564
loss: 0.11407925933599472
loss: 0.10099682956933975
loss: 0.11783347278833389
loss: 0.1075623482465744
loss: 0.12704548239707947
loss: 0.1079610213637352
loss: 0.1197919175028801
loss: 0.11258567869663239
loss: 0.10378158837556839
loss: 0.10636233538389206
loss: 0.0963342934846878
loss: 0.10585354268550873
loss: 0.11497344076633453
loss: 0.10124058276414871
loss: 0.10898743569850922
loss: 0.10173067450523376
loss: 0.1093049943447113
loss: 0.10491026937961578
loss: 0.10282188653945923
loss: 0.10626854002475739
loss: 0.08388182520866394
loss: 0.11180223524570465
loss: 0.0958169624209404
loss: 0.11669869720935822
loss: 0.10403105616569519
loss: 0.10422292351722717
loss: 0.09487535804510117
loss: 0.10411443561315536
loss: 0.10285502672195435
loss: 0.09847666323184967
loss: 0.0971466600894928
loss: 0.10149738192558289
loss: 0.105932421982

loss: 0.07459456473588943
loss: 0.06789135932922363
loss: 0.08738739043474197
loss: 0.07945990562438965
loss: 0.08201166987419128
loss: 0.06787329167127609
loss: 0.07054515182971954
loss: 0.07649119198322296
loss: 0.062162868678569794
loss: 0.06555075198411942
loss: 0.06689505279064178
loss: 0.06503766775131226
loss: 0.03902892768383026
loss: 0.04510548710823059
loss: 0.07743550837039948
loss: 0.08020321279764175
loss: 0.06466144323348999
loss: 0.07646439969539642
loss: 0.06146956980228424
loss: 0.07097063213586807
loss: 0.08511767536401749
loss: 0.079886794090271
loss: 0.06979189813137054
loss: 0.06727353483438492
loss: 0.06857195496559143
loss: 0.0771075040102005
loss: 0.06397745758295059
loss: 0.08883089572191238
loss: 0.0711418092250824
loss: 0.07831346988677979
loss: 0.07477958500385284
loss: 0.08803344517946243
loss: 0.07627027481794357
loss: 0.0753222405910492
loss: 0.08438673615455627
loss: 0.06421728432178497
loss: 0.07040303945541382
loss: 0.07049665600061417
loss: 0.08742414

loss: 0.06905876845121384
loss: 0.07178127765655518
loss: 0.04545188695192337
loss: 0.07680535316467285
loss: 0.0642898827791214
loss: 0.06894660741090775
loss: 0.06987722963094711
loss: 0.06266305595636368
loss: 0.08065754920244217
loss: 0.08841366320848465
loss: 0.06114385649561882
loss: 0.07068167626857758
loss: 0.07590488344430923
loss: 0.05829082056879997
loss: 0.062467001378536224
loss: 0.06176874786615372
loss: 0.07369574159383774
loss: 0.06837192922830582
loss: 0.0663144588470459
loss: 0.06679104268550873
loss: 0.07621506601572037
loss: 0.09461040794849396
loss: 0.06086526811122894
loss: 0.06307316571474075
loss: 0.061026763170957565
loss: 0.07037047296762466
loss: 0.05670687183737755
loss: 0.060483865439891815
loss: 0.06936587393283844
loss: 0.07107755541801453
loss: 0.0817820206284523
loss: 0.0778849795460701
loss: 0.059489134699106216
loss: 0.0673375055193901
loss: 0.05124750733375549
loss: 0.070713572204113
loss: 0.06445606052875519
loss: 0.06182604655623436
loss: 0.0657421

loss: 0.07144495844841003
loss: 0.05969790741801262
loss: 0.07501084357500076
loss: 0.048930130898952484
loss: 0.0481133759021759
loss: 0.06464605033397675
loss: 0.05162995681166649
loss: 0.06649373471736908
loss: 0.0751752108335495
loss: 0.06517677009105682
loss: 0.07081591337919235
loss: 0.06592275202274323
loss: 0.06851079314947128
loss: 0.07468943297863007
loss: 0.057669345289468765
loss: 0.05113385617733002
loss: 0.06385196000337601
loss: 0.06992264837026596
loss: 0.06881842017173767
loss: 0.07104358077049255
loss: 0.06051744893193245
loss: 0.050706569105386734
loss: 0.06078053638339043
loss: 0.06022731214761734
loss: 0.07883128523826599
loss: 0.06319740414619446
loss: 0.062041815370321274
loss: 0.05269161984324455
loss: 0.07105836272239685
loss: 0.07144170254468918
loss: 0.067259781062603
loss: 0.060717035084962845
loss: 0.05356312170624733
loss: 0.06682836264371872
loss: 0.07128162682056427
loss: 0.06610774993896484
loss: 0.05573645606637001
loss: 0.0520247183740139
loss: 0.0711

loss: 0.0626944899559021
loss: 0.06886620074510574
loss: 0.060881298035383224
loss: 0.06074466556310654
loss: 0.058799922466278076
loss: 0.078240767121315
loss: 0.07889118790626526
loss: 0.0752086266875267
loss: 0.06239665299654007
loss: 0.05939225107431412
loss: 0.03763721138238907
loss: 0.05733199417591095
loss: 0.05531516298651695
loss: 0.07297258079051971
loss: 0.07370681315660477
loss: 0.05629650503396988
loss: 0.06666123867034912
loss: 0.057953715324401855
loss: 0.06009998172521591
loss: 0.08446452021598816
loss: 0.06089473515748978
loss: 0.06433887779712677
loss: 0.06020922586321831
loss: 0.05802392214536667
loss: 0.06870602071285248
loss: 0.05581548064947128
loss: 0.0646292045712471
loss: 0.07959120720624924
loss: 0.07569290697574615
loss: 0.07251725345849991
loss: 0.05603969097137451
loss: 0.07536398619413376
loss: 0.0631663128733635
loss: 0.07632026076316833
loss: 0.07265361398458481
loss: 0.04985798895359039
loss: 0.05561206862330437
loss: 0.06915974617004395
loss: 0.0663823

loss: 0.04792642220854759
loss: 0.07202192395925522
loss: 0.06818904727697372
loss: 0.07147110253572464
loss: 0.08162527531385422
loss: 0.0605894960463047
loss: 0.06561926752328873
loss: 0.07612679153680801
loss: 0.06099606305360794
loss: 0.06141332536935806
loss: 0.07376261055469513
loss: 0.0623633936047554
loss: 0.05528254434466362
loss: 0.06763610243797302
loss: 0.07010271400213242
loss: 0.06978986412286758
loss: 0.06880094856023788
loss: 0.04767146334052086
loss: 0.050169434398412704
loss: 0.04267831891775131
loss: 0.07389406859874725
loss: 0.025856630876660347
loss: 0.05394187569618225
loss: 0.0444134920835495
loss: 0.07068546861410141
loss: 0.05819277837872505
loss: 0.06695941835641861
loss: 0.063034787774086
loss: 0.059415679425001144
loss: 0.06906493008136749
loss: 0.06636182963848114
loss: 0.07163600623607635
loss: 0.06005623936653137
loss: 0.06406712532043457
loss: 0.06289368122816086
loss: 0.057490453124046326
loss: 0.0654912069439888
loss: 0.06732343137264252
loss: 0.062784

loss: 0.07082243263721466
loss: 0.05951424688100815
loss: 0.05588816851377487
loss: 0.06254523992538452
loss: 0.0519612692296505
loss: 0.06868032366037369
loss: 0.05779365822672844
loss: 0.05649494752287865
loss: 0.05163007229566574
loss: 0.06862876564264297
loss: 0.0456266775727272
loss: 0.05322827026247978
loss: 0.06526567786931992
loss: 0.06888598948717117
loss: 0.06400733441114426
loss: 0.053529515862464905
loss: 0.050865836441516876
loss: 0.062382157891988754
loss: 0.05379602685570717
loss: 0.0497458353638649
loss: 0.07699144631624222
loss: 0.06467878073453903
loss: 0.05252792313694954
loss: 0.05845044553279877
loss: 0.05126488581299782
loss: 0.06618363410234451
loss: 0.05032619833946228
loss: 0.05047391727566719
loss: 0.05389111116528511
loss: 0.06148805096745491
loss: 0.05162185803055763
loss: 0.056423675268888474
loss: 0.05969588831067085
loss: 0.057254184037446976
loss: 0.06345420330762863
loss: 0.05955938994884491
loss: 0.06392006576061249
loss: 0.05707206577062607
loss: 0.05

loss: 0.0528453029692173
loss: 0.07041148841381073
loss: 0.042612265795469284
loss: 0.05422823503613472
loss: 0.061121389269828796
loss: 0.06073708459734917
loss: 0.04136934131383896
loss: 0.02400326542556286
loss: 0.019140111282467842
loss: 0.07435521483421326
loss: 0.0602581612765789
loss: 0.05149005353450775
loss: 0.04422589763998985
loss: 0.05433805659413338
loss: 0.0505518764257431
loss: 0.05664166808128357
loss: 0.051050540059804916
loss: 0.04789961501955986
loss: 0.04700954630970955
loss: 0.06424004584550858
loss: 0.044608838856220245
loss: 0.05399952456355095
loss: 0.06424059718847275
loss: 0.041850779205560684
loss: 0.05586441606283188
loss: 0.045707426965236664
loss: 0.055755846202373505
loss: 0.04362304136157036
loss: 0.057841017842292786
loss: 0.058386482298374176
loss: 0.06230264529585838
loss: 0.05718625336885452
loss: 0.05062194541096687
loss: 0.05285390093922615
loss: 0.056968189775943756
loss: 0.05163564905524254
loss: 0.07502758502960205
loss: 0.05791715905070305
loss

loss: 0.017394548282027245
loss: 0.04783962294459343
loss: 0.053196705877780914
loss: 0.03622535243630409
loss: 0.05609413981437683
loss: 0.045411672443151474
loss: 0.049964789301157
loss: 0.051706574857234955
loss: 0.04689960554242134
loss: 0.04264532029628754
loss: 0.04701605439186096
loss: 0.05489480122923851
loss: 0.04242686182260513
loss: 0.06436683237552643
loss: 0.054502490907907486
loss: 0.045122627168893814
loss: 0.055064160376787186
loss: 0.05623428151011467
loss: 0.05092250555753708
loss: 0.045902978628873825
loss: 0.037024907767772675
loss: 0.04175160080194473
loss: 0.03940662741661072
loss: 0.03974566608667374
loss: 0.05393073335289955
loss: 0.05003787949681282
loss: 0.05590277537703514
loss: 0.05689680576324463
loss: 0.042557843029499054
loss: 0.0397302582859993
loss: 0.044885098934173584
loss: 0.046273864805698395
loss: 0.05220358446240425
loss: 0.04685784503817558
loss: 0.05431535840034485
loss: 0.056847330182790756
loss: 0.04738396033644676
loss: 0.05664776638150215
lo

loss: 0.043689750134944916
loss: 0.05724039301276207
loss: 0.04144570603966713
loss: 0.04621436446905136
loss: 0.038245540112257004
loss: 0.04335295036435127
loss: 0.04978364333510399
loss: 0.04238016530871391
loss: 0.03850913792848587
loss: 0.05076347663998604
loss: 0.04983033239841461
loss: 0.05511593446135521
loss: 0.044371966272592545
loss: 0.047471899539232254
loss: 0.05249707028269768
loss: 0.04470042139291763
loss: 0.03342162445187569
loss: 0.044346004724502563
loss: 0.050788428634405136
loss: 0.05752712860703468
loss: 0.03720429167151451
loss: 0.03136410564184189
loss: 0.03232284635305405
loss: 0.0590793751180172
loss: 0.05182955041527748
loss: 0.057510070502758026
loss: 0.03988724574446678
loss: 0.04677679389715195
loss: 0.02913937158882618
loss: 0.04010072350502014
loss: 0.043299444019794464
loss: 0.03836023062467575
loss: 0.03922098129987717
loss: 0.054672256112098694
loss: 0.040580835193395615
loss: 0.03894877806305885
loss: 0.040708426386117935
loss: 0.043277282267808914
l

loss: 0.04186689108610153
loss: 0.04309632256627083
loss: 0.05029670149087906
loss: 0.04854615032672882
loss: 0.058497197926044464
loss: 0.046318065375089645
loss: 0.03883756697177887
loss: 0.04712488874793053
loss: 0.04168430343270302
loss: 0.04103933647274971
loss: 0.055887650698423386
loss: 0.04019252210855484
loss: 0.028677573427557945
loss: 0.03744084760546684
loss: 0.044201720505952835
loss: 0.04315931722521782
loss: 0.04495417699217796
loss: 0.040852975100278854
loss: 0.038938816636800766
loss: 0.04935508966445923
loss: 0.039064593613147736
loss: 0.03506290912628174
loss: 0.05537845566868782
loss: 0.04257887229323387
loss: 0.04601117596030235
loss: 0.04037811607122421
loss: 0.04511179402470589
loss: 0.039032768458127975
loss: 0.04985545575618744
loss: 0.05250754952430725
loss: 0.04351698234677315
loss: 0.03700189292430878
loss: 0.04064616560935974
loss: 0.04031335562467575
loss: 0.03602220490574837
loss: 0.04393461346626282
loss: 0.040993232280015945
loss: 0.03899191692471504
lo

loss: 0.04364911839365959
loss: 0.03383298218250275
loss: 0.04285363852977753
loss: 0.04098891094326973
loss: 0.03977999836206436
loss: 0.03968114033341408
loss: 0.031662505120038986
loss: 0.043332040309906006
loss: 0.051642753183841705
loss: 0.04634122550487518
loss: 0.03336535766720772
loss: 0.045355722308158875
loss: 0.05897745490074158
loss: 0.03736403211951256
loss: 0.04448739066720009
loss: 0.03478911891579628
loss: 0.03551001474261284
loss: 0.04262873902916908
loss: 0.03257178142666817
loss: 0.0420449934899807
loss: 0.04193442314863205
loss: 0.046714622527360916
loss: 0.04097254201769829
loss: 0.028749505057930946
loss: 0.052386991679668427
loss: 0.04543372988700867
loss: 0.05225484073162079
loss: 0.03210262954235077
loss: 0.052619569003582
loss: 0.037632085382938385
loss: 0.046703994274139404
loss: 0.03353596851229668
loss: 0.03460552543401718
loss: 0.04221779480576515
loss: 0.042389366775751114
loss: 0.03802850842475891
loss: 0.04319765046238899
loss: 0.054980434477329254
loss

loss: 0.04835178703069687
loss: 0.045875538140535355
loss: 0.038340114057064056
loss: 0.04374643787741661
loss: 0.04578722268342972
loss: 0.035309799015522
loss: 0.046837009489536285
loss: 0.04025536775588989
loss: 0.03521204739809036
loss: 0.04461396485567093
loss: 0.035934317857027054
loss: 0.030421750620007515
loss: 0.04502960294485092
loss: 0.03614816814661026
loss: 0.03935881331562996
loss: 0.03715547174215317
loss: 0.029441995546221733
loss: 0.055084507912397385
loss: 0.04111609235405922
loss: 0.03191354125738144
loss: 0.03521934896707535
loss: 0.04389170929789543
loss: 0.04650073125958443
loss: 0.02582191489636898
loss: 0.04475380852818489
loss: 0.02915819175541401
loss: 0.040004912763834
loss: 0.041914306581020355
loss: 0.05827740579843521
loss: 0.03919578343629837
loss: 0.032829634845256805
loss: 0.03025120310485363
loss: 0.03607671707868576
loss: 0.018558625131845474
loss: 0.04088377207517624
loss: 0.03393526002764702
loss: 0.04219934716820717
loss: 0.030077338218688965
loss:

loss: 0.03294750675559044
loss: 0.028201209381222725
loss: 0.03042616881430149
loss: 0.043730784207582474
loss: 0.043152034282684326
loss: 0.03449007496237755
loss: 0.04087797552347183
loss: 0.02690104767680168
loss: 0.030305787920951843
loss: 0.047746747732162476
loss: 0.04352239519357681
loss: 0.04138694703578949
loss: 0.044588226824998856
loss: 0.03495968133211136
loss: 0.035667311400175095
loss: 0.033384278416633606
loss: 0.02722080796957016
loss: 0.031895507127046585
loss: 0.04057251662015915
loss: 0.037824131548404694
loss: 0.033031661063432693
loss: 0.047539111226797104
loss: 0.03933561593294144
loss: 0.04305049777030945
loss: 0.04283428192138672
loss: 0.04550078511238098
loss: 0.03465207666158676
loss: 0.030261123552918434
loss: 0.047394830733537674
loss: 0.03396422788500786
loss: 0.04128994047641754
loss: 0.02742578461766243
loss: 0.034285496920347214
loss: 0.04141216725111008
loss: 0.04584800824522972
loss: 0.034949891269207
loss: 0.034214142709970474
loss: 0.0491134896874427

loss: 0.04447099193930626
loss: 0.039189279079437256
loss: 0.039629016071558
loss: 0.04316398501396179
loss: 0.046473972499370575
loss: 0.039007458835840225
loss: 0.03941407427191734
loss: 0.03732733055949211
loss: 0.04490269348025322
loss: 0.023457443341612816
loss: 0.03880305960774422
loss: 0.04148586839437485
loss: 0.0360599085688591
loss: 0.040299948304891586
loss: 0.028215333819389343
loss: 0.03015100583434105
loss: 0.03434651717543602
loss: 0.04837571084499359
loss: 0.03278409689664841
loss: 0.036294445395469666
loss: 0.03676580637693405
loss: 0.03298642486333847
loss: 0.026515154168009758
loss: 0.036018360406160355
loss: 0.03532329201698303
loss: 0.04199744015932083
loss: 0.042095866054296494
loss: 0.03707394748926163
loss: 0.037959977984428406
loss: 0.055407509207725525
loss: 0.03351769596338272
loss: 0.015139187686145306
loss: 0.029596328735351562
loss: 0.03465809300541878
loss: 0.04708627611398697
loss: 0.034144077450037
loss: 0.0419042631983757
loss: 0.04430665448307991
loss

loss: 0.030091319233179092
loss: 0.03665228560566902
loss: 0.04157928377389908
loss: 0.031767331063747406
loss: 0.03328484669327736
loss: 0.026518622413277626
loss: 0.042999833822250366
loss: 0.03314701095223427
loss: 0.03237440064549446
loss: 0.026327025145292282
loss: 0.030829738825559616
loss: 0.040354158729314804
loss: 0.045606400817632675
loss: 0.030824238434433937
loss: 0.032470256090164185
loss: 0.03060896508395672
loss: 0.039070144295692444
loss: 0.037489909678697586
loss: 0.027439679950475693
loss: 0.034430619329214096
loss: 0.037805281579494476
loss: 0.034601252526044846
loss: 0.04792880266904831
loss: 0.016565661877393723
loss: 0.02795270085334778
loss: 0.0321793407201767
loss: 0.046932194381952286
loss: 0.04591255635023117
loss: 0.027209673076868057
loss: 0.03415839001536369
loss: 0.035489991307258606
loss: 0.03951118141412735
loss: 0.03150859475135803
loss: 0.028647975996136665
loss: 0.02969175949692726
loss: 0.03375960886478424
loss: 0.029015522450208664
loss: 0.035048715

loss: 0.0428147092461586
loss: 0.030559897422790527
loss: 0.030695319175720215
loss: 0.032776184380054474
loss: 0.048682913184165955
loss: 0.028941377997398376
loss: 0.024491487070918083
loss: 0.0389828197658062
loss: 0.04040497541427612
loss: 0.02404952235519886
loss: 0.041859958320856094
loss: 0.023755228146910667
loss: 0.026503650471568108
loss: 0.028561562299728394
loss: 0.02937495894730091
loss: 0.04561750590801239
loss: 0.02971966378390789
loss: 0.05323495343327522
loss: 0.04113543778657913
loss: 0.019421756267547607
loss: 0.0323013998568058
loss: 0.03422880917787552
loss: 0.04368465021252632
loss: 0.027885781601071358
loss: 0.027635999023914337
loss: 0.039124660193920135
loss: 0.046852365136146545
loss: 0.027036897838115692
loss: 0.03376322612166405
loss: 0.028828037902712822
loss: 0.029920555651187897
loss: 0.03766478970646858
loss: 0.045782994478940964
loss: 0.04845467209815979
loss: 0.037635400891304016
loss: 0.03874075040221214
loss: 0.04044078290462494
loss: 0.0482647605240

loss: 0.0335412360727787
loss: 0.03519232198596001
loss: 0.03302934020757675
loss: 0.03276747837662697
loss: 0.03554772585630417
loss: 0.02906857617199421
loss: 0.025385042652487755
loss: 0.041324734687805176
loss: 0.03402208536863327
loss: 0.03896227106451988
loss: 0.02546357363462448
loss: 0.023892708122730255
loss: 0.03538607060909271
loss: 0.03233996406197548
loss: 0.03143034875392914
loss: 0.029402347281575203
loss: 0.03944594785571098
loss: 0.026542896404862404
loss: 0.029204662889242172
loss: 0.042369283735752106
loss: 0.03287815675139427
loss: 0.026909412816166878
loss: 0.029017871245741844
loss: 0.032598353922367096
loss: 0.03625522926449776
loss: 0.029208295047283173
loss: 0.02610636129975319
loss: 0.037270888686180115
loss: 0.024209320545196533
loss: 0.04196899011731148
loss: 0.03566018119454384
loss: 0.026106899604201317
loss: 0.044007558375597
loss: 0.03854982554912567
loss: 0.036126527935266495
loss: 0.03603178262710571
loss: 0.03829505667090416
loss: 0.033537112176418304

loss: 0.03622695058584213
loss: 0.02538929134607315
loss: 0.041076384484767914
loss: 0.008227256126701832
loss: 0.01706947200000286
loss: 0.0323595330119133
loss: 0.032122135162353516
loss: 0.02893657609820366
loss: 0.02413726970553398
loss: 0.03876033052802086
loss: 0.036220651119947433
loss: 0.028531188145279884
loss: 0.03823128715157509
loss: 0.03643476217985153
loss: 0.039765600115060806
loss: 0.03438703715801239
loss: 0.02843298763036728
loss: 0.03474327549338341
loss: 0.04224775731563568
loss: 0.03450144827365875
loss: 0.03636471554636955
loss: 0.042060039937496185
loss: 0.029760001227259636
loss: 0.03519017994403839
loss: 0.03102753683924675
loss: 0.030408576130867004
loss: 0.03633169084787369
loss: 0.03346634656190872
loss: 0.04815399646759033
loss: 0.03537576645612717
loss: 0.043497320264577866
loss: 0.032099366188049316
loss: 0.029572997242212296


In [None]:
label_id_mapping = trainset.label_map

id_label_mapping = dict()
for key in label_id_mapping.keys():
    id_label_mapping[label_id_mapping[key]] = key

def test_model(model, sentence, device = "cpu"):
    tokenized_sentence = torch.tensor([tokenizer.encode(sentence)])
    pos = torch.tensor([[0] * len(tokenized_sentence)])
    tags = torch.tensor([[1] * len(tokenized_sentence)])

    model = model.to(device)
    outputs = model(input_ids=tokenized_sentence.to(device), 
                    token_type_ids=pos.to(device), 
                    attention_mask=tags.to(device))

    logits = outputs[0]

    _, pred_labels = torch.max(logits, 2)

    out_labels = []
    for row in pred_labels:
        result = list(map(lambda x: id_label_mapping[int(x)], row))
        out_labels.append(result)
    #return tokenizer.tokenize(sentence), out_labels[0], logits
    return tokenizer.tokenize(sentence), out_labels[0][1:-1], logits[:, 1:-1]

In [None]:
adapter_name = "All_tag_2"
model.save_adapter(f"./save_adapters/{adapter_name}", model.active_adapters[0])

In [None]:
model.save_head(f"./save_heads/{adapter_name}", model.active_head)

In [None]:
sentence = "Dan Will be deemed to have completed its delivery obligations before 2021-7-5 if in Niall's opinion, the Jeep Car satisfies the Acceptance Criteria, and Niall notifies Dan in writing that it is accepting the Jeep Car."
sen, pred, logits = test_model(model, sentence, device = 'cpu')

In [None]:
a = tokenizer.tokenize(sentence)[1]

In [None]:
np.array(sen)

In [None]:
np.array(pred)

In [None]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
def interact_word(i):
    print(i)
    print(sen[i])
    target = out[i]

    for i in range(len(target)):
        print(f"{i} {id_label_mapping[i].ljust(6)} \t: {target[i]:.5f}")

In [None]:
out = logits[0]
interact(lambda x: interact_word(x), x=widgets.IntSlider(min=0, max=len(sen)-1, step=1, value=0))

In [None]:
from telegram_notifier import send_message as telegram_bot_sendtext

In [None]:
print("OK")