In [1]:
import pandas as pd
import numpy as np

In [None]:
from telegram_notifier import send_message as telegram_bot_sendtext

In [2]:
import torch
print(f"Torch Version: {torch.__version__}")

import transformers
print(f"transformers (Adapter) Version: {transformers.__version__}")

Torch Version: 1.8.1
transformers (Adapter) Version: 2.0.1


In [3]:
from transformers import RobertaTokenizer
import numpy as np

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], max_length=80, truncation=True, padding="max_length")

In [4]:
from ner_dataset import get_trainset_data_loader

all_tags, trainset, trainloader = get_trainset_data_loader(tokenizer, BATCH_SIZE=128)

labels: ['B-art' 'B-eve' 'B-geo' 'B-gpe' 'B-nat' 'B-org' 'B-per' 'B-tim'
 'CountryCode' 'CryptoCurrencyCode' 'CurrencyCode' 'Event' 'Float' 'I-art'
 'I-eve' 'I-geo' 'I-gpe' 'I-nat' 'I-org' 'I-per' 'I-tim' 'Integer'
 'Location' 'Month' 'O' 'Object' 'Party' 'Race' 'SpecialTerm'
 'TemporalUnit' 'Time' 'Timezone' 'US_States']


In [5]:
from transformers import RobertaConfig, RobertaModelWithHeads

config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=len(all_tags),
    label2id = trainset.label_map, 
    id2label = trainset.id2label
)

name = model.load_adapter("./save_adapters/ALL_tag_0730")
model.add_tagging_head(
        name,
        num_labels=len(trainset.label_map.keys()), overwrite_ok=True
      )
model.train_adapter(name)

In [7]:
device_id = 1
device = torch.device(f"cuda:{device_id}" if torch.cuda.is_available() else "cpu")

In [8]:
all_tags = ['Float','TemporalUnit','I-gpe','CountryCode','CurrencyCode','Timezone','CryptoCurrencyCode','Month','Party','B-tim','I-art','Time','B-per','B-gpe','B-geo','O','Location','Event','I-nat','Race','B-org','I-geo','I-tim','I-eve','SpecialTerm','B-art','US_States','B-eve','I-org','B-nat','Object','I-per','Integer']

In [9]:
for index, tag in enumerate(all_tags):
    if index % 2 == device_id:
        print(f"\nSkip {tag}.\n")
        continue
    model = RobertaModelWithHeads.from_pretrained(
        "roberta-base",
        config=config,
        )


    try:
        model.add_adapter(tag)
        model.add_tagging_head(
            tag,
            num_labels=1
          )
    except: pass
    model.train_adapter(tag)
    model = model.to(device)
    
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
                    {
                        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                        "weight_decay": 1e-5,
                    },
                    {
                        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                        "weight_decay": 0.0,
                    },
                ]
    optimizer = torch.optim.AdamW(params=optimizer_grouped_parameters, lr=1e-4)
    
    for epoch in range(4):
        print(f"\n{tag}: epoch {epoch}")
        for i, data in enumerate(trainloader):

            tokens_tensors, segments_tensors, \
            masks_tensors, labels = [t.to(device) for t in data]

            outputs = model(input_ids = tokens_tensors,
                attention_mask=masks_tensors,
                token_type_ids=segments_tensors)


            logits = outputs[0]

            current_label = labels.view(-1, labels.shape[-1])[:, trainset.label_map[tag]]
            current_label = current_label.view(-1)

            active_logits = logits.view(-1, logits.shape[-1])[masks_tensors.view(-1) == 1]
            active_labels = current_label[masks_tensors.view(-1)== 1]

            actual = current_label[masks_tensors.view(-1)== 1].float().view(-1,1)
            """

            actual = torch.ones(active_logits.shape, device = device)

            actual[:, 0] = (active_labels == 0).long()
            actual[:, 1] = (active_labels == 1).long()"""


            loss_fct = torch.nn.BCEWithLogitsLoss()

            loss = loss_fct(active_logits, actual)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            if i % 100 == 0:
                print(f"\tLoss: {loss}")
        telegram_bot_sendtext(f"\n{tag}: epoch {epoch}, loss = {loss}")
        filename = f"{tag}_epoch_{epoch}_0731"
        model.save_adapter(f"./save_adapters/{filename}", model.active_adapters[0])
        model.save_head(f"./save_heads/{filename}", model.active_head)
    filename = f"{tag}_0731"
    model.save_adapter(f"./save_adapters/{filename}", model.active_adapters[0])
    model.save_head(f"./save_heads/{filename}", model.active_head)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infere


Float: epoch 0
	Loss: 0.6729634404182434
	Loss: 0.003418962936848402
	Loss: 0.000802522583398968
	Loss: 0.007911170832812786

Float: epoch 1
	Loss: 0.007724965922534466
	Loss: 0.0032474221661686897
	Loss: 0.00043286013533361256
	Loss: 0.005454898811876774

Float: epoch 2
	Loss: 0.00804697535932064
	Loss: 0.001664437702856958
	Loss: 0.0001769738009897992
	Loss: 0.002408739645034075

Float: epoch 3
	Loss: 0.004487700294703245
	Loss: 0.0008960371487773955
	Loss: 0.0003259190125390887
	Loss: 0.000977634685114026

Skip TemporalUnit.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


I-gpe: epoch 0
	Loss: 0.9032449126243591
	Loss: 0.00019322166917845607
	Loss: 0.00016153117758221924
	Loss: 0.00011351861758157611
	Loss: 0.00015384428843390197
	Loss: 0.006040485110133886

I-gpe: epoch 2
	Loss: 0.0001629523903829977
	Loss: 0.0001104998736991547
	Loss: 0.0001421958877472207
	Loss: 0.005899607669562101

I-gpe: epoch 3
	Loss: 0.00014614869724027812
	Loss: 0.00010648725583450869
	Loss: 0.00013876576849725097
	Loss: 0.005802497733384371

Skip CountryCode.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


CurrencyCode: epoch 0
	Loss: 0.7763774991035461
	Loss: 0.010218653827905655
	Loss: 0.0065135350450873375
	Loss: 0.006127247586846352

CurrencyCode: epoch 1
	Loss: 0.0038249760400503874
	Loss: 0.0006168533582240343
	Loss: 0.003102172864601016
	Loss: 0.0001647060562390834

CurrencyCode: epoch 2
	Loss: 0.0002810903242789209
	Loss: 0.00031542201759293675
	Loss: 0.0019513716688379645
	Loss: 0.0001601101248525083

CurrencyCode: epoch 3
	Loss: 0.0002568444760981947
	Loss: 0.00012464291648939252
	Loss: 0.00016887746460270137
	Loss: 6.619198393309489e-05

Skip Timezone.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


CryptoCurrencyCode: epoch 0
	Loss: 0.6730352640151978
	Loss: 0.00016961533401627094
	Loss: 3.86653482564725e-05
	Loss: 2.135748763976153e-05

CryptoCurrencyCode: epoch 1
	Loss: 1.571036955283489e-05
	Loss: 1.2280215742066503e-05
	Loss: 1.0956831829389557e-05
	Loss: 9.474279067944735e-06

CryptoCurrencyCode: epoch 2
	Loss: 8.604622053098865e-06
	Loss: 7.971667400852311e-06
	Loss: 7.727814590907656e-06
	Loss: 7.250410817505326e-06

CryptoCurrencyCode: epoch 3
	Loss: 6.900287644384662e-06
	Loss: 6.586380095541244e-06
	Loss: 6.531073267979082e-06
	Loss: 6.184282938193064e-06

Skip Month.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


Party: epoch 0
	Loss: 0.6480399370193481
	Loss: 0.21433617174625397
	Loss: 0.15888123214244843
	Loss: 0.10587793588638306

Party: epoch 1
	Loss: 0.0987062007188797
	Loss: 0.10713467001914978
	Loss: 0.10134787112474442
	Loss: 0.0787268802523613

Party: epoch 2
	Loss: 0.07641387730836868
	Loss: 0.08620305359363556
	Loss: 0.08214200288057327
	Loss: 0.06730452924966812

Party: epoch 3
	Loss: 0.07085584104061127
	Loss: 0.0773899033665657
	Loss: 0.08447027951478958
	Loss: 0.0674319714307785

Skip B-tim.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


I-art: epoch 0
	Loss: 0.8034830093383789
	Loss: 0.00844764243811369
	Loss: 0.0004311409720685333
	Loss: 0.00019595470803324133

I-art: epoch 1
	Loss: 0.005662826821208
	Loss: 0.008470527827739716
	Loss: 0.00047709333011880517
	Loss: 0.00019212407642044127

I-art: epoch 2
	Loss: 0.00567783834412694
	Loss: 0.008195494301617146
	Loss: 0.0004823713388759643
	Loss: 0.00018233248556498438

I-art: epoch 3
	Loss: 0.005641748197376728
	Loss: 0.008116906508803368
	Loss: 0.0004944349057041109
	Loss: 0.0001761751191224903

Skip Time.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


B-per: epoch 0
	Loss: 0.654582679271698
	Loss: 0.08042533695697784
	Loss: 0.06191591918468475
	Loss: 0.02891523949801922

B-per: epoch 1
	Loss: 0.04328916594386101
	Loss: 0.03833376243710518
	Loss: 0.04668014496564865
	Loss: 0.014588330872356892

B-per: epoch 2
	Loss: 0.030946774408221245
	Loss: 0.028370451182127
	Loss: 0.03275325521826744
	Loss: 0.013161640614271164

B-per: epoch 3
	Loss: 0.02547326125204563
	Loss: 0.026212871074676514
	Loss: 0.03446463122963905
	Loss: 0.01157769188284874

Skip B-gpe.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


B-geo: epoch 0
	Loss: 0.6379156708717346
	Loss: 0.1413555145263672
	Loss: 0.09592808783054352
	Loss: 0.05501578748226166

B-geo: epoch 1
	Loss: 0.043183211237192154
	Loss: 0.05166114494204521
	Loss: 0.06271189451217651
	Loss: 0.04350714012980461

B-geo: epoch 2
	Loss: 0.03598232567310333
	Loss: 0.0469747930765152
	Loss: 0.05497155711054802
	Loss: 0.039917223155498505

B-geo: epoch 3
	Loss: 0.035542022436857224
	Loss: 0.046219468116760254
	Loss: 0.056058838963508606
	Loss: 0.036793358623981476

Skip O.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


Location: epoch 0
	Loss: 0.7050840258598328
	Loss: 0.1443053036928177
	Loss: 0.11064567416906357
	Loss: 0.06164872646331787

Location: epoch 1
	Loss: 0.04740497097373009
	Loss: 0.0710466280579567
	Loss: 0.07509702444076538
	Loss: 0.05026066675782204

Location: epoch 2
	Loss: 0.041340917348861694
	Loss: 0.05465501546859741
	Loss: 0.06560926884412766
	Loss: 0.047322120517492294

Location: epoch 3
	Loss: 0.04071603715419769
	Loss: 0.05132175609469414
	Loss: 0.0662488266825676
	Loss: 0.04433796927332878

Skip Event.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


I-nat: epoch 0
	Loss: 0.5578949451446533
	Loss: 0.0002407894207863137
	Loss: 5.9995887568220496e-05
	Loss: 5.0200273108202964e-05

I-nat: epoch 1
	Loss: 5.754619633080438e-05
	Loss: 4.5943666918901727e-05
	Loss: 3.2473104511154816e-05
	Loss: 4.51201485702768e-05

I-nat: epoch 2
	Loss: 4.273160811862908e-05
	Loss: 4.044604429509491e-05
	Loss: 2.9409557100734673e-05
	Loss: 3.975662184529938e-05

I-nat: epoch 3
	Loss: 5.953512663836591e-05
	Loss: 3.974702849518508e-05
	Loss: 2.822737405949738e-05
	Loss: 3.999075124738738e-05

Skip Race.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


B-org: epoch 0
	Loss: 0.6437269449234009
	Loss: 0.0851873978972435
	Loss: 0.06359372287988663
	Loss: 0.05909635126590729

B-org: epoch 1
	Loss: 0.04854844883084297
	Loss: 0.0557667575776577
	Loss: 0.04876093193888664
	Loss: 0.04356876760721207

B-org: epoch 2
	Loss: 0.04284160956740379
	Loss: 0.04848101735115051
	Loss: 0.048477500677108765
	Loss: 0.040614064782857895

B-org: epoch 3
	Loss: 0.04079153761267662
	Loss: 0.04357684403657913
	Loss: 0.04436586797237396
	Loss: 0.03871800750494003

Skip I-geo.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


I-tim: epoch 0
	Loss: 0.6345638036727905
	Loss: 0.029500827193260193
	Loss: 0.031594157218933105
	Loss: 0.03058313950896263

I-tim: epoch 1
	Loss: 0.014221852645277977
	Loss: 0.006988167762756348
	Loss: 0.0160105861723423
	Loss: 0.016866469755768776

I-tim: epoch 2
	Loss: 0.00890387874096632
	Loss: 0.004128413740545511
	Loss: 0.020454032346606255
	Loss: 0.012329304590821266

I-tim: epoch 3
	Loss: 0.004141274839639664
	Loss: 0.004177327733486891
	Loss: 0.01733652502298355
	Loss: 0.01242279913276434

Skip I-eve.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


SpecialTerm: epoch 0
	Loss: 0.7418427467346191
	Loss: 0.0002523269795347005
	Loss: 0.00016197696095332503
	Loss: 0.0002993186062667519

SpecialTerm: epoch 1
	Loss: 0.002930542454123497
	Loss: 0.00019987576524727046
	Loss: 0.000166176789207384
	Loss: 0.00030332099413499236

SpecialTerm: epoch 2
	Loss: 0.002886674366891384
	Loss: 0.00019882175547536463
	Loss: 0.0002077048266073689
	Loss: 0.000249833392444998

SpecialTerm: epoch 3
	Loss: 0.0029306260403245687
	Loss: 0.00021518174617085606
	Loss: 0.0002596323029138148
	Loss: 0.00019466048979666084

Skip B-art.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


US_States: epoch 0
	Loss: 0.6861989498138428
	Loss: 0.00021128682419657707
	Loss: 6.777313683414832e-05

US_States: epoch 1
	Loss: 3.155084777972661e-05
	Loss: 2.6632238586898893e-05
	Loss: 2.2567081032320857e-05
	Loss: 2.0675963241956197e-05

US_States: epoch 2
	Loss: 1.7815780665841885e-05
	Loss: 1.7112482964876108e-05
	Loss: 1.5838153558433987e-05
	Loss: 1.5553185221506283e-05

US_States: epoch 3
	Loss: 1.3841583495377563e-05
	Loss: 1.3583820873463992e-05
	Loss: 1.3493673577613663e-05
	Loss: 1.3491999197867699e-05

Skip B-eve.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


I-org: epoch 0
	Loss: 0.6050031185150146
	Loss: 0.07474705576896667
	Loss: 0.06542512774467468
	Loss: 0.037716712802648544

I-org: epoch 1
	Loss: 0.03299851343035698
	Loss: 0.038270656019449234
	Loss: 0.04330223426222801
	Loss: 0.03099490888416767

I-org: epoch 2
	Loss: 0.025533171370625496
	Loss: 0.03181983157992363
	Loss: 0.03578803688287735
	Loss: 0.02405058592557907

I-org: epoch 3
	Loss: 0.0224652960896492
	Loss: 0.028957273811101913
	Loss: 0.03344966471195221
	Loss: 0.023249920457601547

Skip B-nat.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


Object: epoch 0
	Loss: 0.9240089058876038
	Loss: 0.015891242772340775
	Loss: 0.001168093760497868
	Loss: 0.0004916563630104065

Object: epoch 1
	Loss: 0.01739625073969364
	Loss: 0.015023850835859776
	Loss: 0.0011850276496261358
	Loss: 0.00048271671403199434

Object: epoch 2
	Loss: 0.017458396032452583
	Loss: 0.014757813885807991
	Loss: 0.0011797489132732153
	Loss: 0.0004952103481628001

Object: epoch 3
	Loss: 0.001210616435855627
	Loss: 0.00045177427818998694

Skip I-per.



Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids', 'roberta.encoder.layer.0.output.adapters.Float.adapter_down.0.weight', 'roberta.encoder.layer.0.output.ada


Integer: epoch 0
	Loss: 0.7585887908935547
	Loss: 0.04734921082854271
	Loss: 0.003937866538763046
	Loss: 0.021993646398186684

Integer: epoch 1
	Loss: 0.0004149453015998006
	Loss: 0.0029214664828032255
	Loss: 0.0004725441103801131
	Loss: 0.02071632444858551

Integer: epoch 2
	Loss: 0.00031460431637242436
	Loss: 0.002846821676939726
	Loss: 0.0004670191847253591
	Loss: 0.019959164783358574

Integer: epoch 3
	Loss: 0.00032567803282290697
	Loss: 0.0031056904699653387
	Loss: 0.00036399412783794105
	Loss: 0.01762981526553631


In [10]:
label_id_mapping = trainset.label_map

id_label_mapping = dict()
for key in label_id_mapping.keys():
    id_label_mapping[label_id_mapping[key]] = key

def test_model(model, sentence, device = "cpu"):
    tokenized_sentence = torch.tensor([tokenizer.encode(sentence)])
    pos = torch.tensor([[0] * len(tokenized_sentence)])
    tags = torch.tensor([[1] * len(tokenized_sentence)])

    model = model.to(device)
    outputs = model(input_ids=tokenized_sentence.to(device), 
                    token_type_ids=pos.to(device), 
                    attention_mask=tags.to(device))

    logits = outputs[0]

    _, pred_labels = torch.max(logits, 2)

    out_labels = []
    for row in pred_labels:
        result = list(map(lambda x: id_label_mapping[int(x)], row))
        out_labels.append(result)
    #return tokenizer.tokenize(sentence), out_labels[0], logits
    return tokenizer.tokenize(sentence), out_labels[0][1:-1], logits[:, 1:-1]

In [11]:
sentence = "Dan will be deemed to have completed its delivery obligations before 2021-7-5 if in Niall's opinion, the Jeep Car satisfies the Acceptance Criteria, and Niall notifies Dan in writing that it is accepting the Jeep Car."

In [12]:
tokenized_sentence = torch.tensor([tokenizer.encode(sentence)])
pos = torch.tensor([[0] * len(tokenized_sentence)])
tags = torch.tensor([[1] * len(tokenized_sentence)])

model = model.to(device)
outputs = model(input_ids=tokenized_sentence.to(device), 
                token_type_ids=pos.to(device), 
                attention_mask=tags.to(device))

In [13]:
for i, text in enumerate(tokenizer.tokenize(sentence)):
    print(f"{text}: {outputs[0].view(-1)[i]}")

Dan: -1.3775714635849
Ġwill: -6.25478982925415
Ġbe: -6.146771430969238
Ġdeemed: -6.733390808105469
Ġto: -4.8305864334106445
Ġhave: -4.861928462982178
Ġcompleted: -5.285251140594482
Ġits: -4.913308620452881
Ġdelivery: -5.294787406921387
Ġobligations: -3.4447712898254395
Ġbefore: -4.425929069519043
Ġ2021: -5.148871898651123
-: 4.860267162322998
7: 2.234142541885376
-: 3.1324446201324463
5: 1.6280460357666016
Ġif: 5.194551944732666
Ġin: -4.453372478485107
ĠNi: -3.832737922668457
all: -4.47638463973999
's: -3.5833146572113037
Ġopinion: -5.401472091674805
,: -4.567398548126221
Ġthe: -6.074745178222656
ĠJeep: -5.643889427185059
ĠCar: -2.111795425415039
Ġsatisfies: -4.009443759918213
Ġthe: -4.075582027435303
ĠAccept: -5.991746425628662
ance: -5.409261703491211
ĠCrit: -5.5677618980407715
eria: -5.560084342956543
,: -5.150038242340088
Ġand: -5.581611633300781
ĠNi: -4.869113445281982
all: -4.657632827758789
Ġnot: -3.570448875427246
ifies: -4.647846698760986
ĠDan: -5.250011444091797
Ġin: -5.97724

In [14]:
sentence = "Dan Will be deemed to have completed its delivery obligations before 2021-7-5 if in Niall's opinion, the Jeep Car satisfies the Acceptance Criteria, and Niall notifies Dan in writing that it is accepting the Jeep Car."
sen, pred, logits = test_model(model, sentence, device = 'cpu')

In [15]:
a = tokenizer.tokenize(sentence)[1]

In [16]:
np.array(sen)

array(['Dan', 'ĠWill', 'Ġbe', 'Ġdeemed', 'Ġto', 'Ġhave', 'Ġcompleted',
       'Ġits', 'Ġdelivery', 'Ġobligations', 'Ġbefore', 'Ġ2021', '-', '7',
       '-', '5', 'Ġif', 'Ġin', 'ĠNi', 'all', "'s", 'Ġopinion', ',',
       'Ġthe', 'ĠJeep', 'ĠCar', 'Ġsatisfies', 'Ġthe', 'ĠAccept', 'ance',
       'ĠCrit', 'eria', ',', 'Ġand', 'ĠNi', 'all', 'Ġnot', 'ifies',
       'ĠDan', 'Ġin', 'Ġwriting', 'Ġthat', 'Ġit', 'Ġis', 'Ġaccepting',
       'Ġthe', 'ĠJeep', 'ĠCar', '.'], dtype='<U12')

In [17]:
np.array(pred)

array(['B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art',
       'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art', 'B-art'],
      dtype='<U5')

In [18]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
def interact_word(i):
    print(i)
    print(sen[i])
    target = out[i]

    for i in range(len(target)):
        print(f"{i} {id_label_mapping[i].ljust(6)} \t: {target[i]:.5f}")

In [19]:
out = logits[0]
interact(lambda x: interact_word(x), x=widgets.IntSlider(min=0, max=len(sen)-1, step=1, value=0))

0
Dan
0 B-art  	: -5.16899


<function __main__.<lambda>(x)>

In [20]:
print("OK")

OK
