In [1]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoConfig,BigBirdModel, BigBirdPreTrainedModel, RobertaPreTrainedModel
from embeddings import *
import pickle as pickle
import os
import pandas as pd
import torch
import sklearn
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, AutoModelForTokenClassification, Trainer, TrainingArguments, RobertaConfig, RobertaTokenizer, RobertaForSequenceClassification, BertTokenizer
from load_data import *
from entity_model import *
from embeddings import *
import torch.nn.functional as F
from transformers.activations import ACT2FN
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

2022-03-30 03:36:46.798484: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
class Entity_Embedding_Model(BigBirdPreTrainedModel):
    def __init__(self, config, dropout_rate):
        super(Entity_Embedding_Model, self).__init__(config)
        self.model = AutoModel.from_pretrained('monologg/kobigbird-bert-base')
        self.model_config = config
        self.model.embeddings =Entity_Embeddings(config)
        self.model_config.num_labels = 30
        self.num_labels = 30

        #self.cls_fc_layer = FCLayer(self.config.hidden_size, self.config.hidden_size, dropout_rate)
        #self.entity_fc_layer1 = FCLayer(self.config.hidden_size, self.config.hidden_size, dropout_rate)
        #self.entity_fc_layer2 = FCLayer(self.config.hidden_size, self.config.hidden_size, dropout_rate)

        #self.label_classifier = FCLayer(
        #    self.config.hidden_size * 3,
        #    self.config.num_labels,
        #    dropout_rate,
        #    use_activation=False
        #)
        self.classifier = ClassificationHead(config)


    def forward(self, input_ids, attention_mask,token_type_ids, sub_mask, obj_mask, labels):
        outputs = self.model(
            input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids
        )
        sequence_output = outputs.last_hidden_state
        
        logits = self.classifier(sequence_output,sub_mask,obj_mask)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)

        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output



In [3]:
class ClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""

    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
        self.config = config
    
    def entity_features(self,features,sub_mask,obj_mask):
        entity=[]
        for i in range(len(features)):
            sub_start = sub_mask[i].tolist().index(7)
            sub_end = sub_mask[i].tolist().index(8)
            obj_start = obj_mask[i].tolist().index(9)
            obj_end = obj_mask[i].tolist().index(10)
            total = (features[i,sub_start,:]*2+features[i,sub_end,:]*2+features[i,obj_start,:]+features[i,obj_end,:])/6
            entity.append(total.tolist())
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        entity = torch.tensor(entity).to(device)
        return entity
    
    def forward(self, features, sub_mask,obj_mask, **kwargs):
        x = self.entity_features(features,sub_mask,obj_mask)
          # take <s> token (equiv. to [CLS])
        x = self.dropout(x)
        x = self.dense(x)
        x = ACT2FN[self.config.hidden_act](x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x

In [4]:
def label_to_num(label):
  num_label = []
  with open('dict_label_to_num.pkl', 'rb') as f:
    dict_label_to_num = pickle.load(f)
  for v in label:
    num_label.append(dict_label_to_num[v])
  return num_label
MODEL_NAME = 'monologg/kobigbird-bert-base'
model_config =  AutoConfig.from_pretrained(MODEL_NAME)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [5]:
MODEL_NAME = 'monologg/kobigbird-bert-base'
tokenizer = AutoTokenizer.from_pretrained('/opt/ml/code/vocabs')
train_dataset = load_data("/opt/ml/dataset/train/train.csv")
train_label = label_to_num(train_dataset['label'].values)

tokenized_train = tokenized_dataset(train_dataset, tokenizer)

RE_train_dataset = RE_Dataset(tokenized_train, train_label)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model_config =  AutoConfig.from_pretrained(MODEL_NAME)
model_config.num_labels = 30

In [6]:
input_ids = tokenized_train['input_ids'][:10].to(device)
attention_mask = tokenized_train['attention_mask'][:10].to(device)
token_type_ids = tokenized_train['token_type_ids'][:10].to(device)
sub_mask = tokenized_train['sub_mask'][:10].to(device)
obj_mask = tokenized_train['obj_mask'][:10].to(device)
labels = torch.tensor(train_label[:10]).to(device)

In [None]:
#model = Entity_Embedding_Model(model_config,0.1)
#model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

model.parameters
model.to(device)

In [11]:
outputs=model(input_ids = input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids)
outputs

Attention type 'block_sparse' is not possible if sequence_length: 238 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


SequenceClassifierOutput(loss=None, logits=tensor([[-0.0301,  0.1847],
        [-0.0666,  0.1457],
        [-0.0465,  0.1733],
        [-0.0672,  0.1788],
        [-0.0047,  0.1328],
        [-0.0210,  0.1808],
        [-0.0145,  0.1808],
        [-0.0612,  0.1117],
        [-0.0471,  0.1687],
        [-0.0673,  0.1287]], device='cuda:0', grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)

In [8]:
outputs=model(input_ids = input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids,sub_mask=sub_mask, obj_mask=obj_mask,labels=labels)
outputs

Attention type 'block_sparse' is not possible if sequence_length: 238 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


(tensor(3.3712, device='cuda:0', grad_fn=<NllLossBackward>),
 tensor([[-9.4556e-03, -2.4930e-02, -1.2922e-01, -7.7817e-02, -1.4273e-01,
           4.5376e-02, -3.1409e-02, -1.6661e-01, -6.8012e-02,  1.5956e-02,
           7.0293e-02, -8.5864e-02, -8.9937e-02, -1.1379e-01,  5.0766e-02,
          -1.3515e-01,  3.1258e-02,  2.5222e-02,  2.2826e-02, -2.3527e-02,
          -3.0600e-02,  2.1539e-01, -4.8287e-02, -1.1107e-01, -1.5114e-01,
           1.4262e-01,  6.6954e-02,  2.7644e-02, -2.1801e-02,  2.1343e-02],
         [ 3.1784e-02, -4.1267e-02, -8.0203e-02, -1.0629e-01, -2.3796e-01,
          -1.4544e-02, -4.2640e-02, -2.2849e-01, -4.4176e-02, -5.0316e-03,
           1.7746e-02, -1.4417e-01, -2.5458e-02, -1.1951e-01, -6.4889e-02,
          -5.7664e-02, -3.3172e-02, -2.6596e-02,  1.9691e-02,  1.0580e-02,
           4.2508e-02,  2.6667e-01,  2.7354e-02, -1.3545e-01, -1.3274e-01,
           1.5948e-01,  5.3535e-02,  1.5847e-02, -1.3300e-02,  1.9433e-02],
         [ 1.0473e-01,  1.4868e-03, -