## import packages

In [2]:
import pickle as pickle
import os
import pandas as pd
import torch
from sklearn.metrics import accuracy_score
from transformers import AutoTokenizer, XLMRobertaConfig, XLMRobertaTokenizer
from transformers import XLMRobertaModel
import numpy as np
import matplotlib.pyplot as plt
import random
from itertools import chain
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import copy
import csv
import json
import logging
import torch.nn as nn
from tqdm.auto import tqdm
from transformers import AdamW, get_linear_schedule_with_warmup
import logging
import torch.nn.functional as F

## set seed

In [3]:
random_seed=42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)


## load data

In [4]:
# Dataset 구성.
class RE_Dataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_dataset, labels):
        self.tokenized_dataset = tokenized_dataset
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.tokenized_dataset.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 처음 불러온 tsv 파일을 원하는 형태의 DataFrame으로 변경 시켜줍니다.
# 변경한 DataFrame 형태는 baseline code description 이미지를 참고해주세요.
def preprocessing_dataset(dataset, label_type):
    label = []
    for i in dataset[8]:
        if i == 'blind':
            label.append(100)
        else:
            label.append(label_type[i])
    out_dataset = pd.DataFrame({'sentence':dataset[1],'entity_01':dataset[2], 'e1s':dataset[3],'e1e':dataset[4],
                              'entity_02':dataset[5], 'e2s':dataset[6],'e2e':dataset[7],'label':label})
    return out_dataset

# tsv 파일을 불러옵니다.
def load_data(dataset_dir):
  # load label_type, classes
    with open('/opt/ml/input/data/label_type.pkl', 'rb') as f:
        label_type = pickle.load(f)
  # load dataset
    dataset = pd.read_csv(dataset_dir, delimiter='\t', header=None)
  # preprecessing dataset
    dataset = preprocessing_dataset(dataset, label_type)
  
    return dataset

# XLMRoberta input을 위한 tokenizing.
# tip! 다양한 종류의 tokenizer와 special token들을 활용하는 것으로도 새로운 시도를 해볼 수 있습니다.
# baseline code에서는 2가지 부분을 활용했습니다.
# def append_token(dataset, tokenizer):
#     for (ex_index, example) in enumerate(dataset):
        
    

def tokenized_dataset(dataset, tokenizer):
    concat_entity = []
    for e01, e02 in zip(dataset['entity_01'], dataset['entity_02']):
        temp = ''
        temp = e01 + '[SEP]' + e02
        concat_entity.append(temp)
        tokenized_sentences = tokenizer(
      #concat_entity,
          list(dataset['sentence']),
          return_tensors="pt",
          padding=True,
          truncation=True,
          max_length=150,
          add_special_tokens=True,
          )
    return tokenized_sentences

def tokenized_dataset_len(dataset, tokenizer):
    li = []
    for sentence in dataset['sentence']:
        li.append(tokenizer.tokenize(sentence))
    return li


## metric

In [5]:
def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    return acc_and_f1(preds, labels)


def simple_accuracy(preds, labels):
    return (preds == labels).mean()


def official_f1():

    with open(os.path.join('/opt/ml/eval/result.txt'), "r", encoding="utf-8") as f:
        macro_result = list(f)[-1]
        macro_result = macro_result.split(":")[1].replace(">>>", "").strip()
        macro_result = macro_result.split("=")[1].strip().replace("%", "")
        macro_result = float(macro_result) / 100

    return macro_result

def acc_and_f1(preds, labels, average="macro"):
    acc = simple_accuracy(preds, labels)
    return {
        "acc": acc,
        #"f1": official_f1(),
    }


## add entity token

In [6]:
def convert_sentence_to_features(train_dataset, tokenizer, max_len):
    
    max_seq_len=max_len
    cls_token=tokenizer.cls_token
    #cls_token_segment_id=tokenizer.cls_token_id
    cls_token_segment_id=0
    sep_token=tokenizer.sep_token
    pad_token=1
    pad_token_segment_id=0
    sequence_a_segment_id=0
    add_sep_token=False
    mask_padding_with_zero=True
    
    all_input_ids = []
    all_attention_mask = []
    all_token_type_ids = []
    all_e1_mask=[]
    all_e2_mask=[]
    all_label=[]
    for idx in tqdm(range(len(train_dataset))):
        if train_dataset['e1s'][idx] > train_dataset['e2s'][idx]:
            train_dataset['sentence'][idx] = train_dataset['sentence'][idx][:train_dataset['e2s'][idx]] + ' <e2> ' + train_dataset['sentence'][idx][train_dataset['e2s'][idx]:train_dataset['e2e'][idx]+1] + ' </e2> ' + train_dataset['sentence'][idx][train_dataset['e2e'][idx]+1:train_dataset['e1s'][idx]] + ' <e1> ' + train_dataset['sentence'][idx][train_dataset['e1s'][idx]:train_dataset['e1e'][idx]+1] + ' </e1> ' + train_dataset['sentence'][idx][train_dataset['e1e'][idx]+1:]
        else:
            train_dataset['sentence'][idx] = train_dataset['sentence'][idx][:train_dataset['e1s'][idx]] + ' <e1> ' + train_dataset['sentence'][idx][train_dataset['e1s'][idx]:train_dataset['e1e'][idx]+1] + ' </e1> ' + train_dataset['sentence'][idx][train_dataset['e1e'][idx]+1:train_dataset['e2s'][idx]] + ' <e2> ' + train_dataset['sentence'][idx][train_dataset['e2s'][idx]:train_dataset['e2e'][idx]+1] + ' </e2> ' + train_dataset['sentence'][idx][train_dataset['e2e'][idx]+1:]    

        
        token = tokenizer.tokenize(train_dataset['sentence'][idx])
        
        e11_p = token.index("<e1>")  # the start position of entity1
        e12_p = token.index("</e1>")  # the end position of entity1
        e21_p = token.index("<e2>")  # the start position of entity2
        e22_p = token.index("</e2>")  # the end position of entity2

        token[e11_p] = "$"
        token[e12_p] = "$"
        token[e21_p] = "#"
        token[e22_p] = "#"

        #print(token)

        e11_p += 1
        e12_p += 1
        e21_p += 1
        e22_p += 1

        special_tokens_count = 1

        if len(token) < max_seq_len - special_tokens_count:
#            token = token[: (max_seq_len - special_tokens_count)]

#         if add_sep_token:
#             token += [sep_token]

            token_type_ids = [sequence_a_segment_id] * len(token)

            token = [cls_token] + token 
            token_type_ids = [cls_token_segment_id] + token_type_ids

            input_ids = tokenizer.convert_tokens_to_ids(token)

            attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

            padding_length = max_seq_len - len(input_ids)
            input_ids = input_ids + ([pad_token] * padding_length)
            attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
            token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

            e1_mask = [0] * len(attention_mask)
            e2_mask = [0] * len(attention_mask)

            for i in range(e11_p, e12_p + 1):
                e1_mask[i] = 1
            for i in range(e21_p, e22_p + 1):
                e2_mask[i] = 1

            assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
            assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(
                len(attention_mask), max_seq_len
            )
            assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(
                len(token_type_ids), max_seq_len
            )

            all_input_ids.append(input_ids)
            all_attention_mask.append(attention_mask)
            all_token_type_ids.append(token_type_ids)
            all_e1_mask.append(e1_mask)
            all_e2_mask.append(e2_mask)
            all_label.append(train_dataset['label'][idx])
    
    all_features = {
        'input_ids' : torch.tensor(all_input_ids),
        'attention_mask' : torch.tensor(all_attention_mask),
        'token_type_ids' : torch.tensor(all_token_type_ids),
        'e1_mask' : torch.tensor(all_e1_mask),
        'e2_mask' : torch.tensor(all_e2_mask)
    }  
    return RE_Dataset(all_features, all_label)



## loss

In [7]:
def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss

# Implementation from fastai https://github.com/fastai/fastai2/blob/master/fastai2/layers.py#L338
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, e:float=0.05, reduction='mean'):
        super().__init__()
        self.e,self.reduction = e,reduction
    
    def forward(self, output, target):
        # number of classes
        c = output.size()[-1]
        log_preds = F.log_softmax(output, dim=-1)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        # (1-ε)* H(q,p) + ε*H(u,p)
        return (1-self.e)*nll + self.e*(loss/c) 

## R-XLM 모델 정의 

In [1]:
class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.0, use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)


class RXLMRoberta(XLMRobertaModel):
    def __init__(self,  model_name, config, dropout_rate):
        super(RXLMRoberta, self).__init__(config)
        self.XLMRoberta = XLMRobertaModel.from_pretrained(model_name, config=config)  # Load pretrained XLMRoberta

        self.num_labels = config.num_labels

        self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer1 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer2 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)

        self.label_classifier = FCLayer(
            config.hidden_size * 3,
#             config.hidden_size * 4,
            config.num_labels,
            dropout_rate,
            use_activation=False,
        )

    @staticmethod
    def entity_average(hidden_output, e_mask):
        """
        Average the entity hidden state vectors (H_i ~ H_j)
        :param hidden_output: [batch_size, j-i+1, dim]
        :param e_mask: [batch_size, max_seq_len]
                e.g. e_mask[0] == [0, 0, 0, 1, 1, 1, 0, 0, ... 0]
        :return: [batch_size, dim]
        """
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]

        # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)
        avg_vector = sum_vector.float() / length_tensor.float()  # broadcasting
        return avg_vector

    def forward(self, input_ids, attention_mask, token_type_ids, labels, e1_mask, e2_mask):
        outputs = self.XLMRoberta(
            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0]
        pooled_output = outputs[1]  # [CLS]
    
        e1_h = self.entity_average(sequence_output, e1_mask)
        e2_h = self.entity_average(sequence_output, e2_mask)
        # Dropout -> tanh -> fc_layer (Share FC layer for e1 and e2)
        pooled_output = self.cls_fc_layer(pooled_output)
        e1_h = self.entity_fc_layer1(e1_h)
        e2_h = self.entity_fc_layer2(e2_h)
        # Concat -> fc_layer
        concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)

        logits = self.label_classifier(concat_h)
        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
        # Softmax
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = nn.CrossEntropyLoss()
                # loss_fct = LabelSmoothingCrossEntropy()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            outputs = (loss,) + outputs

        return outputs, pooled_output, e1_h, e2_h  # (loss), logits, (hidden_states), (attentions)

NameError: name 'nn' is not defined

## Trainer

In [10]:
logger = logging.getLogger(__name__)
class Trainer(object):
    def __init__(self,num_labels, label_dict,logging_steps, save_steps,max_steps,
                 num_train_epochs,warmup_steps,adam_epsilon,learning_rate,gradient_accumulation_steps,
                 max_grad_norm, eval_batch_size, train_batch_size, model_dir, dropout_rate,
                 weight_decay, Model_name ,train_dataset=None, dev_dataset=None, test_dataset=None):
        #self.args = args
        self.train_dataset = train_dataset
        self.eval_batch_size = eval_batch_size
        self.train_batch_size = train_batch_size
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        self.Model_name = Model_name
        self.label_lst = label_dict
        self.num_labels = num_labels
        self.max_steps = max_steps
        self.weight_decay = weight_decay
        self.learning_rate = learning_rate
        self.adam_epsilon=adam_epsilon
        self.warmup_steps = warmup_steps
        self.num_train_epochs = num_train_epochs
        self.logging_steps = logging_steps
        self.save_steps = save_steps
        self.max_grad_norm = max_grad_norm
        self.model_dir = model_dir
        self.dropout_rate = dropout_rate
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.config = XLMRobertaConfig.from_pretrained(
            self.Model_name,
            num_labels=self.num_labels,
            #id2label={str(i): label for i, label in enumerate(self.label_lst)},
            id2label=self.label_lst,
            #label2id={label: i for key, label in self.label_lst},
            label2id={value : key for key, value in self.label_lst.items()}
        )
        self.model = RXLMRoberta(
            self.Model_name, config=self.config, dropout_rate = self.dropout_rate,
        )

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.train_batch_size,
        )

        if self.max_steps > 0:
            t_total = self.max_steps
            self.num_train_epochs = (
                self.max_steps // (len(train_dataloader) // self.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.gradient_accumulation_steps * self.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.learning_rate,
            eps=self.adam_epsilon,
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.warmup_steps,
            num_training_steps=t_total,
        )
#        scaler = torch.cuda.amp.GradScaler()
        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.num_train_epochs)
        logger.info("  Total train batch size = %d", self.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.logging_steps)
        logger.info("  Save steps = %d", self.save_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = tqdm(range(int(self.num_train_epochs)), desc="Epoch")

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(batch[t].to(self.device) for t in batch)  # GPU or CPU
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids" : batch[2],
                    "labels": batch[5],
                    "e1_mask": batch[3],
                    "e2_mask": batch[4]
                }
#                 with torch.cuda.amp.autocast():
                outputs, pooled_out, e1_h, e2_h = self.model(**inputs)
                loss = outputs[0]

                if self.gradient_accumulation_steps > 1:
                    loss = loss / self.gradient_accumulation_steps

#                 scaler.scale(loss).backward()
                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)

                    optimizer.step()
#                     scaler.step(optimizer)
#                     scaler.update()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.logging_steps > 0 and global_step % self.logging_steps == 0:
                        logger.info("  global steps = %d", global_step)
                        self.evaluate("train")  # There is no dev set for semeval task

                    if self.save_steps > 0 and global_step % self.save_steps == 0:
                        self.save_model()

                if 0 < self.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.max_steps < global_step:
                train_iterator.close()
                break

        return global_step, tr_loss / global_step

    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        elif mode == "train":
            dataset = self.train_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[5],
                    "e1_mask": batch[3],
                    "e2_mask": batch[4],
                }
                #with torch.cuda.amp.autocast():
                outputs, pooled_output, e1_h, e2_h = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)

        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))
        print('pooled_output :', pooled_output)
        print('e1_h :', e1_h)
        print('e2_h :', e2_h)

        return results
    
    def test_pred(self):
        test_dataset = self.test_dataset
        test_sampler = SequentialSampler(test_dataset)
        test_dataloader = DataLoader(test_dataset, sampler=test_sampler,batch_size=self.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", "test")
        #logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)

        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(test_dataloader, desc="Predicting"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": None,
                    "e1_mask": batch[3],
                    "e2_mask": batch[4],
                }
                outputs, pooled_output, e1_h, e2_h = self.model(**inputs)
                #print(outputs)
                pred = outputs[0]

            nb_eval_steps += 1

            if preds is None:
                preds = pred.detach().cpu().numpy()
                #out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, pred.detach().cpu().numpy(), axis=0)
                #out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        preds = np.argmax(preds, axis=1)
        df = pd.DataFrame(preds, columns=['pred'])
        df.to_csv('RXLMRoberta_batch32_epoch6.csv', index=False)
#         with open("proposed_answers.txt", "w", encoding="utf-8") as f:
#             for idx, pred in enumerate(preds):
#                 f.write("{}\n".format(pred))
        #write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds)
    

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        model_to_save.save_pretrained(self.model_dir)

        # Save training arguments together with the trained model
        #torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        #self.args = torch.load(os.path.join(self.args.model_dir, "training_args.bin"))
        self.model = RXLMRoberta.from_pretrained(self.model_dir)
        self.model.to(self.device)
        logger.info("***** Model Loaded *****")

In [11]:
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )


In [None]:
MODEL_NAME = "xlm-roberta-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"] 
tokenizer.add_special_tokens({"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})

train_dataset = load_data("/opt/ml/input/data/train/train.tsv")
train_label = train_dataset['label'].values

for idx in tqdm(range(len(train_dataset))):
    if train_dataset['e1s'][idx] > train_dataset['e2s'][idx]:
        train_dataset['sentence'][idx] = train_dataset['sentence'][idx][:train_dataset['e2s'][idx]] + ' <e2> ' + train_dataset['sentence'][idx][train_dataset['e2s'][idx]:train_dataset['e2e'][idx]+1] + ' </e2> ' + train_dataset['sentence'][idx][train_dataset['e2e'][idx]+1:train_dataset['e1s'][idx]] + ' <e1> ' + train_dataset['sentence'][idx][train_dataset['e1s'][idx]:train_dataset['e1e'][idx]+1] + ' </e1> ' + train_dataset['sentence'][idx][train_dataset['e1e'][idx]+1:]          
    else:
        train_dataset['sentence'][idx] = train_dataset['sentence'][idx][:train_dataset['e1s'][idx]] + ' <e1> ' + train_dataset['sentence'][idx][train_dataset['e1s'][idx]:train_dataset['e1e'][idx]+1] + ' </e1> ' + train_dataset['sentence'][idx][train_dataset['e1e'][idx]+1:train_dataset['e2s'][idx]] + ' <e2> ' + train_dataset['sentence'][idx][train_dataset['e2s'][idx]:train_dataset['e2e'][idx]+1] + ' </e2> ' + train_dataset['sentence'][idx][train_dataset['e2e'][idx]+1:]              


tokenized_len_dataset = tokenized_dataset_len(train_dataset, tokenizer)

print('최대 길이 : ', max(len(i) for i in tokenized_len_dataset))
print('평균 길이 : ', sum(map(len, tokenized_len_dataset))/len(tokenized_len_dataset))
plt.hist([len(s) for s in tokenized_len_dataset], bins=50)
plt.xlabel('length of samples')
plt.ylabel('number of samples')
plt.show()

## train and test

In [12]:
def main():
    init_logger()
    train_dataset = load_data("/opt/ml/input/data/train/train_original.tsv")
    test_dataset = load_data("/opt/ml/input/data/test/test.tsv")

    ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]
    MODEL_NAME = "xlm-roberta-large"

    tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.add_special_tokens({"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})
      
    
    train_Dataset = convert_sentence_to_features(train_dataset, tokenizer, max_len = 345+2)
    test_Dataset = convert_sentence_to_features(test_dataset, tokenizer, max_len=345+2)
    with open('/opt/ml/input/data/label_type.pkl', 'rb') as f:
        label_type = pickle.load(f)
    
    trainer = Trainer(eval_batch_size=16,train_batch_size=16, num_labels = 42,
                      max_steps=-1, weight_decay=0.0, learning_rate= 2e-5, 
                      adam_epsilon=1e-8, warmup_steps=0, num_train_epochs=7,
                      logging_steps=400, save_steps=400, max_grad_norm=1.0, 
                      model_dir='./model', gradient_accumulation_steps=1,dropout_rate = 0.1,
                      label_dict=label_type,Model_name=MODEL_NAME,train_dataset=train_Dataset,
                      test_dataset=test_Dataset)
    
    do_train = True
    do_test = True
    if do_train:
        trainer.train()

    if do_test:
        trainer.test_pred()


if __name__ == "__main__":
    main()

HBox(children=(FloatProgress(value=0.0, max=9000.0), HTML(value='')))




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




04/22/2021 02:45:33 - INFO - __main__ -   ***** Running training *****
04/22/2021 02:45:33 - INFO - __main__ -     Num examples = 9000
04/22/2021 02:45:33 - INFO - __main__ -     Num Epochs = 7
04/22/2021 02:45:33 - INFO - __main__ -     Total train batch size = 16
04/22/2021 02:45:33 - INFO - __main__ -     Gradient Accumulation steps = 1
04/22/2021 02:45:33 - INFO - __main__ -     Total optimization steps = 3941
04/22/2021 02:45:33 - INFO - __main__ -     Logging steps = 400
04/22/2021 02:45:33 - INFO - __main__ -     Save steps = 400


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=7.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

  
04/22/2021 02:53:07 - INFO - __main__ -     global steps = 400
04/22/2021 02:53:07 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 02:53:07 - INFO - __main__ -     Num examples = 9000
04/22/2021 02:53:07 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 02:56:31 - INFO - __main__ -   ***** Eval results *****
04/22/2021 02:56:31 - INFO - __main__ -     acc = 0.7033
04/22/2021 02:56:31 - INFO - __main__ -     loss = 1.0169



pooled_output : tensor([[ 0.5968, -0.5054,  0.0080,  ..., -0.5293,  0.4743, -0.1478],
        [ 0.4530, -0.3496,  0.2945,  ..., -0.2286,  0.4718, -0.4291],
        [ 0.4187, -0.3692,  0.2007,  ..., -0.4516,  0.3265,  0.0399],
        ...,
        [ 0.5758, -0.3512, -0.0151,  ..., -0.5082,  0.4539, -0.1820],
        [ 0.4301, -0.3186,  0.2749,  ..., -0.3603,  0.2721, -0.0439],
        [ 0.4029, -0.3384,  0.0713,  ..., -0.4913,  0.4758, -0.0841]],
       device='cuda:0')
e1_h : tensor([[ 0.5505, -0.5411,  0.1534,  ..., -0.3062,  0.3908, -0.4984],
        [ 0.3242, -1.2876, -0.4255,  ...,  0.9669,  0.5433,  0.5812],
        [ 0.3715, -1.0845, -0.2710,  ...,  0.8746,  0.2560,  0.6337],
        ...,
        [ 0.2977,  0.7564,  0.5662,  ..., -0.7824,  0.0853, -0.0808],
        [ 0.1620, -1.2865, -0.6557,  ...,  0.6354,  0.1534,  0.6001],
        [ 0.0680,  0.5687,  0.2027,  ..., -0.5986, -0.2477, -0.9112]],
       device='cuda:0')
e2_h : tensor([[-0.5341, -0.8993, -0.2741,  ...,  0.1610, -0

04/22/2021 02:57:11 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 03:04:43 - INFO - __main__ -     global steps = 800
04/22/2021 03:04:43 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 03:04:43 - INFO - __main__ -     Num examples = 9000
04/22/2021 03:04:43 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 03:08:07 - INFO - __main__ -   ***** Eval results *****
04/22/2021 03:08:07 - INFO - __main__ -     acc = 0.7693
04/22/2021 03:08:07 - INFO - __main__ -     loss = 0.7430



pooled_output : tensor([[ 0.6581, -0.2344,  0.0840,  ..., -0.6065,  0.4897, -0.0220],
        [ 0.4281, -0.2740,  0.0598,  ..., -0.0704,  0.2414, -0.3398],
        [ 0.2673, -0.1793,  0.2215,  ..., -0.3442,  0.1365,  0.3185],
        ...,
        [ 0.5919, -0.1365, -0.1544,  ..., -0.6103,  0.3068, -0.1805],
        [ 0.5343, -0.1035,  0.4428,  ..., -0.5900,  0.4626, -0.1049],
        [ 0.3976, -0.0629,  0.0064,  ..., -0.3281,  0.3537,  0.0595]],
       device='cuda:0')
e1_h : tensor([[ 0.2353, -0.6094,  0.3052,  ..., -0.5162,  0.4120, -0.5848],
        [ 0.1684, -1.3722, -0.3361,  ...,  1.1745,  0.2914,  0.7853],
        [ 0.1688, -1.0758, -0.2662,  ...,  1.2422, -0.0977,  0.5906],
        ...,
        [-0.0810,  1.4604,  0.7029,  ..., -0.9437,  0.1642, -0.3405],
        [-0.0507, -0.9674, -0.2984,  ...,  0.4828,  0.0368,  0.6702],
        [-0.3293,  0.6465,  0.4315,  ..., -1.0386, -0.1625, -0.8286]],
       device='cuda:0')
e2_h : tensor([[-0.3549, -0.7447, -0.4044,  ...,  0.1042, -0

04/22/2021 03:08:47 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 03:16:19 - INFO - __main__ -     global steps = 1200
04/22/2021 03:16:19 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 03:16:19 - INFO - __main__ -     Num examples = 9000
04/22/2021 03:16:19 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 03:19:43 - INFO - __main__ -   ***** Eval results *****
04/22/2021 03:19:43 - INFO - __main__ -     acc = 0.8190
04/22/2021 03:19:43 - INFO - __main__ -     loss = 0.5991



pooled_output : tensor([[ 0.7535, -0.3215,  0.0445,  ..., -0.6062,  0.4159, -0.1645],
        [ 0.5677, -0.3433,  0.2354,  ...,  0.0214,  0.0991, -0.3114],
        [ 0.4281, -0.0065,  0.0855,  ..., -0.3890,  0.0505,  0.0607],
        ...,
        [ 0.6271,  0.1255, -0.3487,  ..., -0.3638,  0.2292, -0.3673],
        [ 0.4887, -0.0963,  0.5278,  ..., -0.6200,  0.4500,  0.0451],
        [ 0.4306, -0.0264, -0.1056,  ..., -0.3961,  0.1768, -0.0402]],
       device='cuda:0')
e1_h : tensor([[-0.1084, -0.5924,  0.4787,  ..., -0.3546,  0.2150, -0.2721],
        [ 0.0126, -1.0812, -0.1373,  ...,  1.1129,  0.2195,  0.6437],
        [-0.0100, -0.7909, -0.0222,  ...,  0.7561, -0.3759,  0.3617],
        ...,
        [ 0.2276,  1.0066,  0.7921,  ..., -0.8069, -0.3589,  0.3643],
        [-0.1453, -1.2724, -0.3624,  ...,  0.5785,  0.2252,  0.4122],
        [-0.5824,  1.1988,  0.3402,  ..., -1.0080, -0.6178, -1.2387]],
       device='cuda:0')
e2_h : tensor([[-0.3610, -1.2730, -0.1089,  ...,  0.0132, -1

04/22/2021 03:20:22 - INFO - __main__ -   Saving model checkpoint to ./model
04/22/2021 03:27:55 - INFO - __main__ -     global steps = 1600
04/22/2021 03:27:55 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 03:27:55 - INFO - __main__ -     Num examples = 9000
04/22/2021 03:27:55 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 03:31:19 - INFO - __main__ -   ***** Eval results *****
04/22/2021 03:31:19 - INFO - __main__ -     acc = 0.8682
04/22/2021 03:31:19 - INFO - __main__ -     loss = 0.4188



pooled_output : tensor([[ 0.8186, -0.2808,  0.0989,  ..., -0.6552,  0.5334,  0.0288],
        [ 0.6166, -0.4265,  0.2989,  ...,  0.1135,  0.0884, -0.1612],
        [ 0.6947, -0.0023,  0.0635,  ..., -0.7729,  0.3722,  0.2724],
        ...,
        [ 0.8228, -0.1142, -0.2400,  ..., -0.5852,  0.3535, -0.2090],
        [ 0.5594, -0.1175,  0.4448,  ..., -0.6512,  0.5627,  0.0359],
        [ 0.5707, -0.0398, -0.0609,  ..., -0.6049,  0.4290,  0.1648]],
       device='cuda:0')
e1_h : tensor([[ 0.0019, -0.3107,  0.5463,  ..., -0.8047,  0.4300, -0.6708],
        [-0.1843, -1.1899, -0.2040,  ...,  1.3292, -0.0085,  0.5716],
        [ 0.1798, -1.1125, -0.3261,  ...,  0.9001, -0.3968,  0.1711],
        ...,
        [ 0.1720,  1.5470,  1.0137,  ..., -1.1367,  0.1014, -0.2584],
        [-0.2860, -1.1639, -0.5402,  ...,  0.5088,  0.3157,  0.2363],
        [-0.4022,  0.9371,  0.4668,  ..., -0.9256, -0.5298, -1.2304]],
       device='cuda:0')
e2_h : tensor([[-0.4566, -1.0263, -0.1987,  ...,  0.2596, -1

04/22/2021 03:31:59 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 03:39:31 - INFO - __main__ -     global steps = 2000
04/22/2021 03:39:31 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 03:39:31 - INFO - __main__ -     Num examples = 9000
04/22/2021 03:39:31 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 03:42:55 - INFO - __main__ -   ***** Eval results *****
04/22/2021 03:42:55 - INFO - __main__ -     acc = 0.9056
04/22/2021 03:42:55 - INFO - __main__ -     loss = 0.2985



pooled_output : tensor([[ 0.8176, -0.3920,  0.2217,  ..., -0.6024,  0.5165,  0.0835],
        [ 0.5334, -0.4229,  0.3576,  ...,  0.1207,  0.0639, -0.2713],
        [ 0.6848, -0.0250,  0.2294,  ..., -0.7456,  0.3270,  0.2589],
        ...,
        [ 0.7523, -0.1879, -0.0567,  ..., -0.6522,  0.4959, -0.1301],
        [ 0.4913, -0.1849,  0.3749,  ..., -0.6649,  0.5900,  0.1919],
        [ 0.6099, -0.1199,  0.0798,  ..., -0.7757,  0.4415,  0.1271]],
       device='cuda:0')
e1_h : tensor([[ 0.1444, -0.6281,  0.5188,  ..., -0.5596,  0.5045, -0.8811],
        [-0.2323, -1.2641, -0.2067,  ...,  1.0791,  0.2157,  0.6140],
        [-0.0731, -0.7546, -0.4102,  ...,  0.5702, -0.5256,  0.2819],
        ...,
        [-0.0663,  1.8056,  0.7765,  ..., -1.0596, -0.2203, -0.5470],
        [ 0.0543, -1.5702, -0.6345,  ...,  0.3415,  0.6090,  0.4580],
        [-0.4625,  1.1551,  0.5151,  ..., -1.0770, -0.7554, -1.5600]],
       device='cuda:0')
e2_h : tensor([[-0.4123, -0.7966, -0.3642,  ...,  0.1862, -1

04/22/2021 03:43:36 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 03:51:08 - INFO - __main__ -     global steps = 2400
04/22/2021 03:51:08 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 03:51:08 - INFO - __main__ -     Num examples = 9000
04/22/2021 03:51:08 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 03:54:32 - INFO - __main__ -   ***** Eval results *****
04/22/2021 03:54:32 - INFO - __main__ -     acc = 0.9312
04/22/2021 03:54:32 - INFO - __main__ -     loss = 0.2240



pooled_output : tensor([[ 0.8404, -0.4578,  0.2255,  ..., -0.6571,  0.4811,  0.0487],
        [ 0.4038, -0.4052,  0.3010,  ...,  0.2636, -0.1798, -0.3236],
        [ 0.5864, -0.0917,  0.1579,  ..., -0.7443,  0.2466,  0.2724],
        ...,
        [ 0.6857, -0.1172, -0.1259,  ..., -0.6888,  0.4404, -0.1799],
        [ 0.4070, -0.0824,  0.2799,  ..., -0.5680,  0.5298,  0.0068],
        [ 0.4907, -0.0868,  0.0391,  ..., -0.6671,  0.3015,  0.1493]],
       device='cuda:0')
e1_h : tensor([[-0.0593, -0.6347,  0.4069,  ..., -0.6576,  0.5627, -0.8804],
        [-0.3956, -0.9606, -0.1419,  ...,  1.2423,  0.0748,  0.5300],
        [-0.0702, -0.8515, -0.5740,  ...,  0.8102, -0.3849,  0.3562],
        ...,
        [-0.2803,  1.7075,  0.6657,  ..., -1.1887, -0.0314, -0.6035],
        [-0.1222, -1.6182, -0.5178,  ...,  0.4757,  0.3657,  0.3763],
        [-0.5585,  1.0434,  0.5321,  ..., -0.9820, -0.6864, -1.3030]],
       device='cuda:0')
e2_h : tensor([[-0.4577, -0.9023, -0.3529,  ...,  0.2427, -1

04/22/2021 03:55:13 - INFO - __main__ -   Saving model checkpoint to ./model
04/22/2021 04:02:46 - INFO - __main__ -     global steps = 2800
04/22/2021 04:02:46 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 04:02:46 - INFO - __main__ -     Num examples = 9000
04/22/2021 04:02:46 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 04:06:10 - INFO - __main__ -   ***** Eval results *****
04/22/2021 04:06:10 - INFO - __main__ -     acc = 0.9509
04/22/2021 04:06:10 - INFO - __main__ -     loss = 0.1596



pooled_output : tensor([[ 0.7936, -0.3344,  0.1721,  ..., -0.6009,  0.4740, -0.0844],
        [ 0.4370, -0.4697,  0.3252,  ...,  0.2112, -0.2153, -0.2338],
        [ 0.6847, -0.0164,  0.1972,  ..., -0.8752,  0.2856,  0.2529],
        ...,
        [ 0.8427, -0.2845, -0.1704,  ..., -0.5606,  0.3923, -0.1844],
        [ 0.4575, -0.0668,  0.3932,  ..., -0.7471,  0.5394,  0.2785],
        [ 0.5913, -0.1253,  0.0394,  ..., -0.7463,  0.3426,  0.1734]],
       device='cuda:0')
e1_h : tensor([[ 0.1448, -0.8580,  0.5297,  ..., -0.5481,  0.7991, -0.6960],
        [-0.3800, -1.2266, -0.1960,  ...,  1.3542, -0.0660,  0.4130],
        [-0.1704, -0.9703, -0.3703,  ...,  0.6817, -0.7358,  0.2599],
        ...,
        [ 0.4184,  1.2766,  0.3069,  ..., -0.9861, -0.0937,  0.1626],
        [-0.1509, -1.7451, -0.5666,  ...,  0.3891,  0.4462,  0.5085],
        [-0.5024,  0.9477,  0.3202,  ..., -1.1230, -0.6363, -1.3302]],
       device='cuda:0')
e2_h : tensor([[-0.3891, -0.8564, -0.0783,  ...,  0.0666, -1

04/22/2021 04:06:50 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 04:14:21 - INFO - __main__ -     global steps = 3200
04/22/2021 04:14:21 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 04:14:21 - INFO - __main__ -     Num examples = 9000
04/22/2021 04:14:21 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 04:17:45 - INFO - __main__ -   ***** Eval results *****
04/22/2021 04:17:45 - INFO - __main__ -     acc = 0.9627
04/22/2021 04:17:45 - INFO - __main__ -     loss = 0.1200



pooled_output : tensor([[ 0.8096, -0.4181,  0.3862,  ..., -0.7160,  0.5060, -0.0835],
        [ 0.4113, -0.4847,  0.3711,  ...,  0.2803, -0.1870, -0.4018],
        [ 0.5878, -0.0195,  0.3517,  ..., -0.9885,  0.2133,  0.3750],
        ...,
        [ 0.7636, -0.2945, -0.1067,  ..., -0.5956,  0.4393, -0.2504],
        [ 0.4968, -0.1110,  0.3976,  ..., -0.7423,  0.4673,  0.2778],
        [ 0.5755, -0.1698,  0.0300,  ..., -0.8564,  0.4580,  0.0946]],
       device='cuda:0')
e1_h : tensor([[ 0.1996, -0.8950,  0.5629,  ..., -0.4823,  0.8702, -0.4783],
        [-0.4376, -1.2436, -0.3120,  ...,  1.4136,  0.1521,  0.4850],
        [-0.0582, -0.8727, -0.5422,  ...,  0.7598, -0.5221,  0.5216],
        ...,
        [ 0.5166,  0.9645,  0.5569,  ..., -0.9223, -0.0121,  0.5389],
        [ 0.0734, -1.9136, -0.3754,  ...,  0.2714,  0.6256,  0.4943],
        [-0.5747,  0.9187,  0.2857,  ..., -0.9010, -0.4814, -1.4227]],
       device='cuda:0')
e2_h : tensor([[-0.4089, -0.2667,  0.0472,  ...,  0.1467, -0

04/22/2021 04:18:25 - INFO - __main__ -   Saving model checkpoint to ./model





HBox(children=(FloatProgress(value=0.0, description='Iteration', max=563.0, style=ProgressStyle(description_wi…

04/22/2021 04:25:56 - INFO - __main__ -     global steps = 3600
04/22/2021 04:25:56 - INFO - __main__ -   ***** Running evaluation on train dataset *****
04/22/2021 04:25:56 - INFO - __main__ -     Num examples = 9000
04/22/2021 04:25:56 - INFO - __main__ -     Batch size = 16


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=563.0, style=ProgressStyle(description_w…

04/22/2021 04:29:20 - INFO - __main__ -   ***** Eval results *****
04/22/2021 04:29:20 - INFO - __main__ -     acc = 0.9740
04/22/2021 04:29:20 - INFO - __main__ -     loss = 0.0868



pooled_output : tensor([[ 0.8549, -0.5302,  0.4138,  ..., -0.6453,  0.5808, -0.1457],
        [ 0.3566, -0.4800,  0.3962,  ...,  0.2642, -0.2407, -0.3287],
        [ 0.7002, -0.1426,  0.3379,  ..., -0.9770,  0.3218,  0.2485],
        ...,
        [ 0.7104, -0.2818, -0.0948,  ..., -0.6822,  0.4789, -0.2504],
        [ 0.5667, -0.1954,  0.3829,  ..., -0.5048,  0.4701,  0.0155],
        [ 0.5825, -0.1652, -0.0272,  ..., -0.7675,  0.4000,  0.0550]],
       device='cuda:0')
e1_h : tensor([[ 0.2464, -1.1756,  0.4737,  ..., -0.2276,  0.7984, -0.0793],
        [-0.3920, -1.0273, -0.2345,  ...,  1.4161,  0.1150,  0.3971],
        [-0.2375, -0.9537, -0.4399,  ...,  0.7392, -0.9767,  0.3790],
        ...,
        [ 0.3824,  1.0792,  0.4222,  ..., -0.9337, -0.0916,  0.3177],
        [ 0.3011, -1.9317, -0.2623,  ...,  0.1824,  0.6252,  0.3822],
        [-0.4378,  1.1192,  0.3370,  ..., -0.9300, -0.7362, -1.2873]],
       device='cuda:0')
e2_h : tensor([[-0.3790, -0.3196,  0.1227,  ...,  0.1118, -0

04/22/2021 04:30:01 - INFO - __main__ -   Saving model checkpoint to ./model
04/22/2021 04:36:27 - INFO - __main__ -   ***** Running evaluation on test dataset *****
04/22/2021 04:36:27 - INFO - __main__ -     Batch size = 16






HBox(children=(FloatProgress(value=0.0, description='Predicting', max=63.0, style=ProgressStyle(description_wi…


