In [None]:
import pickle as pickle
import os
import pandas as pd
import torch
from sklearn.metrics import accuracy_score
from transformers import AutoConfig, RobertaModel, AutoModel
import numpy as np
import matplotlib.pyplot as plt
import random
from itertools import chain
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import copy
import csv
import json
import logging
import os
import torch.nn as nn
from tqdm.auto import tqdm
from transformers import AdamW, get_linear_schedule_with_warmup
import torch.nn.functional as F

from transformers import AutoTokenizer
#from kobart import get_pytorch_kobart_model, get_kobart_tokenizer
# from transformers import ElectraModel, ElectraConfig, ElectraPreTrainedModel, ElectraTokenizer
from sklearn.model_selection import train_test_split
#import wandb
import yaml
from sklearn.preprocessing import LabelEncoder
import sklearn


In [None]:
random_seed=42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [None]:
# Dataset 구성.
class RE_Dataset(torch.utils.data.Dataset):
  """ Dataset 구성을 위한 class."""
  def __init__(self, pair_dataset, label):
    self.pair_dataset = pair_dataset
    self.label = label

  def __getitem__(self, idx):
    item = {key: val[idx].clone().detach() for key, val in self.pair_dataset.items()}
    item['label'] = torch.tensor(self.label[idx])
    return item

  def __len__(self):
    return len(self.label)

def preprocessing_dataset(dataset):
  """ 처음 불러온 csv 파일을 원하는 형태의 DataFrame으로 변경 시켜줍니다."""
  subject_entity = []
  object_entity = []
  sub_st = []
  sub_end = []
  obj_st = []
  obj_end = []
  for i,j in zip(dataset['subject_entity'], dataset['object_entity']):
    sub_dic = yaml.load(i)
    obj_dic = yaml.load(j)

    sub = sub_dic['word']
    obj = obj_dic['word']
    e1_st_idx = int(sub_dic['start_idx'])
    e1_end_idx = int(sub_dic['end_idx'])
    e2_st_idx = int(obj_dic['start_idx'])
    e2_end_idx =  int(obj_dic['end_idx'])

    subject_entity.append(sub)
    object_entity.append(obj)
    sub_st.append(e1_st_idx)
    sub_end.append(e1_end_idx)
    obj_st.append(e2_st_idx)
    obj_end.append(e2_end_idx)

  out_dataset = pd.DataFrame({'id':dataset['id'], 'sentence':dataset['sentence'],'subject_entity':subject_entity,\
                              'object_entity':object_entity,'sub_st' : sub_st,'sub_end': sub_end, 'obj_st': obj_st ,\
                              'obj_end': obj_end,'label':dataset['label']})
  return out_dataset

def load_data(dataset_dir):
  """ csv 파일을 경로에 맡게 불러 옵니다. """
  pd_dataset = pd.read_csv(dataset_dir)
  dataset = preprocessing_dataset(pd_dataset)
  
  return dataset

def label_to_num(label):
  num_label = []
  with open('/opt/ml/code/dict_label_to_num.pkl', 'rb') as f:
    dict_label_to_num = pickle.load(f)
  for v in label:
    num_label.append(dict_label_to_num[v])
  return num_label

def num_to_label(label):
  """
    숫자로 되어 있던 class를 원본 문자열 라벨로 변환 합니다.
  """
  origin_label = []
  with open('/opt/ml/code/dict_num_to_label.pkl', 'rb') as f:
    dict_num_to_label = pickle.load(f)
  for v in label:
    origin_label.append(dict_num_to_label[v])
  
  return origin_label
  
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

In [None]:
def compute_metrics(preds, labels):
    assert len(preds) == len(labels)
    return acc_and_f1(preds, labels)


def simple_accuracy(preds, labels):
    return (preds == labels).mean()


def official_f1():

    with open(os.path.join('/opt/ml/eval/result.txt'), "r", encoding="utf-8") as f:
        macro_result = list(f)[-1]
        macro_result = macro_result.split(":")[1].replace(">>>", "").strip()
        macro_result = macro_result.split("=")[1].strip().replace("%", "")
        macro_result = float(macro_result) / 100

    return macro_result

def acc_and_f1(preds, labels, average="macro"):
    acc = simple_accuracy(preds, labels)
    return {
        "acc": acc,
        #"f1": official_f1(),
    }

In [None]:
def convert_sentence_to_features(train_dataset, tokenizer, max_len, mode='train'):
    max_seq_len=max_len
    cls_token=tokenizer.cls_token
    cls_token_segment_id=tokenizer.cls_token_id
    pad_token=1
    pad_token_segment_id=0
    sequence_a_segment_id=0
    mask_padding_with_zero=True
    
    all_input_ids = []
    all_attention_mask = []
    all_token_type_ids = []
    all_e1_mask=[]
    all_e2_mask=[]
    m_len=0
    for idx in tqdm(train_dataset['id']):
        temp_sent =  train_dataset['sentence'][idx]
        sentence =  temp_sent[:train_dataset['sub_st'][idx]] \
            + ' <e1> ' + temp_sent[train_dataset['sub_st'][idx]:train_dataset['sub_end'][idx]+1] \
            + ' </e1> ' + temp_sent[train_dataset['sub_end'][idx]+1:train_dataset['obj_st'][idx]] \
            + ' <e2> ' + temp_sent[train_dataset['obj_st'][idx]:train_dataset['obj_end'][idx]+1] \
            + ' </e2> ' + temp_sent[train_dataset['obj_end'][idx]+1:]
        #print(sentence)
        
        token = tokenizer.tokenize(sentence)
        m_len = max(m_len, len(token))
        e11_p = token.index("<e1>")  # the start position of entity1
        e12_p = token.index("</e1>")  # the end position of entity1
        e21_p = token.index("<e2>")  # the start position of entity2
        e22_p = token.index("</e2>")  # the end position of entity2

        token[e11_p] = "$"
        token[e12_p] = "$"
        token[e21_p] = "#"
        token[e22_p] = "#"

        #print(token)

        e11_p += 1
        e12_p += 1
        e21_p += 1
        e22_p += 1

        special_tokens_count = 4
        if len(token) < max_seq_len - special_tokens_count:

            token_type_ids = [sequence_a_segment_id] * len(token)

            #add cls_token 
            token = [cls_token] + token 
            token_type_ids = [cls_token_segment_id] + token_type_ids

            
            input_ids = tokenizer.convert_tokens_to_ids(token)
            attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

            padding_length = max_seq_len - len(input_ids)
            input_ids = input_ids + ([pad_token] * padding_length)
            attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
            token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

            e1_mask = [0] * len(attention_mask)
            e2_mask = [0] * len(attention_mask)

            for i in range(e11_p, e12_p + 1):
                e1_mask[i] = 1
            for i in range(e21_p, e22_p + 1):
                e2_mask[i] = 1

            assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
            assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(
                len(attention_mask), max_seq_len
            )
            assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(
                    len(token_type_ids), max_seq_len)


            all_input_ids.append(input_ids)
            all_attention_mask.append(attention_mask)
            all_token_type_ids.append(token_type_ids)
            all_e1_mask.append(e1_mask)
            all_e2_mask.append(e2_mask)


    all_features = {
        'input_ids' : torch.tensor(all_input_ids),
        'attention_mask' : torch.tensor(all_attention_mask),
        'token_type_ids' : torch.tensor(all_token_type_ids),
        'e1_mask' : torch.tensor(all_e1_mask),
        'e2_mask' : torch.tensor(all_e2_mask)
    }  
    return all_features
    
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

In [None]:
class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout_rate=0.0, use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)


class Roberta_RE(RobertaModel):
    def __init__(self,  model_name, config, dropout_rate):
        super(Roberta_RE, self).__init__(config)
        self.model = RobertaModel.from_pretrained(model_name, config=config)  # koelectra

        self.num_labels = config.num_labels

        self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        # self.eos_fc_layer = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer1 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)
        self.entity_fc_layer2 = FCLayer(config.hidden_size, config.hidden_size, dropout_rate)

        self.label_classifier = FCLayer(
            config.hidden_size * 3,
            config.num_labels,
            dropout_rate,
            use_activation=False,
        )

    @staticmethod
    def entity_average(hidden_output, e_mask):
        """
        Average the entity hidden state vectors (H_i ~ H_j)
        :param hidden_output: [batch_size, j-i+1, dim]
        :param e_mask: [batch_size, max_seq_len]
                e.g. e_mask[0] == [0, 0, 0, 1, 1, 1, 0, 0, ... 0]
        :return: [batch_size, dim]
        """
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]

        # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)
        avg_vector = sum_vector.float() / length_tensor.float()  # broadcasting
        return avg_vector

    def forward(self, input_ids, attention_mask, token_type_ids, labels, e1_mask, e2_mask):
        outputs = self.model(
            input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0] #batch, max_len, hidden_size 16, 280, 768
        pooled_output = outputs[1]  # [CLS] 

        # cls_mask = input_ids.eq(0) # cls

        # sentence_representation_cls = sequence_output[cls_mask, :].view(sequence_output.size(0), -1, sequence_output.size(-1))[:,-1,:]
    

        # eos_mask = input_ids.eq(1) # eos token id = 1
        
        # sentence_representation = sequence_output[eos_mask, :].view(sequence_output.size(0), -1, sequence_output.size(-1))[:,-1,:]
    
        e1_h = self.entity_average(sequence_output, e1_mask)
        e2_h = self.entity_average(sequence_output, e2_mask)
        # Dropout -> tanh -> fc_layer (Share FC layer for e1 and e2)
        # sentence_representation_cls = self.cls_fc_layer(sentence_representation_cls)
        pooled_output = self.cls_fc_layer(pooled_output)
        e1_h = self.entity_fc_layer1(e1_h)
        e2_h = self.entity_fc_layer2(e2_h)
        # Concat -> fc_layer
        #concat_h = torch.cat([pooled_output, e1_h, e2_h, torch.abs(torch.sub(e1_h,e2_h))], dim=-1)
        concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)
        logits = self.label_classifier(concat_h)
        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
        # Softmax
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = nn.CrossEntropyLoss()
                #loss_fct = nn.BCEWithLogitsLoss()
                #loss_fct = LabelSmoothingCrossEntropy()
                #loss_fct = Cross_FocalLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

In [None]:
logger = logging.getLogger(__name__)
class Trainer(object):
    def __init__(self,num_labels,logging_steps, save_steps,max_steps,
                 num_train_epochs,warmup_steps,adam_epsilon,learning_rate,gradient_accumulation_steps,
                 max_grad_norm, eval_batch_size, train_batch_size, model_dir, dropout_rate, classifier_epoch, tokenizer,
                 weight_decay,train_dataset=None, dev_dataset=None, test_dataset=None):
        #self.args = args
        self.train_dataset = train_dataset
        self.eval_batch_size = eval_batch_size
        self.train_batch_size = train_batch_size
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        #self.Model_name = Model_name
        #self.label_lst = label_dict
        self.num_labels = num_labels
        self.max_steps = max_steps
        self.weight_decay = weight_decay
        self.learning_rate = learning_rate
        self.adam_epsilon=adam_epsilon
        self.warmup_steps = warmup_steps
        self.num_train_epochs = num_train_epochs
        self.logging_steps = logging_steps
        self.save_steps = save_steps
        self.max_grad_norm = max_grad_norm
        self.model_dir = model_dir
        self.dropout_rate = dropout_rate
        self.classifier_epoch=classifier_epoch
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.global_epo=0
        self.config = AutoConfig.from_pretrained(
            'klue/roberta-large',
            num_labels=self.num_labels
        )
        self.model = Roberta_RE(
            'klue/roberta-large', config=self.config, dropout_rate = self.dropout_rate,
        )
        self.model.config.vocab_size = len(tokenizer)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)
        

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.train_batch_size,
        )

        if self.max_steps > 0:
            t_total = self.max_steps
            self.num_train_epochs = (
                self.max_steps // (len(train_dataloader) // self.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.gradient_accumulation_steps * self.num_train_epochs

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.learning_rate,
            eps=self.adam_epsilon,
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.warmup_steps,
            num_training_steps=t_total,
        )
        
        #scaler = torch.cuda.amp.GradScaler()
        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.num_train_epochs)
        logger.info("  Total train batch size = %d", self.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.logging_steps)
        logger.info("  Save steps = %d", self.save_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = tqdm(range(int(self.num_train_epochs)), desc="Epoch")

        for epo_step in train_iterator:
            self.global_epo = epo_step
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(batch[t].to(self.device) for t in batch)  # GPU or CPU
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids" : batch[2],
                    "labels": batch[5],
                    "e1_mask": batch[3],
                    "e2_mask": batch[4]
                }
                #with torch.cuda.amp.autocast():
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.gradient_accumulation_steps > 1:
                    loss = loss / self.gradient_accumulation_steps

                #scaler.scale(loss).backward()
                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)

                    optimizer.step()
                    #scaler.step(optimizer)
                    #scaler.update()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.logging_steps > 0 and global_step % self.logging_steps == 0:
                        logger.info("  global steps = %d", global_step)
                        self.evaluate("dev")
                    if self.save_steps > 0 and global_step % self.save_steps == 0:
                        self.save_model()

                if 0 < self.max_steps < global_step:
                    epoch_iterator.close()
                    break

            

            if 0 < self.max_steps < global_step:
                train_iterator.close()
                break
          

        return global_step, tr_loss / global_step
    
   
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        elif mode == "train":
            dataset = self.train_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.eval_batch_size)

        # Eval!
        logger.info('---------------------------------------------------')
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[5],
                    "e1_mask": batch[3],
                    "e2_mask": batch[4],
                }
                #with torch.cuda.amp.autocast():
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        # preds = np.around(preds)
        # preds = preds.astype(int)
        result = compute_metrics(preds,out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))
            # if key == 'acc':
            #     if results[key] > 0.85:
            #         self.test_pred()
        logger.info("---------------------------------------------------")
        return results
    
    def test_pred(self):
        test_dataset = self.test_dataset
        test_sampler = SequentialSampler(test_dataset)
        test_dataloader = DataLoader(test_dataset, sampler=test_sampler,batch_size=self.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", "test")
        #logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.eval_batch_size)

        nb_eval_steps = 0
        preds = None
        probs = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(test_dataloader, desc="Predicting"):
            batch = tuple(batch[t].to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": None,
                    "e1_mask": batch[3],
                    "e2_mask": batch[4],
                }
                outputs = self.model(**inputs)
                #print(outputs)
                logits = outputs[0]

            nb_eval_steps += 1
            prob = F.softmax(logits, dim=-1).detach().cpu().numpy()

            if preds is None:
                probs = prob
                preds = prob.argmax(logits, axis=-1)
                #out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                probs = np.append(probs, prob, axis=0)
                preds = np.append(preds, prob.argmax(logits, axis=-1), axis=0)
                #out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

        # df = pd.DataFrame(preds, columns=['pred'])
        # df.to_csv('RXLMRoberta_layersplit_with_focalcross_epoch'+str(self.global_epo)+'.csv', index=False)
#         with open("proposed_answers.txt", "w", encoding="utf-8") as f:
#             for idx, pred in enumerate(preds):
#                 f.write("{}\n".format(pred))
        #write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds)
        return np.concatenate(preds).tolist(), np.concatenate(probs, axis=0).tolist()
    

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        model_to_save.save_pretrained(self.model_dir)

        # Save training arguments together with the trained model
        #torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        #self.args = torch.load(os.path.join(self.args.model_dir, "training_args.bin"))
        self.model = RobertaModel.from_pretrained("klue/roberta-large")
        self.model.to(self.device)
        logger.info("***** Model Loaded *****")

In [None]:
init_logger()

train_dir = '/opt/ml/dataset/train/train.csv'
test_dir = '/opt/ml/dataset/test/test_data.csv'

train_dataset = load_data(train_dir)

train_dataset, dev_dataset = train_test_split(train_dataset, test_size=0.1 ,stratify=train_dataset['label'],\
                                             shuffle=True, random_state= random_seed)
train_label  = label_to_num(train_dataset['label'].values)
dev_label = label_to_num(dev_dataset['label'].values)


test_dataset = load_data(test_dir)
test_label = list(map(int,test_dataset['label'].values))


ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]

tokenizer =  AutoTokenizer.from_pretrained("klue/roberta-large")
tokenizer.add_special_tokens({"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})

train_feature = convert_sentence_to_features(train_dataset, tokenizer, max_len = 441 , mode='train')
dev_feature = convert_sentence_to_features(dev_dataset, tokenizer, max_len = 441 , mode='train')
test_feature = convert_sentence_to_features(test_dataset, tokenizer, max_len = 441 , mode='train')

Train_dataset = RE_Dataset(train_feature,train_label)
Dev_dataset = RE_Dataset(dev_feature,dev_label)
Test_dataset = RE_Dataset(test_feature,test_label)

In [None]:
trainer = Trainer(eval_batch_size=8,train_batch_size=8, num_labels = 30,
                  max_steps=-1, weight_decay=0.001, learning_rate= 5e-5, 
                  adam_epsilon=1e-8, warmup_steps=20, num_train_epochs=10,
                  logging_steps= 1600, save_steps= 800, max_grad_norm=1.0, 
                  model_dir='./model2', gradient_accumulation_steps=4,dropout_rate = 0.1, classifier_epoch=3, tokenizer = tokenizer,
                  train_dataset=Train_dataset,
                  dev_dataset=Dev_dataset,
                  test_dataset = Test_dataset)

In [None]:
trainer.train()

--------------------

config = AutoConfig.from_pretrained("klue/roberta-large", num_labels=30)
path = "./model/pytorch_model.bin"
model = Roberta_RE('klue/roberta-large', config=config, dropout_rate=0.1)
model.load_state_dict(torch.load(path))
device = torch.device('cuda:0')
loader = DataLoader(Test_dataset, batch_size=64, shuffle=False)
model.eval()
model.to(device)
output_pred = []
output_prob = []
for idx, data in enumerate(tqdm(loader)):
    with torch.no_grad():
        outputs = model(
            input_ids=data['input_ids'].to(device),
            attention_mask=data['attention_mask'].to(device),
            token_type_ids=data['token_type_ids'].to(device),
            e1_mask = data['e1_mask'].to(device),
            e2_mask = data['e2_mask'].to(device),
            labels=None,
        )
        logits = outputs[0]
        prob = F.softmax(logits, dim=-1).cpu().numpy()
        logits = logits.cpu().numpy()
        result = np.argmax(logits, axis=-1)

        output_pred.append(result)
        output_prob.append(prob)


output_pred = np.concatenate(output_pred)
output_prob = np.concatenate(output_prob)
submission = pd.DataFrame({
    "id":range(len(output_prob)),
    "pred_label":num_to_label(output_pred),
    "probs":list(map(lambda x:str(list(x)) , output_prob)),
    })
submission.to_csv("R-Roberta.csv",index=False)