<a href="https://colab.research.google.com/github/heraclex12/R-BERT-Relation-Classification/blob/master/BERT_for_Relation_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !pip3 install vncorenlp
# !pip3 install fairseq
# !pip3 install fastBPE
!pip3 install transformers



In [2]:
from transformers import *
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm, trange
os.chdir('drive/My Drive/vncorenlp')

In [3]:
class FCLayer(torch.nn.Module):
  def __init__(self, input_dim, output_dim, dropout_rate=0., use_activation=True):
    super(FCLayer, self).__init__()
    self.use_activation = use_activation
    self.dropout = torch.nn.Dropout(dropout_rate)
    self.linear = torch.nn.Linear(input_dim, output_dim)
    self.tanh = torch.nn.Tanh()
  
  def forward(self, x):
    x = self.dropout(x)
    if self.use_activation:
      x = self.tanh(x)
    return self.linear(x)

In [4]:
class RBERT(BertPreTrainedModel):
  base_model_prefix = "roberta"
  config_class = RobertaConfig
  pretrained_model_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
  def __init__(self, config, args):
    super(RBERT, self).__init__(config)
    self.roberta = RobertaModel(config=config)
    self.num_labels = config.num_labels
    self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args['DROPOUT_RATE'])
    self.e1_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args['DROPOUT_RATE'])
    self.e2_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args['DROPOUT_RATE'])
    self.label_classifier = FCLayer(config.hidden_size * 3, self.num_labels, args['DROPOUT_RATE'], use_activation=False)

  @staticmethod
  def entity_average(hidden_output, e_mask):
    e_mask_unqueeze = e_mask.unsqueeze(1)
    length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)

    sum_vector = torch.bmm(e_mask_unqueeze.float(), hidden_output).squeeze(1)
    avg_vector = sum_vector.float() / length_tensor.float()
    return avg_vector
  
  def forward(self, input_ids, attention_mask, labels, e1_mask, e2_mask):
    outputs = self.roberta(input_ids, attention_mask=attention_mask)
    sequence_output = outputs[0]
    pooled_output = outputs[1]

    e1_h = self.entity_average(sequence_output, e1_mask)
    e2_h = self.entity_average(sequence_output, e2_mask)

    pooled_output = self.cls_fc_layer(pooled_output)
    e1_h = self.e1_fc_layer(e1_h)
    e2_h = self.e2_fc_layer(e2_h)

    concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)
    logits = self.label_classifier(concat_h)

    outputs = (logits, ) + outputs[2:]

    if labels is not None:
      if self.num_labels == 1:
        loss_fct = torch.nn.MSELoss()
        loss = loss_fct(logits.view(-1), labels.view(-1))
      else:
        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
      
      outputs = (loss,) + outputs
    return outputs

In [5]:
train_df = pd.read_csv('relation_data/train.tsv', sep='\t', header=None, names=['label', 'text'])
test_df = pd.read_csv('relation_data/test.tsv', sep='\t', header=None, names=['label', 'text'])
labels = pd.read_fwf('relation_data/label.txt', header=None, names=['label'])
train_df.head()

Unnamed: 0,label,text
0,"Component-Whole(e2,e1)",The system as described above has its greatest...
1,Other,The <e1> child </e1> was carefully wrapped and...
2,"Instrument-Agency(e2,e1)",The <e1> author </e1> of a keygen uses a <e2> ...
3,Other,A misty <e1> ridge </e1> uprises from the <e2>...
4,"Member-Collection(e1,e2)",The <e1> student </e1> <e2> association </e2> ...


In [6]:
def seed_everything(SEED):
  np.random.seed(SEED)
  torch.manual_seed(SEED)
  torch.cuda.manual_seed(SEED)
  torch.backends.cudnn.deterministic = True

if torch.cuda.is_available():
  device = torch.device("cuda")
  print("We will use the GPU:", torch.cuda.get_device_name())
else:
  device = torch.device("cpu")
  print("We will use the CPU.")

args = {
    'NUM_LABELS' : len(labels),
    'DROPOUT_RATE' : 0.1,
    'LEARNING_RATE' : 2e-5,
    'EPOCHS' : 5,
    'MAX_SEQUENCE_LENGTH' : 384,
    'BATCH_SIZE' : 16,
    'ADAM_EPSILON' : 1e-8,
    'GRADIENT_ACCUMULATION_STEPS' : 1,
    'MAX_GRAD_NORM' : 1.0,
    'LOGGING_STEPS' : 250,
    'SAVE_STEPS' : 250,
    'WEIGHT_DECAY' : 0.0,
    'NUM_WARMUP_STEPS' : 0,
}

We will use the GPU: Tesla V100-SXM2-16GB


In [7]:
def convert_lines(df, label_indexes, max_seq_len, tokenizer, cls_token='[CLS]',
                  sep_token='[SEP]', pad_token=0, add_sep_token=False, mask_padding_with_zero=True):
  input_ids = []
  attention_masks = []
  e1_masks = []
  e2_masks = []
  labels = []
  print("Converting sentence...")
  for row in df.itertuples():
    if (row.Index % 5000 == 0 and row.Index > 0) or row.Index == len(df) - 1:
      print('Parsing {} of {}'.format(row.Index + 1, len(df)))
    
    tokens = tokenizer.tokenize(row.text.lower())
    e11_p = tokens.index("<e1>")
    e12_p = tokens.index("</e1>")
    e21_p = tokens.index("<e2>")
    e22_p = tokens.index("</e2>")

    # Replace token
    tokens[e11_p] = '$'
    tokens[e12_p] = '$'
    tokens[e21_p] = '#'
    tokens[e22_p] = '#'

    # Add 1 because of the [CLS] token
    e11_p += 1
    e12_p += 1
    e21_p += 1
    e22_p += 1

    # Account for [CLS] and [SEP] with "2" and with "3" for RoBERTa
    if add_sep_token:
      special_tokens_count = 2
    else:
      special_tokens_count = 1
    
    if len(tokens) > max_seq_len - special_tokens_count:
      tokens = tokens[:(max_seq_len - special_tokens_count)]
    
    if add_sep_token:
      tokens += [sep_token]

    tokens = [cls_token] + tokens

    input_id = tokenizer.convert_tokens_to_ids(tokens)
    attention_mask = [1 if mask_padding_with_zero else 0] * len(input_id)

    padding_length = max_seq_len - len(input_id)
    input_id = input_id + ([pad_token] * padding_length)
    attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)

    e1_mask = [0] * len(attention_mask)
    e2_mask = [0] * len(attention_mask)

    for i in range(e11_p, e12_p + 1):
      e1_mask[i] = 1
    for i in range(e21_p, e22_p + 1):
      e2_mask[i] = 1

    assert len(input_id) == max_seq_len, "Error with input length {} vs {}".format(len(input_id), max_seq_len)
    assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)

    input_ids.append(input_id)
    attention_masks.append(attention_mask)
    labels.append(label_indexes.index[label_indexes.label == row.label][0])
    e1_masks.append(e1_mask)
    e2_masks.append(e2_mask)
  
  dataset = torch.utils.data.TensorDataset(torch.tensor(input_ids, dtype=torch.long), 
                          torch.tensor(attention_masks, dtype=torch.long), 
                          torch.tensor(labels, dtype=torch.long), 
                          torch.tensor(e1_masks, dtype=torch.long),
                          torch.tensor(e2_masks, dtype=torch.long))
  return dataset

In [8]:
ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]
def load_model(args, mode='en'):
  tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
  tokenizer.add_special_tokens({"additional_special_tokens" : ADDITIONAL_SPECIAL_TOKENS})

  config = RobertaConfig.from_pretrained('roberta-base', num_labels = args['NUM_LABELS'])
  model = RBERT.from_pretrained('roberta-base', config=config, args=args)
  model.to(device)
  return config, tokenizer, model

seed_everything(42)
config, tokenizer, model = load_model(args)

Some weights of the model checkpoint at roberta-base were not used when initializing RBERT: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RBERT from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RBERT from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RBERT were not initialized from the model checkpoint at roberta-base and are newly initialized: ['cls_fc_layer.linear.weight', 'cls_fc_layer.linear.bias', 'e1_fc_layer.linear.weight', 'e1_fc_layer.linear.bias', 'e2_fc_layer.linear.weight', 'e2_fc_layer.linear.bias', 'label_classifier.linear.weight', '

### Process data

In [9]:
train_dataset = convert_lines(train_df, labels, args['MAX_SEQUENCE_LENGTH'], tokenizer, cls_token='<s>', sep_token='</s>', pad_token=1)
test_dataset = convert_lines(test_df, labels, args['MAX_SEQUENCE_LENGTH'], tokenizer, cls_token='<s>', sep_token='</s>', pad_token=1)

train_sampler = torch.utils.data.RandomSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=args['BATCH_SIZE'])

test_sampler = torch.utils.data.SequentialSampler(test_dataset)
test_loader = torch.utils.data.DataLoader(test_dataset, sampler=test_sampler, batch_size=args['BATCH_SIZE'] * 2)

Converting sentence...
Parsing 5001 of 8000
Parsing 8000 of 8000
Converting sentence...
Parsing 2717 of 2717


In [10]:
def evaluate(model, device, test_loader):
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None

    model.eval()

    for batch in tqdm(test_loader, desc="Evaluating"):
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'labels': batch[2],
                      'e1_mask': batch[3],
                      'e2_mask': batch[4]}
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1

        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(
                out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=1)

    result = {'accuracy' : accuracy_score(out_label_ids, preds), 'f1_score': f1_score(out_label_ids, preds, average='macro'), 'pred' : preds}
    return result

def save_model():
    torch.save(model.state_dict(), 'relation_data/trained_models/model.bin')

def load_saved_model(args):
  tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
  tokenizer.add_special_tokens({"additional_special_tokens" : ADDITIONAL_SPECIAL_TOKENS})

  config = RobertaConfig.from_pretrained('roberta-base', num_labels = args['NUM_LABELS'])
  model = RBERT.from_pretrained('relation_data/trained_models/model.bin', config=config, args=args)
  model.cuda()
  return config, tokenizer, model

In [11]:

t_total = len(train_loader) // args['GRADIENT_ACCUMULATION_STEPS'] * args['EPOCHS']

# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
      'weight_decay': args['WEIGHT_DECAY']},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=args['LEARNING_RATE'], eps=args['ADAM_EPSILON'])
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args['NUM_WARMUP_STEPS'], num_training_steps=t_total)


global_step = 0
tr_loss = 0.0
model.zero_grad()
train_iterator = trange(int(args['EPOCHS']), desc="Epoch")

for _ in train_iterator:
    epoch_iterator = tqdm(train_loader, desc="Iteration")
    for step, batch in enumerate(epoch_iterator):
        model.train()
        batch = tuple(t.to(device) for t in batch)  # GPU or CPU
        inputs = {'input_ids': batch[0],
                  'attention_mask': batch[1],
                  'labels': batch[2],
                  'e1_mask': batch[3],
                  'e2_mask': batch[4]}
        outputs = model(**inputs)
        loss = outputs[0]

        loss.backward()

        tr_loss += loss.item()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args['MAX_GRAD_NORM'])
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        model.zero_grad()


    print("\n====Evaluation====")
    print("\nACCURACY: ", evaluate(model, device, test_loader)['accuracy'])


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:00<03:17,  2.53it/s][A
Iteration:   0%|          | 2/500 [00:00<03:13,  2.58it/s][A
Iteration:   1%|          | 3/500 [00:01<03:09,  2.62it/s][A
Iteration:   1%|          | 4/500 [00:01<03:06,  2.66it/s][A
Iteration:   1%|          | 5/500 [00:01<03:03,  2.70it/s][A
Iteration:   1%|          | 6/500 [00:02<03:02,  2.71it/s][A
Iteration:   1%|▏         | 7/500 [00:02<03:00,  2.73it/s][A
Iteration:   2%|▏         | 8/500 [00:02<02:59,  2.74it/s][A
Iteration:   2%|▏         | 9/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 10/500 [00:03<02:56,  2.77it/s][A
Iteration:   2%|▏         | 11/500 [00:04<02:55,  2.78it/s][A
Iteration:   2%|▏         | 12/500 [00:04<02:55,  2.78it/s][A
Iteration:   3%|▎         | 13/500 [00:04<02:54,  2.79it/s][A
Iteration:   3%|▎         | 14/500 [00:05<02:54,  2.79it/s][A
Iteration:   3%|▎         | 


====Evaluation====



Evaluating:   1%|          | 1/85 [00:00<00:17,  4.69it/s][A
Evaluating:   2%|▏         | 2/85 [00:00<00:17,  4.67it/s][A
Evaluating:   4%|▎         | 3/85 [00:00<00:17,  4.67it/s][A
Evaluating:   5%|▍         | 4/85 [00:00<00:17,  4.66it/s][A
Evaluating:   6%|▌         | 5/85 [00:01<00:17,  4.65it/s][A
Evaluating:   7%|▋         | 6/85 [00:01<00:16,  4.65it/s][A
Evaluating:   8%|▊         | 7/85 [00:01<00:16,  4.65it/s][A
Evaluating:   9%|▉         | 8/85 [00:01<00:16,  4.65it/s][A
Evaluating:  11%|█         | 9/85 [00:01<00:16,  4.64it/s][A
Evaluating:  12%|█▏        | 10/85 [00:02<00:16,  4.64it/s][A
Evaluating:  13%|█▎        | 11/85 [00:02<00:15,  4.64it/s][A
Evaluating:  14%|█▍        | 12/85 [00:02<00:15,  4.65it/s][A
Evaluating:  15%|█▌        | 13/85 [00:02<00:15,  4.65it/s][A
Evaluating:  16%|█▋        | 14/85 [00:03<00:15,  4.65it/s][A
Evaluating:  18%|█▊        | 15/85 [00:03<00:15,  4.66it/s][A
Evaluating:  19%|█▉        | 16/85 [00:03<00:14,  4.65it/s][A



ACCURACY:  0.7854251012145749



Iteration:   0%|          | 1/500 [00:00<03:00,  2.76it/s][A
Iteration:   0%|          | 2/500 [00:00<02:59,  2.77it/s][A
Iteration:   1%|          | 3/500 [00:01<02:59,  2.77it/s][A
Iteration:   1%|          | 4/500 [00:01<03:00,  2.75it/s][A
Iteration:   1%|          | 5/500 [00:01<02:59,  2.76it/s][A
Iteration:   1%|          | 6/500 [00:02<03:00,  2.74it/s][A
Iteration:   1%|▏         | 7/500 [00:02<02:59,  2.74it/s][A
Iteration:   2%|▏         | 8/500 [00:02<02:58,  2.76it/s][A
Iteration:   2%|▏         | 9/500 [00:03<02:58,  2.75it/s][A
Iteration:   2%|▏         | 10/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 11/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 12/500 [00:04<02:57,  2.75it/s][A
Iteration:   3%|▎         | 13/500 [00:04<02:57,  2.75it/s][A
Iteration:   3%|▎         | 14/500 [00:05<02:57,  2.74it/s][A
Iteration:   3%|▎         | 15/500 [00:05<02:58,  2.72it/s][A
Iteration:   3%|▎         | 16/500 [00:05<02:56,  2.74it/s][A



====Evaluation====



Evaluating:   1%|          | 1/85 [00:00<00:17,  4.69it/s][A
Evaluating:   2%|▏         | 2/85 [00:00<00:17,  4.67it/s][A
Evaluating:   4%|▎         | 3/85 [00:00<00:17,  4.66it/s][A
Evaluating:   5%|▍         | 4/85 [00:00<00:17,  4.65it/s][A
Evaluating:   6%|▌         | 5/85 [00:01<00:17,  4.65it/s][A
Evaluating:   7%|▋         | 6/85 [00:01<00:16,  4.65it/s][A
Evaluating:   8%|▊         | 7/85 [00:01<00:16,  4.65it/s][A
Evaluating:   9%|▉         | 8/85 [00:01<00:16,  4.62it/s][A
Evaluating:  11%|█         | 9/85 [00:01<00:16,  4.62it/s][A
Evaluating:  12%|█▏        | 10/85 [00:02<00:16,  4.63it/s][A
Evaluating:  13%|█▎        | 11/85 [00:02<00:15,  4.64it/s][A
Evaluating:  14%|█▍        | 12/85 [00:02<00:15,  4.64it/s][A
Evaluating:  15%|█▌        | 13/85 [00:02<00:15,  4.65it/s][A
Evaluating:  16%|█▋        | 14/85 [00:03<00:15,  4.66it/s][A
Evaluating:  18%|█▊        | 15/85 [00:03<00:15,  4.66it/s][A
Evaluating:  19%|█▉        | 16/85 [00:03<00:14,  4.66it/s][A



ACCURACY:  0.8097165991902834



Iteration:   0%|          | 1/500 [00:00<02:59,  2.79it/s][A
Iteration:   0%|          | 2/500 [00:00<02:59,  2.78it/s][A
Iteration:   1%|          | 3/500 [00:01<03:00,  2.75it/s][A
Iteration:   1%|          | 4/500 [00:01<03:00,  2.75it/s][A
Iteration:   1%|          | 5/500 [00:01<03:00,  2.74it/s][A
Iteration:   1%|          | 6/500 [00:02<02:59,  2.75it/s][A
Iteration:   1%|▏         | 7/500 [00:02<02:58,  2.76it/s][A
Iteration:   2%|▏         | 8/500 [00:02<02:59,  2.75it/s][A
Iteration:   2%|▏         | 9/500 [00:03<02:58,  2.75it/s][A
Iteration:   2%|▏         | 10/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 11/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 12/500 [00:04<02:56,  2.77it/s][A
Iteration:   3%|▎         | 13/500 [00:04<02:56,  2.76it/s][A
Iteration:   3%|▎         | 14/500 [00:05<02:57,  2.75it/s][A
Iteration:   3%|▎         | 15/500 [00:05<02:56,  2.75it/s][A
Iteration:   3%|▎         | 16/500 [00:05<02:56,  2.75it/s][A



====Evaluation====



Evaluating:   1%|          | 1/85 [00:00<00:17,  4.70it/s][A
Evaluating:   2%|▏         | 2/85 [00:00<00:17,  4.68it/s][A
Evaluating:   4%|▎         | 3/85 [00:00<00:17,  4.67it/s][A
Evaluating:   5%|▍         | 4/85 [00:00<00:17,  4.66it/s][A
Evaluating:   6%|▌         | 5/85 [00:01<00:17,  4.66it/s][A
Evaluating:   7%|▋         | 6/85 [00:01<00:16,  4.65it/s][A
Evaluating:   8%|▊         | 7/85 [00:01<00:16,  4.65it/s][A
Evaluating:   9%|▉         | 8/85 [00:01<00:16,  4.65it/s][A
Evaluating:  11%|█         | 9/85 [00:01<00:16,  4.64it/s][A
Evaluating:  12%|█▏        | 10/85 [00:02<00:16,  4.64it/s][A
Evaluating:  13%|█▎        | 11/85 [00:02<00:15,  4.65it/s][A
Evaluating:  14%|█▍        | 12/85 [00:02<00:15,  4.65it/s][A
Evaluating:  15%|█▌        | 13/85 [00:02<00:15,  4.65it/s][A
Evaluating:  16%|█▋        | 14/85 [00:03<00:15,  4.64it/s][A
Evaluating:  18%|█▊        | 15/85 [00:03<00:15,  4.64it/s][A
Evaluating:  19%|█▉        | 16/85 [00:03<00:14,  4.64it/s][A



ACCURACY:  0.8218623481781376



Iteration:   0%|          | 1/500 [00:00<02:58,  2.79it/s][A
Iteration:   0%|          | 2/500 [00:00<02:59,  2.78it/s][A
Iteration:   1%|          | 3/500 [00:01<02:58,  2.78it/s][A
Iteration:   1%|          | 4/500 [00:01<02:58,  2.78it/s][A
Iteration:   1%|          | 5/500 [00:01<02:59,  2.77it/s][A
Iteration:   1%|          | 6/500 [00:02<02:59,  2.76it/s][A
Iteration:   1%|▏         | 7/500 [00:02<02:58,  2.77it/s][A
Iteration:   2%|▏         | 8/500 [00:02<02:58,  2.76it/s][A
Iteration:   2%|▏         | 9/500 [00:03<02:58,  2.75it/s][A
Iteration:   2%|▏         | 10/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 11/500 [00:03<02:57,  2.76it/s][A
Iteration:   2%|▏         | 12/500 [00:04<02:56,  2.77it/s][A
Iteration:   3%|▎         | 13/500 [00:04<02:55,  2.78it/s][A
Iteration:   3%|▎         | 14/500 [00:05<02:55,  2.77it/s][A
Iteration:   3%|▎         | 15/500 [00:05<02:55,  2.77it/s][A
Iteration:   3%|▎         | 16/500 [00:05<02:54,  2.77it/s][A



====Evaluation====



Evaluating:   1%|          | 1/85 [00:00<00:18,  4.65it/s][A
Evaluating:   2%|▏         | 2/85 [00:00<00:17,  4.64it/s][A
Evaluating:   4%|▎         | 3/85 [00:00<00:17,  4.65it/s][A
Evaluating:   5%|▍         | 4/85 [00:00<00:17,  4.65it/s][A
Evaluating:   6%|▌         | 5/85 [00:01<00:17,  4.63it/s][A
Evaluating:   7%|▋         | 6/85 [00:01<00:17,  4.63it/s][A
Evaluating:   8%|▊         | 7/85 [00:01<00:16,  4.63it/s][A
Evaluating:   9%|▉         | 8/85 [00:01<00:16,  4.64it/s][A
Evaluating:  11%|█         | 9/85 [00:01<00:16,  4.64it/s][A
Evaluating:  12%|█▏        | 10/85 [00:02<00:16,  4.65it/s][A
Evaluating:  13%|█▎        | 11/85 [00:02<00:16,  4.61it/s][A
Evaluating:  14%|█▍        | 12/85 [00:02<00:15,  4.61it/s][A
Evaluating:  15%|█▌        | 13/85 [00:02<00:15,  4.62it/s][A
Evaluating:  16%|█▋        | 14/85 [00:03<00:15,  4.63it/s][A
Evaluating:  18%|█▊        | 15/85 [00:03<00:15,  4.63it/s][A
Evaluating:  19%|█▉        | 16/85 [00:03<00:14,  4.62it/s][A



ACCURACY:  0.8295914611704085



Iteration:   0%|          | 1/500 [00:00<03:06,  2.67it/s][A
Iteration:   0%|          | 2/500 [00:00<03:06,  2.67it/s][A
Iteration:   1%|          | 3/500 [00:01<03:05,  2.68it/s][A
Iteration:   1%|          | 4/500 [00:01<03:03,  2.70it/s][A
Iteration:   1%|          | 5/500 [00:01<03:01,  2.72it/s][A
Iteration:   1%|          | 6/500 [00:02<03:00,  2.74it/s][A
Iteration:   1%|▏         | 7/500 [00:02<03:01,  2.72it/s][A
Iteration:   2%|▏         | 8/500 [00:02<03:00,  2.73it/s][A
Iteration:   2%|▏         | 9/500 [00:03<02:59,  2.74it/s][A
Iteration:   2%|▏         | 10/500 [00:03<02:58,  2.75it/s][A
Iteration:   2%|▏         | 11/500 [00:04<02:57,  2.75it/s][A
Iteration:   2%|▏         | 12/500 [00:04<02:58,  2.73it/s][A
Iteration:   3%|▎         | 13/500 [00:04<02:57,  2.74it/s][A
Iteration:   3%|▎         | 14/500 [00:05<02:56,  2.75it/s][A
Iteration:   3%|▎         | 15/500 [00:05<02:55,  2.76it/s][A
Iteration:   3%|▎         | 16/500 [00:05<02:54,  2.77it/s][A



====Evaluation====



Evaluating:   1%|          | 1/85 [00:00<00:17,  4.70it/s][A
Evaluating:   2%|▏         | 2/85 [00:00<00:17,  4.69it/s][A
Evaluating:   4%|▎         | 3/85 [00:00<00:17,  4.67it/s][A
Evaluating:   5%|▍         | 4/85 [00:00<00:17,  4.67it/s][A
Evaluating:   6%|▌         | 5/85 [00:01<00:17,  4.66it/s][A
Evaluating:   7%|▋         | 6/85 [00:01<00:17,  4.64it/s][A
Evaluating:   8%|▊         | 7/85 [00:01<00:16,  4.64it/s][A
Evaluating:   9%|▉         | 8/85 [00:01<00:16,  4.64it/s][A
Evaluating:  11%|█         | 9/85 [00:01<00:16,  4.64it/s][A
Evaluating:  12%|█▏        | 10/85 [00:02<00:16,  4.63it/s][A
Evaluating:  13%|█▎        | 11/85 [00:02<00:15,  4.63it/s][A
Evaluating:  14%|█▍        | 12/85 [00:02<00:15,  4.63it/s][A
Evaluating:  15%|█▌        | 13/85 [00:02<00:15,  4.63it/s][A
Evaluating:  16%|█▋        | 14/85 [00:03<00:15,  4.63it/s][A
Evaluating:  18%|█▊        | 15/85 [00:03<00:15,  4.63it/s][A
Evaluating:  19%|█▉        | 16/85 [00:03<00:14,  4.64it/s][A



ACCURACY:  0.8406330511593669





In [12]:
result = evaluate(model, device, test_loader)
print("\nAccuracy: {}\nF1-score:{}".format(result['accuracy'], result['f1_score']))
# labels.iloc[result['pred']].values

Evaluating: 100%|██████████| 85/85 [00:18<00:00,  4.67it/s]



Accuracy: 0.8406330511593669
F1-score:0.8011566095241032


In [13]:
own_data = {
    'label' : ['Message-Topic(e1,e2)', 'Message-Topic(e1,e2)', 'Component-Whole(e1,e2)',
               'Message-Topic(e2,e1)', 'Cause-Effect(e2,e1)', 'Product-Producer(e1,e2)',
               'Entity-Destination(e1,e2)', 'Component-Whole(e1,e2)', 'Entity-Origin(e1,e2)'],
    'text' : ['	This <e1> article </e1> gives details on 2004 in <e2> music </e2> in the United Kingdom, including the official charts from that year.',
	'We have therefore taken the initiative to convene the first international open <e1> meeting </e1> dedicated solely to <e2> rural history </e2>.',
	'The <e1> timer </e1> of the <e2> device </e2> automatically eliminates wasted "standby power" consumption by automatically turn off electronics plugged into the "auto off" outlets.',
	'Bob Parks made a similar <e1> offer </e1> in a <e2> phone call </e2> made earlier this week.',
	'He had chest pains and <e1> headaches </e1> from <e2> mold </e2> in the bedrooms.',
	'The silver-haired author was not just laying India\'s politician saint to rest but healing a generations-old rift in the family of the <e1> country </e1>\'s founding <e2> father </e2>.',
	'It describes a method for loading a horizontal <e1> stack </e1> of containers into a <e2> carton </e2>.',
	'The Foundation decided to repurpose the building in order to reduce wear and tear on the <e1> plumbing </e1> in the <e2> manor house </e2> by redirecting visitors during restoration projects and beyond.',
	'The technology is available to produce and transmit <e1> electricity </e1> economically from OTEC <e2> systems </e2>.']
}

In [14]:
# df = pd.DataFrame(data=own_data)
# p_data = convert_lines(df, labels, 128, tokenizer)


# # config, tokenizer, model = load_saved_model(args)

# p_sampler = torch.utils.data.SequentialSampler(p_data)
# p_loader = torch.utils.data.DataLoader(p_data, sampler=p_sampler, batch_size=args['BATCH_SIZE'] * 2)

# result_p = evaluate(model, device, p_loader)
# result_p