## main

In [18]:
import os 
import torch 
import random 
import logging
import numpy as np 

from glue_utils import convert_examples_to_seq_features, compute_metrics_absa, ABSAProcessor, SMALL_POSITIVE_CONST
from tqdm import tqdm, trange 

from transformers import BertConfig, BertTokenizer, BertModel
from transformers import get_linear_schedule_with_warmup

from bert import BertABSATagger

from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler 
from torch.nn.utils import clip_grad_norm_

from tensorboardX import SummaryWriter

import glob, torch

from torch import nn, optim

In [2]:
logger = logging.getLogger(__name__)

In [3]:
MODELS = 'bert-base_uncased'
MODEL_CLASSES = {'bert':(BertConfig, BertABSATagger, BertTokenizer)}

In [21]:
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

class args:
    data_dir = './data'
    model_type = 'bert'
    absa_type = 'san'
    tfm_mode = 'finetune'
    model_name_or_path = 'bert-base-uncased'
    fix_tfm = 0
    max_seq_length = 512
    batch_size = 128
    learning_rate = 5e-5
    weight_decay = 1e-8
    max_grad_norm = 1
    num_train_epochs = 10
    logging_steps = 50
    save_steps = 100
    seed = 42
    epsilon = 1e-8
    warmup_steps = 0 
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
output_dir = f'{args.model_type}-{args.absa_type}-{args.tfm_mode}'
args.output_dir = output_dir

In [5]:
processor = ABSAProcessor()
label_list = processor.get_labels()
num_labels = len(label_list)

In [6]:
config_class, model_class, tokenizer_class = MODEL_CLASSES['bert']
config = config_class.from_pretrained(args.model_name_or_path, num_labels=num_labels, cache_dir="./cache")
tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, cache_dir='./cache')

config.absa_type = args.absa_type
config.tfm_mode = args.tfm_mode
config.fix_tfm = args.fix_tfm
model = model_class.from_pretrained(args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path),
                                    config=config, cache_dir='./cache')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertABSATagger: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertABSATagger from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertABSATagger from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertABSATagger were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.embeddi

In [7]:
class BertLayerNorm(nn.Module):
    def __init__(self, hidden_dim, eps=1e-12):
        super(BertLayerNorm, self).__init__()
        
        self.weight = nn.Parameter(torch.ones(hidden_dim))
        self.bias = nn.Parameter(torch.zeros(hidden_dim))
        self.epsilon = eps 
        
    def forward(self, x):
        mu = x.mean(dim=-1, keepdim=True)
        s = (x - mu).pow(2).mean(dim=-1, keepdim=True)
        x = (x - mu) / torch.sqrt(s + self.epsilon)
        return self.weight * x + self.bias 
    
class BertPreTrainedModel(BertModel):
    
    def __init__(self):
        super(BertPreTrainedModel, self).__init__()
        
    def _init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        
        elif isinstance(module, BertLayerNorm):
            module.bias.data.zero_()
        
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()

In [10]:
model.to(args.device)

BertABSATagger(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tru

In [46]:
def evaluate(args, model, tokenizer, mode='SimEval_test'):
    eval_output_dir = args.output_dir
    
    results = {}
    
    eval_dataset, eval_evaluate_label_ids = load_and_cached_examples(args, tokenizer, mode)
    
    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)
    
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.batch_size)
    
    eval_loss, eval_steps = 0.0, 0
    preds = None
    out_label_ids = None
    for batch in tqdm(eval_dataloader, desc='Evaluating...'):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        
        with torch.no_grad():
            inputs = {'input_ids': batch[0], 
                      'attention_mask': batch[1],
                      'token_type_ids': batch[2], 
                      'labels': batch[3]}
            outputs = model(**inputs)
            temp_eval_loss, logits = outputs[:2]
            eval_loss += temp_eval_loss.mean().item()
        eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)
    
    eval_loss /= eval_steps
    
    preds = np.argmax(preds, axis=-1)
    result = compute_metrics_absa(preds, out_label_ids, eval_evaluate_label_ids)
    result['eval_loss'] = eval_loss
    results.update(result)
    
    output_eval_file = os.path.join(eval_output_dir, '%s_results.txt' % mode)
    with open(output_eval_file, 'w') as writer:
        for key in sorted(result.keys()):
            if 'eval_loss' in key:
                logger.info(' %s = %s', key, str(result[key]))
            writer.write('%s = %s\n' % (key, str(result[key])))
    return results

In [12]:
train_dataset, all_evaluate_label_ids = load_and_cached_examples(args, tokenizer)

cached_features_file: ./data/cached_SimEval_train_bert-base-uncased_512


## Training

In [14]:
tb_writer = SummaryWriter()

train_dataset, all_evaluate_label_ids = load_and_cached_examples(args, tokenizer, 'SimEval_train')

train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.batch_size)

cached_features_file: ./data/cached_SimEval_train_bert-base-uncased_512


In [15]:
input_ids, input_mask, segment_ids, label_ids = next(iter(train_dataloader))

In [None]:
def evaluate(args, model, tokenizer, mode='SimEval_test'):
    eval_output_dir = args.output_dir
    
    results = {}
    
    eval_dataset, eval_evaluate_label_ids = load_and_cached_examples(args, tokenizer, mode)
    
    if not os.path.exists(eval_output_dir):
        os.makedirs(eval_output_dir)
    
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.batch_size)
    
    eval_loss, eval_steps = 0.0, 0
    preds = None
    out_label_ids = None
    for batch in tqdm(eval_dataloader, desc='Evaluating...'):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        
        with torch.no_grad():
            inputs = {'input_ids': batch[0], 
                      'attention_mask': batch[1],
                      'token_type_ids': batch[2], 
                      'labels': batch[3]}
            outputs = model(**inputs)
            temp_eval_loss, logits = outputs[:2]
            eval_loss += temp_eval_loss.mean().item()
        eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)
    
    eval_loss /= eval_steps
    
    preds = np.argmax(preds, axis=-1)
    result = compute_metrics_absa(preds, out_label_ids, eval_evaluate_label_ids)
    result['eval_loss'] = eval_loss
    results.update(result)
    
    output_eval_file = os.path.join(eval_output_dir, '%s_results.txt' % mode)
    with open(output_eval_file, 'w') as writer:
        for key in sorted(result.keys()):
            if 'eval_loss' in key:
                logger.info(' %s = %s', key, str(result[key]))
            writer.write('%s = %s\n' % (key, str(result[key])))
    return results

In [None]:
t_total = len(train_dataloader) * args.num_train_epochs

no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.epsilon)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)

# Training

logger.info('***** Running training *****')
logger.info(f'Num Examples: {len(train_dataset)}')
logger.info(f'Num Epochs: {args.num_train_epochs}')
logger.info(f'Total optimization steps: {t_total}')

global_step = 0 
train_loss, logging_loss = 0.0, 0.0

train_iterator = trange(args.num_train_epochs, desc='Epochs...', disable=False, ascii=True)
set_seed(args)

for _ in train_iterator:
    epoch_iterator = tqdm(train_dataloader, desc='Iteration...', disable=False, ascii=True)
    model.train()
    for step, batch in enumerate(epoch_iterator):
        batch = tuple(t.to(args.device) for t in batch)
        inputs = {'input_ids': batch[0], 
                  'attention_mask': batch[1], 
                  'token_type_ids': batch[2], 
                  'labels': batch[3]}
        outputs = model(**inputs)  # (loss, logits, outputs[2:])
        optimizer.zero_grad()
        loss = outputs[0]
        loss.backward()
        clip_grad_norm_(model.parameters(), args.max_grad_norm)
        
        train_loss += loss.item()
        optimizer.step()
        scheduler.step()
        global_step += 1
        
        if global_step % args.logging_steps == 0:
            results = evaluate(args, model, tokenizer)
            for key, value in results.items():
                tb_writer.add_scalar(f'eval_{key}', value, global_step)

            tb_writer.add_scalar('lr', scheduler.get_last_lr()[0], global_step)
            tb_writer.add_scalar('loss', (train_loss - logging_loss)/args.logging_steps, global_step)
            logging_loss = train_loss
        
        if global_step % args.save_steps == 0:
            output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            model_to_save = model.module if hasattr(model, 'module') else model
            model_to_save.save_pretrained(output_dir)
            torch.save(args, os.path.join(output_dir, 'training_args.bin'))
            logger.info('Saving model checkpoint to %s' % output_dir)
        
    tb_writer.close()
    
#     global_step, train_loss / global_step 