In [1]:
#env
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
import torch

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
torch.cuda.current_device()
torch.cuda.device(0)


<torch.cuda.device at 0x7f998077d2b0>

## R-GAT

In [5]:
# coding=utf-8
import argparse
import logging
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import random

import numpy as np
import pandas as pd
import torch
from transformers import (BertConfig, BertForTokenClassification,
                                  BertTokenizer)
from torch.utils.data import DataLoader

from datasets import load_datasets_and_vocabs
from model import (Aspect_Text_GAT_ours,
                    Pure_Bert, Aspect_Bert_GAT, Aspect_Text_GAT_only)
from trainer import train

logger = logging.getLogger(__name__)

def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

def parse_args(args):
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument('--dataset_name', type=str, default='rest',
                        choices=['rest', 'laptop', 'twitter'],
                        help='Choose absa dataset.')
    parser.add_argument('--output_dir', type=str, default='/data1/SHENWZH/ABSA_online/data/output-gcn',
                        help='Directory to store intermedia data, such as vocab, embeddings, tags_vocab.')
    parser.add_argument('--num_classes', type=int, default=3,
                        help='Number of classes of ABSA.')


    parser.add_argument('--cuda_id', type=str, default='3',
                        help='Choose which GPUs to run')
    parser.add_argument('--seed', type=int, default=2019,
                        help='random seed for initialization')

    # Model parameters
    parser.add_argument('--glove_dir', type=str, default='/data1/SHENWZH/wordvec',
                        help='Directory storing glove embeddings')
    parser.add_argument('--bert_model_dir', type=str, default='/data1/SHENWZH/models/bert_base',
                        help='Path to pre-trained Bert model.')
    parser.add_argument('--pure_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')
    parser.add_argument('--gat_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')

    parser.add_argument('--highway', action='store_true',
                        help='Use highway embed.')

    parser.add_argument('--num_layers', type=int, default=2,
                        help='Number of layers of bilstm or highway or elmo.')


    parser.add_argument('--add_non_connect',  type= bool, default=True,
                        help='Add a sepcial "non-connect" relation for aspect with no direct connection.')
    parser.add_argument('--multi_hop',  type= bool, default=True,
                        help='Multi hop non connection.')
    parser.add_argument('--max_hop', type = int, default=4,
                        help='max number of hops')


    parser.add_argument('--num_heads', type=int, default=6,
                        help='Number of heads for gat.')
    
    parser.add_argument('--dropout', type=float, default=0,
                        help='Dropout rate for embedding.')


    parser.add_argument('--num_gcn_layers', type=int, default=1,
                        help='Number of GCN layers.')
    parser.add_argument('--gcn_mem_dim', type=int, default=300,
                        help='Dimension of the W in GCN.')
    parser.add_argument('--gcn_dropout', type=float, default=0.2,
                        help='Dropout rate for GCN.')
    # GAT
    parser.add_argument('--gat', action='store_true',
                        help='GAT')
    parser.add_argument('--gat_our', action='store_true',
                        help='GAT_our')
    parser.add_argument('--gat_attention_type', type = str, choices=['linear','dotprod','gcn'], default='dotprod',
                        help='The attention used for gat')

    parser.add_argument('--embedding_type', type=str,default='glove', choices=['glove','bert'])
    parser.add_argument('--embedding_dim', type=int, default=300,
                        help='Dimension of glove embeddings')
    parser.add_argument('--dep_relation_embed_dim', type=int, default=300,
                        help='Dimension for dependency relation embeddings.')

    parser.add_argument('--hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--final_hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--num_mlps', type=int, default=2,
                        help='Number of mlps in the last of model.')

    # Training parameters
    parser.add_argument("--per_gpu_train_batch_size", default=16, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=32, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=2,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--learning_rate", default=1e-3, type=float,
                        help="The initial learning rate for Adam.")
    
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=30.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps(that update the weights) to perform. Override num_train_epochs.")
    parser.add_argument('--logging_steps', type=int, default=50,
                        help="Log every X updates steps.")
    
    return parser.parse_args(args)


def check_args(args):
    '''
    eliminate confilct situations
    
    '''
    logger.info(vars(args))

In [12]:
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    
# Parse args

args_str = "--embedding_type bert --output_dir data/output-gcn --dropout 0.3 --hidden_size 200 --learning_rate 5e-5 --bert_model_dir ./test/saved_model --pure_bert"
#args = parse_args(['--gat_our', '--highway', '--num_heads', '7', '--dropout', '0.8', '--output_dir',
#                   'output/r-gat', '--glove_dir', 'glove', '--cuda_id', '0'])
args = parse_args(args_str.split(' '))
check_args(args)

# Setup CUDA, GPU training
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
logger.info('Device is %s', args.device)

# Set seed
set_seed(args)

# Bert, load pretrained model and tokenizer, check if neccesary to put bert here
if args.embedding_type == 'bert':
    tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
    args.tokenizer = tokenizer

# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)

# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


07/19/2021 03:37:37 - INFO - __main__ -   {'dataset_name': 'rest', 'output_dir': 'data/output-gcn', 'num_classes': 3, 'cuda_id': '3', 'seed': 2019, 'glove_dir': '/data1/SHENWZH/wordvec', 'bert_model_dir': './test/saved_model', 'pure_bert': True, 'gat_bert': False, 'highway': False, 'num_layers': 2, 'add_non_connect': True, 'multi_hop': True, 'max_hop': 4, 'num_heads': 6, 'dropout': 0.3, 'num_gcn_layers': 1, 'gcn_mem_dim': 300, 'gcn_dropout': 0.2, 'gat': False, 'gat_our': False, 'gat_attention_type': 'dotprod', 'embedding_type': 'bert', 'embedding_dim': 300, 'dep_relation_embed_dim': 300, 'hidden_size': 200, 'final_hidden_size': 300, 'num_mlps': 2, 'per_gpu_train_batch_size': 16, 'per_gpu_eval_batch_size': 32, 'gradient_accumulation_steps': 2, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 30.0, 'max_steps': -1, 'logging_steps': 50}
07/19/2021 03:37:37 - INFO - __main__ -   Device is cuda
07/19/2021 03:37:37 - INFO - dataset

Pure_Bert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  

### GAT+GLOVE

In [35]:
# Train
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/07/2021 07:04:56 - INFO - trainer -   ***** Running training *****
07/07/2021 07:04:56 - INFO - trainer -     Num examples = 3602
07/07/2021 07:04:56 - INFO - trainer -     Num Epochs = 30
07/07/2021 07:04:56 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/07/2021 07:04:56 - INFO - trainer -     Gradient Accumulation steps = 2
07/07/2021 07:04:56 - INFO - trainer -     Total optimization steps = 3390
07/07/2021 07:04:59 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:04:59 - INFO - trainer -     Num examples = 1120
07/07/2021 07:04:59 - INFO - trainer -     Batch size = 32
07/07/2021 07:05:00 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:05:00 - INFO - trainer -     eval loss: 0.8966324329376221
07/07/2021 07:05:00 - INFO - trainer -     acc = 0.65
07/07/2021 07:05:00 - INFO - trainer -     f1 = 0.26262626262626265
07/07/2021 07:05:03 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:05:03 - INFO - trainer -     Num exa

07/07/2021 07:05:57 - INFO - trainer -     Batch size = 32
07/07/2021 07:05:58 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:05:58 - INFO - trainer -     eval loss: 0.5531125336885452
07/07/2021 07:05:58 - INFO - trainer -     acc = 0.7830357142857143
07/07/2021 07:05:58 - INFO - trainer -     f1 = 0.6531542856177347
07/07/2021 07:06:01 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:06:01 - INFO - trainer -     Num examples = 1120
07/07/2021 07:06:01 - INFO - trainer -     Batch size = 32
07/07/2021 07:06:02 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:06:02 - INFO - trainer -     eval loss: 0.5123141582523073
07/07/2021 07:06:02 - INFO - trainer -     acc = 0.8098214285714286
07/07/2021 07:06:02 - INFO - trainer -     f1 = 0.7146273947186691
07/07/2021 07:06:05 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:06:05 - INFO - trainer -     Num examples = 1120
07/07/2021 07:06:05 - INFO - trainer -     Batch size = 32
07/07

07/07/2021 07:07:03 - INFO - trainer -     Num examples = 1120
07/07/2021 07:07:03 - INFO - trainer -     Batch size = 32
07/07/2021 07:07:04 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:07:04 - INFO - trainer -     eval loss: 0.5151275683726583
07/07/2021 07:07:04 - INFO - trainer -     acc = 0.7964285714285714
07/07/2021 07:07:04 - INFO - trainer -     f1 = 0.7051286726617215
Epoch:  50%|█████     | 15/30 [02:11<02:10,  8.68s/it]07/07/2021 07:07:07 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:07:07 - INFO - trainer -     Num examples = 1120
07/07/2021 07:07:07 - INFO - trainer -     Batch size = 32
07/07/2021 07:07:08 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:07:08 - INFO - trainer -     eval loss: 0.5232623447264944
07/07/2021 07:07:08 - INFO - trainer -     acc = 0.8026785714285715
07/07/2021 07:07:08 - INFO - trainer -     f1 = 0.7031924217953387
07/07/2021 07:07:11 - INFO - trainer -   ***** Running evaluation *****
07/07/2021

Epoch:  73%|███████▎  | 22/30 [03:11<01:08,  8.62s/it]07/07/2021 07:08:08 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:08:08 - INFO - trainer -     Num examples = 1120
07/07/2021 07:08:08 - INFO - trainer -     Batch size = 32
07/07/2021 07:08:09 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:08:09 - INFO - trainer -     eval loss: 0.5286704891494343
07/07/2021 07:08:09 - INFO - trainer -     acc = 0.8008928571428572
07/07/2021 07:08:09 - INFO - trainer -     f1 = 0.7103597361556636
07/07/2021 07:08:12 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:08:12 - INFO - trainer -     Num examples = 1120
07/07/2021 07:08:12 - INFO - trainer -     Batch size = 32
07/07/2021 07:08:13 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:08:13 - INFO - trainer -     eval loss: 0.5869143947958946
07/07/2021 07:08:13 - INFO - trainer -     acc = 0.7883928571428571
07/07/2021 07:08:13 - INFO - trainer -     f1 = 0.6839134561333817
Epoch:  77

07/07/2021 07:09:11 - INFO - trainer -     acc = 0.7857142857142857
07/07/2021 07:09:11 - INFO - trainer -     f1 = 0.6820553407700896
07/07/2021 07:09:15 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:09:15 - INFO - trainer -     Num examples = 1120
07/07/2021 07:09:15 - INFO - trainer -     Batch size = 32
07/07/2021 07:09:15 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:09:15 - INFO - trainer -     eval loss: 0.5891395215477262
07/07/2021 07:09:15 - INFO - trainer -     acc = 0.8053571428571429
07/07/2021 07:09:15 - INFO - trainer -     f1 = 0.715551294462741
Epoch: 100%|██████████| 30/30 [04:22<00:00,  8.74s/it]
07/07/2021 07:09:18 - INFO - __main__ -     acc = 0.8133928571428571
07/07/2021 07:09:18 - INFO - __main__ -     f1 = 0.7244278215763696


## Pure Bert 

In [11]:
# Train
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/18/2021 04:08:40 - INFO - trainer -   ***** Running training *****
07/18/2021 04:08:40 - INFO - trainer -     Num examples = 3602
07/18/2021 04:08:40 - INFO - trainer -     Num Epochs = 30
07/18/2021 04:08:40 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/18/2021 04:08:40 - INFO - trainer -     Gradient Accumulation steps = 2
07/18/2021 04:08:40 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/18/2021 04:08:49 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:08:49 - INFO - trainer -     Num examples = 1120
07/18/2021 04:08:49 - INFO - trainer -     Batch size = 32
07/18/2021 04:08:50 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:08:50 - INFO - trainer -     eval loss: 0.65519928250994
07/18/2021 04:08:50 - INFO - trainer -     acc = 0.725
07/18/2021 04:08:50 - INFO - trainer -     f1 = 0.4891617091386233
07/18/2021 04:08:58 - INFO - trainer -   ***** Running evaluation *****
07/18/2

07/18/2021 04:11:03 - INFO - trainer -     f1 = 0.7800614895524336
07/18/2021 04:11:11 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:11:11 - INFO - trainer -     Num examples = 1120
07/18/2021 04:11:11 - INFO - trainer -     Batch size = 32
07/18/2021 04:11:12 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:11:12 - INFO - trainer -     eval loss: 0.686464019172958
07/18/2021 04:11:12 - INFO - trainer -     acc = 0.8357142857142857
07/18/2021 04:11:12 - INFO - trainer -     f1 = 0.7602204045209969
07/18/2021 04:11:20 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:11:20 - INFO - trainer -     Num examples = 1120
07/18/2021 04:11:20 - INFO - trainer -     Batch size = 32
07/18/2021 04:11:21 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:11:21 - INFO - trainer -     eval loss: 0.7699188064118582
07/18/2021 04:11:21 - INFO - trainer -     acc = 0.8410714285714286
07/18/2021 04:11:21 - INFO - trainer -     f1 = 0.745771367617637

07/18/2021 04:13:32 - INFO - trainer -     acc = 0.8517857142857143
07/18/2021 04:13:32 - INFO - trainer -     f1 = 0.7740612033907804
Epoch:  50%|█████     | 15/30 [04:59<04:53, 19.59s/it]07/18/2021 04:13:40 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:13:40 - INFO - trainer -     Num examples = 1120
07/18/2021 04:13:40 - INFO - trainer -     Batch size = 32
07/18/2021 04:13:41 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:13:41 - INFO - trainer -     eval loss: 0.8870214012734193
07/18/2021 04:13:41 - INFO - trainer -     acc = 0.8589285714285714
07/18/2021 04:13:41 - INFO - trainer -     f1 = 0.7918260244487366
07/18/2021 04:13:48 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:13:48 - INFO - trainer -     Num examples = 1120
07/18/2021 04:13:48 - INFO - trainer -     Batch size = 32
07/18/2021 04:13:50 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:13:50 - INFO - trainer -     eval loss: 0.7627020764946272
07/18/2021

07/18/2021 04:16:00 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:16:00 - INFO - trainer -     eval loss: 0.9538841735305531
07/18/2021 04:16:00 - INFO - trainer -     acc = 0.8544642857142857
07/18/2021 04:16:00 - INFO - trainer -     f1 = 0.784057441411119
07/18/2021 04:16:08 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:16:08 - INFO - trainer -     Num examples = 1120
07/18/2021 04:16:08 - INFO - trainer -     Batch size = 32
07/18/2021 04:16:09 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:16:09 - INFO - trainer -     eval loss: 0.9816775739192962
07/18/2021 04:16:09 - INFO - trainer -     acc = 0.8464285714285714
07/18/2021 04:16:09 - INFO - trainer -     f1 = 0.7813942495060991
Epoch:  77%|███████▋  | 23/30 [07:36<02:16, 19.56s/it]07/18/2021 04:16:17 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:16:17 - INFO - trainer -     Num examples = 1120
07/18/2021 04:16:17 - INFO - trainer -     Batch size = 32
07/18/2021 

07/18/2021 04:18:27 - INFO - trainer -     Num examples = 1120
07/18/2021 04:18:27 - INFO - trainer -     Batch size = 32
07/18/2021 04:18:28 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:18:28 - INFO - trainer -     eval loss: 0.9756734546186635
07/18/2021 04:18:28 - INFO - trainer -     acc = 0.8758928571428571
07/18/2021 04:18:28 - INFO - trainer -     f1 = 0.817788715274943
Epoch: 100%|██████████| 30/30 [09:54<00:00, 19.80s/it]
07/18/2021 04:18:34 - INFO - __main__ -     acc = 0.8758928571428571
07/18/2021 04:18:34 - INFO - __main__ -     f1 = 0.817788715274943


### pure bert exp

In [15]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/18/2021 04:25:09 - INFO - trainer -   ***** Running training *****
07/18/2021 04:25:09 - INFO - trainer -     Num examples = 3602
07/18/2021 04:25:09 - INFO - trainer -     Num Epochs = 30
07/18/2021 04:25:09 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/18/2021 04:25:09 - INFO - trainer -     Gradient Accumulation steps = 2
07/18/2021 04:25:09 - INFO - trainer -     Total optimization steps = 3390


Epoch:   0%|          | 0/30 [00:00<?, ?it/s][A[A07/18/2021 04:25:17 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:25:17 - INFO - trainer -     Num examples = 1120
07/18/2021 04:25:17 - INFO - trainer -     Batch size = 32
07/18/2021 04:25:18 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:25:18 - INFO - trainer -     eval loss: 0.65519928250994
07/18/2021 04:25:18 - INFO - trainer -     acc = 0.725
07/18/2021 04:25:18 - INFO - trainer -     f1 = 0.4891617091386233
07/18/2021 04:25:26 - INFO - trainer -   ***** Running evaluation *****

07/18/2021 04:27:34 - INFO - trainer -     acc = 0.8482142857142857
07/18/2021 04:27:34 - INFO - trainer -     f1 = 0.7800614895524336
07/18/2021 04:27:42 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:27:42 - INFO - trainer -     Num examples = 1120
07/18/2021 04:27:42 - INFO - trainer -     Batch size = 32
07/18/2021 04:27:43 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:27:43 - INFO - trainer -     eval loss: 0.686464019172958
07/18/2021 04:27:43 - INFO - trainer -     acc = 0.8357142857142857
07/18/2021 04:27:43 - INFO - trainer -     f1 = 0.7602204045209969
07/18/2021 04:27:50 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:27:50 - INFO - trainer -     Num examples = 1120
07/18/2021 04:27:50 - INFO - trainer -     Batch size = 32
07/18/2021 04:27:52 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:27:52 - INFO - trainer -     eval loss: 0.7699188064118582
07/18/2021 04:27:52 - INFO - trainer -     acc = 0.84107142857142

07/18/2021 04:30:04 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:30:04 - INFO - trainer -     eval loss: 0.8053744528741975
07/18/2021 04:30:04 - INFO - trainer -     acc = 0.8517857142857143
07/18/2021 04:30:04 - INFO - trainer -     f1 = 0.7740612033907804


Epoch:  50%|█████     | 15/30 [05:01<04:58, 19.91s/it][A[A07/18/2021 04:30:12 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:30:12 - INFO - trainer -     Num examples = 1120
07/18/2021 04:30:12 - INFO - trainer -     Batch size = 32
07/18/2021 04:30:13 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:30:13 - INFO - trainer -     eval loss: 0.8870214012734193
07/18/2021 04:30:13 - INFO - trainer -     acc = 0.8589285714285714
07/18/2021 04:30:13 - INFO - trainer -     f1 = 0.7918260244487366
07/18/2021 04:30:20 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:30:20 - INFO - trainer -     Num examples = 1120
07/18/2021 04:30:20 - INFO - trainer -     Batch size = 32
07

07/18/2021 04:34:51 - INFO - trainer -     eval loss: 1.1007224327805618
07/18/2021 04:34:51 - INFO - trainer -     acc = 0.8553571428571428
07/18/2021 04:34:51 - INFO - trainer -     f1 = 0.776227508478558
07/18/2021 04:34:59 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:34:59 - INFO - trainer -     Num examples = 1120
07/18/2021 04:34:59 - INFO - trainer -     Batch size = 32
07/18/2021 04:35:00 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:35:00 - INFO - trainer -     eval loss: 0.9756734546186635
07/18/2021 04:35:00 - INFO - trainer -     acc = 0.8758928571428571
07/18/2021 04:35:00 - INFO - trainer -     f1 = 0.817788715274943


Epoch: 100%|██████████| 30/30 [09:56<00:00, 19.89s/it][A[A
07/18/2021 04:35:06 - INFO - __main__ -     acc = 0.8758928571428571
07/18/2021 04:35:06 - INFO - __main__ -     f1 = 0.817788715274943


### Pure Bert, output 6th hidden layer, (pooled_output = outputs[2][6][:,0, :])

In [7]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/19/2021 02:44:51 - INFO - trainer -   ***** Running training *****
07/19/2021 02:44:51 - INFO - trainer -     Num examples = 3602
07/19/2021 02:44:51 - INFO - trainer -     Num Epochs = 30
07/19/2021 02:44:51 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/19/2021 02:44:51 - INFO - trainer -     Gradient Accumulation steps = 2
07/19/2021 02:44:51 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/19/2021 02:44:58 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:44:58 - INFO - trainer -     Num examples = 1120
07/19/2021 02:44:58 - INFO - trainer -     Batch size = 32
07/19/2021 02:44:59 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:44:59 - INFO - trainer -     eval loss: 0.7873028542314257
07/19/2021 02:44:59 - INFO - trainer -     acc = 0.6669642857142857
07/19/2021 02:44:59 - INFO - trainer -     f1 = 0.3366351235612375
07/19/2021 02:45:05 - INFO - trainer -   ***** Running evaluatio

07/19/2021 02:46:46 - INFO - trainer -     f1 = 0.696779708089888
07/19/2021 02:46:52 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:46:52 - INFO - trainer -     Num examples = 1120
07/19/2021 02:46:52 - INFO - trainer -     Batch size = 32
07/19/2021 02:46:53 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:46:53 - INFO - trainer -     eval loss: 0.9371733822992869
07/19/2021 02:46:53 - INFO - trainer -     acc = 0.80625
07/19/2021 02:46:53 - INFO - trainer -     f1 = 0.7095328450514696
07/19/2021 02:46:59 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:46:59 - INFO - trainer -     Num examples = 1120
07/19/2021 02:46:59 - INFO - trainer -     Batch size = 32
07/19/2021 02:47:00 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:47:00 - INFO - trainer -     eval loss: 0.9312272314514433
07/19/2021 02:47:00 - INFO - trainer -     acc = 0.8098214285714286
07/19/2021 02:47:00 - INFO - trainer -     f1 = 0.7157206444507311
Epoch:  2

07/19/2021 02:48:46 - INFO - trainer -     acc = 0.8151785714285714
07/19/2021 02:48:46 - INFO - trainer -     f1 = 0.7291948167497297
Epoch:  50%|█████     | 15/30 [04:00<03:55, 15.73s/it]07/19/2021 02:48:52 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:48:52 - INFO - trainer -     Num examples = 1120
07/19/2021 02:48:52 - INFO - trainer -     Batch size = 32
07/19/2021 02:48:53 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:48:53 - INFO - trainer -     eval loss: 1.3255954910601888
07/19/2021 02:48:53 - INFO - trainer -     acc = 0.8071428571428572
07/19/2021 02:48:53 - INFO - trainer -     f1 = 0.707839629406973
07/19/2021 02:48:59 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:48:59 - INFO - trainer -     Num examples = 1120
07/19/2021 02:48:59 - INFO - trainer -     Batch size = 32
07/19/2021 02:49:00 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:49:00 - INFO - trainer -     eval loss: 1.5537712186574937
07/19/2021 

07/19/2021 02:50:45 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:50:45 - INFO - trainer -     eval loss: 1.394190927924189
07/19/2021 02:50:45 - INFO - trainer -     acc = 0.8133928571428571
07/19/2021 02:50:45 - INFO - trainer -     f1 = 0.7357359568480376
07/19/2021 02:50:51 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:50:51 - INFO - trainer -     Num examples = 1120
07/19/2021 02:50:51 - INFO - trainer -     Batch size = 32
07/19/2021 02:50:52 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:50:52 - INFO - trainer -     eval loss: 1.4533112335950136
07/19/2021 02:50:52 - INFO - trainer -     acc = 0.7991071428571429
07/19/2021 02:50:52 - INFO - trainer -     f1 = 0.7065535503910604
Epoch:  77%|███████▋  | 23/30 [06:07<01:50, 15.73s/it]07/19/2021 02:50:58 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:50:58 - INFO - trainer -     Num examples = 1120
07/19/2021 02:50:58 - INFO - trainer -     Batch size = 32
07/19/2021 

07/19/2021 02:52:43 - INFO - trainer -     Batch size = 32
07/19/2021 02:52:45 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:52:45 - INFO - trainer -     eval loss: 1.6814880924565452
07/19/2021 02:52:45 - INFO - trainer -     acc = 0.8017857142857143
07/19/2021 02:52:45 - INFO - trainer -     f1 = 0.700741121130383
Epoch: 100%|██████████| 30/30 [07:58<00:00, 15.94s/it]
07/19/2021 02:52:49 - INFO - __main__ -     acc = 0.8232142857142857
07/19/2021 02:52:49 - INFO - __main__ -     f1 = 0.7349991564589461


### Pure Bert, output 1st hidden layer, (pooled_output = outputs[2][1][:,0, :]）

In [9]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/19/2021 02:55:04 - INFO - trainer -   ***** Running training *****
07/19/2021 02:55:04 - INFO - trainer -     Num examples = 3602
07/19/2021 02:55:04 - INFO - trainer -     Num Epochs = 30
07/19/2021 02:55:04 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/19/2021 02:55:04 - INFO - trainer -     Gradient Accumulation steps = 2
07/19/2021 02:55:04 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/19/2021 02:55:10 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:55:10 - INFO - trainer -     Num examples = 1120
07/19/2021 02:55:10 - INFO - trainer -     Batch size = 32
07/19/2021 02:55:11 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:55:11 - INFO - trainer -     eval loss: 0.7873028542314257
07/19/2021 02:55:11 - INFO - trainer -     acc = 0.6669642857142857
07/19/2021 02:55:11 - INFO - trainer -     f1 = 0.3366351235612375
07/19/2021 02:55:17 - INFO - trainer -   ***** Running evaluatio

07/19/2021 02:56:59 - INFO - trainer -     f1 = 0.696779708089888
07/19/2021 02:57:05 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:57:05 - INFO - trainer -     Num examples = 1120
07/19/2021 02:57:05 - INFO - trainer -     Batch size = 32
07/19/2021 02:57:06 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:57:06 - INFO - trainer -     eval loss: 0.9371733822992869
07/19/2021 02:57:06 - INFO - trainer -     acc = 0.80625
07/19/2021 02:57:06 - INFO - trainer -     f1 = 0.7095328450514696
07/19/2021 02:57:12 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:57:12 - INFO - trainer -     Num examples = 1120
07/19/2021 02:57:12 - INFO - trainer -     Batch size = 32
07/19/2021 02:57:13 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:57:13 - INFO - trainer -     eval loss: 0.9312272314514433
07/19/2021 02:57:13 - INFO - trainer -     acc = 0.8098214285714286
07/19/2021 02:57:13 - INFO - trainer -     f1 = 0.7157206444507311
Epoch:  2

07/19/2021 02:58:59 - INFO - trainer -     acc = 0.8151785714285714
07/19/2021 02:58:59 - INFO - trainer -     f1 = 0.7291948167497297
Epoch:  50%|█████     | 15/30 [04:00<03:56, 15.78s/it]07/19/2021 02:59:05 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:59:05 - INFO - trainer -     Num examples = 1120
07/19/2021 02:59:05 - INFO - trainer -     Batch size = 32
07/19/2021 02:59:06 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:59:06 - INFO - trainer -     eval loss: 1.3255954910601888
07/19/2021 02:59:06 - INFO - trainer -     acc = 0.8071428571428572
07/19/2021 02:59:06 - INFO - trainer -     f1 = 0.707839629406973
07/19/2021 02:59:12 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 02:59:12 - INFO - trainer -     Num examples = 1120
07/19/2021 02:59:12 - INFO - trainer -     Batch size = 32
07/19/2021 02:59:13 - INFO - trainer -   ***** Eval results *****
07/19/2021 02:59:13 - INFO - trainer -     eval loss: 1.5537712186574937
07/19/2021 

07/19/2021 03:00:59 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:00:59 - INFO - trainer -     eval loss: 1.394190927924189
07/19/2021 03:00:59 - INFO - trainer -     acc = 0.8133928571428571
07/19/2021 03:00:59 - INFO - trainer -     f1 = 0.7357359568480376
07/19/2021 03:01:05 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:01:05 - INFO - trainer -     Num examples = 1120
07/19/2021 03:01:05 - INFO - trainer -     Batch size = 32
07/19/2021 03:01:06 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:01:06 - INFO - trainer -     eval loss: 1.4533112335950136
07/19/2021 03:01:06 - INFO - trainer -     acc = 0.7991071428571429
07/19/2021 03:01:06 - INFO - trainer -     f1 = 0.7065535503910604
Epoch:  77%|███████▋  | 23/30 [06:07<01:50, 15.77s/it]07/19/2021 03:01:12 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:01:12 - INFO - trainer -     Num examples = 1120
07/19/2021 03:01:12 - INFO - trainer -     Batch size = 32
07/19/2021 

07/19/2021 03:02:57 - INFO - trainer -     Batch size = 32
07/19/2021 03:02:58 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:02:58 - INFO - trainer -     eval loss: 1.6814880924565452
07/19/2021 03:02:58 - INFO - trainer -     acc = 0.8017857142857143
07/19/2021 03:02:58 - INFO - trainer -     f1 = 0.700741121130383
Epoch: 100%|██████████| 30/30 [07:58<00:00, 15.96s/it]
07/19/2021 03:03:03 - INFO - __main__ -     acc = 0.8232142857142857
07/19/2021 03:03:03 - INFO - __main__ -     f1 = 0.7349991564589461


### Pure Bert, output 0th hiddden layer, (pooled_output = outputs[2][0][:,0, :]）

In [11]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/19/2021 03:12:15 - INFO - trainer -   ***** Running training *****
07/19/2021 03:12:15 - INFO - trainer -     Num examples = 3602
07/19/2021 03:12:15 - INFO - trainer -     Num Epochs = 30
07/19/2021 03:12:15 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/19/2021 03:12:15 - INFO - trainer -     Gradient Accumulation steps = 2
07/19/2021 03:12:15 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/19/2021 03:12:21 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:12:21 - INFO - trainer -     Num examples = 1120
07/19/2021 03:12:21 - INFO - trainer -     Batch size = 32
07/19/2021 03:12:22 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:12:22 - INFO - trainer -     eval loss: 0.7873028542314257
07/19/2021 03:12:22 - INFO - trainer -     acc = 0.6669642857142857
07/19/2021 03:12:22 - INFO - trainer -     f1 = 0.3366351235612375
07/19/2021 03:12:28 - INFO - trainer -   ***** Running evaluatio

07/19/2021 03:14:10 - INFO - trainer -     f1 = 0.696779708089888
07/19/2021 03:14:16 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:14:16 - INFO - trainer -     Num examples = 1120
07/19/2021 03:14:16 - INFO - trainer -     Batch size = 32
07/19/2021 03:14:17 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:14:17 - INFO - trainer -     eval loss: 0.9371733822992869
07/19/2021 03:14:17 - INFO - trainer -     acc = 0.80625
07/19/2021 03:14:17 - INFO - trainer -     f1 = 0.7095328450514696
07/19/2021 03:14:23 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:14:23 - INFO - trainer -     Num examples = 1120
07/19/2021 03:14:23 - INFO - trainer -     Batch size = 32
07/19/2021 03:14:24 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:14:24 - INFO - trainer -     eval loss: 0.9312272314514433
07/19/2021 03:14:24 - INFO - trainer -     acc = 0.8098214285714286
07/19/2021 03:14:24 - INFO - trainer -     f1 = 0.7157206444507311
Epoch:  2

07/19/2021 03:16:10 - INFO - trainer -     acc = 0.8151785714285714
07/19/2021 03:16:10 - INFO - trainer -     f1 = 0.7291948167497297
Epoch:  50%|█████     | 15/30 [04:00<03:57, 15.84s/it]07/19/2021 03:16:16 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:16:16 - INFO - trainer -     Num examples = 1120
07/19/2021 03:16:16 - INFO - trainer -     Batch size = 32
07/19/2021 03:16:17 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:16:17 - INFO - trainer -     eval loss: 1.3255954910601888
07/19/2021 03:16:17 - INFO - trainer -     acc = 0.8071428571428572
07/19/2021 03:16:17 - INFO - trainer -     f1 = 0.707839629406973
07/19/2021 03:16:23 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:16:23 - INFO - trainer -     Num examples = 1120
07/19/2021 03:16:23 - INFO - trainer -     Batch size = 32
07/19/2021 03:16:24 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:16:24 - INFO - trainer -     eval loss: 1.5537712186574937
07/19/2021 

07/19/2021 03:18:10 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:18:10 - INFO - trainer -     eval loss: 1.394190927924189
07/19/2021 03:18:10 - INFO - trainer -     acc = 0.8133928571428571
07/19/2021 03:18:10 - INFO - trainer -     f1 = 0.7357359568480376
07/19/2021 03:18:16 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:18:16 - INFO - trainer -     Num examples = 1120
07/19/2021 03:18:16 - INFO - trainer -     Batch size = 32
07/19/2021 03:18:18 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:18:18 - INFO - trainer -     eval loss: 1.4533112335950136
07/19/2021 03:18:18 - INFO - trainer -     acc = 0.7991071428571429
07/19/2021 03:18:18 - INFO - trainer -     f1 = 0.7065535503910604
Epoch:  77%|███████▋  | 23/30 [06:08<01:50, 15.81s/it]07/19/2021 03:18:23 - INFO - trainer -   ***** Running evaluation *****
07/19/2021 03:18:23 - INFO - trainer -     Num examples = 1120
07/19/2021 03:18:23 - INFO - trainer -     Batch size = 32
07/19/2021 

07/19/2021 03:20:09 - INFO - trainer -     Batch size = 32
07/19/2021 03:20:10 - INFO - trainer -   ***** Eval results *****
07/19/2021 03:20:10 - INFO - trainer -     eval loss: 1.6814880924565452
07/19/2021 03:20:10 - INFO - trainer -     acc = 0.8017857142857143
07/19/2021 03:20:10 - INFO - trainer -     f1 = 0.700741121130383
Epoch: 100%|██████████| 30/30 [08:00<00:00, 16.00s/it]
07/19/2021 03:20:15 - INFO - __main__ -     acc = 0.8232142857142857
07/19/2021 03:20:15 - INFO - __main__ -     f1 = 0.7349991564589461
