In [5]:
#env library
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Device Check

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.current_device()
torch.cuda.device(0)


<torch.cuda.device at 0x7f2d8bfe7e80>

In [4]:
%load_ext autoreload
%autoreload 2

## Loading Dataset, Model

In [2]:
# coding=utf-8
import argparse
import logging
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import random

import numpy as np
import pandas as pd
import torch
from transformers import (BertConfig, BertForTokenClassification,
                                  BertTokenizer)
from torch.utils.data import DataLoader

from datasets import load_datasets_and_vocabs
from model import (Aspect_Text_GAT_ours,
                    Pure_Bert, Aspect_Bert_GAT, Aspect_Text_GAT_only)
from trainer import train

logger = logging.getLogger(__name__)

def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

def parse_args(args):
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument('--dataset_name', type=str, default='rest',
                        choices=['rest', 'laptop', 'twitter'],
                        help='Choose absa dataset.')
    parser.add_argument('--output_dir', type=str, default='/data1/SHENWZH/ABSA_online/data/output-gcn',
                        help='Directory to store intermedia data, such as vocab, embeddings, tags_vocab.')
    parser.add_argument('--num_classes', type=int, default=3,
                        help='Number of classes of ABSA.')


    parser.add_argument('--cuda_id', type=str, default='3',
                        help='Choose which GPUs to run')
    parser.add_argument('--seed', type=int, default=2019,
                        help='random seed for initialization')

    # Model parameters
    parser.add_argument('--glove_dir', type=str, default='/data1/SHENWZH/wordvec',
                        help='Directory storing glove embeddings')
    parser.add_argument('--bert_model_dir', type=str, default='/data1/SHENWZH/models/bert_base',
                        help='Path to pre-trained Bert model.')
    parser.add_argument('--pure_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')
    
    parser.add_argument('--pure_bert_layer_agg', action='store_true',
                        help='Pure bert layer aggregation enable/not')

    parser.add_argument('--pure_bert_layer_agg_list', type=str, default="12",
                        help='Pure Bert layer number to aggregate')

    parser.add_argument('--pure_bert_linear_layer_count', type=int, default=2,
                        help='Pure Bert final linear layer count')
    
    parser.add_argument('--gat_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')

    parser.add_argument('--highway', action='store_true',
                        help='Use highway embed.')

    parser.add_argument('--num_layers', type=int, default=2,
                        help='Number of layers of bilstm or highway or elmo.')


    parser.add_argument('--add_non_connect',  type= bool, default=True,
                        help='Add a sepcial "non-connect" relation for aspect with no direct connection.')
    parser.add_argument('--multi_hop',  type= bool, default=True,
                        help='Multi hop non connection.')
    parser.add_argument('--max_hop', type = int, default=4,
                        help='max number of hops')


    parser.add_argument('--num_heads', type=int, default=6,
                        help='Number of heads for gat.')
    
    parser.add_argument('--dropout', type=float, default=0,
                        help='Dropout rate for embedding.')


    parser.add_argument('--num_gcn_layers', type=int, default=1,
                        help='Number of GCN layers.')
    parser.add_argument('--gcn_mem_dim', type=int, default=300,
                        help='Dimension of the W in GCN.')
    parser.add_argument('--gcn_dropout', type=float, default=0.2,
                        help='Dropout rate for GCN.')
    # GAT
    parser.add_argument('--gat', action='store_true',
                        help='GAT')
    parser.add_argument('--gat_our', action='store_true',
                        help='GAT_our')
    parser.add_argument('--gat_attention_type', type = str, choices=['linear','dotprod','gcn'], default='dotprod',
                        help='The attention used for gat')

    parser.add_argument('--embedding_type', type=str,default='glove', choices=['glove','bert'])
    parser.add_argument('--embedding_dim', type=int, default=300,
                        help='Dimension of glove embeddings')
    parser.add_argument('--dep_relation_embed_dim', type=int, default=300,
                        help='Dimension for dependency relation embeddings.')

    parser.add_argument('--hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--final_hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--num_mlps', type=int, default=2,
                        help='Number of mlps in the last of model.')

    # Training parameters
    parser.add_argument("--per_gpu_train_batch_size", default=16, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=32, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=2,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--learning_rate", default=1e-3, type=float,
                        help="The initial learning rate for Adam.")
    
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=30.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps(that update the weights) to perform. Override num_train_epochs.")
    parser.add_argument('--logging_steps', type=int, default=50,
                        help="Log every X updates steps.")
    
    return parser.parse_args(args)


def check_args(args):
    '''
    eliminate confilct situations
    
    '''
    logger.info(vars(args))

In [3]:
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    
# Parse args

args_str = "--embedding_type bert --output_dir data/output-gcn --dropout 0.3 --hidden_size 200 --learning_rate 5e-5 --bert_model_dir ./test/saved_model --pure_bert --pure_bert_layer_agg --pure_bert_layer_agg_list 11,12 --pure_bert_linear_layer_count 2"
#args = parse_args(['--gat_our', '--highway', '--num_heads', '7', '--dropout', '0.8', '--output_dir',
#                   'output/r-gat', '--glove_dir', 'glove', '--cuda_id', '0'])
args = parse_args(args_str.split(' '))
check_args(args)

# Setup CUDA, GPU training
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
logger.info('Device is %s', args.device)

# Set seed
set_seed(args)

# Bert, load pretrained model and tokenizer, check if neccesary to put bert here
if args.embedding_type == 'bert':
    tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
    args.tokenizer = tokenizer

# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)

# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


07/21/2021 02:49:44 - INFO - __main__ -   {'dataset_name': 'rest', 'output_dir': 'data/output-gcn', 'num_classes': 3, 'cuda_id': '3', 'seed': 2019, 'glove_dir': '/data1/SHENWZH/wordvec', 'bert_model_dir': './test/saved_model', 'pure_bert': True, 'pure_bert_layer_agg': True, 'pure_bert_layer_agg_list': '11,12', 'pure_bert_linear_layer_count': 2, 'gat_bert': False, 'highway': False, 'num_layers': 2, 'add_non_connect': True, 'multi_hop': True, 'max_hop': 4, 'num_heads': 6, 'dropout': 0.3, 'num_gcn_layers': 1, 'gcn_mem_dim': 300, 'gcn_dropout': 0.2, 'gat': False, 'gat_our': False, 'gat_attention_type': 'dotprod', 'embedding_type': 'bert', 'embedding_dim': 300, 'dep_relation_embed_dim': 300, 'hidden_size': 200, 'final_hidden_size': 300, 'num_mlps': 2, 'per_gpu_train_batch_size': 16, 'per_gpu_eval_batch_size': 32, 'gradient_accumulation_steps': 2, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 30.0, 'max_steps': -1, 'logging_step

Pure_Bert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  

### GAT+GLOVE

In [35]:
# Train
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/07/2021 07:04:56 - INFO - trainer -   ***** Running training *****
07/07/2021 07:04:56 - INFO - trainer -     Num examples = 3602
07/07/2021 07:04:56 - INFO - trainer -     Num Epochs = 30
07/07/2021 07:04:56 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/07/2021 07:04:56 - INFO - trainer -     Gradient Accumulation steps = 2
07/07/2021 07:04:56 - INFO - trainer -     Total optimization steps = 3390
07/07/2021 07:04:59 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:04:59 - INFO - trainer -     Num examples = 1120
07/07/2021 07:04:59 - INFO - trainer -     Batch size = 32
07/07/2021 07:05:00 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:05:00 - INFO - trainer -     eval loss: 0.8966324329376221
07/07/2021 07:05:00 - INFO - trainer -     acc = 0.65
07/07/2021 07:05:00 - INFO - trainer -     f1 = 0.26262626262626265
07/07/2021 07:05:03 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:05:03 - INFO - trainer -     Num exa

07/07/2021 07:05:57 - INFO - trainer -     Batch size = 32
07/07/2021 07:05:58 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:05:58 - INFO - trainer -     eval loss: 0.5531125336885452
07/07/2021 07:05:58 - INFO - trainer -     acc = 0.7830357142857143
07/07/2021 07:05:58 - INFO - trainer -     f1 = 0.6531542856177347
07/07/2021 07:06:01 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:06:01 - INFO - trainer -     Num examples = 1120
07/07/2021 07:06:01 - INFO - trainer -     Batch size = 32
07/07/2021 07:06:02 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:06:02 - INFO - trainer -     eval loss: 0.5123141582523073
07/07/2021 07:06:02 - INFO - trainer -     acc = 0.8098214285714286
07/07/2021 07:06:02 - INFO - trainer -     f1 = 0.7146273947186691
07/07/2021 07:06:05 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:06:05 - INFO - trainer -     Num examples = 1120
07/07/2021 07:06:05 - INFO - trainer -     Batch size = 32
07/07

07/07/2021 07:07:03 - INFO - trainer -     Num examples = 1120
07/07/2021 07:07:03 - INFO - trainer -     Batch size = 32
07/07/2021 07:07:04 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:07:04 - INFO - trainer -     eval loss: 0.5151275683726583
07/07/2021 07:07:04 - INFO - trainer -     acc = 0.7964285714285714
07/07/2021 07:07:04 - INFO - trainer -     f1 = 0.7051286726617215
Epoch:  50%|█████     | 15/30 [02:11<02:10,  8.68s/it]07/07/2021 07:07:07 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:07:07 - INFO - trainer -     Num examples = 1120
07/07/2021 07:07:07 - INFO - trainer -     Batch size = 32
07/07/2021 07:07:08 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:07:08 - INFO - trainer -     eval loss: 0.5232623447264944
07/07/2021 07:07:08 - INFO - trainer -     acc = 0.8026785714285715
07/07/2021 07:07:08 - INFO - trainer -     f1 = 0.7031924217953387
07/07/2021 07:07:11 - INFO - trainer -   ***** Running evaluation *****
07/07/2021

Epoch:  73%|███████▎  | 22/30 [03:11<01:08,  8.62s/it]07/07/2021 07:08:08 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:08:08 - INFO - trainer -     Num examples = 1120
07/07/2021 07:08:08 - INFO - trainer -     Batch size = 32
07/07/2021 07:08:09 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:08:09 - INFO - trainer -     eval loss: 0.5286704891494343
07/07/2021 07:08:09 - INFO - trainer -     acc = 0.8008928571428572
07/07/2021 07:08:09 - INFO - trainer -     f1 = 0.7103597361556636
07/07/2021 07:08:12 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:08:12 - INFO - trainer -     Num examples = 1120
07/07/2021 07:08:12 - INFO - trainer -     Batch size = 32
07/07/2021 07:08:13 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:08:13 - INFO - trainer -     eval loss: 0.5869143947958946
07/07/2021 07:08:13 - INFO - trainer -     acc = 0.7883928571428571
07/07/2021 07:08:13 - INFO - trainer -     f1 = 0.6839134561333817
Epoch:  77

07/07/2021 07:09:11 - INFO - trainer -     acc = 0.7857142857142857
07/07/2021 07:09:11 - INFO - trainer -     f1 = 0.6820553407700896
07/07/2021 07:09:15 - INFO - trainer -   ***** Running evaluation *****
07/07/2021 07:09:15 - INFO - trainer -     Num examples = 1120
07/07/2021 07:09:15 - INFO - trainer -     Batch size = 32
07/07/2021 07:09:15 - INFO - trainer -   ***** Eval results *****
07/07/2021 07:09:15 - INFO - trainer -     eval loss: 0.5891395215477262
07/07/2021 07:09:15 - INFO - trainer -     acc = 0.8053571428571429
07/07/2021 07:09:15 - INFO - trainer -     f1 = 0.715551294462741
Epoch: 100%|██████████| 30/30 [04:22<00:00,  8.74s/it]
07/07/2021 07:09:18 - INFO - __main__ -     acc = 0.8133928571428571
07/07/2021 07:09:18 - INFO - __main__ -     f1 = 0.7244278215763696


# Pure Bert
## Single Layer
### Output of Bert, output[1]

In [11]:
# Train
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/18/2021 04:08:40 - INFO - trainer -   ***** Running training *****
07/18/2021 04:08:40 - INFO - trainer -     Num examples = 3602
07/18/2021 04:08:40 - INFO - trainer -     Num Epochs = 30
07/18/2021 04:08:40 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/18/2021 04:08:40 - INFO - trainer -     Gradient Accumulation steps = 2
07/18/2021 04:08:40 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/18/2021 04:08:49 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:08:49 - INFO - trainer -     Num examples = 1120
07/18/2021 04:08:49 - INFO - trainer -     Batch size = 32
07/18/2021 04:08:50 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:08:50 - INFO - trainer -     eval loss: 0.65519928250994
07/18/2021 04:08:50 - INFO - trainer -     acc = 0.725
07/18/2021 04:08:50 - INFO - trainer -     f1 = 0.4891617091386233
07/18/2021 04:08:58 - INFO - trainer -   ***** Running evaluation *****
07/18/2

07/18/2021 04:11:03 - INFO - trainer -     f1 = 0.7800614895524336
07/18/2021 04:11:11 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:11:11 - INFO - trainer -     Num examples = 1120
07/18/2021 04:11:11 - INFO - trainer -     Batch size = 32
07/18/2021 04:11:12 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:11:12 - INFO - trainer -     eval loss: 0.686464019172958
07/18/2021 04:11:12 - INFO - trainer -     acc = 0.8357142857142857
07/18/2021 04:11:12 - INFO - trainer -     f1 = 0.7602204045209969
07/18/2021 04:11:20 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:11:20 - INFO - trainer -     Num examples = 1120
07/18/2021 04:11:20 - INFO - trainer -     Batch size = 32
07/18/2021 04:11:21 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:11:21 - INFO - trainer -     eval loss: 0.7699188064118582
07/18/2021 04:11:21 - INFO - trainer -     acc = 0.8410714285714286
07/18/2021 04:11:21 - INFO - trainer -     f1 = 0.745771367617637

07/18/2021 04:13:32 - INFO - trainer -     acc = 0.8517857142857143
07/18/2021 04:13:32 - INFO - trainer -     f1 = 0.7740612033907804
Epoch:  50%|█████     | 15/30 [04:59<04:53, 19.59s/it]07/18/2021 04:13:40 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:13:40 - INFO - trainer -     Num examples = 1120
07/18/2021 04:13:40 - INFO - trainer -     Batch size = 32
07/18/2021 04:13:41 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:13:41 - INFO - trainer -     eval loss: 0.8870214012734193
07/18/2021 04:13:41 - INFO - trainer -     acc = 0.8589285714285714
07/18/2021 04:13:41 - INFO - trainer -     f1 = 0.7918260244487366
07/18/2021 04:13:48 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:13:48 - INFO - trainer -     Num examples = 1120
07/18/2021 04:13:48 - INFO - trainer -     Batch size = 32
07/18/2021 04:13:50 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:13:50 - INFO - trainer -     eval loss: 0.7627020764946272
07/18/2021

07/18/2021 04:16:00 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:16:00 - INFO - trainer -     eval loss: 0.9538841735305531
07/18/2021 04:16:00 - INFO - trainer -     acc = 0.8544642857142857
07/18/2021 04:16:00 - INFO - trainer -     f1 = 0.784057441411119
07/18/2021 04:16:08 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:16:08 - INFO - trainer -     Num examples = 1120
07/18/2021 04:16:08 - INFO - trainer -     Batch size = 32
07/18/2021 04:16:09 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:16:09 - INFO - trainer -     eval loss: 0.9816775739192962
07/18/2021 04:16:09 - INFO - trainer -     acc = 0.8464285714285714
07/18/2021 04:16:09 - INFO - trainer -     f1 = 0.7813942495060991
Epoch:  77%|███████▋  | 23/30 [07:36<02:16, 19.56s/it]07/18/2021 04:16:17 - INFO - trainer -   ***** Running evaluation *****
07/18/2021 04:16:17 - INFO - trainer -     Num examples = 1120
07/18/2021 04:16:17 - INFO - trainer -     Batch size = 32
07/18/2021 

07/18/2021 04:18:27 - INFO - trainer -     Num examples = 1120
07/18/2021 04:18:27 - INFO - trainer -     Batch size = 32
07/18/2021 04:18:28 - INFO - trainer -   ***** Eval results *****
07/18/2021 04:18:28 - INFO - trainer -     eval loss: 0.9756734546186635
07/18/2021 04:18:28 - INFO - trainer -     acc = 0.8758928571428571
07/18/2021 04:18:28 - INFO - trainer -     f1 = 0.817788715274943
Epoch: 100%|██████████| 30/30 [09:54<00:00, 19.80s/it]
07/18/2021 04:18:34 - INFO - __main__ -     acc = 0.8758928571428571
07/18/2021 04:18:34 - INFO - __main__ -     f1 = 0.817788715274943


### Pure Bert
### output 0th hiddden layer, (pooled_output = outputs[2][0][:,0, :]）

### relu((768,256))Layer_0) ->(256,3)

In [4]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/21/2021 02:22:17 - INFO - trainer -   ***** Running training *****
07/21/2021 02:22:17 - INFO - trainer -     Num examples = 3602
07/21/2021 02:22:17 - INFO - trainer -     Num Epochs = 30
07/21/2021 02:22:17 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/21/2021 02:22:17 - INFO - trainer -     Gradient Accumulation steps = 2
07/21/2021 02:22:17 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/21/2021 02:22:20 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:22:20 - INFO - trainer -     Num examples = 1120
07/21/2021 02:22:20 - INFO - trainer -     Batch size = 32
07/21/2021 02:22:21 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:22:21 - INFO - trainer -     eval loss: 0.8965684882232121
07/21/2021 02:22:21 - INFO - trainer -     acc = 0.65
07/21/2021 02:22:21 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:22:24 - INFO - trainer -   ***** Running evaluation *****
07/21

07/21/2021 02:23:23 - INFO - trainer -     Batch size = 32
07/21/2021 02:23:24 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:23:24 - INFO - trainer -     eval loss: 0.895578590461186
07/21/2021 02:23:24 - INFO - trainer -     acc = 0.65
07/21/2021 02:23:24 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:23:27 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:23:27 - INFO - trainer -     Num examples = 1120
07/21/2021 02:23:27 - INFO - trainer -     Batch size = 32
07/21/2021 02:23:28 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:23:28 - INFO - trainer -     eval loss: 0.902548805304936
07/21/2021 02:23:28 - INFO - trainer -     acc = 0.65
07/21/2021 02:23:28 - INFO - trainer -     f1 = 0.26262626262626265
Epoch:  27%|██▋       | 8/30 [01:11<03:20,  9.10s/it]07/21/2021 02:23:31 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:23:31 - INFO - trainer -     Num examples = 1120
07/21/2021 02:23:31 - INFO - trainer - 

07/21/2021 02:24:31 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:24:31 - INFO - trainer -     eval loss: 0.8990076541900635
07/21/2021 02:24:31 - INFO - trainer -     acc = 0.65
07/21/2021 02:24:31 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:24:34 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:24:34 - INFO - trainer -     Num examples = 1120
07/21/2021 02:24:34 - INFO - trainer -     Batch size = 32
07/21/2021 02:24:35 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:24:35 - INFO - trainer -     eval loss: 0.8951208250863212
07/21/2021 02:24:35 - INFO - trainer -     acc = 0.65
07/21/2021 02:24:35 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:24:38 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:24:38 - INFO - trainer -     Num examples = 1120
07/21/2021 02:24:38 - INFO - trainer -     Batch size = 32
07/21/2021 02:24:39 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:24:39 - IN

07/21/2021 02:25:38 - INFO - trainer -     acc = 0.65
07/21/2021 02:25:38 - INFO - trainer -     f1 = 0.26262626262626265
Epoch:  77%|███████▋  | 23/30 [03:24<01:01,  8.76s/it]07/21/2021 02:25:41 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:25:41 - INFO - trainer -     Num examples = 1120
07/21/2021 02:25:41 - INFO - trainer -     Batch size = 32
07/21/2021 02:25:42 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:25:42 - INFO - trainer -     eval loss: 0.8977635264396667
07/21/2021 02:25:42 - INFO - trainer -     acc = 0.65
07/21/2021 02:25:42 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:25:45 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:25:45 - INFO - trainer -     Num examples = 1120
07/21/2021 02:25:45 - INFO - trainer -     Batch size = 32
07/21/2021 02:25:46 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:25:46 - INFO - trainer -     eval loss: 0.8928227015904018
07/21/2021 02:25:46 - INFO - trainer

## Pure Bert,  output 1st hidden layer

### output 1st hidden layer (pooled_output = outputs[2][1][:,0, :]）

### relu((768,256))Layer_1) ->(256,3)

In [4]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/21/2021 02:33:14 - INFO - trainer -   ***** Running training *****
07/21/2021 02:33:14 - INFO - trainer -     Num examples = 3602
07/21/2021 02:33:14 - INFO - trainer -     Num Epochs = 30
07/21/2021 02:33:14 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/21/2021 02:33:14 - INFO - trainer -     Gradient Accumulation steps = 2
07/21/2021 02:33:14 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/21/2021 02:33:17 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:33:17 - INFO - trainer -     Num examples = 1120
07/21/2021 02:33:17 - INFO - trainer -     Batch size = 32
07/21/2021 02:33:18 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:33:18 - INFO - trainer -     eval loss: 0.9041421600750514
07/21/2021 02:33:18 - INFO - trainer -     acc = 0.65
07/21/2021 02:33:18 - INFO - trainer -     f1 = 0.26262626262626265
07/21/2021 02:33:21 - INFO - trainer -   ***** Running evaluation *****
07/21

07/21/2021 02:34:23 - INFO - trainer -     f1 = 0.5931018509569088
07/21/2021 02:34:27 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:34:27 - INFO - trainer -     Num examples = 1120
07/21/2021 02:34:27 - INFO - trainer -     Batch size = 32
07/21/2021 02:34:28 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:34:28 - INFO - trainer -     eval loss: 0.8204279278005873
07/21/2021 02:34:28 - INFO - trainer -     acc = 0.7232142857142857
07/21/2021 02:34:28 - INFO - trainer -     f1 = 0.618706663921156
07/21/2021 02:34:31 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:34:31 - INFO - trainer -     Num examples = 1120
07/21/2021 02:34:31 - INFO - trainer -     Batch size = 32
07/21/2021 02:34:32 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:34:32 - INFO - trainer -     eval loss: 0.8058119441781725
07/21/2021 02:34:32 - INFO - trainer -     acc = 0.7526785714285714
07/21/2021 02:34:32 - INFO - trainer -     f1 = 0.633907688855719

07/21/2021 02:35:38 - INFO - trainer -     acc = 0.7089285714285715
07/21/2021 02:35:38 - INFO - trainer -     f1 = 0.5714919646178758
Epoch:  50%|█████     | 15/30 [02:26<02:25,  9.71s/it]07/21/2021 02:35:41 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:35:41 - INFO - trainer -     Num examples = 1120
07/21/2021 02:35:41 - INFO - trainer -     Batch size = 32
07/21/2021 02:35:42 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:35:42 - INFO - trainer -     eval loss: 1.3315906007375036
07/21/2021 02:35:42 - INFO - trainer -     acc = 0.7205357142857143
07/21/2021 02:35:42 - INFO - trainer -     f1 = 0.5950885718643302
07/21/2021 02:35:45 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:35:45 - INFO - trainer -     Num examples = 1120
07/21/2021 02:35:45 - INFO - trainer -     Batch size = 32
07/21/2021 02:35:46 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:35:46 - INFO - trainer -     eval loss: 1.4261851806725774
07/21/2021

07/21/2021 02:36:52 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:36:52 - INFO - trainer -     eval loss: 1.8904205820390156
07/21/2021 02:36:52 - INFO - trainer -     acc = 0.7008928571428571
07/21/2021 02:36:52 - INFO - trainer -     f1 = 0.5734336416465841
07/21/2021 02:36:55 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:36:55 - INFO - trainer -     Num examples = 1120
07/21/2021 02:36:55 - INFO - trainer -     Batch size = 32
07/21/2021 02:36:56 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:36:56 - INFO - trainer -     eval loss: 1.9507761895656586
07/21/2021 02:36:56 - INFO - trainer -     acc = 0.7098214285714286
07/21/2021 02:36:56 - INFO - trainer -     f1 = 0.5698865350529774
Epoch:  77%|███████▋  | 23/30 [03:45<01:07,  9.71s/it]07/21/2021 02:36:59 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:36:59 - INFO - trainer -     Num examples = 1120
07/21/2021 02:36:59 - INFO - trainer -     Batch size = 32
07/21/2021

07/21/2021 02:38:05 - INFO - trainer -     Batch size = 32
07/21/2021 02:38:06 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:38:06 - INFO - trainer -     eval loss: 2.5346927787576403
07/21/2021 02:38:06 - INFO - trainer -     acc = 0.69375
07/21/2021 02:38:06 - INFO - trainer -     f1 = 0.5629421584887339
Epoch: 100%|██████████| 30/30 [04:54<00:00,  9.82s/it]
07/21/2021 02:38:08 - INFO - __main__ -     acc = 0.7598214285714285
07/21/2021 02:38:08 - INFO - __main__ -     f1 = 0.6181060606060607


## Pure Bert, 

### output 6th hidden layer, (pooled_output = outputs[2][6][:,0, :])

### relu((768,256))Layer_6) ->(256,3)

In [4]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/21/2021 02:39:36 - INFO - trainer -   ***** Running training *****
07/21/2021 02:39:36 - INFO - trainer -     Num examples = 3602
07/21/2021 02:39:36 - INFO - trainer -     Num Epochs = 30
07/21/2021 02:39:36 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/21/2021 02:39:36 - INFO - trainer -     Gradient Accumulation steps = 2
07/21/2021 02:39:36 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/21/2021 02:39:41 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:39:41 - INFO - trainer -     Num examples = 1120
07/21/2021 02:39:41 - INFO - trainer -     Batch size = 32
07/21/2021 02:39:42 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:39:42 - INFO - trainer -     eval loss: 0.7873028542314257
07/21/2021 02:39:42 - INFO - trainer -     acc = 0.6669642857142857
07/21/2021 02:39:42 - INFO - trainer -     f1 = 0.3366351235612375
07/21/2021 02:39:48 - INFO - trainer -   ***** Running evaluatio

07/21/2021 02:41:21 - INFO - trainer -     f1 = 0.696779708089888
07/21/2021 02:41:27 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:41:27 - INFO - trainer -     Num examples = 1120
07/21/2021 02:41:27 - INFO - trainer -     Batch size = 32
07/21/2021 02:41:28 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:41:28 - INFO - trainer -     eval loss: 0.9371733822992869
07/21/2021 02:41:28 - INFO - trainer -     acc = 0.80625
07/21/2021 02:41:28 - INFO - trainer -     f1 = 0.7095328450514696
07/21/2021 02:41:33 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:41:33 - INFO - trainer -     Num examples = 1120
07/21/2021 02:41:33 - INFO - trainer -     Batch size = 32
07/21/2021 02:41:34 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:41:34 - INFO - trainer -     eval loss: 0.9312272314514433
07/21/2021 02:41:34 - INFO - trainer -     acc = 0.8098214285714286
07/21/2021 02:41:34 - INFO - trainer -     f1 = 0.7157206444507311
Epoch:  2

07/21/2021 02:43:11 - INFO - trainer -     acc = 0.8151785714285714
07/21/2021 02:43:11 - INFO - trainer -     f1 = 0.7291948167497297
Epoch:  50%|█████     | 15/30 [03:40<03:35, 14.37s/it]07/21/2021 02:43:16 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:43:16 - INFO - trainer -     Num examples = 1120
07/21/2021 02:43:16 - INFO - trainer -     Batch size = 32
07/21/2021 02:43:17 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:43:17 - INFO - trainer -     eval loss: 1.3255954910601888
07/21/2021 02:43:17 - INFO - trainer -     acc = 0.8071428571428572
07/21/2021 02:43:17 - INFO - trainer -     f1 = 0.707839629406973
07/21/2021 02:43:23 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:43:23 - INFO - trainer -     Num examples = 1120
07/21/2021 02:43:23 - INFO - trainer -     Batch size = 32
07/21/2021 02:43:24 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:43:24 - INFO - trainer -     eval loss: 1.5537712186574937
07/21/2021 

07/21/2021 02:44:59 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:44:59 - INFO - trainer -     eval loss: 1.394190927924189
07/21/2021 02:44:59 - INFO - trainer -     acc = 0.8133928571428571
07/21/2021 02:44:59 - INFO - trainer -     f1 = 0.7357359568480376
07/21/2021 02:45:04 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:45:04 - INFO - trainer -     Num examples = 1120
07/21/2021 02:45:04 - INFO - trainer -     Batch size = 32
07/21/2021 02:45:05 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:45:05 - INFO - trainer -     eval loss: 1.4533112335950136
07/21/2021 02:45:05 - INFO - trainer -     acc = 0.7991071428571429
07/21/2021 02:45:05 - INFO - trainer -     f1 = 0.7065535503910604
Epoch:  77%|███████▋  | 23/30 [05:34<01:39, 14.22s/it]07/21/2021 02:45:11 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:45:11 - INFO - trainer -     Num examples = 1120
07/21/2021 02:45:11 - INFO - trainer -     Batch size = 32
07/21/2021 

07/21/2021 02:46:46 - INFO - trainer -     Batch size = 32
07/21/2021 02:46:47 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:46:47 - INFO - trainer -     eval loss: 1.6814880924565452
07/21/2021 02:46:47 - INFO - trainer -     acc = 0.8017857142857143
07/21/2021 02:46:47 - INFO - trainer -     f1 = 0.700741121130383
Epoch: 100%|██████████| 30/30 [07:15<00:00, 14.52s/it]
07/21/2021 02:46:51 - INFO - __main__ -     acc = 0.8232142857142857
07/21/2021 02:46:51 - INFO - __main__ -     f1 = 0.7349991564589461


## Pure Bert, 

### output 6th hidden layer, (pooled_output = outputs[2][8][:,0, :])

### relu((768,256))Layer_8) ->(256,3)

In [4]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/21/2021 02:50:46 - INFO - trainer -   ***** Running training *****
07/21/2021 02:50:46 - INFO - trainer -     Num examples = 3602
07/21/2021 02:50:46 - INFO - trainer -     Num Epochs = 30
07/21/2021 02:50:46 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/21/2021 02:50:46 - INFO - trainer -     Gradient Accumulation steps = 2
07/21/2021 02:50:46 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/21/2021 02:50:52 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:50:52 - INFO - trainer -     Num examples = 1120
07/21/2021 02:50:52 - INFO - trainer -     Batch size = 32
07/21/2021 02:50:53 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:50:53 - INFO - trainer -     eval loss: 0.7690382263490132
07/21/2021 02:50:53 - INFO - trainer -     acc = 0.7142857142857143
07/21/2021 02:50:53 - INFO - trainer -     f1 = 0.4911800890179812
07/21/2021 02:50:59 - INFO - trainer -   ***** Running evaluatio

07/21/2021 02:52:42 - INFO - trainer -     f1 = 0.689534674982731
07/21/2021 02:52:48 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:52:48 - INFO - trainer -     Num examples = 1120
07/21/2021 02:52:48 - INFO - trainer -     Batch size = 32
07/21/2021 02:52:49 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:52:49 - INFO - trainer -     eval loss: 0.9497437489884241
07/21/2021 02:52:49 - INFO - trainer -     acc = 0.8160714285714286
07/21/2021 02:52:49 - INFO - trainer -     f1 = 0.7281688948894699
07/21/2021 02:52:55 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:52:55 - INFO - trainer -     Num examples = 1120
07/21/2021 02:52:55 - INFO - trainer -     Batch size = 32
07/21/2021 02:52:56 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:52:56 - INFO - trainer -     eval loss: 0.9329991502953427
07/21/2021 02:52:56 - INFO - trainer -     acc = 0.8125
07/21/2021 02:52:56 - INFO - trainer -     f1 = 0.7230331773962947
Epoch:  27

07/21/2021 02:54:44 - INFO - trainer -     acc = 0.8008928571428572
07/21/2021 02:54:44 - INFO - trainer -     f1 = 0.7133682165487473
Epoch:  50%|█████     | 15/30 [04:03<04:00, 16.06s/it]07/21/2021 02:54:50 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:54:50 - INFO - trainer -     Num examples = 1120
07/21/2021 02:54:50 - INFO - trainer -     Batch size = 32
07/21/2021 02:54:51 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:54:51 - INFO - trainer -     eval loss: 1.5151829911795045
07/21/2021 02:54:51 - INFO - trainer -     acc = 0.8026785714285715
07/21/2021 02:54:51 - INFO - trainer -     f1 = 0.7075090964997695
07/21/2021 02:54:57 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:54:57 - INFO - trainer -     Num examples = 1120
07/21/2021 02:54:57 - INFO - trainer -     Batch size = 32
07/21/2021 02:54:58 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:54:58 - INFO - trainer -     eval loss: 1.4618967817863449
07/21/2021

07/21/2021 02:56:45 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:56:45 - INFO - trainer -     eval loss: 1.3938810957329615
07/21/2021 02:56:45 - INFO - trainer -     acc = 0.8151785714285714
07/21/2021 02:56:45 - INFO - trainer -     f1 = 0.7312672407696171
07/21/2021 02:56:50 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:56:50 - INFO - trainer -     Num examples = 1120
07/21/2021 02:56:50 - INFO - trainer -     Batch size = 32
07/21/2021 02:56:52 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:56:52 - INFO - trainer -     eval loss: 1.3192238743816103
07/21/2021 02:56:52 - INFO - trainer -     acc = 0.8116071428571429
07/21/2021 02:56:52 - INFO - trainer -     f1 = 0.7304325421324415
Epoch:  77%|███████▋  | 23/30 [06:11<01:50, 15.78s/it]07/21/2021 02:56:57 - INFO - trainer -   ***** Running evaluation *****
07/21/2021 02:56:57 - INFO - trainer -     Num examples = 1120
07/21/2021 02:56:57 - INFO - trainer -     Batch size = 32
07/21/2021

07/21/2021 02:58:43 - INFO - trainer -     Batch size = 32
07/21/2021 02:58:44 - INFO - trainer -   ***** Eval results *****
07/21/2021 02:58:44 - INFO - trainer -     eval loss: 1.285363844935117
07/21/2021 02:58:44 - INFO - trainer -     acc = 0.8205357142857143
07/21/2021 02:58:44 - INFO - trainer -     f1 = 0.7368253373088219
Epoch: 100%|██████████| 30/30 [08:02<00:00, 16.10s/it]
07/21/2021 02:58:49 - INFO - __main__ -     acc = 0.8339285714285715
07/21/2021 02:58:49 - INFO - __main__ -     f1 = 0.7525422305130777


## Pure Bert,

### (768, 768) layer 11 + (768, 768) Layer 12 =>(768,256)=>relu=>(256,3)

In [7]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/20/2021 03:51:01 - INFO - trainer -   ***** Running training *****
07/20/2021 03:51:01 - INFO - trainer -     Num examples = 3602
07/20/2021 03:51:01 - INFO - trainer -     Num Epochs = 30
07/20/2021 03:51:01 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/20/2021 03:51:01 - INFO - trainer -     Gradient Accumulation steps = 2
07/20/2021 03:51:01 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/20/2021 03:51:09 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:51:09 - INFO - trainer -     Num examples = 1120
07/20/2021 03:51:09 - INFO - trainer -     Batch size = 32
07/20/2021 03:51:10 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:51:10 - INFO - trainer -     eval loss: 0.6244564269270215
07/20/2021 03:51:10 - INFO - trainer -     acc = 0.7383928571428572
07/20/2021 03:51:10 - INFO - trainer -     f1 = 0.6098884064937078
07/20/2021 03:51:17 - INFO - trainer -   ***** Running evaluatio

07/20/2021 03:53:25 - INFO - trainer -     f1 = 0.7446848310146196
07/20/2021 03:53:33 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:53:33 - INFO - trainer -     Num examples = 1120
07/20/2021 03:53:33 - INFO - trainer -     Batch size = 32
07/20/2021 03:53:34 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:53:34 - INFO - trainer -     eval loss: 0.9158601333520242
07/20/2021 03:53:34 - INFO - trainer -     acc = 0.8303571428571429
07/20/2021 03:53:34 - INFO - trainer -     f1 = 0.7427860394216753
07/20/2021 03:53:42 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:53:42 - INFO - trainer -     Num examples = 1120
07/20/2021 03:53:42 - INFO - trainer -     Batch size = 32
07/20/2021 03:53:43 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:53:43 - INFO - trainer -     eval loss: 0.8037052731096213
07/20/2021 03:53:43 - INFO - trainer -     acc = 0.8392857142857143
07/20/2021 03:53:43 - INFO - trainer -     f1 = 0.75034719320367

07/20/2021 03:55:53 - INFO - trainer -     acc = 0.8428571428571429
07/20/2021 03:55:53 - INFO - trainer -     f1 = 0.7722686202686203
Epoch:  50%|█████     | 15/30 [04:58<04:49, 19.32s/it]07/20/2021 03:56:00 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:56:00 - INFO - trainer -     Num examples = 1120
07/20/2021 03:56:00 - INFO - trainer -     Batch size = 32
07/20/2021 03:56:01 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:56:01 - INFO - trainer -     eval loss: 1.0461685159056546
07/20/2021 03:56:01 - INFO - trainer -     acc = 0.8508928571428571
07/20/2021 03:56:01 - INFO - trainer -     f1 = 0.7801918239027962
07/20/2021 03:56:09 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:56:09 - INFO - trainer -     Num examples = 1120
07/20/2021 03:56:09 - INFO - trainer -     Batch size = 32
07/20/2021 03:56:10 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:56:10 - INFO - trainer -     eval loss: 1.374416219307438
07/20/2021 

07/20/2021 03:58:19 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:58:19 - INFO - trainer -     eval loss: 1.3009721975050135
07/20/2021 03:58:19 - INFO - trainer -     acc = 0.8339285714285715
07/20/2021 03:58:19 - INFO - trainer -     f1 = 0.7609934840446718
07/20/2021 03:58:26 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:58:26 - INFO - trainer -     Num examples = 1120
07/20/2021 03:58:26 - INFO - trainer -     Batch size = 32
07/20/2021 03:58:27 - INFO - trainer -   ***** Eval results *****
07/20/2021 03:58:27 - INFO - trainer -     eval loss: 1.482406623641977
07/20/2021 03:58:27 - INFO - trainer -     acc = 0.8321428571428572
07/20/2021 03:58:27 - INFO - trainer -     f1 = 0.7502116587270722
Epoch:  77%|███████▋  | 23/30 [07:33<02:14, 19.18s/it]07/20/2021 03:58:34 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 03:58:34 - INFO - trainer -     Num examples = 1120
07/20/2021 03:58:34 - INFO - trainer -     Batch size = 32
07/20/2021 

07/20/2021 04:00:45 - INFO - trainer -     Batch size = 32
07/20/2021 04:00:47 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:00:47 - INFO - trainer -     eval loss: 1.425562096768512
07/20/2021 04:00:47 - INFO - trainer -     acc = 0.8410714285714286
07/20/2021 04:00:47 - INFO - trainer -     f1 = 0.759706869364663
Epoch: 100%|██████████| 30/30 [09:51<00:00, 19.73s/it]
07/20/2021 04:00:53 - INFO - __main__ -     acc = 0.8616071428571429
07/20/2021 04:00:53 - INFO - __main__ -     f1 = 0.8016136747594548


## Pure Bert 

### relu((768,256)layer_11+(768,256)layer_12)  -> (256,3)

In [7]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/20/2021 04:15:00 - INFO - trainer -   ***** Running training *****
07/20/2021 04:15:00 - INFO - trainer -     Num examples = 3602
07/20/2021 04:15:00 - INFO - trainer -     Num Epochs = 30
07/20/2021 04:15:00 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/20/2021 04:15:00 - INFO - trainer -     Gradient Accumulation steps = 2
07/20/2021 04:15:00 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/20/2021 04:15:08 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:15:08 - INFO - trainer -     Num examples = 1120
07/20/2021 04:15:08 - INFO - trainer -     Batch size = 32
07/20/2021 04:15:09 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:15:09 - INFO - trainer -     eval loss: 0.5461245524031776
07/20/2021 04:15:09 - INFO - trainer -     acc = 0.7705357142857143
07/20/2021 04:15:09 - INFO - trainer -     f1 = 0.6405555380858045
07/20/2021 04:15:17 - INFO - trainer -   ***** Running evaluatio

07/20/2021 04:17:22 - INFO - trainer -     f1 = 0.758017861627153
07/20/2021 04:17:30 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:17:30 - INFO - trainer -     Num examples = 1120
07/20/2021 04:17:30 - INFO - trainer -     Batch size = 32
07/20/2021 04:17:31 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:17:31 - INFO - trainer -     eval loss: 0.9813033352140337
07/20/2021 04:17:31 - INFO - trainer -     acc = 0.8339285714285715
07/20/2021 04:17:31 - INFO - trainer -     f1 = 0.7197973879042222
07/20/2021 04:17:38 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:17:38 - INFO - trainer -     Num examples = 1120
07/20/2021 04:17:38 - INFO - trainer -     Batch size = 32
07/20/2021 04:17:40 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:17:40 - INFO - trainer -     eval loss: 0.8288049257110938
07/20/2021 04:17:40 - INFO - trainer -     acc = 0.8375
07/20/2021 04:17:40 - INFO - trainer -     f1 = 0.7554090835014019
Epoch:  27

07/20/2021 04:19:51 - INFO - trainer -     acc = 0.8348214285714286
07/20/2021 04:19:51 - INFO - trainer -     f1 = 0.7506144403568098
Epoch:  50%|█████     | 15/30 [04:56<04:55, 19.73s/it]07/20/2021 04:19:58 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:19:58 - INFO - trainer -     Num examples = 1120
07/20/2021 04:19:58 - INFO - trainer -     Batch size = 32
07/20/2021 04:19:59 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:19:59 - INFO - trainer -     eval loss: 1.200779303908348
07/20/2021 04:19:59 - INFO - trainer -     acc = 0.8241071428571428
07/20/2021 04:19:59 - INFO - trainer -     f1 = 0.7559529283492008
07/20/2021 04:20:07 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:20:07 - INFO - trainer -     Num examples = 1120
07/20/2021 04:20:07 - INFO - trainer -     Batch size = 32
07/20/2021 04:20:08 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:20:08 - INFO - trainer -     eval loss: 1.2054380647547078
07/20/2021 

07/20/2021 04:22:17 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:22:17 - INFO - trainer -     eval loss: 1.3800136395838178
07/20/2021 04:22:17 - INFO - trainer -     acc = 0.8366071428571429
07/20/2021 04:22:17 - INFO - trainer -     f1 = 0.7395712474733472
07/20/2021 04:22:24 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:22:24 - INFO - trainer -     Num examples = 1120
07/20/2021 04:22:24 - INFO - trainer -     Batch size = 32
07/20/2021 04:22:25 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:22:25 - INFO - trainer -     eval loss: 1.1983618216938339
07/20/2021 04:22:25 - INFO - trainer -     acc = 0.8196428571428571
07/20/2021 04:22:25 - INFO - trainer -     f1 = 0.7227727445241446
Epoch:  77%|███████▋  | 23/30 [07:32<02:13, 19.10s/it]07/20/2021 04:22:33 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:22:33 - INFO - trainer -     Num examples = 1120
07/20/2021 04:22:33 - INFO - trainer -     Batch size = 32
07/20/2021

07/20/2021 04:24:41 - INFO - trainer -     Batch size = 32
07/20/2021 04:24:42 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:24:42 - INFO - trainer -     eval loss: 1.2051530962011643
07/20/2021 04:24:42 - INFO - trainer -     acc = 0.8455357142857143
07/20/2021 04:24:42 - INFO - trainer -     f1 = 0.7645016446668462
Epoch: 100%|██████████| 30/30 [09:47<00:00, 19.60s/it]
07/20/2021 04:24:48 - INFO - __main__ -     acc = 0.8508928571428571
07/20/2021 04:24:48 - INFO - __main__ -     f1 = 0.7647611102435462


## Pure Bert  
### relu((768,256)Layer_11+(768,256)Layer_12) -> relu(256,256) ->(256,3)

In [7]:
_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    for key in sorted(best_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_eval_result[key]))

07/20/2021 04:43:46 - INFO - trainer -   ***** Running training *****
07/20/2021 04:43:46 - INFO - trainer -     Num examples = 3602
07/20/2021 04:43:46 - INFO - trainer -     Num Epochs = 30
07/20/2021 04:43:46 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/20/2021 04:43:46 - INFO - trainer -     Gradient Accumulation steps = 2
07/20/2021 04:43:46 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/20/2021 04:43:54 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:43:54 - INFO - trainer -     Num examples = 1120
07/20/2021 04:43:54 - INFO - trainer -     Batch size = 32
07/20/2021 04:43:55 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:43:55 - INFO - trainer -     eval loss: 0.6668469688722065
07/20/2021 04:43:55 - INFO - trainer -     acc = 0.7339285714285714
07/20/2021 04:43:55 - INFO - trainer -     f1 = 0.49368206247962093
07/20/2021 04:44:03 - INFO - trainer -   ***** Running evaluati

07/20/2021 04:46:12 - INFO - trainer -     f1 = 0.7554410551779479
07/20/2021 04:46:20 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:46:20 - INFO - trainer -     Num examples = 1120
07/20/2021 04:46:20 - INFO - trainer -     Batch size = 32
07/20/2021 04:46:21 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:46:21 - INFO - trainer -     eval loss: 0.8759184832535019
07/20/2021 04:46:21 - INFO - trainer -     acc = 0.8321428571428572
07/20/2021 04:46:21 - INFO - trainer -     f1 = 0.7596056965093662
07/20/2021 04:46:29 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:46:29 - INFO - trainer -     Num examples = 1120
07/20/2021 04:46:29 - INFO - trainer -     Batch size = 32
07/20/2021 04:46:30 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:46:30 - INFO - trainer -     eval loss: 0.7259916649998299
07/20/2021 04:46:30 - INFO - trainer -     acc = 0.8401785714285714
07/20/2021 04:46:30 - INFO - trainer -     f1 = 0.76111964851467

07/20/2021 04:48:46 - INFO - trainer -     acc = 0.8375
07/20/2021 04:48:46 - INFO - trainer -     f1 = 0.7605880138899007
Epoch:  50%|█████     | 15/30 [05:07<05:04, 20.29s/it]07/20/2021 04:48:54 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:48:54 - INFO - trainer -     Num examples = 1120
07/20/2021 04:48:54 - INFO - trainer -     Batch size = 32
07/20/2021 04:48:55 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:48:55 - INFO - trainer -     eval loss: 1.3118484293239556
07/20/2021 04:48:55 - INFO - trainer -     acc = 0.83125
07/20/2021 04:48:55 - INFO - trainer -     f1 = 0.7358937691176433
07/20/2021 04:49:03 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:49:03 - INFO - trainer -     Num examples = 1120
07/20/2021 04:49:03 - INFO - trainer -     Batch size = 32
07/20/2021 04:49:04 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:49:04 - INFO - trainer -     eval loss: 1.3742698199369312
07/20/2021 04:49:04 - INFO - trai

07/20/2021 04:51:18 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:51:18 - INFO - trainer -     eval loss: 1.3855525286780903
07/20/2021 04:51:18 - INFO - trainer -     acc = 0.8392857142857143
07/20/2021 04:51:18 - INFO - trainer -     f1 = 0.755908443041787
07/20/2021 04:51:26 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:51:26 - INFO - trainer -     Num examples = 1120
07/20/2021 04:51:26 - INFO - trainer -     Batch size = 32
07/20/2021 04:51:27 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:51:27 - INFO - trainer -     eval loss: 1.2975916159929641
07/20/2021 04:51:27 - INFO - trainer -     acc = 0.8428571428571429
07/20/2021 04:51:27 - INFO - trainer -     f1 = 0.7669593104079672
Epoch:  77%|███████▋  | 23/30 [07:48<02:19, 20.00s/it]07/20/2021 04:51:35 - INFO - trainer -   ***** Running evaluation *****
07/20/2021 04:51:35 - INFO - trainer -     Num examples = 1120
07/20/2021 04:51:35 - INFO - trainer -     Batch size = 32
07/20/2021 

07/20/2021 04:53:54 - INFO - trainer -     Batch size = 32
07/20/2021 04:53:55 - INFO - trainer -   ***** Eval results *****
07/20/2021 04:53:55 - INFO - trainer -     eval loss: 1.357256876025245
07/20/2021 04:53:55 - INFO - trainer -     acc = 0.84375
07/20/2021 04:53:55 - INFO - trainer -     f1 = 0.766839867665806
Epoch: 100%|██████████| 30/30 [10:15<00:00, 20.51s/it]
07/20/2021 04:54:01 - INFO - __main__ -     acc = 0.85
07/20/2021 04:54:01 - INFO - __main__ -     f1 = 0.7840141017567058
