In [6]:
#env library
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Device Check

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.current_device()
torch.cuda.device(0)


<torch.cuda.device at 0x7f65443ceba8>

## Loading Dataset, Model

In [2]:
# coding=utf-8
import argparse
import logging
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import random

import numpy as np
import pandas as pd
import torch
from transformers import (BertConfig, BertForTokenClassification,
                                  BertTokenizer)
from torch.utils.data import DataLoader

from datasets import load_datasets_and_vocabs
from model import (Aspect_Text_GAT_ours,
                    Pure_Bert, Aspect_Bert_GAT, Aspect_Text_GAT_only)
from trainer import train

logger = logging.getLogger(__name__)

def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

def parse_args(args):
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument('--dataset_name', type=str, default='rest',
                        choices=['rest', 'laptop', 'twitter'],
                        help='Choose absa dataset.')
    parser.add_argument('--output_dir', type=str, default='/data1/SHENWZH/ABSA_online/data/output-gcn',
                        help='Directory to store intermedia data, such as vocab, embeddings, tags_vocab.')
    parser.add_argument('--num_classes', type=int, default=3,
                        help='Number of classes of ABSA.')


    parser.add_argument('--cuda_id', type=str, default='3',
                        help='Choose which GPUs to run')
    parser.add_argument('--seed', type=int, default=2019,
                        help='random seed for initialization')

    # Model parameters
    parser.add_argument('--glove_dir', type=str, default='/data1/SHENWZH/wordvec',
                        help='Directory storing glove embeddings')
    parser.add_argument('--bert_model_dir', type=str, default='/data1/SHENWZH/models/bert_base',
                        help='Path to pre-trained Bert model.')
    parser.add_argument('--pure_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')
    
    parser.add_argument('--pure_bert_layer_agg', action='store_true',
                        help='Pure bert layer aggregation enable/not')

    parser.add_argument('--pure_bert_layer_agg_list', type=str, default="12",
                        help='Pure Bert layer number to aggregate')

    parser.add_argument('--pure_bert_linear_layer_count', type=int, default=2,
                        help='Pure Bert final linear layer count')
    
    parser.add_argument('--gat_bert', action='store_true',
                        help='Cat text and aspect, [cls] to predict.')

    parser.add_argument('--highway', action='store_true',
                        help='Use highway embed.')

    parser.add_argument('--num_layers', type=int, default=2,
                        help='Number of layers of bilstm or highway or elmo.')
    
    parser.add_argument('--hidden_layer_no', type=int, default=12,
                        help='NO of hidder layer of bert state output to FC.')
 
    parser.add_argument('--frozen_percent', type=float, default = 0.1,
                       help='Percentage of the frozen layers.')


    parser.add_argument('--add_non_connect',  type= bool, default=True,
                        help='Add a sepcial "non-connect" relation for aspect with no direct connection.')
    parser.add_argument('--multi_hop',  type= bool, default=True,
                        help='Multi hop non connection.')
    parser.add_argument('--max_hop', type = int, default=4,
                        help='max number of hops')


    parser.add_argument('--num_heads', type=int, default=6,
                        help='Number of heads for gat.')
    
    parser.add_argument('--dropout', type=float, default=0,
                        help='Dropout rate for embedding.')


    parser.add_argument('--num_gcn_layers', type=int, default=1,
                        help='Number of GCN layers.')
    parser.add_argument('--gcn_mem_dim', type=int, default=300,
                        help='Dimension of the W in GCN.')
    parser.add_argument('--gcn_dropout', type=float, default=0.2,
                        help='Dropout rate for GCN.')
    # GAT
    parser.add_argument('--gat', action='store_true',
                        help='GAT')
    parser.add_argument('--gat_our', action='store_true',
                        help='GAT_our')
    parser.add_argument('--gat_attention_type', type = str, choices=['linear','dotprod','gcn'], default='dotprod',
                        help='The attention used for gat')

    parser.add_argument('--embedding_type', type=str,default='glove', choices=['glove','bert'])
    parser.add_argument('--embedding_dim', type=int, default=300,
                        help='Dimension of glove embeddings')
    parser.add_argument('--dep_relation_embed_dim', type=int, default=300,
                        help='Dimension for dependency relation embeddings.')

    parser.add_argument('--hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--final_hidden_size', type=int, default=300,
                        help='Hidden size of bilstm, in early stage.')
    parser.add_argument('--num_mlps', type=int, default=2,
                        help='Number of mlps in the last of model.')

    # Training parameters
    parser.add_argument("--per_gpu_train_batch_size", default=16, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=32, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=2,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--learning_rate", default=1e-3, type=float,
                        help="The initial learning rate for Adam.")
    
    parser.add_argument("--weight_decay", default=0.0, type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=30.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps(that update the weights) to perform. Override num_train_epochs.")
    parser.add_argument('--logging_steps', type=int, default=50,
                        help="Log every X updates steps.")
    
    return parser.parse_args(args)


def check_args(args):
    '''
    eliminate confilct situations
    
    '''
    logger.info(vars(args))



In [3]:
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    
# Parse args

args_str = "--embedding_type bert --output_dir data/output-gcn --dropout 0.3 --hidden_size 200 --learning_rate 5e-5 --bert_model_dir ./test/saved_model --pure_bert --hidden_layer_no 12 --frozen_percent 0"
#args = parse_args(['--gat_our', '--highway', '--num_heads', '7', '--dropout', '0.8', '--output_dir',
#                   'output/r-gat', '--glove_dir', 'glove', '--cuda_id', '0'])
args = parse_args(args_str.split(' '))
check_args(args)

# Setup CUDA, GPU training
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
logger.info('Device is %s', args.device)

# Set seed
set_seed(args)

# Bert, load pretrained model and tokenizer, check if neccesary to put bert here
if args.embedding_type == 'bert':
    tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
    args.tokenizer = tokenizer

# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)




07/23/2021 03:00:25 - INFO - __main__ -   {'dataset_name': 'rest', 'output_dir': 'data/output-gcn', 'num_classes': 3, 'cuda_id': '3', 'seed': 2019, 'glove_dir': '/data1/SHENWZH/wordvec', 'bert_model_dir': './test/saved_model', 'pure_bert': True, 'pure_bert_layer_agg': False, 'pure_bert_layer_agg_list': '12', 'pure_bert_linear_layer_count': 2, 'gat_bert': False, 'highway': False, 'num_layers': 2, 'hidden_layer_no': 12, 'frozen_percent': 0.0, 'add_non_connect': True, 'multi_hop': True, 'max_hop': 4, 'num_heads': 6, 'dropout': 0.3, 'num_gcn_layers': 1, 'gcn_mem_dim': 300, 'gcn_dropout': 0.2, 'gat': False, 'gat_our': False, 'gat_attention_type': 'dotprod', 'embedding_type': 'bert', 'embedding_dim': 300, 'dep_relation_embed_dim': 300, 'hidden_size': 200, 'final_hidden_size': 300, 'num_mlps': 2, 'per_gpu_train_batch_size': 16, 'per_gpu_eval_batch_size': 32, 'gradient_accumulation_steps': 2, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_e

# Pure Bert
## Single Layer 


In [5]:
best_acc_eval_results_by_layer = {}
best_f1_eval_results_by_layer = {}

### Output of Bert, output[12] All Frozen

In [11]:
args.hidden_layer_no = 12

# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[(args.hidden_layer_no, arg.frozen_percent)] = best_acc_eval_result
    best_f1_eval_results_by_layer[(args.hidden_layer_no, arg.frozen_percent)] = best_f1_eval_result

07/23/2021 02:14:52 - INFO - model -   Hidden Layer 12
07/23/2021 02:14:52 - INFO - trainer -   ***** Running training *****
07/23/2021 02:14:52 - INFO - trainer -     Num examples = 3602
07/23/2021 02:14:52 - INFO - trainer -     Num Epochs = 30
07/23/2021 02:14:52 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 02:14:52 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 02:14:52 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 02:14:55 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:14:55 - INFO - trainer -     Num examples = 1120
07/23/2021 02:14:55 - INFO - trainer -     Batch size = 32
07/23/2021 02:14:56 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:14:56 - INFO - trainer -     eval loss: 0.9064929451261248
07/23/2021 02:14:56 - INFO - trainer -     acc = 0.6491071428571429
07/23/2021 02:14:56 - INFO - trainer -     f1 = 0.2624075076700957
07/23/202

07/23/2021 02:15:48 - INFO - trainer -     acc = 0.66875
07/23/2021 02:15:48 - INFO - trainer -     f1 = 0.35051252443039344
07/23/2021 02:15:51 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:15:51 - INFO - trainer -     Num examples = 1120
07/23/2021 02:15:51 - INFO - trainer -     Batch size = 32
07/23/2021 02:15:52 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:15:52 - INFO - trainer -     eval loss: 0.7815773725509644
07/23/2021 02:15:52 - INFO - trainer -     acc = 0.6660714285714285
07/23/2021 02:15:52 - INFO - trainer -     f1 = 0.34528654773689427
07/23/2021 02:15:54 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:15:54 - INFO - trainer -     Num examples = 1120
07/23/2021 02:15:54 - INFO - trainer -     Batch size = 32
07/23/2021 02:15:55 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:15:55 - INFO - trainer -     eval loss: 0.7860433169773646
07/23/2021 02:15:55 - INFO - trainer -     acc = 0.6767857142857143
07/23

07/23/2021 02:16:48 - INFO - trainer -     eval loss: 0.7194710178034646
07/23/2021 02:16:48 - INFO - trainer -     acc = 0.7044642857142858
07/23/2021 02:16:48 - INFO - trainer -     f1 = 0.46891240565291886
Epoch:  50%|█████     | 15/30 [01:57<01:56,  7.75s/it]07/23/2021 02:16:50 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:16:50 - INFO - trainer -     Num examples = 1120
07/23/2021 02:16:50 - INFO - trainer -     Batch size = 32
07/23/2021 02:16:51 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:16:51 - INFO - trainer -     eval loss: 0.7247870666640145
07/23/2021 02:16:51 - INFO - trainer -     acc = 0.7008928571428571
07/23/2021 02:16:51 - INFO - trainer -     f1 = 0.45426988613989755
07/23/2021 02:16:53 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:16:53 - INFO - trainer -     Num examples = 1120
07/23/2021 02:16:53 - INFO - trainer -     Batch size = 32
07/23/2021 02:16:55 - INFO - trainer -   ***** Eval results *****
07/23/20

07/23/2021 02:17:47 - INFO - trainer -     Batch size = 32
07/23/2021 02:17:49 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:17:49 - INFO - trainer -     eval loss: 0.6892816594668797
07/23/2021 02:17:49 - INFO - trainer -     acc = 0.7160714285714286
07/23/2021 02:17:49 - INFO - trainer -     f1 = 0.4912712749682577
07/23/2021 02:17:51 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:17:51 - INFO - trainer -     Num examples = 1120
07/23/2021 02:17:51 - INFO - trainer -     Batch size = 32
07/23/2021 02:17:52 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:17:52 - INFO - trainer -     eval loss: 0.6935524501970836
07/23/2021 02:17:52 - INFO - trainer -     acc = 0.7089285714285715
07/23/2021 02:17:52 - INFO - trainer -     f1 = 0.4695821032834326
Epoch:  77%|███████▋  | 23/30 [03:02<00:56,  8.09s/it]07/23/2021 02:17:55 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:17:55 - INFO - trainer -     Num examples = 1120
07/23/2021

07/23/2021 02:18:47 - INFO - trainer -     Num examples = 1120
07/23/2021 02:18:47 - INFO - trainer -     Batch size = 32
07/23/2021 02:18:48 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:18:48 - INFO - trainer -     eval loss: 0.6775206297636032
07/23/2021 02:18:48 - INFO - trainer -     acc = 0.7267857142857143
07/23/2021 02:18:48 - INFO - trainer -     f1 = 0.5151736707334873
Epoch: 100%|██████████| 30/30 [03:57<00:00,  7.92s/it]
07/23/2021 02:18:50 - INFO - __main__ -     acc = 0.7366071428571429
07/23/2021 02:18:50 - INFO - __main__ -     f1 = 0.5572514607994633


### Output of Bert, output[12] Frozen 0

In [4]:
args.hidden_layer_no = 12
args.frozen_percent = 0
# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_acc_eval_result
    best_f1_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_f1_eval_result

07/23/2021 03:00:39 - INFO - model -   Hidden Layer 12 frozen percentage 0
07/23/2021 03:00:41 - INFO - trainer -   ***** Running training *****
07/23/2021 03:00:41 - INFO - trainer -     Num examples = 3602
07/23/2021 03:00:41 - INFO - trainer -     Num Epochs = 30
07/23/2021 03:00:41 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 03:00:41 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 03:00:41 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 03:00:49 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:00:49 - INFO - trainer -     Num examples = 1120
07/23/2021 03:00:49 - INFO - trainer -     Batch size = 32
07/23/2021 03:00:50 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:00:50 - INFO - trainer -     eval loss: 0.6072911794696535
07/23/2021 03:00:50 - INFO - trainer -     acc = 0.7607142857142857
07/23/2021 03:00:50 - INFO - trainer -     f1 = 0.545665

07/23/2021 03:03:00 - INFO - trainer -     acc = 0.8580357142857142
07/23/2021 03:03:00 - INFO - trainer -     f1 = 0.7823453075568864
07/23/2021 03:03:08 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:03:08 - INFO - trainer -     Num examples = 1120
07/23/2021 03:03:08 - INFO - trainer -     Batch size = 32
07/23/2021 03:03:09 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:03:09 - INFO - trainer -     eval loss: 0.8051507401280105
07/23/2021 03:03:09 - INFO - trainer -     acc = 0.8410714285714286
07/23/2021 03:03:09 - INFO - trainer -     f1 = 0.7556317379138645
07/23/2021 03:03:17 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:03:17 - INFO - trainer -     Num examples = 1120
07/23/2021 03:03:17 - INFO - trainer -     Batch size = 32
07/23/2021 03:03:18 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:03:18 - INFO - trainer -     eval loss: 0.8362530197183202
07/23/2021 03:03:18 - INFO - trainer -     acc = 0.8392857142857

07/23/2021 03:05:27 - INFO - trainer -     eval loss: 1.0138292685467085
07/23/2021 03:05:27 - INFO - trainer -     acc = 0.8517857142857143
07/23/2021 03:05:27 - INFO - trainer -     f1 = 0.7839190006750095
Epoch:  50%|█████     | 15/30 [04:52<04:48, 19.22s/it]07/23/2021 03:05:34 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:05:34 - INFO - trainer -     Num examples = 1120
07/23/2021 03:05:34 - INFO - trainer -     Batch size = 32
07/23/2021 03:05:36 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:05:36 - INFO - trainer -     eval loss: 1.0893574889233735
07/23/2021 03:05:36 - INFO - trainer -     acc = 0.8410714285714286
07/23/2021 03:05:36 - INFO - trainer -     f1 = 0.7543556908053791
07/23/2021 03:05:43 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:05:43 - INFO - trainer -     Num examples = 1120
07/23/2021 03:05:43 - INFO - trainer -     Batch size = 32
07/23/2021 03:05:44 - INFO - trainer -   ***** Eval results *****
07/23/2021

07/23/2021 03:07:50 - INFO - trainer -     Batch size = 32
07/23/2021 03:07:51 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:07:51 - INFO - trainer -     eval loss: 1.1743714654606134
07/23/2021 03:07:51 - INFO - trainer -     acc = 0.8321428571428572
07/23/2021 03:07:51 - INFO - trainer -     f1 = 0.761935087584944
07/23/2021 03:07:58 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:07:58 - INFO - trainer -     Num examples = 1120
07/23/2021 03:07:58 - INFO - trainer -     Batch size = 32
07/23/2021 03:07:59 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:07:59 - INFO - trainer -     eval loss: 1.2112254739244235
07/23/2021 03:07:59 - INFO - trainer -     acc = 0.8366071428571429
07/23/2021 03:07:59 - INFO - trainer -     f1 = 0.7583271659048078
Epoch:  77%|███████▋  | 23/30 [07:25<02:12, 18.95s/it]07/23/2021 03:08:06 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:08:06 - INFO - trainer -     Num examples = 1120
07/23/2021 

07/23/2021 03:10:14 - INFO - trainer -     Num examples = 1120
07/23/2021 03:10:14 - INFO - trainer -     Batch size = 32
07/23/2021 03:10:15 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:10:15 - INFO - trainer -     eval loss: 1.15045785567823
07/23/2021 03:10:15 - INFO - trainer -     acc = 0.8392857142857143
07/23/2021 03:10:15 - INFO - trainer -     f1 = 0.7704319640260014
Epoch: 100%|██████████| 30/30 [09:39<00:00, 19.31s/it]
07/23/2021 03:10:20 - INFO - __main__ -     acc = 0.8580357142857142
07/23/2021 03:10:20 - INFO - __main__ -     f1 = 0.7823453075568864


NameError: name 'best_acc_eval_results_by_layer' is not defined

In [7]:
best_acc_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_acc_eval_result
best_f1_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_f1_eval_result

In [8]:
best_acc_eval_results_by_layer

{(12, 0): {'acc': 0.8580357142857142, 'f1': 0.7823453075568864}}

In [9]:
best_f1_eval_results_by_layer

{(12, 0): {'acc': 0.8580357142857142, 'f1': 0.794648953589351}}

### Output of Bert, output[12] Frozen 0.25

In [10]:
args.hidden_layer_no = 12
args.frozen_percent = 0.25
# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_acc_eval_result
    best_f1_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_f1_eval_result

07/23/2021 03:15:48 - INFO - model -   Hidden Layer 12 frozen percentage 0.25
07/23/2021 03:15:49 - INFO - trainer -   ***** Running training *****
07/23/2021 03:15:49 - INFO - trainer -     Num examples = 3602
07/23/2021 03:15:49 - INFO - trainer -     Num Epochs = 30
07/23/2021 03:15:49 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 03:15:49 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 03:15:49 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 03:15:54 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:15:54 - INFO - trainer -     Num examples = 1120
07/23/2021 03:15:54 - INFO - trainer -     Batch size = 32
07/23/2021 03:15:56 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:15:56 - INFO - trainer -     eval loss: 0.5889721155166626
07/23/2021 03:15:56 - INFO - trainer -     acc = 0.7821428571428571
07/23/2021 03:15:56 - INFO - trainer -     f1 = 0.620

07/23/2021 03:17:43 - INFO - trainer -     acc = 0.8419642857142857
07/23/2021 03:17:43 - INFO - trainer -     f1 = 0.7656895263010371
07/23/2021 03:17:49 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:17:49 - INFO - trainer -     Num examples = 1120
07/23/2021 03:17:49 - INFO - trainer -     Batch size = 32
07/23/2021 03:17:50 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:17:50 - INFO - trainer -     eval loss: 0.7680219803138503
07/23/2021 03:17:50 - INFO - trainer -     acc = 0.85
07/23/2021 03:17:50 - INFO - trainer -     f1 = 0.7725526024299278
07/23/2021 03:17:56 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:17:56 - INFO - trainer -     Num examples = 1120
07/23/2021 03:17:56 - INFO - trainer -     Batch size = 32
07/23/2021 03:17:57 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:17:57 - INFO - trainer -     eval loss: 0.691530282090285
07/23/2021 03:17:57 - INFO - trainer -     acc = 0.8616071428571429
07/23/2021 

07/23/2021 03:19:43 - INFO - trainer -     acc = 0.8553571428571428
07/23/2021 03:19:43 - INFO - trainer -     f1 = 0.7847646527953435
Epoch:  50%|█████     | 15/30 [03:59<03:57, 15.82s/it]07/23/2021 03:19:49 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:19:49 - INFO - trainer -     Num examples = 1120
07/23/2021 03:19:49 - INFO - trainer -     Batch size = 32
07/23/2021 03:19:50 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:19:50 - INFO - trainer -     eval loss: 1.158260177055906
07/23/2021 03:19:50 - INFO - trainer -     acc = 0.84375
07/23/2021 03:19:50 - INFO - trainer -     f1 = 0.7532528634132382
07/23/2021 03:19:56 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:19:56 - INFO - trainer -     Num examples = 1120
07/23/2021 03:19:56 - INFO - trainer -     Batch size = 32
07/23/2021 03:19:57 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:19:57 - INFO - trainer -     eval loss: 1.1301005523161232
07/23/2021 03:19:57 - 

07/23/2021 03:21:43 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:21:43 - INFO - trainer -     eval loss: 1.083670751673136
07/23/2021 03:21:43 - INFO - trainer -     acc = 0.8455357142857143
07/23/2021 03:21:43 - INFO - trainer -     f1 = 0.7648040728444375
07/23/2021 03:21:49 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:21:49 - INFO - trainer -     Num examples = 1120
07/23/2021 03:21:49 - INFO - trainer -     Batch size = 32
07/23/2021 03:21:50 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:21:50 - INFO - trainer -     eval loss: 1.1377573055543637
07/23/2021 03:21:50 - INFO - trainer -     acc = 0.8357142857142857
07/23/2021 03:21:50 - INFO - trainer -     f1 = 0.7480734054844126
Epoch:  77%|███████▋  | 23/30 [06:07<01:50, 15.83s/it]07/23/2021 03:21:56 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:21:56 - INFO - trainer -     Num examples = 1120
07/23/2021 03:21:56 - INFO - trainer -     Batch size = 32
07/23/2021 

07/23/2021 03:23:42 - INFO - trainer -     Batch size = 32
07/23/2021 03:23:44 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:23:44 - INFO - trainer -     eval loss: 1.2049756014204052
07/23/2021 03:23:44 - INFO - trainer -     acc = 0.83125
07/23/2021 03:23:44 - INFO - trainer -     f1 = 0.7377088804997779
Epoch: 100%|██████████| 30/30 [07:59<00:00, 15.99s/it]
07/23/2021 03:23:48 - INFO - __main__ -     acc = 0.8625
07/23/2021 03:23:48 - INFO - __main__ -     f1 = 0.7960469155284686


### Output of Bert, output[12] Frozen 0.75

In [11]:
args.hidden_layer_no = 12
args.frozen_percent = 0.75
# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_acc_eval_result
    best_f1_eval_results_by_layer[(args.hidden_layer_no, args.frozen_percent)] = best_f1_eval_result

07/23/2021 03:25:59 - INFO - model -   Hidden Layer 12 frozen percentage 0.75
07/23/2021 03:25:59 - INFO - trainer -   ***** Running training *****
07/23/2021 03:25:59 - INFO - trainer -     Num examples = 3602
07/23/2021 03:25:59 - INFO - trainer -     Num Epochs = 30
07/23/2021 03:25:59 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 03:25:59 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 03:25:59 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 03:26:02 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:26:02 - INFO - trainer -     Num examples = 1120
07/23/2021 03:26:02 - INFO - trainer -     Batch size = 32
07/23/2021 03:26:03 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:26:03 - INFO - trainer -     eval loss: 0.7379045597144536
07/23/2021 03:26:03 - INFO - trainer -     acc = 0.7133928571428572
07/23/2021 03:26:03 - INFO - trainer -     f1 = 0.440

07/23/2021 03:27:13 - INFO - trainer -     acc = 0.84375
07/23/2021 03:27:13 - INFO - trainer -     f1 = 0.7608707862767581
07/23/2021 03:27:17 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:27:17 - INFO - trainer -     Num examples = 1120
07/23/2021 03:27:17 - INFO - trainer -     Batch size = 32
07/23/2021 03:27:18 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:27:18 - INFO - trainer -     eval loss: 0.5863856578245759
07/23/2021 03:27:18 - INFO - trainer -     acc = 0.8464285714285714
07/23/2021 03:27:18 - INFO - trainer -     f1 = 0.7727342910724446
07/23/2021 03:27:21 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:27:21 - INFO - trainer -     Num examples = 1120
07/23/2021 03:27:21 - INFO - trainer -     Batch size = 32
07/23/2021 03:27:23 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:27:23 - INFO - trainer -     eval loss: 0.5644378356635571
07/23/2021 03:27:23 - INFO - trainer -     acc = 0.8455357142857143
07/23/2

07/23/2021 03:28:32 - INFO - trainer -     eval loss: 0.868643713336704
07/23/2021 03:28:32 - INFO - trainer -     acc = 0.8401785714285714
07/23/2021 03:28:32 - INFO - trainer -     f1 = 0.7577496152989128
Epoch:  50%|█████     | 15/30 [02:36<02:35, 10.35s/it]07/23/2021 03:28:36 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:28:36 - INFO - trainer -     Num examples = 1120
07/23/2021 03:28:36 - INFO - trainer -     Batch size = 32
07/23/2021 03:28:37 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:28:37 - INFO - trainer -     eval loss: 0.8478353490055139
07/23/2021 03:28:37 - INFO - trainer -     acc = 0.8455357142857143
07/23/2021 03:28:37 - INFO - trainer -     f1 = 0.7668853511362906
07/23/2021 03:28:40 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:28:40 - INFO - trainer -     Num examples = 1120
07/23/2021 03:28:40 - INFO - trainer -     Batch size = 32
07/23/2021 03:28:42 - INFO - trainer -   ***** Eval results *****
07/23/2021 

07/23/2021 03:29:50 - INFO - trainer -     Batch size = 32
07/23/2021 03:29:51 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:29:51 - INFO - trainer -     eval loss: 0.9041991234291344
07/23/2021 03:29:51 - INFO - trainer -     acc = 0.8473214285714286
07/23/2021 03:29:51 - INFO - trainer -     f1 = 0.7794765554218372
07/23/2021 03:29:54 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:29:54 - INFO - trainer -     Num examples = 1120
07/23/2021 03:29:54 - INFO - trainer -     Batch size = 32
07/23/2021 03:29:56 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:29:56 - INFO - trainer -     eval loss: 1.0004015458747744
07/23/2021 03:29:56 - INFO - trainer -     acc = 0.8383928571428572
07/23/2021 03:29:56 - INFO - trainer -     f1 = 0.7596352277118966
Epoch:  77%|███████▋  | 23/30 [04:00<01:12, 10.32s/it]07/23/2021 03:29:59 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:29:59 - INFO - trainer -     Num examples = 1120
07/23/2021

07/23/2021 03:31:08 - INFO - trainer -     Num examples = 1120
07/23/2021 03:31:08 - INFO - trainer -     Batch size = 32
07/23/2021 03:31:09 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:31:09 - INFO - trainer -     eval loss: 1.2338148049411497
07/23/2021 03:31:09 - INFO - trainer -     acc = 0.8357142857142857
07/23/2021 03:31:09 - INFO - trainer -     f1 = 0.7490991675969849
Epoch: 100%|██████████| 30/30 [05:13<00:00, 10.44s/it]
07/23/2021 03:31:12 - INFO - __main__ -     acc = 0.8544642857142857
07/23/2021 03:31:12 - INFO - __main__ -     f1 = 0.7955473836394455


In [12]:
best_acc_eval_results_by_layer

{(12, 0): {'acc': 0.8580357142857142, 'f1': 0.7823453075568864},
 (12, 0.25): {'acc': 0.8625, 'f1': 0.7960469155284686},
 (12, 0.75): {'acc': 0.8544642857142857, 'f1': 0.7955473836394455}}

In [13]:
best_f1_eval_results_by_layer

{(12, 0): {'acc': 0.8580357142857142, 'f1': 0.794648953589351},
 (12, 0.25): {'acc': 0.8616071428571429, 'f1': 0.798711316849118},
 (12, 0.75): {'acc': 0.8544642857142857, 'f1': 0.7955473836394455}}

### Save Dictionary 

In [17]:
with open('L12_Frozen_acc.txt', 'w') as file:
    file.write(str(best_acc_eval_results_by_layer))

In [18]:
with open('L12_Frozen_f1.txt', 'w') as file:
    file.write(str(best_f1_eval_results_by_layer))

### Output of Bert, output[11] All Frozen

In [7]:
args.hidden_layer_no = 11
# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[args.hidden_layer_no] = best_acc_eval_result
    best_f1_eval_results_by_layer[args.hidden_layer_no] = best_f1_eval_result
    
    
        

07/23/2021 02:04:54 - INFO - model -   Hidden Layer 11
07/23/2021 02:04:55 - INFO - trainer -   ***** Running training *****
07/23/2021 02:04:55 - INFO - trainer -     Num examples = 3602
07/23/2021 02:04:55 - INFO - trainer -     Num Epochs = 30
07/23/2021 02:04:55 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 02:04:55 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 02:04:55 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 02:05:14 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:05:14 - INFO - trainer -     Num examples = 1120
07/23/2021 02:05:14 - INFO - trainer -     Batch size = 32
07/23/2021 02:05:16 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:05:16 - INFO - trainer -     eval loss: 0.8898086428642273
07/23/2021 02:05:16 - INFO - trainer -     acc = 0.65
07/23/2021 02:05:16 - INFO - trainer -     f1 = 0.26262626262626265
07/23/2021 02:05:18 - 

07/23/2021 02:06:08 - INFO - trainer -     f1 = 0.3486217611429545
07/23/2021 02:06:10 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:06:10 - INFO - trainer -     Num examples = 1120
07/23/2021 02:06:10 - INFO - trainer -     Batch size = 32
07/23/2021 02:06:11 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:06:11 - INFO - trainer -     eval loss: 0.7764740066868918
07/23/2021 02:06:11 - INFO - trainer -     acc = 0.6705357142857142
07/23/2021 02:06:11 - INFO - trainer -     f1 = 0.36833616112009854
07/23/2021 02:06:13 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:06:13 - INFO - trainer -     Num examples = 1120
07/23/2021 02:06:13 - INFO - trainer -     Batch size = 32
07/23/2021 02:06:15 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:06:15 - INFO - trainer -     eval loss: 0.774716945205416
07/23/2021 02:06:15 - INFO - trainer -     acc = 0.6723214285714286
07/23/2021 02:06:15 - INFO - trainer -     f1 = 0.36864012338037

07/23/2021 02:07:07 - INFO - trainer -     acc = 0.6991071428571428
07/23/2021 02:07:07 - INFO - trainer -     f1 = 0.4478531441800257
Epoch:  50%|█████     | 15/30 [02:14<01:57,  7.84s/it]07/23/2021 02:07:09 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:07:09 - INFO - trainer -     Num examples = 1120
07/23/2021 02:07:09 - INFO - trainer -     Batch size = 32
07/23/2021 02:07:10 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:07:10 - INFO - trainer -     eval loss: 0.7287754723003932
07/23/2021 02:07:10 - INFO - trainer -     acc = 0.6955357142857143
07/23/2021 02:07:10 - INFO - trainer -     f1 = 0.43555824301414664
07/23/2021 02:07:13 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:07:13 - INFO - trainer -     Num examples = 1120
07/23/2021 02:07:13 - INFO - trainer -     Batch size = 32
07/23/2021 02:07:14 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:07:14 - INFO - trainer -     eval loss: 0.7314496972731181
07/23/202

07/23/2021 02:08:06 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:08:06 - INFO - trainer -     eval loss: 0.6949426472187042
07/23/2021 02:08:06 - INFO - trainer -     acc = 0.7214285714285714
07/23/2021 02:08:06 - INFO - trainer -     f1 = 0.5206787255820795
07/23/2021 02:08:08 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:08:08 - INFO - trainer -     Num examples = 1120
07/23/2021 02:08:08 - INFO - trainer -     Batch size = 32
07/23/2021 02:08:10 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:08:10 - INFO - trainer -     eval loss: 0.7023780235222408
07/23/2021 02:08:10 - INFO - trainer -     acc = 0.7053571428571429
07/23/2021 02:08:10 - INFO - trainer -     f1 = 0.46748953947588423
Epoch:  77%|███████▋  | 23/30 [03:17<00:54,  7.77s/it]07/23/2021 02:08:12 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 02:08:12 - INFO - trainer -     Num examples = 1120
07/23/2021 02:08:12 - INFO - trainer -     Batch size = 32
07/23/202

07/23/2021 02:09:05 - INFO - trainer -     Batch size = 32
07/23/2021 02:09:06 - INFO - trainer -   ***** Eval results *****
07/23/2021 02:09:06 - INFO - trainer -     eval loss: 0.6873542359897069
07/23/2021 02:09:06 - INFO - trainer -     acc = 0.7223214285714286
07/23/2021 02:09:06 - INFO - trainer -     f1 = 0.5154186100859396
Epoch: 100%|██████████| 30/30 [04:12<00:00,  8.43s/it]


NameError: name 'best_eval_result' is not defined

In [10]:
if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))
        
    best_acc_eval_results_by_layer[args.hidden_layer_no] = best_acc_eval_result
    best_f1_eval_results_by_layer[args.hidden_layer_no] = best_f1_eval_result

07/23/2021 02:11:47 - INFO - __main__ -     acc = 0.7285714285714285
07/23/2021 02:11:47 - INFO - __main__ -     f1 = 0.5414583304317144


In [16]:
best_f1_eval_results_by_layer[11]

{'acc': 0.7285714285714285, 'f1': 0.5414583304317144}

##  Layer 11 + Layer 12 Frozen 2 

In [3]:
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    
# Parse args

args_str = "--embedding_type bert --output_dir data/output-gcn --dropout 0.3 --hidden_size 200 --learning_rate 5e-5 --bert_model_dir ./test/saved_model --pure_bert --frozen_percent 0"
#args = parse_args(['--gat_our', '--highway', '--num_heads', '7', '--dropout', '0.8', '--output_dir',
#                   'output/r-gat', '--glove_dir', 'glove', '--cuda_id', '0'])
args = parse_args(args_str.split(' '))
check_args(args)

# Setup CUDA, GPU training
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args.device = device
logger.info('Device is %s', args.device)

# Set seed
set_seed(args)

# Bert, load pretrained model and tokenizer, check if neccesary to put bert here
if args.embedding_type == 'bert':
    tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir)
    args.tokenizer = tokenizer

# Load datasets and vocabs
train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab= load_datasets_and_vocabs(args)



07/23/2021 03:49:46 - INFO - __main__ -   {'dataset_name': 'rest', 'output_dir': 'data/output-gcn', 'num_classes': 3, 'cuda_id': '3', 'seed': 2019, 'glove_dir': '/data1/SHENWZH/wordvec', 'bert_model_dir': './test/saved_model', 'pure_bert': True, 'pure_bert_layer_agg': False, 'pure_bert_layer_agg_list': '12', 'pure_bert_linear_layer_count': 2, 'gat_bert': False, 'highway': False, 'num_layers': 2, 'hidden_layer_no': 12, 'frozen_percent': 0.0, 'add_non_connect': True, 'multi_hop': True, 'max_hop': 4, 'num_heads': 6, 'dropout': 0.3, 'num_gcn_layers': 1, 'gcn_mem_dim': 300, 'gcn_dropout': 0.2, 'gat': False, 'gat_our': False, 'gat_attention_type': 'dotprod', 'embedding_type': 'bert', 'embedding_dim': 300, 'dep_relation_embed_dim': 300, 'hidden_size': 200, 'final_hidden_size': 300, 'num_mlps': 2, 'per_gpu_train_batch_size': 16, 'per_gpu_eval_batch_size': 32, 'gradient_accumulation_steps': 2, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_e

In [4]:
# Build Model
# model = Aspect_Text_Multi_Syntax_Encoding(args, dep_tag_vocab['len'], pos_tag_vocab['len'])
if args.pure_bert:
    model = Pure_Bert(args)
elif args.gat_bert:
    model = Aspect_Bert_GAT(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # R-GAT + Bert
elif args.gat_our:
    model = Aspect_Text_GAT_ours(args, dep_tag_vocab['len'], pos_tag_vocab['len']) # R-GAT with reshaped tree
else:
    model = Aspect_Text_GAT_only(args, dep_tag_vocab['len'], pos_tag_vocab['len'])  # original GAT with reshaped tree

model.to(args.device)
# Train


_, _,  all_eval_results = train(args, train_dataset, model, test_dataset)

if len(all_eval_results):
    best_acc_eval_result = max(all_eval_results, key=lambda x: x['acc']) 
    best_f1_eval_result = max(all_eval_results, key=lambda x: x['f1'])
    
    for key in sorted(best_acc_eval_result.keys()):
        logger.info("  %s = %s", key, str(best_acc_eval_result[key]))


07/23/2021 03:52:08 - INFO - model -   Hidden Layer 11 & 12, FC1
07/23/2021 03:52:10 - INFO - trainer -   ***** Running training *****
07/23/2021 03:52:10 - INFO - trainer -     Num examples = 3602
07/23/2021 03:52:10 - INFO - trainer -     Num Epochs = 30
07/23/2021 03:52:10 - INFO - trainer -     Instantaneous batch size per GPU = 16
07/23/2021 03:52:10 - INFO - trainer -     Gradient Accumulation steps = 2
07/23/2021 03:52:10 - INFO - trainer -     Total optimization steps = 3390
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]07/23/2021 03:52:18 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:52:18 - INFO - trainer -     Num examples = 1120
07/23/2021 03:52:18 - INFO - trainer -     Batch size = 32
07/23/2021 03:52:19 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:52:19 - INFO - trainer -     eval loss: 0.5427059139524187
07/23/2021 03:52:19 - INFO - trainer -     acc = 0.7910714285714285
07/23/2021 03:52:19 - INFO - trainer -     f1 = 0.6710150238294069

07/23/2021 03:54:31 - INFO - trainer -     acc = 0.8482142857142857
07/23/2021 03:54:31 - INFO - trainer -     f1 = 0.7717813872675819
07/23/2021 03:54:39 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:54:39 - INFO - trainer -     Num examples = 1120
07/23/2021 03:54:39 - INFO - trainer -     Batch size = 32
07/23/2021 03:54:40 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:54:40 - INFO - trainer -     eval loss: 0.7895552602214073
07/23/2021 03:54:40 - INFO - trainer -     acc = 0.85625
07/23/2021 03:54:40 - INFO - trainer -     f1 = 0.7878743668392203
07/23/2021 03:54:47 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:54:47 - INFO - trainer -     Num examples = 1120
07/23/2021 03:54:47 - INFO - trainer -     Batch size = 32
07/23/2021 03:54:49 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:54:49 - INFO - trainer -     eval loss: 0.8248617659422702
07/23/2021 03:54:49 - INFO - trainer -     acc = 0.8419642857142857
07/23/2

07/23/2021 03:56:58 - INFO - trainer -     eval loss: 1.1981529227736505
07/23/2021 03:56:58 - INFO - trainer -     acc = 0.8464285714285714
07/23/2021 03:56:58 - INFO - trainer -     f1 = 0.769884345530729
Epoch:  50%|█████     | 15/30 [04:54<04:49, 19.33s/it]07/23/2021 03:57:05 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:57:05 - INFO - trainer -     Num examples = 1120
07/23/2021 03:57:05 - INFO - trainer -     Batch size = 32
07/23/2021 03:57:07 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:57:07 - INFO - trainer -     eval loss: 1.0475693627543348
07/23/2021 03:57:07 - INFO - trainer -     acc = 0.8598214285714286
07/23/2021 03:57:07 - INFO - trainer -     f1 = 0.7957430951537852
07/23/2021 03:57:14 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:57:14 - INFO - trainer -     Num examples = 1120
07/23/2021 03:57:14 - INFO - trainer -     Batch size = 32
07/23/2021 03:57:15 - INFO - trainer -   ***** Eval results *****
07/23/2021 

07/23/2021 03:59:23 - INFO - trainer -     Batch size = 32
07/23/2021 03:59:24 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:59:24 - INFO - trainer -     eval loss: 1.0949547391002332
07/23/2021 03:59:24 - INFO - trainer -     acc = 0.8473214285714286
07/23/2021 03:59:24 - INFO - trainer -     f1 = 0.7777524586809662
07/23/2021 03:59:32 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:59:32 - INFO - trainer -     Num examples = 1120
07/23/2021 03:59:32 - INFO - trainer -     Batch size = 32
07/23/2021 03:59:33 - INFO - trainer -   ***** Eval results *****
07/23/2021 03:59:33 - INFO - trainer -     eval loss: 1.1553801152589065
07/23/2021 03:59:33 - INFO - trainer -     acc = 0.8482142857142857
07/23/2021 03:59:33 - INFO - trainer -     f1 = 0.7746263074148941
Epoch:  77%|███████▋  | 23/30 [07:29<02:15, 19.29s/it]07/23/2021 03:59:40 - INFO - trainer -   ***** Running evaluation *****
07/23/2021 03:59:40 - INFO - trainer -     Num examples = 1120
07/23/2021

07/23/2021 04:01:49 - INFO - trainer -     Num examples = 1120
07/23/2021 04:01:49 - INFO - trainer -     Batch size = 32
07/23/2021 04:01:50 - INFO - trainer -   ***** Eval results *****
07/23/2021 04:01:50 - INFO - trainer -     eval loss: 1.540078211121155
07/23/2021 04:01:50 - INFO - trainer -     acc = 0.8321428571428572
07/23/2021 04:01:50 - INFO - trainer -     f1 = 0.726141641118113
Epoch: 100%|██████████| 30/30 [09:45<00:00, 19.53s/it]
07/23/2021 04:01:56 - INFO - __main__ -     acc = 0.8598214285714286
07/23/2021 04:01:56 - INFO - __main__ -     f1 = 0.7957430951537852
