In [27]:
import os, sys, re, datetime, random, gzip, json
from tqdm.autonotebook import tqdm
import pandas as pd
import numpy as np
import glob
from pathlib import Path
from itertools import accumulate
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import BertTokenizer, BertModel, BertForSequenceClassification

from time import time
from math import ceil
from multiprocessing import Pool
from sentence_transformers import SentenceTransformer, models, losses, InputExample

import pytorch_lightning as pl
from pytorch_lightning.trainer.trainer import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.utilities.seed import seed_everything

PROJ_PATH = Path(os.path.join(re.sub("/BERT_ABSA.*$", '', os.getcwd()), 'BERT_ABSA'))
print(f'PROJ_PATH={PROJ_PATH}')
sys.path.insert(1, str(PROJ_PATH))
sys.path.insert(1, str(PROJ_PATH/'src'))
import utils
import helper

PROJ_PATH=/home/hoang/github/BERT_ABSA


In [28]:
# glob.glob('../model/restaurants/*.ckpt')

## XML Parser

In [29]:
import os
import json
import pandas as pd
import re
import xml.etree.ElementTree as ET
from pathlib import Path

In [30]:
def parseXML(data_path):
    tree = ET.ElementTree(file=data_path)
    objs = list()
    for sentence in tree.getroot():
        obj = dict()
        obj['id'] = sentence.attrib['id']
        for item in sentence:
            if item.tag == 'text':
                obj['text'] = item.text
            elif item.tag == 'aspectTerms':
                obj['aspects'] = list()
                for aspectTerm in item:
                    if aspectTerm.attrib['polarity'] != 'conflict':
                        obj['aspects'].append(aspectTerm.attrib)
            elif item.tag == 'aspectCategories':
                obj['category'] = list()
                for category in item:
                    obj['category'].append(category.attrib)
        if 'aspects' in obj and len(obj['aspects']):
            objs.append(obj)
    return objs

def convert_to_dataframe(objs):
    output = []
    for sentence in objs:
        id = sentence['id']
        text = sentence['text']
        aspects = sentence['aspects']
        for aspect in aspects:
            term = aspect['term']
            label = aspect['polarity']
            output.append([id, text, term, label])
    output = sorted(output, key=lambda x: x[0])
    df = pd.DataFrame(output, columns=['id', 'text', 'term', 'label'])
    return df

In [31]:
tree = ET.ElementTree(file= str(PROJ_PATH/ 'dataset/raw_data' / 'Restaurants_Train.xml'))
objs = list()
for sentence in tree.getroot():
    obj = dict()
    obj['id'] = sentence.attrib['id']
    for item in sentence:
        if item.tag == 'text':
            obj['text'] = item.text
        elif item.tag == 'aspectTerms':
            obj['aspects'] = list()
            for aspectTerm in item:
                if aspectTerm.attrib['polarity'] != 'conflict':
                    obj['aspects'].append(aspectTerm.attrib)
        elif item.tag == 'aspectCategories':
            obj['category'] = list()
            for category in item:
                obj['category'].append(category.attrib)
#     if 'aspects' in obj and len(obj['aspects']):
    break

In [32]:
dataset_files = {
    'restaurant': {
        'train': 'Restaurants_Train.xml',
        'test': 'Restaurants_Test.xml',
        'trial': 'Restaurants_Trial.xml'
    },
    'laptop': {
        'train': 'Laptops_Train.xml',
        'test': 'Laptops_Test.xml',
        'trial': 'Laptops_Trial.xml'
    }
}

In [33]:
# for dsname, fnames in dataset_files.items():
#     for g, fname in fnames.items():
#         input_path = str(PROJ_PATH/ 'dataset/raw_data' / fname)
#         output_path01 = str(PROJ_PATH/ 'dataset/preprocessed_data' / fname.replace('.xml', '.pkl'))
#         output_path02 = str(PROJ_PATH/ 'dataset/preprocessed_data' / fname.replace('.xml', '.csv'))
#         print(f'Load: {input_path}')
#         print(f'Save: {output_path01}\n')
#         objs = parseXML(input_path)
#         df = convert_to_dataframe(objs)
#         pd.to_pickle(objs, output_path01)
#         df.to_csv(output_path02, index=False)

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Restaurants_Train.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Restaurants_Train.pkl

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Restaurants_Test.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Restaurants_Test.pkl

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Restaurants_Trial.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Restaurants_Trial.pkl

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Laptops_Train.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Laptops_Train.pkl

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Laptops_Test.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Laptops_Test.pkl

Load: /home/hoang/github/BERT_ABSA/dataset/raw_data/Laptops_Trial.xml
Save: /home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Laptops_Trial.pkl



In [34]:
df = pd.read_csv('/home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Restaurants_Train.csv')
df.head()

Unnamed: 0,id,text,term,label
0,1000,"The food is good, especially their more basic ...",food,positive
1,1000,"The food is good, especially their more basic ...",dishes,positive
2,1000,"The food is good, especially their more basic ...",drinks,positive
3,1002,"The view is spectacular, and the food is great.",view,positive
4,1002,"The view is spectacular, and the food is great.",food,positive


## Tree Parser

In [35]:
import numpy as np
import spacy
import pickle
import tqdm
#nlp = spacy.load('en_core_web_sm')
import re
from stanfordcorenlp import StanfordCoreNLP
import json
nlps = StanfordCoreNLP(str(PROJ_PATH / 'misc/stanford-corenlp-4.3.2'))
from nltk.parse.corenlp import CoreNLPDependencyParser

In [36]:
# def tokenize_and_depparse(text):
#     text+=' '
#     text = re.sub(r'\. ',' . ',text).strip()
#     text = re.sub(r' {2,}',' ',text)
#     nlp_properties = {
#         'annotators': 'depparse',
# #         'tokenize.options': 'splitHyphenated=false,normalizeParentheses=false',
#         'tokenize.whitespace': True,  # all tokens have been tokenized before
#         'ssplit.isOneSentence': False,
#         'outputFormat': 'json',
#     }
    
#     try:
#         parsed = json.loads(nlps.annotate(text.strip(), nlp_properties))
#     except:
#         print('ewewerror')
        
#     parsed = parsed['sentences']
#     tokens = []
#     tokens_dict = {}
#     tuples = []
#     tmplen = 0
#     for item in parsed:
#         for ite in item['tokens']:
#             tokens.extend([ite['word']])
#             tokens_dict[ite['index']] = ite['word']
# #         tokens.extend([ite['word'] for ite in item['tokens']])
#         tuples.extend([
#             (
#                 ite['dep'],
#                 ite['governor']-1+tmplen,
#                 ite['dependent']-1+tmplen
#             ) for ite in item['basicDependencies'] if ite['dep']!='ROOT'
#         ])
#         tmplen=len(tokens)
        
# #     return tokens, tokens_dict, tuples
#     return tokens, tuples



In [37]:
def tokenize_and_depparse(text):
    '''
    # to_conll(10) will return the result in a format as follows:
    # id word lemma ctag tag feats head(head's id) rel(syntactic relation)
    # return values that is unknown will be shown as '_'
    # tag and ctag are considered to be equal
    '''
    parser = CoreNLPDependencyParser(url='http://localhost:9000')
    dep_parsed_sentence = parser.raw_parse(text)
    deps = dep_parsed_sentence.__next__()
    
    lines = deps.to_conll(10).split('\n')
    tokens = []
    tuples = []
    for line in lines:
        if line != '':
            result = line.split('\t')
            # id word lemma ctag tag feats head(head's id) rel(syntactic relation)
            tokens.append(result[1])
            if result[7] != 'ROOT':
                tuples.append((result[7], int(result[6])-1 , int(result[0])-1))   
    return tokens, tuples

In [38]:
data = pd.read_pickle('/home/hoang/github/BERT_ABSA/dataset/preprocessed_data/Restaurants_Train.pkl')

In [39]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [40]:
inp = data[len(data)-1]
print(inp)

{'id': '671', 'text': 'Each table has a pot of boiling water sunken into its surface, and you get platters of thin sliced meats, various vegetables, and rice and glass noodles.', 'aspects': [{'term': 'table', 'polarity': 'neutral', 'from': '5', 'to': '10'}, {'term': 'pot of boiling water', 'polarity': 'neutral', 'from': '17', 'to': '37'}, {'term': 'meats', 'polarity': 'neutral', 'from': '99', 'to': '104'}, {'term': 'vegetables', 'polarity': 'neutral', 'from': '114', 'to': '124'}, {'term': 'rice', 'polarity': 'neutral', 'from': '130', 'to': '134'}, {'term': 'glass noodles', 'polarity': 'neutral', 'from': '139', 'to': '152'}], 'category': [{'category': 'food', 'polarity': 'neutral'}]}


In [41]:
# text = inp['text']
text = "I'm waiting... It's 9am now."
# text = "Hello, I'm Nguyen. I'm 29 years old."

# text = "This is Sam's bicycle."
# text = "My parents' friends came for dinner."
# text = "They're my parents."

In [42]:
bert_tokenizer.tokenize(text)

['i',
 "'",
 'm',
 'waiting',
 '.',
 '.',
 '.',
 'it',
 "'",
 's',
 '9',
 '##am',
 'now',
 '.']

In [43]:
tokenize_and_depparse(text)

(['I', "'m", 'waiting', '...', 'It', "'s", '9', 'am', 'now', '.'],
 [('nsubj', 2, 0),
  ('aux', 2, 1),
  ('punct', 2, 3),
  ('nsubj', 7, 4),
  ('cop', 7, 5),
  ('nummod', 7, 6),
  ('parataxis', 2, 7),
  ('advmod', 7, 8),
  ('punct', 2, 9)])

## Dep parse tree builder

In [44]:
# output
## text
## bert token index
## aspect term index
## edge index of dependency graph
#### edge index example: 1 --> 2, 2 --> 3, 3 --> 4
#### edge_idx = [[1, 2, 3], [2, 3, 4]]

In [45]:
# Aspect term: redeeming
# Corenlp: redeeming (12) --> A (16)
# Bert: 'red','##eem','##ing' (12, 13, 14) --> A (18)
# Edge index: [[12, 13, 14], [18, 18, 18]]
# aspect term index [12, 13, 14]

In [47]:
# keep token i == '.' if token i+1 != '.'
# all other token, trimming ".,()", and remove if empty
# if token[i] == "'" -> group to token i-1
# if token[i] == "'" and token[i+1] == "s" | "re" | "m" | "ve" -> group token i and i+1
# if token[i] starts witht "##" -> group with token i-1
def process_bert_tokens(tokens, DEBUG=False):
    
    to_strip_chars = ".,\(\)"
    token_check_list_1 = ["s", "re", "m", "ve", "ll", "d"]
    
    tokens += [""]
    
    current_token_group = []
    output = []
    last_token = None
    for token_idx, token in enumerate(tokens):
        
        next_token = None
        if token_idx + 1 < len(tokens):
            next_token = tokens[token_idx + 1]

        reset = True
        if token == "'":
            reset = False
        elif token.startswith("##"):
            reset = False
        elif last_token is not None:
            if token in token_check_list_1:
                reset = False

        keep = True
        if (token == '.') and (next_token is None):
            keep = True
        elif (token == '.') and (next_token is not None) and (next_token != '.'):
            keep = True
        else:
            token = token.strip(to_strip_chars)
            if token == "":
                keep = False

        if reset:
            if len(current_token_group) > 0:
                output += [current_token_group]
            current_token_group = []
        if keep:
            if DEBUG:
                current_token_group += [token]
            else:
                current_token_group += [token_idx]
            last_token = token
            
    return output

In [48]:
# keep '.', '..', '...'
# all other token, trimming ".,()", and remove if empty
# if token starts with "'" -> group to previous token
# if token ends with "." -> insert idx -1 or "" for DEBUG mod
def process_core_nlp_tokens(tokens, DEBUG=False):
    
    to_strip_chars = ".,\(\)"
    
    output = []
    for token_idx, token in enumerate(tokens):
        
        point_stripped_token = token.strip(".")
        if point_stripped_token == "":
            if DEBUG:
                output += ["."]
            else: 
                output += [token_idx]
        elif token.startswith("'"):
            if DEBUG:
                output[-1] += token
        else:
            isTokenEndsWithPoint = token.endswith(".")
            
            token = token.strip(to_strip_chars)
            if token != "":
                if DEBUG:
                    output += [token]
                    if isTokenEndsWithPoint:
                        output += [""]
                else: 
                    output += [token_idx]
                    if isTokenEndsWithPoint:
                        output += [-1]

    return output

process_core_nlp_tokens(['I', "'m", 'waiting', '...', 'It', "'s", '9', 'am', 'now', '.'])
process_core_nlp_tokens(['I', "'m", 'waiting', '...', 'It', "'s", '9', 'am', 'now', '.'], True)

["I'm", 'waiting', '.', "It's", '9', 'am', 'now', '.']

In [49]:
corenlp_tokens = ["I'm", 'waiting', '..', '.', "It's", '9am', 'now', '.']
bert_tokens = ['i', "'", 'm', 'waiting', '.', '.', '.', 'it', "'", 's', '9', '##am', 'now', '.']
processed_core_nlp_tokens = process_core_nlp_tokens(corenlp_tokens, True)
processed_bert_tokens = process_bert_tokens(bert_tokens, True)
for (processed_core_nlp_token, processed_bert_token) in zip(processed_core_nlp_tokens, processed_bert_tokens):
    print(processed_core_nlp_token, processed_bert_token)

I'm ['i', "'", 'm']
waiting ['waiting']
. ['.']
. ['it', "'", 's']
It's ['9', '##am']
9am ['now']
now ['.']


In [50]:
len(data)

1980

In [51]:
def build_bert_token_whole(bert_token):
    result = ""
    for bert_sub_token in bert_token:
        if bert_sub_token.startswith("##"):
            result += bert_sub_token[2:]
        else:
            result += bert_sub_token
    return result

build_bert_token_whole(['d', '##v', '##4'])

'dv4'

In [52]:
# def TEST_corenlp_to_bert_mapping():
#     match_sample_count = 0

#     for sample_idx, sample in enumerate(data[:1000]):
#         text = sample["text"]

#         bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#         bert_tokens = bert_tokenizer.tokenize(text)
#         (corenlp_tokens, parse_tree_corenlp) = tokenize_and_depparse(text)

#         processed_core_nlp_tokens = process_core_nlp_tokens(corenlp_tokens, True)
#         processed_bert_tokens = process_bert_tokens(bert_tokens, True)

#         total_count = max(len(processed_core_nlp_tokens), len(processed_bert_tokens))
#         match_count = 0
#         for (processed_core_nlp_token, processed_bert_token) in zip(processed_core_nlp_tokens, processed_bert_tokens):
# #             print(processed_core_nlp_token, processed_bert_token)
#             processed_bert_token_whole = build_bert_token_whole(processed_bert_token)
            
#             lower_corenlp_token = processed_core_nlp_token.lower()
#             lower_bert_token = processed_bert_token_whole.lower()
#             if lower_corenlp_token.startswith(lower_bert_token) or lower_corenlp_token.endswith(lower_bert_token) or lower_bert_token.startswith(lower_corenlp_token) or lower_bert_token.endswith(lower_corenlp_token):
#                 match_count += 1

#         ratio = match_count/total_count
#         if ratio == 1:
#             match_sample_count += 1
#         else:
#             print(text)
            
#         cur_ratio = match_sample_count / (sample_idx + 1)
#         print("{}/{} - {}".format(sample_idx,len(data), cur_ratio))

#     print(match_sample_count / len(data))
    
# TEST_corenlp_to_bert_mapping()

In [65]:
def map_corenlp_to_bert_from_indexes(corenlp_processed_indexes, bert_processed_indexes):
    output = {}
    for (corenlp_processed_index, bert_processed_index) in zip(corenlp_processed_indexes, bert_processed_indexes):
        output[corenlp_processed_index] = bert_processed_index
    return output

def map_corenlp_to_bert_from_indexes_2(corenlp_tokens, bert_tokens, corenlp_processed_indexes, bert_processed_indexes):

    output = {}
    
    bert_run_idx_global = 0
    for corenlp_idx in corenlp_processed_indexes:
        for bert_run_idx, bert_idx_group in enumerate(bert_processed_indexes[bert_run_idx_global:]):
        
            corenlp_token = corenlp_tokens[corenlp_idx]

            bert_token_group = map(lambda bert_idx: bert_tokens[bert_idx], bert_idx_group)
            bert_token = build_bert_token_whole(bert_token_group)

            lower_corenlp_token = corenlp_token.lower()
            lower_bert_token = bert_token.lower()
            if lower_corenlp_token.startswith(lower_bert_token) or lower_corenlp_token.endswith(lower_bert_token) or lower_bert_token.startswith(lower_corenlp_token) or lower_bert_token.endswith(lower_corenlp_token):
                bert_run_idx_global = bert_run_idx + 1
                output[corenlp_idx] = bert_idx_group
                break;
        
    return output
        
def map_corenlp_to_bert(corenlp_tokens, bert_tokens, DEBUG=False):
    corenlp_processed_indexes = process_core_nlp_tokens(corenlp_tokens, DEBUG)
    bert_processed_indexes = process_bert_tokens(bert_tokens, DEBUG)
#     return map_corenlp_to_bert_from_indexes(corenlp_processed_indexes, bert_processed_indexes)
    return map_corenlp_to_bert_from_indexes_2(corenlp_tokens, bert_tokens, corenlp_processed_indexes, bert_processed_indexes)

corenlp_tokens = ["I'm", 'waiting', '..', '.', "It's", '9am', 'now', '.']
bert_tokens = ['i', "'", 'm', 'waiting', '.', '.', '.', 'it', "'", 's', '9', '##am', 'now', '.']
map_corenlp_to_bert(corenlp_tokens, bert_tokens)

{0: [0, 1, 2],
 1: [3],
 2: [6],
 3: [6],
 4: [7, 8, 9],
 5: [10, 11],
 6: [12],
 7: [13]}

In [86]:
def build_corenlp_to_bert_map(corenlp_tokens, bert_tokens, DEBUG=True):
    corenlp_to_bert_map = map_corenlp_to_bert(corenlp_tokens, bert_tokens, False)
    if DEBUG:
        output = []
        for corenlp_idx, bert_idx_group in corenlp_to_bert_map.items():
            corenlp_token = corenlp_tokens[corenlp_idx]
            bert_token_group = list(map(lambda bert_idx: bert_tokens[bert_idx], bert_idx_group))
            output += [(corenlp_token, bert_token_group)]
        return corenlp_to_bert_map, output
    else:
        return corenlp_to_bert_map

def build_corenlp_to_bert_map_from_text(text, DEBUG=True):
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_tokens = bert_tokenizer.tokenize(text)
    (corenlp_tokens, parse_tree_corenlp) = tokenize_and_depparse(text)
    
#     print(corenlp_tokens)
#     print(bert_tokens)

    return build_corenlp_to_bert_map(corenlp_tokens, bert_tokens, DEBUG)

build_corenlp_to_bert_map_from_text(
#     "Lahore is a great place to duck into late-night when you need some really tasty food on the cheap -- you'll likely have trouble finishing the amount of food you get for FOUR DOLLARS.", 
    "I'm waiting ... It's 9am now.",
    True
)

({0: [0, 1, 2],
  2: [3],
  3: [6],
  4: [7, 8, 9],
  6: [10, 11],
  7: [10, 11],
  8: [12],
  9: [13]},
 [('I', ['i', "'", 'm']),
  ('waiting', ['waiting']),
  ('...', ['.']),
  ('It', ['it', "'", 's']),
  ('9', ['9', '##am']),
  ('am', ['9', '##am']),
  ('now', ['now']),
  ('.', ['.'])])

In [77]:
def get_corenlp_to_bert_map_matching_rate(corenlp_tokens, bert_tokens, corenlp_to_bert_map):
    match_count = 0
    for corenlp_idx, bert_idx_group in corenlp_to_bert_map.items():
        corenlp_token = corenlp_tokens[corenlp_idx]

        bert_token_group = list(map(lambda bert_idx: bert_tokens[bert_idx], bert_idx_group))
        bert_token = build_bert_token_whole(bert_token_group)
        
        lower_corenlp_token = corenlp_token.lower()
        lower_bert_token = bert_token.lower()
        if lower_corenlp_token.startswith(lower_bert_token) or lower_corenlp_token.endswith(lower_bert_token) or lower_bert_token.startswith(lower_corenlp_token) or lower_bert_token.endswith(lower_corenlp_token):
            match_count += 1
        
    return match_count / len(corenlp_to_bert_map.keys())

In [87]:
def TEST_corenlp_to_bert_mapping():
    match_sample_count = 0

    for sample_idx, sample in enumerate(data):
        text = sample["text"]
        
        bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_tokens = bert_tokenizer.tokenize(text)
        (corenlp_tokens, parse_tree_corenlp) = tokenize_and_depparse(text)

        corenlp_to_bert_map = build_corenlp_to_bert_map(corenlp_tokens, bert_tokens, False)
        rate = get_corenlp_to_bert_map_matching_rate(corenlp_tokens, bert_tokens, corenlp_to_bert_map)

        if rate == 1:
            match_sample_count += 1
        else:
            print(text)
            
        cur_ratio = match_sample_count / (sample_idx + 1)
        print("{}/{} - {}".format(sample_idx,len(data), cur_ratio))

    print(match_sample_count / len(data))
    
TEST_corenlp_to_bert_mapping()

0/1980 - 1.0
1/1980 - 1.0
2/1980 - 1.0
3/1980 - 1.0
4/1980 - 1.0
5/1980 - 1.0
6/1980 - 1.0
7/1980 - 1.0
8/1980 - 1.0
9/1980 - 1.0
10/1980 - 1.0
11/1980 - 1.0
12/1980 - 1.0
13/1980 - 1.0
14/1980 - 1.0
15/1980 - 1.0
16/1980 - 1.0
17/1980 - 1.0
18/1980 - 1.0
19/1980 - 1.0
20/1980 - 1.0
21/1980 - 1.0
22/1980 - 1.0
23/1980 - 1.0
24/1980 - 1.0
25/1980 - 1.0
26/1980 - 1.0
27/1980 - 1.0
28/1980 - 1.0
29/1980 - 1.0
30/1980 - 1.0
31/1980 - 1.0
32/1980 - 1.0
33/1980 - 1.0
34/1980 - 1.0
35/1980 - 1.0
36/1980 - 1.0
37/1980 - 1.0
38/1980 - 1.0
39/1980 - 1.0
40/1980 - 1.0
41/1980 - 1.0
42/1980 - 1.0
43/1980 - 1.0
44/1980 - 1.0
45/1980 - 1.0
46/1980 - 1.0
47/1980 - 1.0
48/1980 - 1.0
49/1980 - 1.0
50/1980 - 1.0
51/1980 - 1.0
52/1980 - 1.0
53/1980 - 1.0
54/1980 - 1.0
55/1980 - 1.0
56/1980 - 1.0
57/1980 - 1.0
58/1980 - 1.0
59/1980 - 1.0
60/1980 - 1.0
61/1980 - 1.0
62/1980 - 1.0
63/1980 - 1.0
64/1980 - 1.0
65/1980 - 1.0
66/1980 - 1.0
67/1980 - 1.0
68/1980 - 1.0
69/1980 - 1.0
70/1980 - 1.0
71/1980 - 1.0
72

554/1980 - 1.0
555/1980 - 1.0
556/1980 - 1.0
557/1980 - 1.0
558/1980 - 1.0
559/1980 - 1.0
560/1980 - 1.0
561/1980 - 1.0
562/1980 - 1.0
563/1980 - 1.0
564/1980 - 1.0
565/1980 - 1.0
566/1980 - 1.0
567/1980 - 1.0
568/1980 - 1.0
569/1980 - 1.0
570/1980 - 1.0
571/1980 - 1.0
572/1980 - 1.0
573/1980 - 1.0
574/1980 - 1.0
575/1980 - 1.0
576/1980 - 1.0
577/1980 - 1.0
578/1980 - 1.0
579/1980 - 1.0
580/1980 - 1.0
581/1980 - 1.0
582/1980 - 1.0
583/1980 - 1.0
584/1980 - 1.0
585/1980 - 1.0
586/1980 - 1.0
587/1980 - 1.0
588/1980 - 1.0
589/1980 - 1.0
590/1980 - 1.0
591/1980 - 1.0
592/1980 - 1.0
593/1980 - 1.0
594/1980 - 1.0
595/1980 - 1.0
596/1980 - 1.0
597/1980 - 1.0
598/1980 - 1.0
599/1980 - 1.0
600/1980 - 1.0
601/1980 - 1.0
602/1980 - 1.0
603/1980 - 1.0
604/1980 - 1.0
605/1980 - 1.0
606/1980 - 1.0
607/1980 - 1.0
608/1980 - 1.0
609/1980 - 1.0
610/1980 - 1.0
611/1980 - 1.0
612/1980 - 1.0
613/1980 - 1.0
614/1980 - 1.0
615/1980 - 1.0
616/1980 - 1.0
617/1980 - 1.0
618/1980 - 1.0
619/1980 - 1.0
620/1980 -

1094/1980 - 1.0
1095/1980 - 1.0
1096/1980 - 1.0
1097/1980 - 1.0
1098/1980 - 1.0
1099/1980 - 1.0
1100/1980 - 1.0
1101/1980 - 1.0
1102/1980 - 1.0
1103/1980 - 1.0
1104/1980 - 1.0
1105/1980 - 1.0
1106/1980 - 1.0
1107/1980 - 1.0
1108/1980 - 1.0
1109/1980 - 1.0
1110/1980 - 1.0
1111/1980 - 1.0
1112/1980 - 1.0
1113/1980 - 1.0
1114/1980 - 1.0
1115/1980 - 1.0
1116/1980 - 1.0
1117/1980 - 1.0
1118/1980 - 1.0
1119/1980 - 1.0
1120/1980 - 1.0
1121/1980 - 1.0
1122/1980 - 1.0
1123/1980 - 1.0
1124/1980 - 1.0
1125/1980 - 1.0
1126/1980 - 1.0
1127/1980 - 1.0
1128/1980 - 1.0
1129/1980 - 1.0
1130/1980 - 1.0
1131/1980 - 1.0
1132/1980 - 1.0
1133/1980 - 1.0
1134/1980 - 1.0
1135/1980 - 1.0
1136/1980 - 1.0
1137/1980 - 1.0
1138/1980 - 1.0
1139/1980 - 1.0
1140/1980 - 1.0
1141/1980 - 1.0
1142/1980 - 1.0
1143/1980 - 1.0
1144/1980 - 1.0
1145/1980 - 1.0
1146/1980 - 1.0
1147/1980 - 1.0
1148/1980 - 1.0
1149/1980 - 1.0
1150/1980 - 1.0
1151/1980 - 1.0
1152/1980 - 1.0
1153/1980 - 1.0
1154/1980 - 1.0
1155/1980 - 1.0
1156/198

1607/1980 - 1.0
1608/1980 - 1.0
1609/1980 - 1.0
1610/1980 - 1.0
1611/1980 - 1.0
1612/1980 - 1.0
1613/1980 - 1.0
1614/1980 - 1.0
1615/1980 - 1.0
1616/1980 - 1.0
1617/1980 - 1.0
1618/1980 - 1.0
1619/1980 - 1.0
1620/1980 - 1.0
1621/1980 - 1.0
1622/1980 - 1.0
1623/1980 - 1.0
1624/1980 - 1.0
1625/1980 - 1.0
1626/1980 - 1.0
1627/1980 - 1.0
1628/1980 - 1.0
1629/1980 - 1.0
1630/1980 - 1.0
1631/1980 - 1.0
1632/1980 - 1.0
1633/1980 - 1.0
1634/1980 - 1.0
1635/1980 - 1.0
1636/1980 - 1.0
1637/1980 - 1.0
1638/1980 - 1.0
1639/1980 - 1.0
1640/1980 - 1.0
1641/1980 - 1.0
1642/1980 - 1.0
1643/1980 - 1.0
1644/1980 - 1.0
1645/1980 - 1.0
1646/1980 - 1.0
1647/1980 - 1.0
1648/1980 - 1.0
1649/1980 - 1.0
1650/1980 - 1.0
1651/1980 - 1.0
1652/1980 - 1.0
1653/1980 - 1.0
1654/1980 - 1.0
1655/1980 - 1.0
1656/1980 - 1.0
1657/1980 - 1.0
1658/1980 - 1.0
1659/1980 - 1.0
1660/1980 - 1.0
1661/1980 - 1.0
1662/1980 - 1.0
1663/1980 - 1.0
1664/1980 - 1.0
1665/1980 - 1.0
1666/1980 - 1.0
1667/1980 - 1.0
1668/1980 - 1.0
1669/198

In [89]:
def build_dep_parse_tree(text, verbose=False):
    '''
    Parse dependency tree and map CoreNLP index to BERT index
    
    Returns
    -------
        output_bert_v1s: list of source node indexes
        output_bert_v2s: list of target node indexes
        types: list of dependency relation
        
    Usage:
    ----------
        build_dep_parse_tree("I'm waiting ... It's 9am now.", True)
    '''
    check_existed_dict = {}
    
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_tokens = bert_tokenizer.tokenize(text)
    (corenlp_tokens, parse_tree_corenlp) = tokenize_and_depparse(text)
    
    corenlp_to_bert_map = map_corenlp_to_bert(corenlp_tokens, bert_tokens)
    
    if verbose:
        print(f'BERT tokens: {bert_tokens}')
        print(f'CoreNLP tokens: {corenlp_tokens}')
        print(f'CoreNLP dependency tree: {parse_tree_corenlp}')
        print(f'CoreNLP to BERT: {corenlp_to_bert_map}')
        
    output_bert_v1s = []
    output_bert_v2s = []
    types = []
    for edge in parse_tree_corenlp:
        (t, corenlp_v1, corenlp_v2) = edge
        
        if (corenlp_v1 not in corenlp_to_bert_map) or (corenlp_v2 not in corenlp_to_bert_map):
            continue;
        
        bert_v1 = corenlp_to_bert_map[corenlp_v1]
        bert_v2 = corenlp_to_bert_map[corenlp_v2]

        if (len(bert_v1) > 0) and (len(bert_v2) > 0):
            bert_v1_super = bert_v1[0]
            bert_v2_super = bert_v2[0]
            
            output_bert_v1s.append(bert_v1_super)
            output_bert_v2s.append(bert_v2_super)
            types.append(t)
            
            for bert_v1_sub in bert_v1[1:]:
                if (bert_v1_super, bert_v1_sub, "sprwrd") not in check_existed_dict:
                    output_bert_v1s.append(bert_v1_super)
                    output_bert_v2s.append(bert_v1_sub)
                    types.append("sprwrd")
                    check_existed_dict[(bert_v1_super, bert_v1_sub, "sprwrd")] = True
                
            for bert_v2_sub in bert_v2[1:]:
                if (bert_v2_super, bert_v2_sub, "sprwrd") not in check_existed_dict:
                    output_bert_v1s.append(bert_v2_super)
                    output_bert_v2s.append(bert_v2_sub)
                    types.append("sprwrd")
                    check_existed_dict[(bert_v2_super, bert_v2_sub, "sprwrd")] = True
                
    return output_bert_v1s, output_bert_v2s, types
    
build_dep_parse_tree("I'm waiting ... It's 9am now.")

([3, 0, 0, 3, 10, 10, 7, 7, 10, 3, 10, 3],
 [0, 1, 2, 6, 7, 11, 8, 9, 10, 10, 12, 13],
 ['nsubj',
  'sprwrd',
  'sprwrd',
  'punct',
  'nsubj',
  'sprwrd',
  'sprwrd',
  'sprwrd',
  'nummod',
  'parataxis',
  'advmod',
  'punct'])

In [24]:
bert_sample = {'id': '2777', 'text': "To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora.", 'aspects': [{'term': 'food', 'polarity': 'positive', 'from': '57', 'to': '61'}], 'category': [{'category': 'food', 'polarity': 'positive'}, {'category': 'anecdotes/miscellaneous', 'polarity': 'negative'}]}
bert_sample

{'id': '2777',
 'text': "To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora.",
 'aspects': [{'term': 'food',
   'polarity': 'positive',
   'from': '57',
   'to': '61'}],
 'category': [{'category': 'food', 'polarity': 'positive'},
  {'category': 'anecdotes/miscellaneous', 'polarity': 'negative'}]}

In [25]:
text_1 = "To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora."
text_1[57:61]

'food'

In [27]:
def build_aspect(bert_tokens, bert_aspect):
    term = bert_aspect["term"]
    

def build_aspects(bert_tokens, bert_aspects):
    for bert_aspect in bert_aspects:
        build_aspect(bert_tokens, bert_aspect)

In [28]:
# input 
# BERT_ABSA:
{
    'id': '2777', 
    'text': "To be completely fair, the only redeeming factor was the food, which was above average, but couldn't make up for all the other deficiencies of Teodora.", 
    'aspects': [
        {
            'term': 'food', 
            'polarity': 'positive', 
            'from': '57', 
            'to': '61'
        }
    ], 
    'category': [
        {
            'category': 'food', 
            'polarity': 'positive'
        }, {'category': 'anecdotes/miscellaneous', 'polarity': 'negative'}
    ]
}

# output:
{
    'text': "",
    'bert_token_index': [],
    'aspect_term_index': [], # in bert index
    'edges': [ # in bert index
        [],
        []
    ]
}
    
text = inp['text']
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_token = bert_tokenizer.tokenize(text)
(coreNLP_tokens, parse_tree_corenlp) = tokenize_and_depparse(inp['text'])
coreNLP_to_bert_index_map = convert_index_coreNLP_to_bert(bert_token)

{
    'text': text,
    'bert_token_index': bert_token,
    'aspect_term_index': [], # in bert index
    'edges': [ # in bert index
        [],
        []
    ]
}

NameError: name 'convert_index_coreNLP_to_bert' is not defined

## Dataset

In [29]:
data = list(pd.read_csv('../dataset/preprocessed_data/Laptops_Train.csv').T.to_dict().values())

In [30]:
class Dataset(Dataset):
    def __init__(self, data_dir, transformation='QA_M', num_classes=3, bert_tokenizer=None, max_length=0, seed=0):
        random.seed(seed)
        assert transformation in ['QA_M', 'QA_B', 'MLI_M', 'MLI_B'], 'Invalid transformation method'
        assert num_classes in [2, 3], 'Invalid num_classes'
        
        self.transformation = transformation
        self.bert_tokenizer = bert_tokenizer
        self.max_length = max_length
        self.polarity_dict = {'positive': 0, 'negative': 1, 'neutral': 2}
        
        # load data
        self.data = list(pd.read_csv(data_dir).T.to_dict().values())
        if num_classes == 2:
            self.data = [d for d in self.data if d['label'] != 'neutral']
    
    def transform(self, sample):
        seq1 = sample['text'].lower()
        term = sample['term'].lower()
        
        if self.transformation == 'QA_M':
            seq2 = f'what is the polarity of {term} ?'
            label = self.polarity_dict[sample['label']]
        elif self.transformation == 'MLI_M':
            seq2 = term.lower()
            label = self.polarity_dict[sample['label']]
#         elif self.transformation == 'QA_B':
#         elif self.transformation == 'MLI_B':
        
        return seq1, seq2, label
        
    def encode_text(self, seq1, seq2):
        # encode
        encoded_text = self.bert_tokenizer.encode_plus(
            seq1,
            seq2,
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length=self.max_length,  # maximum length of a sentence
            padding='max_length',  # Add [PAD]s
            truncation=True, # Truncate up to maximum length
            return_attention_mask=True,  # Generate the attention mask
            return_tensors='pt',  # Ask the function to return PyTorch tensors
        )
        return encoded_text
        
    def __getitem__(self, item):
        '''
        example = {
            'id': 1000,
            'text': 'The food is good, especially their more basic dishes, and the drinks are delicious.',
            'term': 'food',
            'label': 'positive',
            }
        '''
            
        # encoder
        sample = self.data[item]
        seq1, seq2, label = self.transform(sample)
        encoded_text = self.encode_text(seq1, seq2)

        single_input = {
            'seq1': seq1,
            'seq2': seq2,
            'term': sample['term'],
            'label': label, 
            'input_ids': encoded_text['input_ids'].flatten(),
            'token_type_ids': encoded_text['token_type_ids'].flatten(),
            'attention_mask': encoded_text['attention_mask'].flatten(),
        }
        return single_input

    def __len__(self):
        return len(self.data)
    
class DataModule(pl.LightningDataModule):
    def __init__(self, params):
        super().__init__()
        self.save_hyperparameters(params)

    def setup(self, stage=None):
        bert_tokenizer = BertTokenizer.from_pretrained(self.hparams.bert_name)
        
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            data_fit = Dataset(
                data_dir=self.hparams.data_train_dir,
                transformation=self.hparams.transformation,
                num_classes=self.hparams.num_classes,
                bert_tokenizer=bert_tokenizer,
                max_length=self.hparams.max_length,
                seed=self.hparams.seed)
            
            total_samples = data_fit.__len__()
            train_samples = int(data_fit.__len__() * 0.8)
            val_samples = total_samples - train_samples
            self.data_train, self.data_val = random_split(
                data_fit, [train_samples, val_samples], generator=torch.Generator().manual_seed(self.hparams.seed))

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.data_test = Dataset(
                data_dir=self.hparams.data_test_dir,
                transformation=self.hparams.transformation,
                num_classes=self.hparams.num_classes,
                bert_tokenizer=bert_tokenizer,
                max_length=self.hparams.max_length,
                seed=self.hparams.seed)

    def train_dataloader(self):
        return DataLoader(
            self.data_train, 
            batch_size=self.hparams.batch_size, 
            num_workers=4, 
            shuffle=False, # Already shuffle in random_split() 
            drop_last=True, 
#             collate_fn=lambda x: x,
        )

    def val_dataloader(self):
        return DataLoader(
            self.data_val, 
            batch_size=self.hparams.batch_size, 
            num_workers=4, 
            shuffle=False,
#             drop_last=True, 
#             collate_fn=lambda x: x,
        )
    def test_dataloader(self):
        return DataLoader(
            self.data_test, 
            batch_size=self.hparams.batch_size, 
            num_workers=4, 
            shuffle=False,
#             drop_last=True, 
#             collate_fn=lambda x: x,
        )

## Model

In [33]:
class SentimentClassifier(pl.LightningModule):
    def __init__(self, params):
        super().__init__()
        self.save_hyperparameters(params)
        self.tokenizer = BertTokenizer.from_pretrained(self.hparams.pretrained_bert_name)
        self.bert = BertForSequenceClassification.from_pretrained(
            self.hparams.pretrained_bert_name, num_labels=3, output_hidden_states=True, output_attentions=True, return_dict=False)
        self.hidden_size = self.bert.config.hidden_size
        self.cross_entropy_loss = nn.CrossEntropyLoss()
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
        return optimizer  
    
    def forward(self, input_ids, attention_mask, token_type_ids, labels):
        loss, logits, hidden, _ = self.bert(
            input_ids=input_ids, 
            attention_mask=attention_mask, 
            token_type_ids=token_type_ids,
            labels=labels,
        )
        
        return logits
    
    def margin_loss(self,  embedding_query, embedding_pos, embedding_neg):
        scores_pos = (embeddings_query * embeddings_pos).sum(dim=-1)
        scores_neg = (embeddings_query * embeddings_neg).sum(dim=-1) * self.scale
        return scores_pos - scores_neg
        
    def training_step(self, batch, batch_idx):
        # ['seq1', 'seq2', 'term', 'label', 'input_ids', 'token_type_ids', 'attention_mask']
        logits = self.forward(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            token_type_ids=batch['token_type_ids'],
            labels=batch['label'],
        )
        
        labels = batch['label']
        ce_loss = self.cross_entropy_loss(logits, labels)        
#         acc = utils.calc_accuracy(logits, labels).squeeze()
#         logs = {
#             'loss': ce_loss,
#             'acc': acc,
#         }
#         self.log_dict(logs, prog_bar=True)
        return ce_loss
    
    def validation_step(self, batch, batch_idx):
        logits = self.forward(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            token_type_ids=batch['token_type_ids'],
            labels=batch['label'],
        )
        
        labels = batch['label']
        ce_loss = self.cross_entropy_loss(logits, labels)        
        acc = utils.calc_accuracy(logits, labels).squeeze()
        macro_f1 = utils.calc_f1(logits, labels, avg_type='macro').squeeze()
        micro_f1 = utils.calc_f1(logits, labels, avg_type='micro').squeeze()

        logs = {
            'loss': ce_loss, 
            'acc': acc,
            'macro_f1': macro_f1,
            'micro_f1': micro_f1
        }
        self.log_dict(logs, prog_bar=True)
        return logs
    
    def validation_epoch_end(self, val_step_outputs):
        avg_loss = torch.stack([x['loss'] for x in val_step_outputs]).mean().cpu()
        avg_acc = torch.stack([x['acc'] for x in val_step_outputs]).mean().cpu()
        avg_macro_f1 = torch.stack([x['macro_f1'] for x in val_step_outputs]).mean().cpu()
        avg_micro_f1 = torch.stack([x['micro_f1'] for x in val_step_outputs]).mean().cpu()
        logs = {
            'val_loss': avg_loss, 
            'val_acc': avg_acc,
            'val_macro_f1': avg_macro_f1,
            'val_micro_f1': avg_micro_f1,
        }
        self.log_dict(logs, prog_bar=True)
     
    def test_step(self, batch, batch_idx):
        logits = self.forward(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            token_type_ids=batch['token_type_ids'],
            labels=batch['label'],
        )
        
        labels = batch['label']
        ce_loss = self.cross_entropy_loss(logits, labels)        
        acc = utils.calc_accuracy(logits, labels).squeeze()
        macro_f1 = utils.calc_f1(logits, labels, avg_type='macro').squeeze()
        micro_f1 = utils.calc_f1(logits, labels, avg_type='micro').squeeze()

        logs = {
            'loss': ce_loss, 
            'acc': acc,
            'macro_f1': macro_f1,
            'micro_f1': micro_f1
        }
        return logs
    
    def test_epoch_end(self, test_step_outputs):
        avg_loss = torch.stack([x['loss'] for x in test_step_outputs]).mean().cpu()
        avg_acc = torch.stack([x['acc'] for x in test_step_outputs]).mean().cpu()
        avg_macro_f1 = torch.stack([x['macro_f1'] for x in test_step_outputs]).mean().cpu()
        avg_micro_f1 = torch.stack([x['micro_f1'] for x in test_step_outputs]).mean().cpu()

        logs = {
            'test_loss': avg_loss, 
            'test_acc': avg_acc,
            'test_macro_f1': avg_macro_f1,
            'test_micro_f1': avg_micro_f1,
        }
        self.log_dict(logs, prog_bar=True)
        return logs
     

In [34]:
import commentjson
from collections import OrderedDict

def read_json(fname):
    '''
    Read in the json file specified by 'fname'
    '''
    with open(fname, 'rt') as handle:
        return commentjson.load(handle, object_hook=OrderedDict)

def build_model(config):
    data_params, model_params = config['data_params'], config['model_params']
    data = DataModule(data_params)
    model = SentimentClassifier(model_params)
    return data, model

In [35]:
def build_trainder(config):
    trainer_params = config['trainer_params']
    data_params = config['data_params']
    
    # callbacks
    checkpoint = ModelCheckpoint(
        dirpath=trainer_params['checkpoint_dir'], 
        filename='{epoch}-{val_loss:.4f}-{val_acc:.4f}-{val_macro_f1:.4f}-{val_micro_f1:.4f}',
        save_top_k=trainer_params['top_k'],
        verbose=True,
        monitor=trainer_params['metric'],
        mode=trainer_params['mode'],
    )
    early_stopping = EarlyStopping(
        monitor='val_loss', 
        min_delta=0.00, 
        patience=trainer_params['patience'],
        verbose=False,
        mode=trainer_params['mode'],
    )
    callbacks = [checkpoint, early_stopping]
    
    # trainer_kwargs
    trainer_kwargs = {
        'max_epochs': trainer_params['max_epochs'],
        'gpus': 1 if torch.cuda.is_available() else 0,
    #     "progress_bar_refresh_rate":p_refresh,
    #     'gradient_clip_val': hyperparameters['grad_clip'],
        'weights_summary': 'full',
        'deterministic': True,
        'callbacks': callbacks,
    }

    trainer = Trainer(**trainer_kwargs)
    return trainer, trainer_kwargs

In [36]:
# parser = argparse.ArgumentParser(description='Training.')

# parser.add_argument('-config_file', help='config file path', default='../src/restaurant_config.json', type=str)
# parser.add_argument('-f', '--fff', help='a dummy argument to fool ipython', default='1')
# args = parser.parse_args()

# args.config = read_json(args.config_file)
# seed_everything(args.config['data_params']['seed'], workers=True)
# data, clf = build_model(args.config)
# trainer, trainer_kwargs = build_trainder(args.config)
# trainer.fit(clf, data)

## Predict

In [37]:
parser = argparse.ArgumentParser(description='Training.')

parser.add_argument('-config_file', help='config file path', default='../src/restaurant_config.json', type=str)
parser.add_argument('-f', '--fff', help='a dummy argument to fool ipython', default='1')
args = parser.parse_args()

args.config = read_json(args.config_file)
seed_everything(args.config['data_params']['seed'], workers=True)
data, clf = build_model(args.config)
trainer, trainer_kwargs = build_trainder(args.config)

Global seed set to 12345
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized fro

In [38]:
paths = sorted(glob.glob('/home/hoang/github/BERT_ABSA/model/restaurants/*.ckpt'))
model_test = SentimentClassifier.load_from_checkpoint(paths[0])
result = trainer.test(model_test, datamodule=data)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Testing: 0it [00:00, ?it/s]

[{'loss': tensor(0.4673, device='cuda:0'), 'acc': tensor(0.8203, dtype=torch.float64), 'macro_f1': tensor(0.7241, dtype=torch.float64), 'micro_f1': tensor(0.8203, dtype=torch.float64)}, {'loss': tensor(0.4060, device='cuda:0'), 'acc': tensor(0.8906, dtype=torch.float64), 'macro_f1': tensor(0.7525, dtype=torch.float64), 'micro_f1': tensor(0.8906, dtype=torch.float64)}, {'loss': tensor(0.4146, device='cuda:0'), 'acc': tensor(0.8359, dtype=torch.float64), 'macro_f1': tensor(0.6356, dtype=torch.float64), 'micro_f1': tensor(0.8359, dtype=torch.float64)}, {'loss': tensor(0.6661, device='cuda:0'), 'acc': tensor(0.7734, dtype=torch.float64), 'macro_f1': tensor(0.6064, dtype=torch.float64), 'micro_f1': tensor(0.7734, dtype=torch.float64)}, {'loss': tensor(0.7138, device='cuda:0'), 'acc': tensor(0.7578, dtype=torch.float64), 'macro_f1': tensor(0.5623, dtype=torch.float64), 'micro_f1': tensor(0.7578, dtype=torch.float64)}, {'loss': tensor(0.7445, device='cuda:0'), 'acc': tensor(0.7500, dtype=torc