# Import dependencies

In [1]:
import os
import xml.etree.ElementTree as ET
import json
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from transformers import BertModel
import pandas as pd
import numpy as np
from torch.utils.data import RandomSampler, SequentialSampler
from transformers import BertTokenizer
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import gc
import time

In [2]:
from models.BERT_ATE import BERT_ATE

# Load data

In [3]:
DATASET = '../data/semeval16_restaurants_train.json'

In [4]:
df = pd.json_normalize(json.load(open(DATASET)))

In [5]:
df.head()

Unnamed: 0,text,opinions,tokens,iob_aspect_tags
0,Judging from previous posts this used to be a ...,"[{'target': 'place', 'category': 'RESTAURANT#G...","[Judging, from, previous, posts, this, used, t...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."
1,"We, there were four of us, arrived at noon - t...","[{'target': 'staff', 'category': 'SERVICE#GENE...","[We, ,, there, were, four, of, us, ,, arrived,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"They never brought us complimentary noodles, i...","[{'target': 'NULL', 'category': 'SERVICE#GENER...","[They, never, brought, us, complimentary, nood...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,The food was lousy - too sweet or too salty an...,"[{'target': 'food', 'category': 'FOOD#QUALITY'...","[The, food, was, lousy, -, too, sweet, or, too...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
4,The food was lousy - too sweet or too salty an...,"[{'target': 'food', 'category': 'FOOD#QUALITY'...","[The, food, was, lousy, -, too, sweet, or, too...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"


# Train model

In [6]:
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4
EPOCHS = 2
LEARNING_RATE = 1e-05

TRAIN_SPLIT = 0.8

In [7]:
PRETRAINED_BERT_MODEL_VARIANT = 'bert-base-cased'

In [8]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [9]:
train_df = df.sample(frac = TRAIN_SPLIT)
test_df = df.drop(train_df.index).reset_index(drop=True)
train_df = train_df.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

FULL Dataset: (2507, 4)
TRAIN Dataset: (2006, 4)
TEST Dataset: (501, 4)


In [10]:
class dataset_ATM(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        tokens = self.df['tokens'].iloc[idx]
        tags = self.df['iob_aspect_tags'].iloc[idx]

        bert_tokens = []
        bert_tags = []

        for i in range(len(tokens)):
            t = self.tokenizer.tokenize(tokens[i])
            bert_tokens += t
            bert_tags += [int(tags[i])] * len(t)
        
        bert_ids = self.tokenizer.convert_tokens_to_ids(bert_tokens)

        ids_tensor = torch.tensor(bert_ids)
        tags_tensor = torch.tensor(bert_tags)

        return bert_tokens, ids_tensor, tags_tensor
    
    def __len__(self):
        return len(self.df)

In [11]:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_BERT_MODEL_VARIANT)

In [12]:
train_ds = dataset_ATM(train_df, tokenizer)
test_ds = dataset_ATM(test_df, tokenizer)

In [13]:
def create_mini_batch(samples):
    ids_tensors = [s[1] for s in samples]
    ids_tensors = pad_sequence(ids_tensors, batch_first=True).to(DEVICE)

    tags_tensors = [s[2] for s in samples]
    tags_tensors = pad_sequence(tags_tensors, batch_first=True).to(DEVICE)
    
    masks_tensors = torch.zeros(ids_tensors.shape, dtype=torch.long).to(DEVICE)
    masks_tensors = masks_tensors.masked_fill(ids_tensors != 0, 1).to(DEVICE)
    
    return ids_tensors, tags_tensors, masks_tensors

In [14]:
train_dataloader = DataLoader(
    train_ds,
    sampler = RandomSampler(train_ds),
    batch_size = TRAIN_BATCH_SIZE,
    drop_last = True,
    collate_fn=create_mini_batch
)

test_dataloader = DataLoader(
    test_ds,
    sampler = SequentialSampler(test_ds),
    batch_size = VALID_BATCH_SIZE,
    drop_last = True,
    collate_fn=create_mini_batch
)

In [15]:
model_ATE = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
optimizer_ATE = torch.optim.Adam(model_ATE.parameters(), lr=LEARNING_RATE)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
# torch cuda clear cache
torch.cuda.empty_cache()

with torch.no_grad():
    torch.cuda.empty_cache()

In [17]:
def train(epoch, model, loss_fn, optimizer, dataloader):
    model.train()

    for _, data in enumerate(dataloader, 0):
        ids_tensors, tags_tensors, masks_tensors = data
        ids_tensors = ids_tensors.to(DEVICE)
        tags_tensors = tags_tensors.to(DEVICE)
        masks_tensors = masks_tensors.to(DEVICE)

        outputs = model(ids_tensors, masks_tensors)

        optimizer.zero_grad()

        loss = loss_fn(outputs.view(-1, 3), tags_tensors.view(-1))

        loss.backward()

        optimizer.step()
        
        if _ % 50 == 0:
            print(f'Epoch: {epoch}, Step: {_}, Loss:  {loss.item()}')

In [18]:
for epoch in range(EPOCHS):
    train(epoch, model_ATE, torch.nn.CrossEntropyLoss(), optimizer_ATE, train_dataloader)

Epoch: 0, Step: 0, Loss:  1.0022004842758179
Epoch: 0, Step: 50, Loss:  0.20004427433013916
Epoch: 0, Step: 100, Loss:  0.21648697555065155
Epoch: 0, Step: 150, Loss:  0.08073092997074127
Epoch: 0, Step: 200, Loss:  0.08740708231925964
Epoch: 0, Step: 250, Loss:  0.23410958051681519
Epoch: 0, Step: 300, Loss:  0.07421716302633286
Epoch: 0, Step: 350, Loss:  0.33796024322509766
Epoch: 0, Step: 400, Loss:  0.13609564304351807
Epoch: 0, Step: 450, Loss:  0.1776328831911087
Epoch: 0, Step: 500, Loss:  0.04585307091474533
Epoch: 1, Step: 0, Loss:  0.06842641532421112
Epoch: 1, Step: 50, Loss:  0.16960299015045166
Epoch: 1, Step: 100, Loss:  0.016538985073566437
Epoch: 1, Step: 150, Loss:  0.11822547018527985
Epoch: 1, Step: 200, Loss:  0.06756515055894852
Epoch: 1, Step: 250, Loss:  0.03745114058256149
Epoch: 1, Step: 300, Loss:  0.04986521974205971
Epoch: 1, Step: 350, Loss:  0.1965045928955078
Epoch: 1, Step: 400, Loss:  0.04075441509485245
Epoch: 1, Step: 450, Loss:  0.06539789587259293


In [19]:
def validate(model_ATE, dataloader):
    x = []
    y = []

    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids_tensors, tags_tensors, masks_tensors = data
            ids_tensors = ids_tensors.to(DEVICE)
            tags_tensors = tags_tensors.to(DEVICE)
            masks_tensors = masks_tensors.to(DEVICE)

            outputs = model_ATE(ids_tensors, masks_tensors)

            _, predictions = torch.max(outputs, dim=2)

            x += list([int(j) for i in predictions for j in i ])
            y += list([int(j) for i in tags_tensors for j in i ])

    return x, y

In [20]:
x, y = validate(model_ATE, test_dataloader)

In [21]:
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99     13338
           1       0.82      0.87      0.84       883
           2       0.68      0.90      0.77       311

    accuracy                           0.97     14532
   macro avg       0.83      0.92      0.87     14532
weighted avg       0.98      0.97      0.97     14532



In [22]:
torch.save(model_ATE, '../results/ate/models/model_ATE_semeval.pth')

# Run 10 times

In [23]:
bert_ate_semeval_stats = pd.DataFrame(columns=['accuracy','precision_score_micro','precision_score_macro','recall_score_micro','recall_score_macro','f1_score_micro','f1_score_macro', 'execution_time'])

In [24]:
for i in range(10):
    # clear cache cuda
    torch.cuda.empty_cache()
    with torch.no_grad():
        torch.cuda.empty_cache()
    gc.collect()

    start_time = time.time()

    print(f"Run {i + 1}/10")

    train_df = df.sample(frac = TRAIN_SPLIT)
    test_df = df.drop(train_df.index).reset_index(drop=True)
    train_df = train_df.reset_index(drop=True)

    train_ds = dataset_ATM(train_df, tokenizer)
    test_ds = dataset_ATM(test_df, tokenizer)

    train_dataloader = DataLoader(
        train_ds,
        sampler = RandomSampler(train_ds),
        batch_size = TRAIN_BATCH_SIZE,
        drop_last = True,
        collate_fn=create_mini_batch
    )

    test_dataloader = DataLoader(
        test_ds,
        sampler = SequentialSampler(test_ds),
        batch_size = VALID_BATCH_SIZE,
        drop_last = True,
        collate_fn=create_mini_batch
    )

    model_ATE_run = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
    optimizer_ATE = torch.optim.Adam(model_ATE.parameters(), lr=LEARNING_RATE)

    model_ATE_run = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
    optimizer_ATE_run = torch.optim.Adam(model_ATE_run.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train(epoch, model_ATE_run, torch.nn.CrossEntropyLoss(), optimizer_ATE_run, train_dataloader)

    x, y = validate(model_ATE, test_dataloader)

    accuracy = accuracy_score(y, x)
    precision_score_micro = precision_score(y, x, average='micro')
    precision_score_macro = precision_score(y, x, average='macro')
    recall_score_micro = recall_score(y, x, average='micro')
    recall_score_macro = recall_score(y, x, average='macro')
    f1_score_micro = f1_score(y, x, average='micro')
    f1_score_macro = f1_score(y, x, average='macro')
    execution_time = time.time() - start_time

    bert_ate_semeval_stats.loc[i] = [accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro, execution_time]

    del train_df
    del test_df
    del train_ds
    del test_ds
    del train_dataloader
    del test_dataloader
    del model_ATE_run
    del optimizer_ATE
    del x
    del y

Run 1/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.0430035591125488
Epoch: 0, Step: 50, Loss:  0.36760538816452026
Epoch: 0, Step: 100, Loss:  0.2698538899421692
Epoch: 0, Step: 150, Loss:  0.0768975168466568
Epoch: 0, Step: 200, Loss:  0.05865037813782692
Epoch: 0, Step: 250, Loss:  0.09902628511190414
Epoch: 0, Step: 300, Loss:  0.1596120297908783
Epoch: 0, Step: 350, Loss:  0.1139257550239563
Epoch: 0, Step: 400, Loss:  0.1024770736694336
Epoch: 0, Step: 450, Loss:  0.10531380772590637
Epoch: 0, Step: 500, Loss:  0.08003353327512741
Epoch: 1, Step: 0, Loss:  0.009264320135116577
Epoch: 1, Step: 50, Loss:  0.09024758636951447
Epoch: 1, Step: 100, Loss:  0.10645774751901627
Epoch: 1, Step: 150, Loss:  0.04666516184806824
Epoch: 1, Step: 200, Loss:  0.02524683251976967
Epoch: 1, Step: 250, Loss:  0.05559393763542175
Epoch: 1, Step: 300, Loss:  0.20780722796916962
Epoch: 1, Step: 350, Loss:  0.11651113629341125
Epoch: 1, Step: 400, Loss:  0.04732119292020798
Epoch: 1, Step: 450, Loss:  0.045676928013563156
Ep

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.2537903785705566
Epoch: 0, Step: 50, Loss:  0.44710657000541687
Epoch: 0, Step: 100, Loss:  0.4849851727485657
Epoch: 0, Step: 150, Loss:  0.2641758620738983
Epoch: 0, Step: 200, Loss:  0.10806534439325333
Epoch: 0, Step: 250, Loss:  0.12158673256635666
Epoch: 0, Step: 300, Loss:  0.18524472415447235
Epoch: 0, Step: 350, Loss:  0.10666368901729584
Epoch: 0, Step: 400, Loss:  0.05690456181764603
Epoch: 0, Step: 450, Loss:  0.08016927540302277
Epoch: 0, Step: 500, Loss:  0.0972270742058754
Epoch: 1, Step: 0, Loss:  0.09911803901195526
Epoch: 1, Step: 50, Loss:  0.12451036274433136
Epoch: 1, Step: 100, Loss:  0.06200190633535385
Epoch: 1, Step: 150, Loss:  0.11665500700473785
Epoch: 1, Step: 200, Loss:  0.04767729714512825
Epoch: 1, Step: 250, Loss:  0.102117158472538
Epoch: 1, Step: 300, Loss:  0.08810574561357498
Epoch: 1, Step: 350, Loss:  0.018795818090438843
Epoch: 1, Step: 400, Loss:  0.07390595227479935
Epoch: 1, Step: 450, Loss:  0.030220715329051018
Ep

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.3014229536056519
Epoch: 0, Step: 50, Loss:  0.40194207429885864
Epoch: 0, Step: 100, Loss:  0.18030965328216553
Epoch: 0, Step: 150, Loss:  0.11769424378871918
Epoch: 0, Step: 200, Loss:  0.1678420752286911
Epoch: 0, Step: 250, Loss:  0.07730333507061005
Epoch: 0, Step: 300, Loss:  0.07073739171028137
Epoch: 0, Step: 350, Loss:  0.23605643212795258
Epoch: 0, Step: 400, Loss:  0.07493675500154495
Epoch: 0, Step: 450, Loss:  0.22320862114429474
Epoch: 0, Step: 500, Loss:  0.06647665053606033
Epoch: 1, Step: 0, Loss:  0.12300381064414978
Epoch: 1, Step: 50, Loss:  0.032233450561761856
Epoch: 1, Step: 100, Loss:  0.1801566332578659
Epoch: 1, Step: 150, Loss:  0.02168879471719265
Epoch: 1, Step: 200, Loss:  0.12324973195791245
Epoch: 1, Step: 250, Loss:  0.037198591977357864
Epoch: 1, Step: 300, Loss:  0.036494478583335876
Epoch: 1, Step: 350, Loss:  0.029521457850933075
Epoch: 1, Step: 400, Loss:  0.09758556634187698
Epoch: 1, Step: 450, Loss:  0.043071109801530

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.0898280143737793
Epoch: 0, Step: 50, Loss:  0.2447110414505005
Epoch: 0, Step: 100, Loss:  0.4175432324409485
Epoch: 0, Step: 150, Loss:  0.06393174082040787
Epoch: 0, Step: 200, Loss:  0.11685635894536972
Epoch: 0, Step: 250, Loss:  0.20008225739002228
Epoch: 0, Step: 300, Loss:  0.15935471653938293
Epoch: 0, Step: 350, Loss:  0.1250811666250229
Epoch: 0, Step: 400, Loss:  0.18204212188720703
Epoch: 0, Step: 450, Loss:  0.16214998066425323
Epoch: 0, Step: 500, Loss:  0.1858232319355011
Epoch: 1, Step: 0, Loss:  0.08303128927946091
Epoch: 1, Step: 50, Loss:  0.04150811582803726
Epoch: 1, Step: 100, Loss:  0.06011053919792175
Epoch: 1, Step: 150, Loss:  0.08970509469509125
Epoch: 1, Step: 200, Loss:  0.08348125219345093
Epoch: 1, Step: 250, Loss:  0.06847938150167465
Epoch: 1, Step: 300, Loss:  0.008204569108784199
Epoch: 1, Step: 350, Loss:  0.1344681829214096
Epoch: 1, Step: 400, Loss:  0.09505525231361389
Epoch: 1, Step: 450, Loss:  0.01969189941883087
Epo

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.062132716178894
Epoch: 0, Step: 50, Loss:  0.5515051484107971
Epoch: 0, Step: 100, Loss:  0.4542858898639679
Epoch: 0, Step: 150, Loss:  0.11120535433292389
Epoch: 0, Step: 200, Loss:  0.2412242889404297
Epoch: 0, Step: 250, Loss:  0.0855952799320221
Epoch: 0, Step: 300, Loss:  0.09694837033748627
Epoch: 0, Step: 350, Loss:  0.15682408213615417
Epoch: 0, Step: 400, Loss:  0.13360382616519928
Epoch: 0, Step: 450, Loss:  0.03171464055776596
Epoch: 0, Step: 500, Loss:  0.06826537102460861
Epoch: 1, Step: 0, Loss:  0.05365700647234917
Epoch: 1, Step: 50, Loss:  0.0822128877043724
Epoch: 1, Step: 100, Loss:  0.038975562900304794
Epoch: 1, Step: 150, Loss:  0.05985653027892113
Epoch: 1, Step: 200, Loss:  0.007912320084869862
Epoch: 1, Step: 250, Loss:  0.061512332409620285
Epoch: 1, Step: 300, Loss:  0.027350952848792076
Epoch: 1, Step: 350, Loss:  0.052660319954156876
Epoch: 1, Step: 400, Loss:  0.04332687705755234
Epoch: 1, Step: 450, Loss:  0.06461235880851746


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.0856444835662842
Epoch: 0, Step: 50, Loss:  0.20581768453121185
Epoch: 0, Step: 100, Loss:  0.1957991123199463
Epoch: 0, Step: 150, Loss:  0.12569022178649902
Epoch: 0, Step: 200, Loss:  0.20157530903816223
Epoch: 0, Step: 250, Loss:  0.13149528205394745
Epoch: 0, Step: 300, Loss:  0.13972389698028564
Epoch: 0, Step: 350, Loss:  0.3456745743751526
Epoch: 0, Step: 400, Loss:  0.10223718732595444
Epoch: 0, Step: 450, Loss:  0.06560993194580078
Epoch: 0, Step: 500, Loss:  0.03135645017027855
Epoch: 1, Step: 0, Loss:  0.11809445917606354
Epoch: 1, Step: 50, Loss:  0.030834287405014038
Epoch: 1, Step: 100, Loss:  0.09849459677934647
Epoch: 1, Step: 150, Loss:  0.08539047837257385
Epoch: 1, Step: 200, Loss:  0.03766023740172386
Epoch: 1, Step: 250, Loss:  0.019637487828731537
Epoch: 1, Step: 300, Loss:  0.02738294005393982
Epoch: 1, Step: 350, Loss:  0.1278335154056549
Epoch: 1, Step: 400, Loss:  0.04479116201400757
Epoch: 1, Step: 450, Loss:  0.036197926849126816

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.3412960767745972
Epoch: 0, Step: 50, Loss:  0.2535034418106079
Epoch: 0, Step: 100, Loss:  0.4011283218860626
Epoch: 0, Step: 150, Loss:  0.16236890852451324
Epoch: 0, Step: 200, Loss:  0.1214841902256012
Epoch: 0, Step: 250, Loss:  0.06271844357252121
Epoch: 0, Step: 300, Loss:  0.05705879628658295
Epoch: 0, Step: 350, Loss:  0.10836785286664963
Epoch: 0, Step: 400, Loss:  0.05862100049853325
Epoch: 0, Step: 450, Loss:  0.12642738223075867
Epoch: 0, Step: 500, Loss:  0.13753166794776917
Epoch: 1, Step: 0, Loss:  0.12718148529529572
Epoch: 1, Step: 50, Loss:  0.16934145987033844
Epoch: 1, Step: 100, Loss:  0.03909799084067345
Epoch: 1, Step: 150, Loss:  0.1451781988143921
Epoch: 1, Step: 200, Loss:  0.09871223568916321
Epoch: 1, Step: 250, Loss:  0.03597717359662056
Epoch: 1, Step: 300, Loss:  0.06970240920782089
Epoch: 1, Step: 350, Loss:  0.0950542613863945
Epoch: 1, Step: 400, Loss:  0.09138715267181396
Epoch: 1, Step: 450, Loss:  0.06733590364456177
Epoc

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.1806660890579224
Epoch: 0, Step: 50, Loss:  0.2237490862607956
Epoch: 0, Step: 100, Loss:  0.2773565649986267
Epoch: 0, Step: 150, Loss:  0.09165164083242416
Epoch: 0, Step: 200, Loss:  0.07346244156360626
Epoch: 0, Step: 250, Loss:  0.0747537687420845
Epoch: 0, Step: 300, Loss:  0.07161753624677658
Epoch: 0, Step: 350, Loss:  0.04677466303110123
Epoch: 0, Step: 400, Loss:  0.16600003838539124
Epoch: 0, Step: 450, Loss:  0.16486799716949463
Epoch: 0, Step: 500, Loss:  0.05424050614237785
Epoch: 1, Step: 0, Loss:  0.0966126024723053
Epoch: 1, Step: 50, Loss:  0.03344816341996193
Epoch: 1, Step: 100, Loss:  0.3145105838775635
Epoch: 1, Step: 150, Loss:  0.06441684067249298
Epoch: 1, Step: 200, Loss:  0.03905321657657623
Epoch: 1, Step: 250, Loss:  0.020183682441711426
Epoch: 1, Step: 300, Loss:  0.0366407074034214
Epoch: 1, Step: 350, Loss:  0.05747498944401741
Epoch: 1, Step: 400, Loss:  0.03100842423737049
Epoch: 1, Step: 450, Loss:  0.01602890156209469
Epoc

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  0.9639701247215271
Epoch: 0, Step: 50, Loss:  0.2821648418903351
Epoch: 0, Step: 100, Loss:  0.1693946272134781
Epoch: 0, Step: 150, Loss:  0.05498886853456497
Epoch: 0, Step: 200, Loss:  0.16234391927719116
Epoch: 0, Step: 250, Loss:  0.09449974447488785
Epoch: 0, Step: 300, Loss:  0.18932634592056274
Epoch: 0, Step: 350, Loss:  0.1320429891347885
Epoch: 0, Step: 400, Loss:  0.12235639989376068
Epoch: 0, Step: 450, Loss:  0.07171657681465149
Epoch: 0, Step: 500, Loss:  0.03225191310048103
Epoch: 1, Step: 0, Loss:  0.11923080682754517
Epoch: 1, Step: 50, Loss:  0.04148229584097862
Epoch: 1, Step: 100, Loss:  0.06723132729530334
Epoch: 1, Step: 150, Loss:  0.03943600133061409
Epoch: 1, Step: 200, Loss:  0.02137628011405468
Epoch: 1, Step: 250, Loss:  0.09542258083820343
Epoch: 1, Step: 300, Loss:  0.1124953031539917
Epoch: 1, Step: 350, Loss:  0.026850419119000435
Epoch: 1, Step: 400, Loss:  0.05960392206907272
Epoch: 1, Step: 450, Loss:  0.03837199509143829
Ep

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transf

Epoch: 0, Step: 0, Loss:  1.087335467338562
Epoch: 0, Step: 50, Loss:  0.1756507158279419
Epoch: 0, Step: 100, Loss:  0.0999547615647316
Epoch: 0, Step: 150, Loss:  0.12446015328168869
Epoch: 0, Step: 200, Loss:  0.1867896318435669
Epoch: 0, Step: 250, Loss:  0.10504032671451569
Epoch: 0, Step: 300, Loss:  0.37062010169029236
Epoch: 0, Step: 350, Loss:  0.1552312970161438
Epoch: 0, Step: 400, Loss:  0.11326158791780472
Epoch: 0, Step: 450, Loss:  0.23856714367866516
Epoch: 0, Step: 500, Loss:  0.10938013345003128
Epoch: 1, Step: 0, Loss:  0.021813208237290382
Epoch: 1, Step: 50, Loss:  0.09317556023597717
Epoch: 1, Step: 100, Loss:  0.03270767256617546
Epoch: 1, Step: 150, Loss:  0.025941254571080208
Epoch: 1, Step: 200, Loss:  0.10452130436897278
Epoch: 1, Step: 250, Loss:  0.07946568727493286
Epoch: 1, Step: 300, Loss:  0.05709370970726013
Epoch: 1, Step: 350, Loss:  0.042549505829811096
Epoch: 1, Step: 400, Loss:  0.0824536457657814
Epoch: 1, Step: 450, Loss:  0.05225629359483719
Ep

In [25]:
bert_ate_semeval_stats

Unnamed: 0,accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro,execution_time
0,0.984185,0.984185,0.948481,0.984185,0.896852,0.984185,0.921263,90.38128
1,0.983483,0.983483,0.942692,0.983483,0.904133,0.983483,0.92251,91.455228
2,0.982673,0.982673,0.94161,0.982673,0.90114,0.982673,0.920507,91.439573
3,0.984795,0.984795,0.951683,0.984795,0.90921,0.984795,0.929607,90.933346
4,0.983838,0.983838,0.952645,0.983838,0.900382,0.983838,0.924893,91.098615
5,0.982738,0.982738,0.945367,0.982738,0.886782,0.982738,0.914181,89.598135
6,0.985574,0.985574,0.953635,0.985574,0.912738,0.985574,0.932236,86.936766
7,0.984163,0.984163,0.947839,0.984163,0.909608,0.984163,0.927822,85.269147
8,0.984485,0.984485,0.947195,0.984485,0.902074,0.984485,0.923478,84.893893
9,0.983207,0.983207,0.954255,0.983207,0.897044,0.983207,0.923493,86.076742


In [27]:
bert_ate_semeval_stats.to_csv('../results/ate/stats/bert_ate_semeval_stats.csv')