# Import dependencies

In [1]:
import os
import xml.etree.ElementTree as ET
import json
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from transformers import BertModel
import pandas as pd
import numpy as np
from torch.utils.data import RandomSampler, SequentialSampler
from transformers import BertTokenizer
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import gc
import time

In [2]:
from models.BERT_ATE import BERT_ATE

# Load data

In [3]:
DATASET = '../data/mams_atsa.json'

In [4]:
df = pd.json_normalize(json.load(open(DATASET)))

In [5]:
df.head()

Unnamed: 0,text,opinions,tokens,iob_aspect_tags
0,The decor is not special at all but their food...,"[{'target': 'decor', 'polarity': 'negative', '...","[The, decor, is, not, special, at, all, but, t...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, ..."
1,The decor is not special at all but their food...,"[{'target': 'decor', 'polarity': 'negative', '...","[The, decor, is, not, special, at, all, but, t...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, ..."
2,The decor is not special at all but their food...,"[{'target': 'decor', 'polarity': 'negative', '...","[The, decor, is, not, special, at, all, but, t...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, ..."
3,"when tables opened up, the manager sat another...","[{'target': 'tables', 'polarity': 'neutral', '...","[when, tables, opened, up, ,, the, manager, sa...","[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
4,"when tables opened up, the manager sat another...","[{'target': 'tables', 'polarity': 'neutral', '...","[when, tables, opened, up, ,, the, manager, sa...","[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"


# Train model

In [6]:
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4
EPOCHS = 2
LEARNING_RATE = 1e-05

TRAIN_SPLIT = 0.8

In [7]:
PRETRAINED_BERT_MODEL_VARIANT = 'bert-base-cased'

In [8]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [9]:
train_df = df.sample(frac = TRAIN_SPLIT)
test_df = df.drop(train_df.index).reset_index(drop=True)
train_df = train_df.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

FULL Dataset: (11186, 4)
TRAIN Dataset: (8949, 4)
TEST Dataset: (2237, 4)


In [10]:
class dataset_ATM(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        tokens = self.df['tokens'].iloc[idx]
        tags = self.df['iob_aspect_tags'].iloc[idx]

        bert_tokens = []
        bert_tags = []

        for i in range(len(tokens)):
            t = self.tokenizer.tokenize(tokens[i])
            bert_tokens += t
            bert_tags += [int(tags[i])] * len(t)
        
        bert_ids = self.tokenizer.convert_tokens_to_ids(bert_tokens)

        ids_tensor = torch.tensor(bert_ids)
        tags_tensor = torch.tensor(bert_tags)

        return bert_tokens, ids_tensor, tags_tensor
    
    def __len__(self):
        return len(self.df)

In [11]:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_BERT_MODEL_VARIANT)

In [12]:
train_ds = dataset_ATM(train_df, tokenizer)
test_ds = dataset_ATM(test_df, tokenizer)

In [13]:
def create_mini_batch(samples):
    ids_tensors = [s[1] for s in samples]
    ids_tensors = pad_sequence(ids_tensors, batch_first=True).to(DEVICE)

    tags_tensors = [s[2] for s in samples]
    tags_tensors = pad_sequence(tags_tensors, batch_first=True).to(DEVICE)
    
    masks_tensors = torch.zeros(ids_tensors.shape, dtype=torch.long).to(DEVICE)
    masks_tensors = masks_tensors.masked_fill(ids_tensors != 0, 1).to(DEVICE)
    
    return ids_tensors, tags_tensors, masks_tensors

In [14]:
train_dataloader = DataLoader(
    train_ds,
    sampler = RandomSampler(train_ds),
    batch_size = TRAIN_BATCH_SIZE,
    drop_last = True,
    collate_fn=create_mini_batch
)

test_dataloader = DataLoader(
    test_ds,
    sampler = SequentialSampler(test_ds),
    batch_size = VALID_BATCH_SIZE,
    drop_last = True,
    collate_fn=create_mini_batch
)

In [15]:
model_ATE = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
optimizer_ATE = torch.optim.Adam(model_ATE.parameters(), lr=LEARNING_RATE)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
# torch cuda clear cache
torch.cuda.empty_cache()

with torch.no_grad():
    torch.cuda.empty_cache()

In [17]:
def train(epoch, model, loss_fn, optimizer, dataloader):
    model.train()

    for _, data in enumerate(dataloader, 0):
        ids_tensors, tags_tensors, masks_tensors = data
        ids_tensors = ids_tensors.to(DEVICE)
        tags_tensors = tags_tensors.to(DEVICE)
        masks_tensors = masks_tensors.to(DEVICE)

        outputs = model(ids_tensors, masks_tensors)

        optimizer.zero_grad()

        loss = loss_fn(outputs.view(-1, 3), tags_tensors.view(-1))

        loss.backward()

        optimizer.step()
        
        if _ % 250 == 0:
            print(f'Epoch: {epoch}, Step: {_}, Loss:  {loss.item()}')

In [18]:
for epoch in range(EPOCHS):
    train(epoch, model_ATE, torch.nn.CrossEntropyLoss(), optimizer_ATE, train_dataloader)

Epoch: 0, Step: 0, Loss:  1.0352542400360107
Epoch: 0, Step: 250, Loss:  0.09849945455789566
Epoch: 0, Step: 500, Loss:  0.13399481773376465
Epoch: 0, Step: 750, Loss:  0.08224043250083923
Epoch: 0, Step: 1000, Loss:  0.16053709387779236
Epoch: 0, Step: 1250, Loss:  0.0873182937502861
Epoch: 0, Step: 1500, Loss:  0.27753540873527527
Epoch: 0, Step: 1750, Loss:  0.14635694026947021
Epoch: 0, Step: 2000, Loss:  0.13162505626678467
Epoch: 1, Step: 0, Loss:  0.14286555349826813
Epoch: 1, Step: 250, Loss:  0.07746236026287079
Epoch: 1, Step: 500, Loss:  0.1597706377506256
Epoch: 1, Step: 750, Loss:  0.06139006093144417
Epoch: 1, Step: 1000, Loss:  0.05829762667417526
Epoch: 1, Step: 1250, Loss:  0.10498248040676117
Epoch: 1, Step: 1500, Loss:  0.05226742476224899
Epoch: 1, Step: 1750, Loss:  0.06537162512540817
Epoch: 1, Step: 2000, Loss:  0.09267237037420273


In [19]:
def validate(model_ATE, dataloader):
    x = []
    y = []

    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids_tensors, tags_tensors, masks_tensors = data
            ids_tensors = ids_tensors.to(DEVICE)
            tags_tensors = tags_tensors.to(DEVICE)
            masks_tensors = masks_tensors.to(DEVICE)

            outputs = model_ATE(ids_tensors, masks_tensors)

            _, predictions = torch.max(outputs, dim=2)

            x += list([int(j) for i in predictions for j in i ])
            y += list([int(j) for i in tags_tensors for j in i ])

    return x, y

In [20]:
x, y = validate(model_ATE, test_dataloader)

In [21]:
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98     86543
           1       0.91      0.81      0.86      8751
           2       0.88      0.84      0.86      2862

    accuracy                           0.97     98156
   macro avg       0.92      0.88      0.90     98156
weighted avg       0.97      0.97      0.97     98156



In [22]:
torch.save(model_ATE, '../results/ate/models/model_ATE_mams.pth')

# Run 10 times

In [23]:
bert_ate_mams_stats = pd.DataFrame(columns=['accuracy','precision_score_micro','precision_score_macro','recall_score_micro','recall_score_macro','f1_score_micro','f1_score_macro', 'execution_time'])

In [24]:
for i in range(10):
    # clear cache cuda
    torch.cuda.empty_cache()
    with torch.no_grad():
        torch.cuda.empty_cache()
    gc.collect()

    start_time = time.time()

    print(f"Run {i + 1}/10")

    train_df = df.sample(frac = TRAIN_SPLIT)
    test_df = df.drop(train_df.index).reset_index(drop=True)
    train_df = train_df.reset_index(drop=True)

    train_ds = dataset_ATM(train_df, tokenizer)
    test_ds = dataset_ATM(test_df, tokenizer)

    train_dataloader = DataLoader(
        train_ds,
        sampler = RandomSampler(train_ds),
        batch_size = TRAIN_BATCH_SIZE,
        drop_last = True,
        collate_fn=create_mini_batch
    )

    test_dataloader = DataLoader(
        test_ds,
        sampler = SequentialSampler(test_ds),
        batch_size = VALID_BATCH_SIZE,
        drop_last = True,
        collate_fn=create_mini_batch
    )

    model_ATE_run = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
    optimizer_ATE = torch.optim.Adam(model_ATE.parameters(), lr=LEARNING_RATE)

    model_ATE_run = BERT_ATE(PRETRAINED_BERT_MODEL_VARIANT).to(DEVICE)
    optimizer_ATE_run = torch.optim.Adam(model_ATE_run.parameters(), lr=LEARNING_RATE)

    for epoch in range(EPOCHS):
        train(epoch, model_ATE_run, torch.nn.CrossEntropyLoss(), optimizer_ATE_run, train_dataloader)

    x, y = validate(model_ATE, test_dataloader)

    accuracy = accuracy_score(y, x)
    precision_score_micro = precision_score(y, x, average='micro')
    precision_score_macro = precision_score(y, x, average='macro')
    recall_score_micro = recall_score(y, x, average='micro')
    recall_score_macro = recall_score(y, x, average='macro')
    f1_score_micro = f1_score(y, x, average='micro')
    f1_score_macro = f1_score(y, x, average='macro')
    execution_time = time.time() - start_time

    bert_ate_mams_stats.loc[i] = [accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro, execution_time]

    del train_df
    del test_df
    del train_ds
    del test_ds
    del train_dataloader
    del test_dataloader
    del model_ATE_run
    del optimizer_ATE
    del x
    del y

Run 1/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.04682195186615
Epoch: 0, Step: 250, Loss:  0.19200390577316284
Epoch: 0, Step: 500, Loss:  0.07802700996398926
Epoch: 0, Step: 750, Loss:  0.17389942705631256
Epoch: 0, Step: 1000, Loss:  0.24707427620887756
Epoch: 0, Step: 1250, Loss:  0.03138164430856705
Epoch: 0, Step: 1500, Loss:  0.08674127608537674
Epoch: 0, Step: 1750, Loss:  0.08171983808279037
Epoch: 0, Step: 2000, Loss:  0.09621835500001907
Epoch: 1, Step: 0, Loss:  0.15668734908103943
Epoch: 1, Step: 250, Loss:  0.03916433826088905
Epoch: 1, Step: 500, Loss:  0.11243865638971329
Epoch: 1, Step: 750, Loss:  0.05369211360812187
Epoch: 1, Step: 1000, Loss:  0.14048659801483154
Epoch: 1, Step: 1250, Loss:  0.037970416247844696
Epoch: 1, Step: 1500, Loss:  0.09522941708564758
Epoch: 1, Step: 1750, Loss:  0.04681490734219551
Epoch: 1, Step: 2000, Loss:  0.0465279296040535
Run 2/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.042677879333496
Epoch: 0, Step: 250, Loss:  0.2508992850780487
Epoch: 0, Step: 500, Loss:  0.3565571904182434
Epoch: 0, Step: 750, Loss:  0.1943647861480713
Epoch: 0, Step: 1000, Loss:  0.14645105600357056
Epoch: 0, Step: 1250, Loss:  0.10630898922681808
Epoch: 0, Step: 1500, Loss:  0.20444121956825256
Epoch: 0, Step: 1750, Loss:  0.11980200558900833
Epoch: 0, Step: 2000, Loss:  0.08928913623094559
Epoch: 1, Step: 0, Loss:  0.12784157693386078
Epoch: 1, Step: 250, Loss:  0.14071038365364075
Epoch: 1, Step: 500, Loss:  0.12431848794221878
Epoch: 1, Step: 750, Loss:  0.06743177771568298
Epoch: 1, Step: 1000, Loss:  0.09688041359186172
Epoch: 1, Step: 1250, Loss:  0.004303648602217436
Epoch: 1, Step: 1500, Loss:  0.026872806251049042
Epoch: 1, Step: 1750, Loss:  0.03608439117670059
Epoch: 1, Step: 2000, Loss:  0.10498127341270447
Run 3/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.4639230966567993
Epoch: 0, Step: 250, Loss:  0.17172792553901672
Epoch: 0, Step: 500, Loss:  0.3060568571090698
Epoch: 0, Step: 750, Loss:  0.17148350179195404
Epoch: 0, Step: 1000, Loss:  0.10625799745321274
Epoch: 0, Step: 1250, Loss:  0.143313929438591
Epoch: 0, Step: 1500, Loss:  0.11178508400917053
Epoch: 0, Step: 1750, Loss:  0.07423533499240875
Epoch: 0, Step: 2000, Loss:  0.08512452244758606
Epoch: 1, Step: 0, Loss:  0.11205387860536575
Epoch: 1, Step: 250, Loss:  0.11749545484781265
Epoch: 1, Step: 500, Loss:  0.05825150012969971
Epoch: 1, Step: 750, Loss:  0.12245792150497437
Epoch: 1, Step: 1000, Loss:  0.10984472930431366
Epoch: 1, Step: 1250, Loss:  0.05265340209007263
Epoch: 1, Step: 1500, Loss:  0.05907595530152321
Epoch: 1, Step: 1750, Loss:  0.019220130518078804
Epoch: 1, Step: 2000, Loss:  0.24343609809875488
Run 4/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.255709171295166
Epoch: 0, Step: 250, Loss:  0.1497087925672531
Epoch: 0, Step: 500, Loss:  0.20537877082824707
Epoch: 0, Step: 750, Loss:  0.14908242225646973
Epoch: 0, Step: 1000, Loss:  0.13232392072677612
Epoch: 0, Step: 1250, Loss:  0.05879543721675873
Epoch: 0, Step: 1500, Loss:  0.162367582321167
Epoch: 0, Step: 1750, Loss:  0.13506710529327393
Epoch: 0, Step: 2000, Loss:  0.11573159694671631
Epoch: 1, Step: 0, Loss:  0.10732629150152206
Epoch: 1, Step: 250, Loss:  0.07997918128967285
Epoch: 1, Step: 500, Loss:  0.20562845468521118
Epoch: 1, Step: 750, Loss:  0.04574575647711754
Epoch: 1, Step: 1000, Loss:  0.07322009652853012
Epoch: 1, Step: 1250, Loss:  0.026874329894781113
Epoch: 1, Step: 1500, Loss:  0.02503211796283722
Epoch: 1, Step: 1750, Loss:  0.19568537175655365
Epoch: 1, Step: 2000, Loss:  0.02873862534761429
Run 5/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.075918197631836
Epoch: 0, Step: 250, Loss:  0.09127195924520493
Epoch: 0, Step: 500, Loss:  0.2927808463573456
Epoch: 0, Step: 750, Loss:  0.058256831020116806
Epoch: 0, Step: 1000, Loss:  0.08308476954698563
Epoch: 0, Step: 1250, Loss:  0.1396861970424652
Epoch: 0, Step: 1500, Loss:  0.19522491097450256
Epoch: 0, Step: 1750, Loss:  0.24821855127811432
Epoch: 0, Step: 2000, Loss:  0.13012632727622986
Epoch: 1, Step: 0, Loss:  0.09614209085702896
Epoch: 1, Step: 250, Loss:  0.11614061892032623
Epoch: 1, Step: 500, Loss:  0.054327499121427536
Epoch: 1, Step: 750, Loss:  0.0684332326054573
Epoch: 1, Step: 1000, Loss:  0.13350075483322144
Epoch: 1, Step: 1250, Loss:  0.027979597449302673
Epoch: 1, Step: 1500, Loss:  0.13507843017578125
Epoch: 1, Step: 1750, Loss:  0.03768206760287285
Epoch: 1, Step: 2000, Loss:  0.042921144515275955
Run 6/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.2783429622650146
Epoch: 0, Step: 250, Loss:  0.29739198088645935
Epoch: 0, Step: 500, Loss:  0.22795255482196808
Epoch: 0, Step: 750, Loss:  0.07380405813455582
Epoch: 0, Step: 1000, Loss:  0.0766105055809021
Epoch: 0, Step: 1250, Loss:  0.11717953532934189
Epoch: 0, Step: 1500, Loss:  0.11925826221704483
Epoch: 0, Step: 1750, Loss:  0.17991042137145996
Epoch: 0, Step: 2000, Loss:  0.0789104625582695
Epoch: 1, Step: 0, Loss:  0.12950313091278076
Epoch: 1, Step: 250, Loss:  0.11741800606250763
Epoch: 1, Step: 500, Loss:  0.06505125761032104
Epoch: 1, Step: 750, Loss:  0.03482092171907425
Epoch: 1, Step: 1000, Loss:  0.14196595549583435
Epoch: 1, Step: 1250, Loss:  0.08183784782886505
Epoch: 1, Step: 1500, Loss:  0.034466080367565155
Epoch: 1, Step: 1750, Loss:  0.10144148021936417
Epoch: 1, Step: 2000, Loss:  0.06534934043884277
Run 7/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.2830792665481567
Epoch: 0, Step: 250, Loss:  0.20837797224521637
Epoch: 0, Step: 500, Loss:  0.11199929565191269
Epoch: 0, Step: 750, Loss:  0.14493127167224884
Epoch: 0, Step: 1000, Loss:  0.11649476736783981
Epoch: 0, Step: 1250, Loss:  0.20733001828193665
Epoch: 0, Step: 1500, Loss:  0.17347349226474762
Epoch: 0, Step: 1750, Loss:  0.19006729125976562
Epoch: 0, Step: 2000, Loss:  0.11420568823814392
Epoch: 1, Step: 0, Loss:  0.09217513352632523
Epoch: 1, Step: 250, Loss:  0.10300259292125702
Epoch: 1, Step: 500, Loss:  0.08441643416881561
Epoch: 1, Step: 750, Loss:  0.027663564309477806
Epoch: 1, Step: 1000, Loss:  0.17444555461406708
Epoch: 1, Step: 1250, Loss:  0.06588095426559448
Epoch: 1, Step: 1500, Loss:  0.039876554161310196
Epoch: 1, Step: 1750, Loss:  0.06674695760011673
Epoch: 1, Step: 2000, Loss:  0.01662570796906948
Run 8/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.0995115041732788
Epoch: 0, Step: 250, Loss:  0.1791851818561554
Epoch: 0, Step: 500, Loss:  0.13415180146694183
Epoch: 0, Step: 750, Loss:  0.13288573920726776
Epoch: 0, Step: 1000, Loss:  0.08960212767124176
Epoch: 0, Step: 1250, Loss:  0.29862144589424133
Epoch: 0, Step: 1500, Loss:  0.10156542807817459
Epoch: 0, Step: 1750, Loss:  0.05764281749725342
Epoch: 0, Step: 2000, Loss:  0.09285144507884979
Epoch: 1, Step: 0, Loss:  0.06406483799219131
Epoch: 1, Step: 250, Loss:  0.1233275756239891
Epoch: 1, Step: 500, Loss:  0.10617289692163467
Epoch: 1, Step: 750, Loss:  0.06566299498081207
Epoch: 1, Step: 1000, Loss:  0.06824783235788345
Epoch: 1, Step: 1250, Loss:  0.12758894264698029
Epoch: 1, Step: 1500, Loss:  0.08792734891176224
Epoch: 1, Step: 1750, Loss:  0.06153273954987526
Epoch: 1, Step: 2000, Loss:  0.04510731250047684
Run 9/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.0557465553283691
Epoch: 0, Step: 250, Loss:  0.27305927872657776
Epoch: 0, Step: 500, Loss:  0.26642027497291565
Epoch: 0, Step: 750, Loss:  0.20242908596992493
Epoch: 0, Step: 1000, Loss:  0.04250514879822731
Epoch: 0, Step: 1250, Loss:  0.2864929139614105
Epoch: 0, Step: 1500, Loss:  0.1269216686487198
Epoch: 0, Step: 1750, Loss:  0.06666672229766846
Epoch: 0, Step: 2000, Loss:  0.09615769982337952
Epoch: 1, Step: 0, Loss:  0.23873209953308105
Epoch: 1, Step: 250, Loss:  0.04499483481049538
Epoch: 1, Step: 500, Loss:  0.1462191343307495
Epoch: 1, Step: 750, Loss:  0.1594005674123764
Epoch: 1, Step: 1000, Loss:  0.10446750372648239
Epoch: 1, Step: 1250, Loss:  0.021205337718129158
Epoch: 1, Step: 1500, Loss:  0.04783220589160919
Epoch: 1, Step: 1750, Loss:  0.03491629287600517
Epoch: 1, Step: 2000, Loss:  0.04675896465778351
Run 10/10


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias

Epoch: 0, Step: 0, Loss:  1.264002799987793
Epoch: 0, Step: 250, Loss:  0.11675743013620377
Epoch: 0, Step: 500, Loss:  0.09582889080047607
Epoch: 0, Step: 750, Loss:  0.18508930504322052
Epoch: 0, Step: 1000, Loss:  0.10759922862052917
Epoch: 0, Step: 1250, Loss:  0.0682109072804451
Epoch: 0, Step: 1500, Loss:  0.1189827248454094
Epoch: 0, Step: 1750, Loss:  0.05768073350191116
Epoch: 0, Step: 2000, Loss:  0.14141815900802612
Epoch: 1, Step: 0, Loss:  0.15549761056900024
Epoch: 1, Step: 250, Loss:  0.13506411015987396
Epoch: 1, Step: 500, Loss:  0.09423185139894485
Epoch: 1, Step: 750, Loss:  0.14729222655296326
Epoch: 1, Step: 1000, Loss:  0.1365109384059906
Epoch: 1, Step: 1250, Loss:  0.025700869038701057
Epoch: 1, Step: 1500, Loss:  0.02778894081711769
Epoch: 1, Step: 1750, Loss:  0.08371781557798386
Epoch: 1, Step: 2000, Loss:  0.029140857979655266


In [25]:
bert_ate_mams_stats

Unnamed: 0,accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro,execution_time
0,0.977617,0.977617,0.913309,0.977617,0.949835,0.977617,0.930646,380.57009
1,0.976691,0.976691,0.908525,0.976691,0.944193,0.976691,0.925478,385.877049
2,0.976331,0.976331,0.906979,0.976331,0.944357,0.976331,0.924718,385.869864
3,0.975579,0.975579,0.904484,0.975579,0.9436,0.975579,0.9231,385.640436
4,0.977371,0.977371,0.907026,0.977371,0.947159,0.977371,0.926139,392.682886
5,0.976435,0.976435,0.910136,0.976435,0.943848,0.976435,0.926211,386.502355
6,0.977456,0.977456,0.910919,0.977456,0.94945,0.977456,0.929207,385.147549
7,0.977087,0.977087,0.912205,0.977087,0.943649,0.977087,0.927186,388.000542
8,0.977539,0.977539,0.910781,0.977539,0.949291,0.977539,0.929086,389.369364
9,0.976292,0.976292,0.90242,0.976292,0.946797,0.976292,0.923407,390.278677


In [26]:
bert_ate_mams_stats.to_csv('../results/ate/stats/bert_ate_mams_stats.csv')