# Import dependencies

In [None]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd())) 

In [1]:
import time
import gc

import numpy as np
import pandas as pd

from transformers import BertTokenizer, BertModel

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

from semeval_reader import SemevalReader

from InputDataset import InputDataset

import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

from torch import cuda

from models.BERT_Dropout_CNN_BiLSTM_Linear import BERT_Dropout_CNN_BiLSTM_Linear

In [2]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
print(torch.cuda.get_device_name(0))
print(f"Memory: {torch.cuda.get_device_properties(0).total_memory // 1024 ** 3} GB")

NVIDIA GeForce RTX 2060 SUPER
Memory: 8 GB


In [4]:
def clear_memory():
    torch.cuda.empty_cache()

    with torch.no_grad():
        torch.cuda.empty_cache()

    gc.collect()

# Load Data

In [5]:
def get_target_list_for_polarity(polarity):
    if polarity == 'positive':
        return [0, 0, 1]
    if polarity == 'negative':
        return [1, 0, 0]
    return [0, 1, 0]

In [6]:
semeval_reader = SemevalReader('../../../data/semeval16_restaurants_train.xml')

reviews = semeval_reader.read_reviews()
absolute_polarity_sentences = semeval_reader.get_absolute_polarity_sentences()

df = pd.DataFrame(map(lambda x: (x.text, x.opinions[0].polarity), absolute_polarity_sentences))
df.rename(columns={0: 'text'}, inplace=True)
df['target_list'] = df.apply(lambda row: get_target_list_for_polarity(row[1]), axis=1)

absolute_polarity_df = df.drop(columns=[1])

# Train & Validate

In [7]:
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4

EPOCHS = 2

LEARNING_RATE = 1e-5

TRAIN_SPLIT = 0.8

NO_RUNS = 10

In [8]:
MODEL_OUTPUT = '../../../results/SA/SemEval16 - Task 5 - Restaurants/models/bert_pre_trained_dropout_cnn_bilstm_linear.pth'
STATS_OUTPUT = '../../../results/SA/SemEval16 - Task 5 - Restaurants/stats/bert_pre_trained_dropout_cnn_bilstm_linear.csv'

In [9]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [10]:
best_accuracy = 0.0

In [11]:
def train(epoch, model, loss_fn, optimizer, dataloader):
    model.train()

    dataloader_len = len(dataloader)

    for _,data in enumerate(dataloader, 0):
        optimizer.zero_grad()

        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        
        if _ % (dataloader_len // 10) == 0:
            print(f"Epoch: {epoch}/{EPOCHS}, Batch: {_}/{dataloader_len}, Loss: {loss.item()}")
        
        loss.backward()
        
        optimizer.step()

In [12]:
def validation(model, dataloader):
    model.eval()
    
    fin_targets=[]
    fin_outputs=[]

    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            outputs = model(ids, mask, token_type_ids)
            
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

    return fin_outputs, fin_targets

In [13]:
results = pd.DataFrame(columns=['accuracy','precision_score_micro','precision_score_macro','recall_score_micro','recall_score_macro','f1_score_micro','f1_score_macro', 'execution_time'])

In [14]:
for i in range(10):
    # clear cache cuda
    torch.cuda.empty_cache()
    with torch.no_grad():
        torch.cuda.empty_cache()
    gc.collect()

    start_time = time.time()

    print(f"Run {i + 1}/10")

    train_dataset = df.sample(frac=TRAIN_SPLIT)
    test_dataset = df.drop(train_dataset.index).reset_index(drop=True)
    train_dataset = train_dataset.reset_index(drop=True)

    training_set = InputDataset(train_dataset, tokenizer)
    testing_set = InputDataset(test_dataset, tokenizer)

    train_dataloader = DataLoader(
        training_set,
        sampler = RandomSampler(train_dataset),
        batch_size = TRAIN_BATCH_SIZE,
        drop_last = True
    )

    validation_dataloader = DataLoader(
        testing_set,
        sampler = SequentialSampler(testing_set),
        batch_size = VALID_BATCH_SIZE,
        drop_last = True
    )

    model = BERT_Dropout_CNN_BiLSTM_Linear(bert=BertModel.from_pretrained('bert-base-uncased'), bert_seq_len=512, dropout=0.3, bilstm_in_features=256, no_out_labels=3, conv_out_channels=54, conv_kernel_size=3, device=device).to(device)

    optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
    loss_fn = torch.nn.BCEWithLogitsLoss()

    for epoch in range(EPOCHS):
        train(epoch, model, loss_fn, optimizer, train_dataloader)

    outputs, targets = validation(model, validation_dataloader)
    outputs = np.argmax(outputs, axis=1)
    targets = np.argmax(targets, axis=1)
    
    accuracy = accuracy_score(targets, outputs)
    precision_score_micro = precision_score(targets, outputs, average='micro')
    precision_score_macro = precision_score(targets, outputs, average='macro')
    recall_score_micro = recall_score(targets, outputs, average='micro')
    recall_score_macro = recall_score(targets, outputs, average='macro')
    f1_score_micro = f1_score(targets, outputs, average='micro')
    f1_score_macro = f1_score(targets, outputs, average='macro')

    execution_time = time.time() - start_time

    results.loc[i] = [accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro, execution_time]

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.bert, MODEL_OUTPUT)

    del train_dataset
    del test_dataset
    del training_set
    del testing_set
    del model
    del loss_fn
    del optimizer
    del outputs
    del targets

Run 1/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6849984526634216
Epoch: 0/2, Batch: 31/313, Loss: 0.551174521446228
Epoch: 0/2, Batch: 62/313, Loss: 0.5675079822540283
Epoch: 0/2, Batch: 93/313, Loss: 0.5017633438110352
Epoch: 0/2, Batch: 124/313, Loss: 0.6461917757987976
Epoch: 0/2, Batch: 155/313, Loss: 0.13236531615257263
Epoch: 0/2, Batch: 186/313, Loss: 0.2261066734790802
Epoch: 0/2, Batch: 217/313, Loss: 0.3827422559261322
Epoch: 0/2, Batch: 248/313, Loss: 0.09966349601745605
Epoch: 0/2, Batch: 279/313, Loss: 0.4253297746181488
Epoch: 0/2, Batch: 310/313, Loss: 0.5212905406951904
Epoch: 1/2, Batch: 0/313, Loss: 0.8933717608451843
Epoch: 1/2, Batch: 31/313, Loss: 0.36494678258895874
Epoch: 1/2, Batch: 62/313, Loss: 0.07298640161752701
Epoch: 1/2, Batch: 93/313, Loss: 0.3547503352165222
Epoch: 1/2, Batch: 124/313, Loss: 0.027988139539957047
Epoch: 1/2, Batch: 155/313, Loss: 0.04060964658856392
Epoch: 1/2, Batch: 186/313, Loss: 0.10326582193374634
Epoch: 1/2, Batch: 217/313, Loss: 0.0527985319495

  _warn_prf(average, modifier, msg_start, len(result))


Run 2/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6887361407279968
Epoch: 0/2, Batch: 31/313, Loss: 0.6008433699607849
Epoch: 0/2, Batch: 62/313, Loss: 0.5504064559936523
Epoch: 0/2, Batch: 93/313, Loss: 0.39596879482269287
Epoch: 0/2, Batch: 124/313, Loss: 0.5702389478683472
Epoch: 0/2, Batch: 155/313, Loss: 0.31207895278930664
Epoch: 0/2, Batch: 186/313, Loss: 0.1390637755393982
Epoch: 0/2, Batch: 217/313, Loss: 0.38819608092308044
Epoch: 0/2, Batch: 248/313, Loss: 0.3798310458660126
Epoch: 0/2, Batch: 279/313, Loss: 0.5377541184425354
Epoch: 0/2, Batch: 310/313, Loss: 0.4577880799770355
Epoch: 1/2, Batch: 0/313, Loss: 0.11689735949039459
Epoch: 1/2, Batch: 31/313, Loss: 0.44993335008621216
Epoch: 1/2, Batch: 62/313, Loss: 0.07149426639080048
Epoch: 1/2, Batch: 93/313, Loss: 0.037890054285526276
Epoch: 1/2, Batch: 124/313, Loss: 0.034489747136831284
Epoch: 1/2, Batch: 155/313, Loss: 0.07911334931850433
Epoch: 1/2, Batch: 186/313, Loss: 0.38688403367996216
Epoch: 1/2, Batch: 217/313, Loss: 0.02478880

  _warn_prf(average, modifier, msg_start, len(result))


Run 3/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6794683337211609
Epoch: 0/2, Batch: 31/313, Loss: 0.5144062042236328
Epoch: 0/2, Batch: 62/313, Loss: 0.5319206714630127
Epoch: 0/2, Batch: 93/313, Loss: 0.3925364911556244
Epoch: 0/2, Batch: 124/313, Loss: 0.4637276828289032
Epoch: 0/2, Batch: 155/313, Loss: 0.5818487405776978
Epoch: 0/2, Batch: 186/313, Loss: 0.31780827045440674
Epoch: 0/2, Batch: 217/313, Loss: 0.2671831548213959
Epoch: 0/2, Batch: 248/313, Loss: 0.3931359648704529
Epoch: 0/2, Batch: 279/313, Loss: 0.5375869274139404
Epoch: 0/2, Batch: 310/313, Loss: 0.501971960067749
Epoch: 1/2, Batch: 0/313, Loss: 0.038523606956005096
Epoch: 1/2, Batch: 31/313, Loss: 0.0406625010073185
Epoch: 1/2, Batch: 62/313, Loss: 0.022803951054811478
Epoch: 1/2, Batch: 93/313, Loss: 0.07349824160337448
Epoch: 1/2, Batch: 124/313, Loss: 0.5086113810539246
Epoch: 1/2, Batch: 155/313, Loss: 0.07626689970493317
Epoch: 1/2, Batch: 186/313, Loss: 0.09992630779743195
Epoch: 1/2, Batch: 217/313, Loss: 0.0129057457670

  _warn_prf(average, modifier, msg_start, len(result))


Run 4/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6853570938110352
Epoch: 0/2, Batch: 31/313, Loss: 0.5731658935546875
Epoch: 0/2, Batch: 62/313, Loss: 0.764578104019165
Epoch: 0/2, Batch: 93/313, Loss: 0.5000852346420288
Epoch: 0/2, Batch: 124/313, Loss: 0.26460325717926025
Epoch: 0/2, Batch: 155/313, Loss: 0.28683924674987793
Epoch: 0/2, Batch: 186/313, Loss: 0.3892136216163635
Epoch: 0/2, Batch: 217/313, Loss: 0.27479779720306396
Epoch: 0/2, Batch: 248/313, Loss: 0.09317802637815475
Epoch: 0/2, Batch: 279/313, Loss: 0.12324162572622299
Epoch: 0/2, Batch: 310/313, Loss: 0.10074687004089355
Epoch: 1/2, Batch: 0/313, Loss: 0.12114866077899933
Epoch: 1/2, Batch: 31/313, Loss: 0.3328900933265686
Epoch: 1/2, Batch: 62/313, Loss: 0.7843090295791626
Epoch: 1/2, Batch: 93/313, Loss: 0.015208887867629528
Epoch: 1/2, Batch: 124/313, Loss: 0.013263355009257793
Epoch: 1/2, Batch: 155/313, Loss: 0.015111085027456284
Epoch: 1/2, Batch: 186/313, Loss: 0.01012833695858717
Epoch: 1/2, Batch: 217/313, Loss: 0.6005643

  _warn_prf(average, modifier, msg_start, len(result))


Run 5/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6964221000671387
Epoch: 0/2, Batch: 31/313, Loss: 0.7109543085098267
Epoch: 0/2, Batch: 62/313, Loss: 0.6685733199119568
Epoch: 0/2, Batch: 93/313, Loss: 0.2759927213191986
Epoch: 0/2, Batch: 124/313, Loss: 0.46100112795829773
Epoch: 0/2, Batch: 155/313, Loss: 0.22034752368927002
Epoch: 0/2, Batch: 186/313, Loss: 0.14681535959243774
Epoch: 0/2, Batch: 217/313, Loss: 0.1864432543516159
Epoch: 0/2, Batch: 248/313, Loss: 0.09403541684150696
Epoch: 0/2, Batch: 279/313, Loss: 0.24148350954055786
Epoch: 0/2, Batch: 310/313, Loss: 0.20678339898586273
Epoch: 1/2, Batch: 0/313, Loss: 0.15376776456832886
Epoch: 1/2, Batch: 31/313, Loss: 0.08428364247083664
Epoch: 1/2, Batch: 62/313, Loss: 0.07747279107570648
Epoch: 1/2, Batch: 93/313, Loss: 0.4345700740814209
Epoch: 1/2, Batch: 124/313, Loss: 0.4478917121887207
Epoch: 1/2, Batch: 155/313, Loss: 0.07287810742855072
Epoch: 1/2, Batch: 186/313, Loss: 0.04779994487762451
Epoch: 1/2, Batch: 217/313, Loss: 0.112900897

  _warn_prf(average, modifier, msg_start, len(result))


Run 6/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.7044438123703003
Epoch: 0/2, Batch: 31/313, Loss: 0.6262228488922119
Epoch: 0/2, Batch: 62/313, Loss: 0.5435106754302979
Epoch: 0/2, Batch: 93/313, Loss: 0.41057726740837097
Epoch: 0/2, Batch: 124/313, Loss: 0.3754904866218567
Epoch: 0/2, Batch: 155/313, Loss: 0.7301740050315857
Epoch: 0/2, Batch: 186/313, Loss: 0.4045577943325043
Epoch: 0/2, Batch: 217/313, Loss: 0.28121405839920044
Epoch: 0/2, Batch: 248/313, Loss: 0.35082051157951355
Epoch: 0/2, Batch: 279/313, Loss: 0.5005247592926025
Epoch: 0/2, Batch: 310/313, Loss: 0.12198790162801743
Epoch: 1/2, Batch: 0/313, Loss: 0.059068065136671066
Epoch: 1/2, Batch: 31/313, Loss: 0.07050080597400665
Epoch: 1/2, Batch: 62/313, Loss: 0.40532276034355164
Epoch: 1/2, Batch: 93/313, Loss: 0.05079053342342377
Epoch: 1/2, Batch: 124/313, Loss: 0.06243818253278732
Epoch: 1/2, Batch: 155/313, Loss: 0.07432855665683746
Epoch: 1/2, Batch: 186/313, Loss: 0.7296657562255859
Epoch: 1/2, Batch: 217/313, Loss: 0.027968037

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6871427297592163
Epoch: 0/2, Batch: 31/313, Loss: 0.50816810131073
Epoch: 0/2, Batch: 62/313, Loss: 0.4279990792274475
Epoch: 0/2, Batch: 93/313, Loss: 0.3820075988769531
Epoch: 0/2, Batch: 124/313, Loss: 0.5867719054222107
Epoch: 0/2, Batch: 155/313, Loss: 0.5182000398635864
Epoch: 0/2, Batch: 186/313, Loss: 0.10027050971984863
Epoch: 0/2, Batch: 217/313, Loss: 0.5084753632545471
Epoch: 0/2, Batch: 248/313, Loss: 0.16644249856472015
Epoch: 0/2, Batch: 279/313, Loss: 0.06543536484241486
Epoch: 0/2, Batch: 310/313, Loss: 0.2661551237106323
Epoch: 1/2, Batch: 0/313, Loss: 0.12457741796970367
Epoch: 1/2, Batch: 31/313, Loss: 0.41237181425094604
Epoch: 1/2, Batch: 62/313, Loss: 0.07049304246902466
Epoch: 1/2, Batch: 93/313, Loss: 0.0515303760766983
Epoch: 1/2, Batch: 124/313, Loss: 0.06481412053108215
Epoch: 1/2, Batch: 155/313, Loss: 0.12359599769115448
Epoch: 1/2, Batch: 186/313, Loss: 0.04705725237727165
Epoch: 1/2, Batch: 217/313, Loss: 0.0386738218367

  _warn_prf(average, modifier, msg_start, len(result))


Run 8/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6830505728721619
Epoch: 0/2, Batch: 31/313, Loss: 0.5739259123802185
Epoch: 0/2, Batch: 62/313, Loss: 0.6856088042259216
Epoch: 0/2, Batch: 93/313, Loss: 0.24600490927696228
Epoch: 0/2, Batch: 124/313, Loss: 0.43721431493759155
Epoch: 0/2, Batch: 155/313, Loss: 0.5217752456665039
Epoch: 0/2, Batch: 186/313, Loss: 0.5001397728919983
Epoch: 0/2, Batch: 217/313, Loss: 0.10887718200683594
Epoch: 0/2, Batch: 248/313, Loss: 0.3200184106826782
Epoch: 0/2, Batch: 279/313, Loss: 0.3526468873023987
Epoch: 0/2, Batch: 310/313, Loss: 0.5949265956878662
Epoch: 1/2, Batch: 0/313, Loss: 0.14439657330513
Epoch: 1/2, Batch: 31/313, Loss: 0.05566447973251343
Epoch: 1/2, Batch: 62/313, Loss: 0.052916690707206726
Epoch: 1/2, Batch: 93/313, Loss: 0.09872026741504669
Epoch: 1/2, Batch: 124/313, Loss: 0.035538699477910995
Epoch: 1/2, Batch: 155/313, Loss: 0.44568049907684326
Epoch: 1/2, Batch: 186/313, Loss: 0.07895755022764206
Epoch: 1/2, Batch: 217/313, Loss: 0.02913528308

  _warn_prf(average, modifier, msg_start, len(result))


Run 9/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6904184222221375
Epoch: 0/2, Batch: 31/313, Loss: 0.5874440670013428
Epoch: 0/2, Batch: 62/313, Loss: 0.4215044379234314
Epoch: 0/2, Batch: 93/313, Loss: 0.3365277647972107
Epoch: 0/2, Batch: 124/313, Loss: 0.3176775276660919
Epoch: 0/2, Batch: 155/313, Loss: 0.6553312540054321
Epoch: 0/2, Batch: 186/313, Loss: 0.1882181167602539
Epoch: 0/2, Batch: 217/313, Loss: 0.3597099184989929
Epoch: 0/2, Batch: 248/313, Loss: 0.19706891477108002
Epoch: 0/2, Batch: 279/313, Loss: 0.12855221331119537
Epoch: 0/2, Batch: 310/313, Loss: 0.13188572227954865
Epoch: 1/2, Batch: 0/313, Loss: 0.07984905689954758
Epoch: 1/2, Batch: 31/313, Loss: 0.29229772090911865
Epoch: 1/2, Batch: 62/313, Loss: 0.11548858880996704
Epoch: 1/2, Batch: 93/313, Loss: 0.4149824380874634
Epoch: 1/2, Batch: 124/313, Loss: 0.20395159721374512
Epoch: 1/2, Batch: 155/313, Loss: 0.033904194831848145
Epoch: 1/2, Batch: 186/313, Loss: 0.0729997381567955
Epoch: 1/2, Batch: 217/313, Loss: 0.04619817063

  _warn_prf(average, modifier, msg_start, len(result))


Run 10/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6866952180862427
Epoch: 0/2, Batch: 31/313, Loss: 0.5388302206993103
Epoch: 0/2, Batch: 62/313, Loss: 0.4090738296508789
Epoch: 0/2, Batch: 93/313, Loss: 0.3749626874923706
Epoch: 0/2, Batch: 124/313, Loss: 0.38168400526046753
Epoch: 0/2, Batch: 155/313, Loss: 0.1961364597082138
Epoch: 0/2, Batch: 186/313, Loss: 0.28114205598831177
Epoch: 0/2, Batch: 217/313, Loss: 0.34564799070358276
Epoch: 0/2, Batch: 248/313, Loss: 0.3629702925682068
Epoch: 0/2, Batch: 279/313, Loss: 0.31423425674438477
Epoch: 0/2, Batch: 310/313, Loss: 0.5280775427818298
Epoch: 1/2, Batch: 0/313, Loss: 0.09572979807853699
Epoch: 1/2, Batch: 31/313, Loss: 0.03257439285516739
Epoch: 1/2, Batch: 62/313, Loss: 0.10746961832046509
Epoch: 1/2, Batch: 93/313, Loss: 0.4115232825279236
Epoch: 1/2, Batch: 124/313, Loss: 0.060619812458753586
Epoch: 1/2, Batch: 155/313, Loss: 0.0416710339486599
Epoch: 1/2, Batch: 186/313, Loss: 0.2757192552089691
Epoch: 1/2, Batch: 217/313, Loss: 0.02178116142

  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
results

Unnamed: 0,accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro,execution_time
0,0.894231,0.894231,0.579088,0.894231,0.619409,0.894231,0.596886,201.17018
1,0.907051,0.907051,0.593324,0.907051,0.633655,0.907051,0.612474,198.677062
2,0.923077,0.923077,0.61364,0.923077,0.62998,0.923077,0.621599,198.567633
3,0.903846,0.903846,0.587527,0.903846,0.615704,0.903846,0.601267,198.56325
4,0.916667,0.916667,0.605887,0.916667,0.610451,0.916667,0.608052,199.355402
5,0.891026,0.891026,0.912312,0.891026,0.642195,0.891026,0.657977,211.377116
6,0.907051,0.907051,0.609488,0.907051,0.615217,0.907051,0.611223,208.473276
7,0.875,0.875,0.571759,0.875,0.589461,0.875,0.580357,207.678398
8,0.903846,0.903846,0.576813,0.903846,0.616791,0.903846,0.595046,208.089441
9,0.907051,0.907051,0.591811,0.907051,0.616598,0.907051,0.603947,210.883662


In [16]:
results.to_csv(STATS_OUTPUT)