# Import dependencies

In [1]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd())) 

In [2]:
import time
import gc

import numpy as np
import pandas as pd

from transformers import BertTokenizer, BertModel

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

from semeval_reader import SemevalReader

from InputDataset import InputDataset

import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

from torch import cuda

from sa_models.SA_BERT_Dropout_CNN_BiLSTM_Linear import SA_BERT_Dropout_CNN_BiLSTM_Linear

In [3]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
print(torch.cuda.get_device_name(0))
print(f"Memory: {torch.cuda.get_device_properties(0).total_memory // 1024 ** 3} GB")

NVIDIA GeForce RTX 2060 SUPER
Memory: 8 GB


In [5]:
def clear_memory():
    torch.cuda.empty_cache()

    with torch.no_grad():
        torch.cuda.empty_cache()

    gc.collect()

# Load Data

In [6]:
def get_target_list_for_polarity(polarity):
    if polarity == 'positive':
        return [0, 0, 1]
    if polarity == 'negative':
        return [1, 0, 0]
    return [0, 1, 0]

In [7]:
semeval_reader = SemevalReader('../../../data/semeval16_restaurants_train.xml')

reviews = semeval_reader.read_reviews()
absolute_polarity_sentences = semeval_reader.get_absolute_polarity_sentences()

df = pd.DataFrame(map(lambda x: (x.text, x.opinions[0].polarity), absolute_polarity_sentences))
df.rename(columns={0: 'text'}, inplace=True)
df['target_list'] = df.apply(lambda row: get_target_list_for_polarity(row[1]), axis=1)

absolute_polarity_df = df.drop(columns=[1])

# Train & Validate

In [8]:
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4

EPOCHS = 2

LEARNING_RATE = 1e-5

TRAIN_SPLIT = 0.8

NO_RUNS = 10

In [9]:
MODEL_OUTPUT = '../../../results/SA/SemEval16 - Task 5 - Restaurants/models/bert_pre_trained_dropout_cnn_bilstm_linear.pth'
STATS_OUTPUT = '../../../results/SA/SemEval16 - Task 5 - Restaurants/stats/bert_pre_trained_dropout_cnn_bilstm_linear.csv'

In [10]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [11]:
best_accuracy = 0.0

In [12]:
def train(epoch, model, loss_fn, optimizer, dataloader):
    model.train()

    dataloader_len = len(dataloader)

    for _,data in enumerate(dataloader, 0):
        optimizer.zero_grad()

        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, targets)
        
        if _ % (dataloader_len // 10) == 0:
            print(f"Epoch: {epoch}/{EPOCHS}, Batch: {_}/{dataloader_len}, Loss: {loss.item()}")
        
        loss.backward()
        
        optimizer.step()

In [13]:
def validation(model, dataloader):
    model.eval()
    
    fin_targets=[]
    fin_outputs=[]

    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)

            outputs = model(ids, mask, token_type_ids)
            
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

    return fin_outputs, fin_targets

In [14]:
results = pd.DataFrame(columns=['accuracy','precision_score_micro','precision_score_macro','recall_score_micro','recall_score_macro','f1_score_micro','f1_score_macro', 'execution_time'])

In [15]:
for i in range(10):
    # clear cache cuda
    torch.cuda.empty_cache()
    with torch.no_grad():
        torch.cuda.empty_cache()
    gc.collect()

    start_time = time.time()

    print(f"Run {i + 1}/10")

    train_dataset = df.sample(frac=TRAIN_SPLIT)
    test_dataset = df.drop(train_dataset.index).reset_index(drop=True)
    train_dataset = train_dataset.reset_index(drop=True)

    training_set = InputDataset(train_dataset, tokenizer)
    testing_set = InputDataset(test_dataset, tokenizer)

    train_dataloader = DataLoader(
        training_set,
        sampler = RandomSampler(train_dataset),
        batch_size = TRAIN_BATCH_SIZE,
        drop_last = True
    )

    validation_dataloader = DataLoader(
        testing_set,
        sampler = SequentialSampler(testing_set),
        batch_size = VALID_BATCH_SIZE,
        drop_last = True
    )

    model = SA_BERT_Dropout_CNN_BiLSTM_Linear(bert=BertModel.from_pretrained('bert-base-uncased'), bert_seq_len=512, dropout=0.3, bilstm_in_features=256, no_out_labels=3, conv_out_channels=54, conv_kernel_size=3, device=device).to(device)

    optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
    loss_fn = torch.nn.BCEWithLogitsLoss()

    for epoch in range(EPOCHS):
        train(epoch, model, loss_fn, optimizer, train_dataloader)

    outputs, targets = validation(model, validation_dataloader)
    outputs = np.argmax(outputs, axis=1)
    targets = np.argmax(targets, axis=1)
    
    accuracy = accuracy_score(targets, outputs)
    precision_score_micro = precision_score(targets, outputs, average='micro')
    precision_score_macro = precision_score(targets, outputs, average='macro')
    recall_score_micro = recall_score(targets, outputs, average='micro')
    recall_score_macro = recall_score(targets, outputs, average='macro')
    f1_score_micro = f1_score(targets, outputs, average='micro')
    f1_score_macro = f1_score(targets, outputs, average='macro')

    execution_time = time.time() - start_time

    results.loc[i] = [accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro, execution_time]

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model, MODEL_OUTPUT)

    del train_dataset
    del test_dataset
    del training_set
    del testing_set
    del model
    del loss_fn
    del optimizer
    del outputs
    del targets

Run 1/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6874613761901855
Epoch: 0/2, Batch: 31/313, Loss: 0.6216678619384766
Epoch: 0/2, Batch: 62/313, Loss: 0.5358039736747742
Epoch: 0/2, Batch: 93/313, Loss: 0.2977152466773987
Epoch: 0/2, Batch: 124/313, Loss: 0.3557056784629822
Epoch: 0/2, Batch: 155/313, Loss: 0.5149423480033875
Epoch: 0/2, Batch: 186/313, Loss: 0.37484925985336304
Epoch: 0/2, Batch: 217/313, Loss: 0.06865265965461731
Epoch: 0/2, Batch: 248/313, Loss: 0.5727523565292358
Epoch: 0/2, Batch: 279/313, Loss: 0.23172152042388916
Epoch: 0/2, Batch: 310/313, Loss: 0.09408882260322571
Epoch: 1/2, Batch: 0/313, Loss: 0.0561547577381134
Epoch: 1/2, Batch: 31/313, Loss: 0.10869568586349487
Epoch: 1/2, Batch: 62/313, Loss: 0.03591163083910942
Epoch: 1/2, Batch: 93/313, Loss: 0.06960925459861755
Epoch: 1/2, Batch: 124/313, Loss: 0.13841183483600616
Epoch: 1/2, Batch: 155/313, Loss: 0.030344031751155853
Epoch: 1/2, Batch: 186/313, Loss: 0.018795784562826157
Epoch: 1/2, Batch: 217/313, Loss: 0.07282781

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.7154633402824402
Epoch: 0/2, Batch: 31/313, Loss: 0.618417501449585
Epoch: 0/2, Batch: 62/313, Loss: 0.44464391469955444
Epoch: 0/2, Batch: 93/313, Loss: 0.29741400480270386
Epoch: 0/2, Batch: 124/313, Loss: 0.24994191527366638
Epoch: 0/2, Batch: 155/313, Loss: 0.36358147859573364
Epoch: 0/2, Batch: 186/313, Loss: 0.399749755859375
Epoch: 0/2, Batch: 217/313, Loss: 0.11229853332042694
Epoch: 0/2, Batch: 248/313, Loss: 0.11725249141454697
Epoch: 0/2, Batch: 279/313, Loss: 0.48750731348991394
Epoch: 0/2, Batch: 310/313, Loss: 0.09451286494731903
Epoch: 1/2, Batch: 0/313, Loss: 0.45425403118133545
Epoch: 1/2, Batch: 31/313, Loss: 0.05628611147403717
Epoch: 1/2, Batch: 62/313, Loss: 0.08714979887008667
Epoch: 1/2, Batch: 93/313, Loss: 0.05240461230278015
Epoch: 1/2, Batch: 124/313, Loss: 0.15310057997703552
Epoch: 1/2, Batch: 155/313, Loss: 0.08278203755617142
Epoch: 1/2, Batch: 186/313, Loss: 0.12393645942211151
Epoch: 1/2, Batch: 217/313, Loss: 0.0712756

  _warn_prf(average, modifier, msg_start, len(result))


Run 3/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6784725189208984
Epoch: 0/2, Batch: 31/313, Loss: 0.6468285918235779
Epoch: 0/2, Batch: 62/313, Loss: 0.5042349696159363
Epoch: 0/2, Batch: 93/313, Loss: 0.5352164506912231
Epoch: 0/2, Batch: 124/313, Loss: 0.47054433822631836
Epoch: 0/2, Batch: 155/313, Loss: 0.2887949049472809
Epoch: 0/2, Batch: 186/313, Loss: 0.19461993873119354
Epoch: 0/2, Batch: 217/313, Loss: 0.49358123540878296
Epoch: 0/2, Batch: 248/313, Loss: 0.3922000825405121
Epoch: 0/2, Batch: 279/313, Loss: 0.12829820811748505
Epoch: 0/2, Batch: 310/313, Loss: 0.14438985288143158
Epoch: 1/2, Batch: 0/313, Loss: 0.07314185053110123
Epoch: 1/2, Batch: 31/313, Loss: 0.6096878051757812
Epoch: 1/2, Batch: 62/313, Loss: 0.6356380581855774
Epoch: 1/2, Batch: 93/313, Loss: 0.03971702978014946
Epoch: 1/2, Batch: 124/313, Loss: 0.12151290476322174
Epoch: 1/2, Batch: 155/313, Loss: 0.034514110535383224
Epoch: 1/2, Batch: 186/313, Loss: 0.09342437237501144
Epoch: 1/2, Batch: 217/313, Loss: 0.049857132

  _warn_prf(average, modifier, msg_start, len(result))


Run 4/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6915306448936462
Epoch: 0/2, Batch: 31/313, Loss: 0.5008850693702698
Epoch: 0/2, Batch: 62/313, Loss: 0.43643781542778015
Epoch: 0/2, Batch: 93/313, Loss: 0.40850120782852173
Epoch: 0/2, Batch: 124/313, Loss: 0.3932937979698181
Epoch: 0/2, Batch: 155/313, Loss: 0.49668335914611816
Epoch: 0/2, Batch: 186/313, Loss: 0.18283526599407196
Epoch: 0/2, Batch: 217/313, Loss: 0.22553515434265137
Epoch: 0/2, Batch: 248/313, Loss: 0.11554718017578125
Epoch: 0/2, Batch: 279/313, Loss: 0.10422860085964203
Epoch: 0/2, Batch: 310/313, Loss: 0.04256674647331238
Epoch: 1/2, Batch: 0/313, Loss: 0.4518810510635376
Epoch: 1/2, Batch: 31/313, Loss: 0.047374874353408813
Epoch: 1/2, Batch: 62/313, Loss: 0.45230862498283386
Epoch: 1/2, Batch: 93/313, Loss: 0.03088327869772911
Epoch: 1/2, Batch: 124/313, Loss: 0.07235540449619293
Epoch: 1/2, Batch: 155/313, Loss: 0.4554923474788666
Epoch: 1/2, Batch: 186/313, Loss: 0.025385309010744095
Epoch: 1/2, Batch: 217/313, Loss: 0.47408

  _warn_prf(average, modifier, msg_start, len(result))


Run 5/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6960047483444214
Epoch: 0/2, Batch: 31/313, Loss: 0.6021063923835754
Epoch: 0/2, Batch: 62/313, Loss: 1.0069389343261719
Epoch: 0/2, Batch: 93/313, Loss: 0.41011083126068115
Epoch: 0/2, Batch: 124/313, Loss: 0.2672017216682434
Epoch: 0/2, Batch: 155/313, Loss: 0.48662716150283813
Epoch: 0/2, Batch: 186/313, Loss: 0.9991106986999512
Epoch: 0/2, Batch: 217/313, Loss: 0.1091393530368805
Epoch: 0/2, Batch: 248/313, Loss: 0.1254575252532959
Epoch: 0/2, Batch: 279/313, Loss: 0.0761856809258461
Epoch: 0/2, Batch: 310/313, Loss: 0.2077949047088623
Epoch: 1/2, Batch: 0/313, Loss: 0.5903880000114441
Epoch: 1/2, Batch: 31/313, Loss: 0.13890618085861206
Epoch: 1/2, Batch: 62/313, Loss: 0.06468860805034637
Epoch: 1/2, Batch: 93/313, Loss: 0.03186438977718353
Epoch: 1/2, Batch: 124/313, Loss: 0.05669192597270012
Epoch: 1/2, Batch: 155/313, Loss: 0.042034875601530075
Epoch: 1/2, Batch: 186/313, Loss: 0.059852421283721924
Epoch: 1/2, Batch: 217/313, Loss: 0.1019863635

  _warn_prf(average, modifier, msg_start, len(result))


Run 6/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.7048889994621277
Epoch: 0/2, Batch: 31/313, Loss: 0.6119951009750366
Epoch: 0/2, Batch: 62/313, Loss: 0.531725287437439
Epoch: 0/2, Batch: 93/313, Loss: 0.4031209647655487
Epoch: 0/2, Batch: 124/313, Loss: 0.7964865565299988
Epoch: 0/2, Batch: 155/313, Loss: 0.46224260330200195
Epoch: 0/2, Batch: 186/313, Loss: 0.13961440324783325
Epoch: 0/2, Batch: 217/313, Loss: 0.4360681474208832
Epoch: 0/2, Batch: 248/313, Loss: 0.2515283226966858
Epoch: 0/2, Batch: 279/313, Loss: 0.1599084436893463
Epoch: 0/2, Batch: 310/313, Loss: 0.09676627814769745
Epoch: 1/2, Batch: 0/313, Loss: 0.19408546388149261
Epoch: 1/2, Batch: 31/313, Loss: 0.08685772120952606
Epoch: 1/2, Batch: 62/313, Loss: 0.3969612717628479
Epoch: 1/2, Batch: 93/313, Loss: 0.05424705147743225
Epoch: 1/2, Batch: 124/313, Loss: 0.40960773825645447
Epoch: 1/2, Batch: 155/313, Loss: 0.0343082956969738
Epoch: 1/2, Batch: 186/313, Loss: 0.410038560628891
Epoch: 1/2, Batch: 217/313, Loss: 0.045730870217084

  _warn_prf(average, modifier, msg_start, len(result))


Run 7/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.689222514629364
Epoch: 0/2, Batch: 31/313, Loss: 0.5568697452545166
Epoch: 0/2, Batch: 62/313, Loss: 0.43063777685165405
Epoch: 0/2, Batch: 93/313, Loss: 0.7757756114006042
Epoch: 0/2, Batch: 124/313, Loss: 0.37763065099716187
Epoch: 0/2, Batch: 155/313, Loss: 0.3439243733882904
Epoch: 0/2, Batch: 186/313, Loss: 0.2519928216934204
Epoch: 0/2, Batch: 217/313, Loss: 0.32056301832199097
Epoch: 0/2, Batch: 248/313, Loss: 0.5668320655822754
Epoch: 0/2, Batch: 279/313, Loss: 0.12064292281866074
Epoch: 0/2, Batch: 310/313, Loss: 0.1275022029876709
Epoch: 1/2, Batch: 0/313, Loss: 0.4036598205566406
Epoch: 1/2, Batch: 31/313, Loss: 0.05753746256232262
Epoch: 1/2, Batch: 62/313, Loss: 0.0642387866973877
Epoch: 1/2, Batch: 93/313, Loss: 0.35002702474594116
Epoch: 1/2, Batch: 124/313, Loss: 0.7082979679107666
Epoch: 1/2, Batch: 155/313, Loss: 0.41685253381729126
Epoch: 1/2, Batch: 186/313, Loss: 0.9279353022575378
Epoch: 1/2, Batch: 217/313, Loss: 0.03888038545846

  _warn_prf(average, modifier, msg_start, len(result))


Run 8/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.7031309008598328
Epoch: 0/2, Batch: 31/313, Loss: 0.491115927696228
Epoch: 0/2, Batch: 62/313, Loss: 0.4360962510108948
Epoch: 0/2, Batch: 93/313, Loss: 0.20849382877349854
Epoch: 0/2, Batch: 124/313, Loss: 0.36028608679771423
Epoch: 0/2, Batch: 155/313, Loss: 0.5691620111465454
Epoch: 0/2, Batch: 186/313, Loss: 0.7282012701034546
Epoch: 0/2, Batch: 217/313, Loss: 0.1018361747264862
Epoch: 0/2, Batch: 248/313, Loss: 0.12036987394094467
Epoch: 0/2, Batch: 279/313, Loss: 0.302007257938385
Epoch: 0/2, Batch: 310/313, Loss: 0.22022515535354614
Epoch: 1/2, Batch: 0/313, Loss: 0.05869429558515549
Epoch: 1/2, Batch: 31/313, Loss: 0.07032941281795502
Epoch: 1/2, Batch: 62/313, Loss: 0.04013147950172424
Epoch: 1/2, Batch: 93/313, Loss: 0.035613421350717545
Epoch: 1/2, Batch: 124/313, Loss: 0.6353021860122681
Epoch: 1/2, Batch: 155/313, Loss: 0.013124758377671242
Epoch: 1/2, Batch: 186/313, Loss: 0.05441413074731827
Epoch: 1/2, Batch: 217/313, Loss: 0.3823172450

  _warn_prf(average, modifier, msg_start, len(result))


Run 9/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.694473922252655
Epoch: 0/2, Batch: 31/313, Loss: 0.608627438545227
Epoch: 0/2, Batch: 62/313, Loss: 0.2334899604320526
Epoch: 0/2, Batch: 93/313, Loss: 0.45565494894981384
Epoch: 0/2, Batch: 124/313, Loss: 0.2307671308517456
Epoch: 0/2, Batch: 155/313, Loss: 0.2500508427619934
Epoch: 0/2, Batch: 186/313, Loss: 0.2508369982242584
Epoch: 0/2, Batch: 217/313, Loss: 0.23538559675216675
Epoch: 0/2, Batch: 248/313, Loss: 0.2121790498495102
Epoch: 0/2, Batch: 279/313, Loss: 0.05845759063959122
Epoch: 0/2, Batch: 310/313, Loss: 0.2977002263069153
Epoch: 1/2, Batch: 0/313, Loss: 0.385844886302948
Epoch: 1/2, Batch: 31/313, Loss: 0.030874555930495262
Epoch: 1/2, Batch: 62/313, Loss: 0.04154539108276367
Epoch: 1/2, Batch: 93/313, Loss: 0.09882613271474838
Epoch: 1/2, Batch: 124/313, Loss: 0.02437276765704155
Epoch: 1/2, Batch: 155/313, Loss: 0.01254332810640335
Epoch: 1/2, Batch: 186/313, Loss: 0.028273750096559525
Epoch: 1/2, Batch: 217/313, Loss: 0.086989581584

  _warn_prf(average, modifier, msg_start, len(result))


Run 10/10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: 0/2, Batch: 0/313, Loss: 0.6832401156425476
Epoch: 0/2, Batch: 31/313, Loss: 0.5110331773757935
Epoch: 0/2, Batch: 62/313, Loss: 0.4355607032775879
Epoch: 0/2, Batch: 93/313, Loss: 0.630267858505249
Epoch: 0/2, Batch: 124/313, Loss: 0.47408056259155273
Epoch: 0/2, Batch: 155/313, Loss: 0.317720890045166
Epoch: 0/2, Batch: 186/313, Loss: 0.29666051268577576
Epoch: 0/2, Batch: 217/313, Loss: 0.4744132459163666
Epoch: 0/2, Batch: 248/313, Loss: 0.3339642882347107
Epoch: 0/2, Batch: 279/313, Loss: 0.047195132821798325
Epoch: 0/2, Batch: 310/313, Loss: 0.26396793127059937
Epoch: 1/2, Batch: 0/313, Loss: 0.19994090497493744
Epoch: 1/2, Batch: 31/313, Loss: 0.1214948520064354
Epoch: 1/2, Batch: 62/313, Loss: 0.030105775222182274
Epoch: 1/2, Batch: 93/313, Loss: 0.04274573177099228
Epoch: 1/2, Batch: 124/313, Loss: 0.1157049685716629
Epoch: 1/2, Batch: 155/313, Loss: 0.3847389817237854
Epoch: 1/2, Batch: 186/313, Loss: 0.06596709787845612
Epoch: 1/2, Batch: 217/313, Loss: 0.043254654854

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
results

Unnamed: 0,accuracy,precision_score_micro,precision_score_macro,recall_score_micro,recall_score_macro,f1_score_micro,f1_score_macro,execution_time
0,0.891026,0.891026,0.588618,0.891026,0.612703,0.891026,0.600383,209.204387
1,0.903846,0.903846,0.602125,0.903846,0.610051,0.903846,0.605597,208.909574
2,0.894231,0.894231,0.580676,0.894231,0.608747,0.894231,0.59401,209.50267
3,0.878205,0.878205,0.56062,0.878205,0.610296,0.878205,0.579609,208.93816
4,0.878205,0.878205,0.586048,0.878205,0.588858,0.878205,0.585939,206.40525
5,0.910256,0.910256,0.589864,0.910256,0.624877,0.910256,0.605352,204.803477
6,0.897436,0.897436,0.579521,0.897436,0.622783,0.897436,0.598367,207.881433
7,0.887821,0.887821,0.584738,0.887821,0.616201,0.887821,0.599777,203.828193
8,0.916667,0.916667,0.596669,0.916667,0.626885,0.916667,0.610923,203.669993
9,0.913462,0.913462,0.603366,0.913462,0.62456,0.913462,0.613777,203.716484


In [17]:
results.to_csv(STATS_OUTPUT)