In [1]:
import torch
from torch.utils.data import DataLoader, RandomSampler, Dataset
from torch import nn
from torch.nn.functional import cross_entropy
from torch.optim import AdamW

from typing import List
from transformers import ElectraModel, ElectraTokenizer, get_linear_schedule_with_warmup
from tqdm.auto import tqdm

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
torch.cuda.is_available()

True

In [3]:
bert_model = ElectraModel.from_pretrained("monologg/koelectra-base-v3-discriminator")
tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

In [4]:
version = "knu_score" #"new_dict_score"
df_pos_name = os.path.join(os.path.dirname(os.getcwd()), 'xlsx_data', '#3_'+version, f"{version+'_pos_1'}.xlsx")
df_neg_name = os.path.join(os.path.dirname(os.getcwd()), 'xlsx_data', '#3_'+version, f"{version+'_neg_1'}.xlsx")

df_pos = pd.read_excel(df_pos_name, index_col=0)
df_neg = pd.read_excel(df_neg_name, index_col=0)

df_all = pd.concat([df_pos, df_neg], ignore_index=True)

df_all['total'] = df_all['title'].str.strip() + ".\n" + df_all['text'].str.strip()
df_all.knu_score[df_all.knu_score > 0 ] = 1
df_all.knu_score[df_all.knu_score < 0 ] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [5]:
batch_size = 128
epochs = 1000
patience = 4

In [6]:
class SentimentDataset(Dataset):
    def __init__(self, texts: List[str], labels: List[int], tokenizer):
        # 데이터 셋이 가져야 하는 기본적인 값들을 세팅하는 함수
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        assert len(texts) == len(labels)

    def __getitem__(self, index:int):
        # bert입력을 만들어 주는 곳 + 15000개의 문장 중에서 몇번째 문장을 가져올건지
        text = self.texts[index]
        label = self.labels[index]
        bert_inputs = self.tokenizer(text, padding="max_length", truncation=True, max_length=32, return_tensors="pt")
    
        bert_inputs["input_ids"] = bert_inputs["input_ids"].squeeze()
        bert_inputs["attention_mask"] = bert_inputs["attention_mask"].squeeze()
        bert_inputs["token_type_ids"] = bert_inputs["token_type_ids"].squeeze()
    
        bert_inputs["label"] = torch.tensor(label, dtype=torch.long)
        print(bert_inputs)
        return bert_inputs
  
    def __len__(self) -> int:
        # 이 데이터셋의 크기 -> 150000
        return len(self.texts)

In [7]:
class SentimentClassificationModel(nn.Module):
    def __init__(self, bert):
        super().__init__()
        # BERT 모델을 입력 받기, 2차원으로 줄여주는 모델(weight) 추가
        self.bert = bert
        self.classification_layer = nn.Linear(768, 2)

    def forward(self, batch_data):
        # 1. BERT 모델의 결과 뽑기 2. BERT 모델 결과를 2차원으로 만들기
        bert_output = self.bert.forward(
          input_ids = batch_data["input_ids"],
          attention_mask = batch_data["attention_mask"], 
          token_type_ids = batch_data["token_type_ids"])
        pooled_output = bert_output[0][:,0]

        #2. BERT 모델 결과를 2차원으로 만들기
        classification_output = self.classification_layer.forward(pooled_output)
        return classification_output

In [8]:
all_texts = df_all['total'].tolist()
all_labels = list(map(int, df_all[version].tolist()))

del df_pos, df_neg, df_all

In [9]:
X_train, X_val, y_train, y_val = train_test_split(
    all_texts, 
    all_labels, 
    test_size=0.15, 
    random_state = 17, 
    stratify = all_labels
)

In [10]:
train_dataset = SentimentDataset(X_train, y_train, tokenizer)
val_dataset = SentimentDataset(X_val, y_val, tokenizer)

In [11]:
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size)

In [12]:
import gc
gc.collect()

105050

In [13]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [14]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

In [15]:
def evaluate_model(val_dataloader):
    
    model.eval()# back progation을 하지 않는 거 빼곤 train과 거의 동일, freeze all weight
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch_data in tqdm(val_dataloader):
        
        batch_data = {key:value.to(device) for key, value in batch_data.items()}
        with torch.no_grad():
            classification_output = model.forward(batch_data)
            loss = cross_entropy(classification_output, batch_data["label"])
        
        loss_val_total += loss.item()
        
        logits = classification_output.detach().cpu().numpy()
        label_ids = batch_data['label'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
        
    loss_val_avg = loss_val_total/len(val_dataloader)
    
    predictions = np.concatenate(predictions, axis = 0)
    true_vals = np.concatenate(true_vals, axis=0)
    
    return loss_val_avg, predictions, true_vals

In [16]:
def train_model(model, patience, epochs):
    
    # to track the training loss as the model trains
    train_losses = []
    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = [] 
    
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    for epoch in tqdm(range(1, epochs+1)):
        model.train()
        loss_train_total = 0
        progress_bar = tqdm(train_dataloader, desc = f"Epoch {epoch}")

        for batch_data in progress_bar:
            print(batch_data)

            optimizer.zero_grad()
            batch_data = {key:value.to(device) for key, value in batch_data.items()}
            classification_output = model.forward(batch_data)

            # 채점
            loss = cross_entropy(classification_output, batch_data["label"])
            train_losses.append(loss.item())
            # 잘못한 걸 밝히는 과정
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1, 0)
            # 모델의 두뇌를 수정하는 과정
            optimizer.step()
            scheduler.step()
            # 이전에 잘못한 건 삭제 (봐주기)
            #progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch_data))})
            
            del batch_data
            gc.collect()
            torch.cuda.empty_cache()
        
        tqdm.write(f"\nEpoch {epoch}")

        val_loss, predictions, true_vals = evaluate_model(val_dataloader)
        valid_losses.append(val_loss)
        
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)
        
        epoch_len = len(str(epochs))
        
        print_msg = (f'[{epoch:>{epoch_len}}/{epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')
        
        tqdm.write(print_msg)
        
        val_f1 = f1_score_func(predictions, true_vals)
        tqdm.write(f'F1 Score (weighted): {val_f1}')
        
        # clear lists to track next epoch
        train_losses = []
        valid_losses = []
        
        
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break
    
    model.load_state_dict(torch.load('checkpoint.pt'))
    
    return  model, avg_train_losses, avg_valid_losses

In [17]:
model = SentimentClassificationModel(bert_model)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [18]:
optimizer = AdamW(model.parameters(), lr = 3e-5, weight_decay=0.1)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps  = 0,
    num_training_steps  = len(train_dataloader)*epochs
)

In [19]:
gc.collect()
torch.cuda.empty_cache()

In [20]:
model, train_loss, valid_loss = train_model(model, patience, epochs)

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch 1', max=1393.0, style=ProgressStyle(description_wid…

{'input_ids': tensor([    2,   144, 28780,  4234,  8005,   145,  2085, 30497, 10004,  4256,
           16, 27159,  4283, 15446,  4147,   151, 12627, 28015, 30471,  4216,
         4176,    18, 28780,  4234,  8005, 10004,  4256,  2085, 30497,  4234,
        27159,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2,    63,  3599, 25412,  4506,    16, 11352,  6444,  7652,  4176,
           65,   184,   146,  3249,  4034,  6907,  2236,   147,   151, 10874,
         9448,  8160,  5502,    18, 11686,  4234,  6444,  4007,  9484,  4219,
         3249,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,    25,  4293,  4498,  9226,  4239, 22314,  5368,  2887,  3431,
        24034,  4599,  4600,   151, 30021,  2947,  2758, 18572, 18152, 10928,
           18, 13079, 26298,  9173,  6232,    24,  4501,  4204,  4366, 18152,
         6416,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,  2662,  4300,  6406,  8515,  4149,  4200, 14220,  5158,  4034,
         7304,  4239,  9773,  8265,  3305,  2459,  4899,  4149,    18,  9773,
         4007,    22,  4076,  3092, 10749, 14854,  4176,    18,  6249,  8168,
         4006,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 

{'input_ids': tensor([    2,  6297,  4755,  7743, 27615,  4005,    60,  6244, 22682, 27972,
         9675,  4234,  6417,  4028, 22682, 11074,  6343,  4200,  4005, 13604,
           43,    18,    18,    18,    18,  6785,  4073,  3123,  4820,  6436,
         6402,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2, 18843,  4047, 23264,  4234,  7232,  8733,  4110,  3247,  4176,
           18, 16505,  8100, 10590,  4110, 16516, 12460,  8733,  2590,  4007,
        14596,  6538,  4292,  2734,  4219,  3249,  4176,    18,  8733,  3636,
         4491,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2, 11794,  4543,   107,  2954,  4005,  4545,   107, 13044,  4029,
           16,  9563,   144,  3588,  9841, 10318,   145,  9233,    63,  6956,
         4218,  4048,    65,    18,  7316,  7215,  8438,  4047,  3719,  4071,
         4234,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2, 15143,  4162,  4275,  4110,  3412,  4034,  7599,    35,    11,
        27104,    21,  4120,    11,    18,    18,  3757,  4463,  4525,  4112,
           11,  9115,    17, 15143,    21,  4120,    11,    18,  6275,  6880,
         4451,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,    63,  7028,    65,   144,  3144,  4361,   145, 32479,  4806,
         4163,  4437,  4112,  4158,  4234,   144, 14347,    15, 30692,   145,
           18,    18,  8438,   144,  2441,  4480,  4176, 20099,   145,  3758,
           18,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2,   144,  8474,    26,  4556,   145, 27169,  4137, 19836,   146,
        12665, 26778,  6226, 10728,  9576,  4520,  4150,   147,    18, 14922,
          389,  3616,  5088,  6426,    23,  4139,  4084, 23534,  6636, 27169,
         4137,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,     6,  6457,  4006,  3021,  2492, 25524,  4076, 18781, 33789,
        14826,  2780,  5151,  3755,  4761,     6,    18, 16229,  2236,  4034,
         6353,  4292, 15607,  4118, 15329,  4176,    18,  6233,  6279,  4070,
         2064,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,    63, 27053,    65,  8004,  4234,  8004,  4070,  2410,  4176,
          692,    18,   304,  4392,  4239, 11955,  8004,  4110,  2400,  4372,
         6770,  4234,  6385,  8004,  4234,  8004,  4070,  2410,  4176,  8515,
         4468,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,    63, 28385, 20454,    65,  6394, 27972, 20331,  4029,  4234,
        31715,  3747,  4441,  4462, 33268,  6453,    18,  8412,  4073,  4129,
         6533,  9377,  4075, 14852,  6379,  4292,  2737,  6460,  6533,  9072,
         6755,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,    32,  6726,  4491, 10749,    34, 16883, 17467,  4070, 10292,
         4176,     5,  2961, 17680,  4063,  6726,  4112,    35,    18, 16883,
        17467,  4070, 10292,  4576,  6216,     5,    18, 20244,  4282, 12209,
         4031,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,  2339, 10728,    11,  9710,  4081,  4139,    11,  3232,  7678,
           35,    18,  2339, 10728,  9710,  4081,  2637,  4292,  7678,    35,
         2613,  6880,  7982,  9995,  4292,  6243,  4398,  4176,    18,  6233,
         7221,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,  3242,  4618,  4283,  7749,     6, 11634,  4489,  4074,  4174,
           16,  3103,  4286,   107,  2788,  4286,   107, 20748,   107,  3474,
         4065,  4283,  4212, 31910,  6512,     6,    18, 11205, 12459,  4199,
         4139,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,    63,  7397,    65,  9226,  3260,  4219,    11,  2297,  4349,
         4068,    11,  6654, 12920, 23477,  7177, 20131,  4666,    18,  6844,
        20131,  4666,  4007, 12920, 23477,  4073,  7690,  4176,    18, 10985,
         9226,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2,    63,  7614,    19,  8092,    65,   144,  8092,  7347,  8749,
          145,  2575,  2658,  4338,  4219,  3725,  4536,  4031,    18, 15177,
         7226,  8092,  4369,  4073, 28893,  7242,  4017,  4968,  4007, 14073,
        24387,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,     6,  6589,  8911,  4151,  4118,    18,  6257,  2078,  6283,
         6441,  4176,     6,    18,  6718,  7429,  9697,  6367, 13934, 13469,
         7537, 12151, 12490,  4073,  2101,  4026,  8780,  8714, 23246,  9018,
        32103,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2, 20648,  3079,  4188,  4200,  3083,  4112,  8243,    16,  8111,
        10313,  4234,  6726,    18, 15177,  3243, 10529,  9072,  3731,  6009,
         4073,  4034,  6287, 11539,  4073,  4129,  6963,  6596,    20,  4044,
         2675,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,    63,  6336,  4190,  4348,    65,  8934,    11,  6233,  4234,
         6555,  4006,    11, 20339,  4297,     6,  7318,  4049,  4031,  7511,
           16,  3400,  6534,  4070,  4034,  2637,  4007,  4761,    18,    18,
           18,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2,  3478,  4869, 11184, 21291,  8717,    18,    18,    18,   144,
         7043,  8432,  4113,  4106,  4468,  4289,  4185,  4297,  4728, 19783,
          145, 17941,    18,  8432,  4113,  4106,  4468,  4289,  4185,  4297,
         4728,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,    11,  9426,  8643,  9207,  4289,  4081,    11,  2731,  4065,
         4395,    16,  6391, 13008,  4026, 18669,  4051,  4036,  4234,    11,
         3255,  4112, 15523,    11,    63,  7028,    65,    18,  7347, 22014,
        17804,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(1)}
{'input_ids': tensor([    2,    63, 12942,  4158,  4234, 19609,  4164,  4347,  4665,    65,
           11,  6868,  4557,  4997,  4499,    11,  3757,  4441,  4182,    16,
        10862,  4034,    20,  4469,  7614,  4112,  6399,  4469,     5,    18,
         6868,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,

{'input_ids': tensor([    2,  7046, 12547,  4034,  6680,    18,  3806,  4766,  5095, 17482,
           38, 17039,  4073,  4129,  8931,  2784,  2967,  3249,  4034,  3087,
        16537,  7440,  4034,  6834,  4141,  4282,  4047,  3087,  6442,  4378,
         4172,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), 'label': tensor(0)}
{'input_ids': tensor([    2,  6320,  4772,     6,    22,  4105,  4229,    16,  1937,   107,
         1058, 10828,  4047,  4034,  9049,  4007,  7025,  4176,     6,    18,
         2628,  8943,  6320, 10828,  6636,    49,  4007,  6320, 11093,  4949,
         4292,     3]), 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0]), 'attention_mask': tensor([1, 1, 1, 1, 1,





RuntimeError: CUDA out of memory. Tried to allocate 12.00 MiB (GPU 0; 6.00 GiB total capacity; 4.15 GiB already allocated; 2.12 MiB free; 4.20 GiB reserved in total by PyTorch)

In [None]:
# visualize the loss as the network trained
fig = plt.figure(figsize=(10,8))
plt.plot(range(1,len(train_loss)+1),train_loss, label='Training Loss')
plt.plot(range(1,len(valid_loss)+1),valid_loss,label='Validation Loss')

# find position of lowest validation loss
minposs = valid_loss.index(min(valid_loss))+1 
plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

plt.xlabel('epochs')
plt.ylabel('loss')
plt.ylim(0, 0.5) # consistent scale
plt.xlim(0, len(train_loss)+1) # consistent scale
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
fig.savefig('S_loss_plot.png', bbox_inches='tight')

In [None]:
torch.save(model.state_dict(), f'Models/BERT_sent_ft_1.ph')