In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


from nltk.tokenize import sent_tokenize
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from transformers import BertTokenizer,BertModel

from torch.utils.data import Dataset

from datasets import load_from_disk,load_dataset

import pickle

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')


Using device: cuda
NVIDIA GeForce RTX 3060 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [12]:
threshold = 1.0
data_dir = r'\\wsl$\Ubuntu-20.04\home\jolteon\NLUProject\data\20news\\'
processed_dir = data_dir + 'processed\\'
split='train'
file_name = 'bert_encoded_segments_list_'

In [13]:
with open(processed_dir+ split+'\\' + file_name + str(threshold) +'.pkl', 'rb') as handle:
    bert_encoded_segments_list = pickle.load(handle)
 

In [14]:
for ii, batch in enumerate(bert_encoded_segments_list):
    label = batch[0]
    temp = batch[1]
    print(label)
    print(temp.shape)
    if ii == 10:
        break

tensor([0])
torch.Size([109, 768])
tensor([0])
torch.Size([319, 768])
tensor([0])
torch.Size([50, 768])
tensor([0])
torch.Size([15, 768])
tensor([0])
torch.Size([6, 768])
tensor([0])
torch.Size([55, 768])
tensor([0])
torch.Size([8, 768])
tensor([0])
torch.Size([13, 768])
tensor([0])
torch.Size([29, 768])
tensor([0])
torch.Size([4, 768])
tensor([0])
torch.Size([7, 768])


In [15]:
class EncodedSegmentsDataset(Dataset):
    def __init__(self,data_list):
        self.data_list = data_list
        
    def __len__(self):
        return len(self.data_list)
 
    def __getitem__(self,idx):
        return(self.data_list[idx])

In [16]:
encoded_dataset = EncodedSegmentsDataset(bert_encoded_segments_list)
val_prop =.1
bsize = 1

dataset_size = len(encoded_dataset)
val_size = int(val_prop * dataset_size)
train_size = dataset_size - val_size

train_dataset, val_dataset =  torch.utils.data.random_split(encoded_dataset,[train_size,val_size])
encoded_train_loader = DataLoader(train_dataset,batch_size=bsize,shuffle=True, pin_memory=True)
encoded_val_loader = DataLoader(val_dataset,batch_size=bsize,shuffle=True, pin_memory=True)



In [17]:
for ii, batch in enumerate(encoded_train_loader):
    label = batch[0]
    tensor = batch[1]
    print(label)
    print(tensor.shape)
    print(tensor[0,0,10:20])
    if ii ==2:
        break

tensor([[3]])
torch.Size([1, 23, 768])
tensor([-0.4808,  0.9906, -0.1408,  0.9914, -0.1096, -0.5517, -0.0907, -0.2929,
         0.2146,  0.6552])
tensor([[3]])
torch.Size([1, 18, 768])
tensor([-0.6152,  0.9913,  0.5027,  0.9831,  0.3775, -0.6378, -0.0582, -0.4697,
         0.3518,  0.6874])
tensor([[18]])
torch.Size([1, 18, 768])
tensor([-0.1982,  0.9835,  0.0502,  0.9774, -0.0878, -0.3492,  0.1402, -0.1665,
         0.1323,  0.6186])


In [18]:
class LSTMoverBERT(nn.Module):
    def __init__(self):
        super().__init__()
        self.LSTM = nn.LSTM(input_size=768,hidden_size = 128,num_layers=1,batch_first=True)
        self.activation = nn.ReLU()
        self.linear1 = nn.Linear(in_features=128,out_features=64)
        self.linear2 = nn.Linear(in_features=64,out_features=20)
        self.softmax = nn.Softmax(dim=1)
        

    def forward(self, x,verbose=False):
        
        #print('input x:', x.shape)
        LSTM_out,LSTM_states = self.LSTM(x)
        #print('LSTM out:', LSTM_out.shape)
        #print('LSTM states[0]:', LSTM_states[0].shape)
        #print('LSTM states[1]:', LSTM_states[1].shape)
        last_hidden_state = LSTM_states[0][0,:,:]
        #last_embedding = LSTM_out[:,-1,:]
        out = self.linear1(last_hidden_state)
        #print('linear out', out.shape) if verbose
        out = self.activation(out)
        #print('activation out', out.shape) if verbose
        out = self.linear2(out)
        out = self.softmax(out)
        return out



In [19]:
LoBERT_model = LSTMoverBERT()
LoBERT_model.to(device)
LoBERT_model.train()
criterion = nn.CrossEntropyLoss(reduction='sum')
optimizer = optim.Adam(LoBERT_model.parameters(),lr=0.00001,)

In [20]:
train_loss_list = []
val_loss_list = []
train_accuracy_list = []
val_accuracy_list = []

for epoch in tqdm(range(30)):  # loop over the dataset multiple times
    train_loss = 0
    train_correct = 0
    val_loss = 0
    val_correct = 0
    
    #START TRAIN
    LoBERT_model.train()
    for idx, batch in enumerate(encoded_train_loader):
        optimizer.zero_grad()

        # Define and move to GPU
        label = batch[0][0]
        model_input = batch[1]
        label = label.to(device)
        model_input = model_input.to(device)
        # Forward Pass
        out = LoBERT_model(model_input)
        loss = criterion(out,label)
        #Record Metrics pt 1/2
        train_loss += loss.item()
        pred = torch.argmax(out)
        train_correct +=(pred == label).sum()
        
        #Backward pass
        loss.backward()
        optimizer.step()
    
    #Record Metrics pt 2/2
    train_loss = train_loss/ len(encoded_train_loader)
    train_accuracy = train_correct/len(encoded_train_loader)
    #Print and save
    print('Epoch:', epoch, 'train_loss:',train_loss, 'accuracy: ',train_accuracy)
    train_loss_list.append(train_loss)
    train_accuracy_list.append(train_accuracy)
    
    # START VAL
    LoBERT_model.eval()
    
    with torch.no_grad():
        for idx, batch in enumerate(encoded_val_loader):
            #Define and move to GPU
            label = batch[0][0]
            model_input = batch[1]
            label = label.to(device)
            model_input = model_input.to(device)
            #Forward Pass
            out = LoBERT_model(model_input)
            loss = criterion(out,label)
            #Record metrics pt 1/2
            val_loss += loss.item()

            pred = torch.argmax(out)
            val_correct +=(pred == label).sum()
            
    #Record metrics pt 2/2
    val_loss = val_loss/ len(encoded_val_loader)
    val_accuracy = val_correct/len(encoded_val_loader)
    #Print and save
    print('Epoch:', epoch, 'val_loss:',val_loss, 'accuracy: ',val_accuracy)
    val_loss_list.append(val_loss)
    val_accuracy_list.append(val_accuracy)


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

Epoch: 0 train_loss: 2.9951916465611306 accuracy:  tensor(0.0528, device='cuda:0')
Epoch: 0 val_loss: 2.994442713144709 accuracy:  tensor(0.0451, device='cuda:0')



KeyboardInterrupt: 