In [115]:
import sys
import pandas as pd

import transformers
from transformers import BertTokenizer, BertModel
from transformers import AdamW as AdamW_HF, get_linear_schedule_with_warmup

import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F

from tqdm.notebook import tqdm

In [205]:
torch.manual_seed(42)

f = "../../finphrase_dir/Sentences_75Agree.txt"
dataset = pd.read_csv(f, sep='\@', engine='python', header=None, names=['sentence', 'label'])
# dataset.drop_duplicates(inplace=True)
dataset.reset_index(inplace=True)

train = dataset['sentence']
label = dataset['label'].map({'negative': 0, 'neutral': 1, 'positive': 2})

d = list(zip(train, label))

In [206]:
class BertToSentiment(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.bert = BertModel.from_pretrained("bert-base-cased",
                                        output_hidden_states=True,
                                        output_attentions=True)
        
        for param in self.bert.parameters():
            param.requires_grad = True
            
        self.d1 = nn.Linear(64*12, 128)
        self.d2 = nn.Linear(128, 128)
        self.d3 = nn.Linear(128, 128)
        self.d4 = nn.Linear(128, 3)
        
        self.weights = nn.Parameter(torch.rand(13, 1))
        self.dropout = nn.Dropout(0.25)
        
#         self.layers = nn.Sequential(
#             self.bert,
#             nn.ReLU(),

#             nn.Linear(64, 128),
#             nn.ReLU(),
            
#             nn.Linear(128, 128),
#             nn.ReLU(),
#             nn.Dropout(p=0.25),

#             nn.Linear(128, 128),
#             nn.ReLU(),
#             nn.Dropout(p=0.25),

#             nn.Linear(128, 3), 
#         )
        
    def forward(self, x):
        hidden, attention = self.bert(x)[-2:]
        ht_cls = torch.cat(hidden)[:, :1, :].view(13, x.shape[0], 1, 768)
        atten = torch.sum(ht_cls * self.weights.view(13, 1, 1, 1), dim=[1, 3])
        atten = F.softmax(atten.view(-1), dim=0)
        feature = torch.sum(ht_cls * atten.view(13, 1, 1, 1), dim=[0, 2])
        
        out = self.d1(F.relu(feature))
        out = self.d2(F.relu(out))
        out = self.d3(self.dropout(F.relu(out)))
        out = self.d4(self.dropout(F.relu(out)))
        
        return out
    

In [207]:
model = BertToSentiment()

In [208]:
def train_model(model, data):
    num_epochs = 2
    batch_size = 10
    
#     optimizer = AdamW_HF(model.parameters(), lr=learning_rate, correct_bias=False) 
#     scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_total_opt_steps*warm_up_proportion, num_training_steps=num_total_opt_steps)
    
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.0001)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    train_dl = DataLoader(data, batch_size=batch_size, shuffle=True)
    
    model = model.to(device)
    model.train()
    
    training_losses = []
#     validation_acc = []
    
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        print('epoch:', epoch + 1)
        training_losses = []
        
        for x, y in tqdm(train_dl, unit='batch'):
            x_tokenized = tokenizer(text=x, 
                                    return_tensors="pt", 
                                    pad_to_max_length=True, 
                                    max_length = 64,
                                    add_special_tokens=True
                                   )
            x_train = x_tokenized.to(device)
            x = x_train['input_ids']
            y = torch.tensor(y, dtype=torch.long).to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            training_losses.append(loss.item())
        
        print("Finished Epoch", epoch + 1, ", training loss:", np.mean(training_losses))

#         num_correct = 0
#         for i, data in enumerate(dataloaders['val']):
#             inputs, labels = data
#             inputs, labels = inputs.to(device), labels.to(device)

#             output = model(inputs)
#             _, preds = torch.max(output, 1)
#             num_correct += torch.sum(preds == labels)
#         val_acc_history.append(num_correct / (32 * len(dataloaders['val'])))

    print('Finished Training')
    return model

In [209]:
train_model(model, d)

epoch: 1


HBox(children=(FloatProgress(value=0.0, max=346.0), HTML(value='')))




KeyboardInterrupt: 