imports

In [292]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchtext
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset

from tqdm.auto import tqdm

upload datasets

In [293]:
def str_text_to_list(value):
    list_values = value.strip('[]').split(', ')
    cleaned_list_values = [item[1:-1] for item in list_values]
    return cleaned_list_values

In [294]:
dataset_clean = pd.read_csv("../datasets/tonetags_dataset_tumblr_clean.csv", converters={"text": str_text_to_list})

In [295]:
dataset_clean_corrected = pd.read_csv("../datasets/tonetags_dataset_tumblr_clean_corrected_text.csv", converters={"text": str_text_to_list})

In [296]:
dataset_wsd_1 = pd.read_csv("../datasets/tonetags_wsd_1.csv", index_col=0, converters={"text": str_text_to_list})

In [297]:
def str_context_to_list(value):
    list_values = value.strip('[]').split(' ')
    cleaned_list_values = []
    for item in list_values:
        item.strip()
        # if '\n' in item:
        #     item.replace('\n', '')
        if item != "":
            cleaned_list_values.append(float(item))
    return cleaned_list_values

In [298]:
dataset_wsd_2 = pd.read_csv("../datasets/tonetags_wsd_2.csv", index_col=0, converters={"text": str_text_to_list, 'context': str_context_to_list})

encode labels

In [299]:
labels = dataset_wsd_1.tags.unique().tolist()

dataset_wsd_1.tags = dataset_wsd_1.tags.apply(labels.index)

dataset_clean.tags = dataset_clean.tags.apply(labels.index)
dataset_clean_corrected.tags =dataset_clean_corrected.tags.apply(labels.index)
dataset_wsd_2.tags = dataset_wsd_2.tags.apply(labels.index)

split data

In [300]:
train_clean, test_clean = train_test_split(dataset_clean, test_size=0.2, shuffle=False)

In [301]:
train_clean_corrected, test_clean_corrected = train_test_split(dataset_clean_corrected, test_size=0.2, shuffle=False)

In [302]:
train_wsd_1, test_wsd_1 = train_test_split(dataset_wsd_1, test_size=0.2, shuffle=False)

In [303]:
train_wsd_2, test_wsd_2 = train_test_split(dataset_wsd_2, test_size=0.2, shuffle=False)

create vocab and class of dataset

In [304]:
glove_6b_50 = torchtext.vocab.GloVe(name='6B', dim=50)

In [305]:
vocab = glove_6b_50.stoi
vocab["<unk>"] = len(vocab)
vocab["<pad>"] = len(vocab)

In [306]:
class myDataset(Dataset):
    def __init__(self, dataset):
        
        max_length = 4096
        
        self.data = []
        for sentence in dataset.text:
            if len(sentence) > max_length:
                continue
            sentence_ids = []
            for token in sentence:
                try:
                    sentence_ids.append(vocab[token])
                except KeyError:
                    sentence_ids.append(vocab["<unk>"])
            self.data.append(sentence_ids)
        self.labels = dataset.tags
        
        self.context = None
        
        if 'context' in dataset.columns:
            self.context = dataset.context

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.context is None:
            return self.data[idx], torch.tensor(self.labels.iloc[idx])
        else:
            return self.data[idx], torch.tensor(self.labels.iloc[idx]), self.context.iloc[idx]

In [307]:
def collate_fn(batch):
    max_length = 4096
    
    data_ids = []
    labels = []
    contexts = []
    
    for dat in batch:
        
        data_ids.append(dat[0])
        labels.append(dat[1])
        
        if len(dat) >= 3:
            contexts.append(dat[2])

    for i in range(len(data_ids)):
        while len(data_ids[i]) < max_length:
            data_ids[i].append(vocab["<pad>"])
    
    return torch.tensor(data_ids), torch.tensor(contexts), torch.tensor(labels)

create datasets

In [308]:
train_dataset_clean, test_dataset_clean = myDataset(train_clean), myDataset(test_clean)

In [309]:
train_dataset_clean_corrected, test_dataset_clean_corrected = myDataset(train_clean_corrected), myDataset(test_clean_corrected)

In [310]:
train_dataset_wsd_1, test_dataset_wsd_1 = myDataset(train_wsd_1), myDataset(test_wsd_1)

In [311]:
train_dataset_wsd_2, test_dataset_wsd_2 = myDataset(train_wsd_2), myDataset(test_wsd_2)

create dataloaders

In [312]:
batch_size = 32

In [313]:
train_dataloader_clean = DataLoader(train_dataset_clean, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

test_dataloader_clean = DataLoader(test_dataset_clean, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [314]:
train_dataloader_clean_corrected = DataLoader(train_dataset_clean_corrected, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

test_dataloader_clean_corrected = DataLoader(test_dataset_clean_corrected, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [315]:
train_dataloader_wsd_1 = DataLoader(train_dataset_wsd_1, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

test_dataloader_wsd_1 = DataLoader(test_dataset_wsd_1, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [316]:
train_dataloader_wsd_2 = DataLoader(train_dataset_wsd_2, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

test_dataloader_wsd_2 = DataLoader(test_dataset_wsd_2, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

tokens embedding

In [317]:
embedding_vector = glove_6b_50.vectors.numpy()
embedding_vector = np.append(embedding_vector, np.zeros(50)).reshape(-1, 50) # vector for unknown value in vocab
embedding_vector = np.append(embedding_vector, np.ones(50)).reshape(-1, 50) # vector for padding value in vocab

In [318]:
embedding_tensor = torch.tensor(embedding_vector, dtype=torch.float)

In [319]:
nn.Embedding.from_pretrained(embedding_tensor, freeze=True)

Embedding(400002, 50)

model

In [320]:
class ToneTagsLSTM(nn.Module):
    def __init__(self, vocab_size, embedding, hidden_dim, output_size, num_layers, dropout):
        super(ToneTagsLSTM, self).__init__()
        # vocab_size = 400002
        # embedding_dim = 50
        # hidden_dim_lstm = 30
        # fc1 = 30 - 25
        # fc2 = 25 - 20
        # fc3 = 20 - 19
        # output_size = 19
        
        self.embedding = embedding
        
        self.lstm = nn.LSTM(self.embedding.embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout, batch_first=True)
        
        self.fc1 = nn.Linear(hidden_dim, 25)
        self.fc2 = nn.Linear(25, 20)
        self.fc3 = nn.Linear(20, output_size)
        # self.out = nn.Softmax(output_size, dim=1)
        

    def forward(self, x):
        
        embedded = self.embedding(x)
        output, (hidden, cell) = self.lstm(embedded)


        # print(hidden[-1].shape)

        fc1_out = self.fc1(hidden[-1])
        
        
        fc2_out = self.fc2(fc1_out)
        out = self.fc3(fc2_out)
        # out = self.out(fc3_out)
        
        return out

parameters

In [321]:
vocab_size = len(vocab) # 4000002
embedding = nn.Embedding.from_pretrained(embedding_tensor, freeze=True)
hidden_dim = 30 # 30
output_size = len(labels) # 19
num_layers = 4
dropout = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 5e-4

criterion = nn.CrossEntropyLoss()

accuracy function

In [322]:
def get_accuracy(prediction, label):
    batch_size, _ = prediction.shape
    predicted_classes = prediction.argmax(dim=-1)
    correct_predictions = predicted_classes.eq(label).sum()
    accuracy = correct_predictions / batch_size
    return accuracy

train and test `train_dataloader_wsd_1`

In [323]:
# train_dataloader_wsd_1
# test_dataloader_wsd_1

model_wsd_1 = ToneTagsLSTM(
    vocab_size=vocab_size,
    embedding=embedding,
    hidden_dim=hidden_dim,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout
).to(device)

optimizer_wsd_1 = optim.Adam(model_wsd_1.parameters(), lr=lr)

In [324]:
%%time

n_epochs = 20

for ep in range(1, n_epochs + 1):
    
    model_wsd_1.train()
    epoch_losses_train = []
    epoch_accuracies_train = []
    
    
    for tokens, contexts, tags in tqdm(train_dataloader_wsd_1, desc="training..."):
        optimizer_wsd_1.zero_grad()
        
        tokens = tokens.to(device)
        tags = tags.to(device)
        
        predictions = model_wsd_1(tokens)
        
        loss = criterion(predictions, tags)
        accuracy = get_accuracy(predictions, tags)
        
        loss.backward()
        optimizer_wsd_1.step()
        
        epoch_losses_train.append(loss.item())
        epoch_accuracies_train.append(accuracy.item())
        
    print(f'[Train Epoch {ep}] Loss: {np.mean(epoch_losses_train)}, Accuracy: {np.mean(epoch_accuracies_train)}')


    model_wsd_1.eval()
    epoch_losses_test = []
    epoch_accuracies_test = []
    
    with torch.no_grad():
        for tokens, contexts, tags in tqdm(test_dataloader_wsd_1, desc="testing..."):
            optimizer_wsd_1.zero_grad()
    
            tokens = tokens.to(device)
            tags = tags.to(device)
    
            predictions = model_wsd_1(tokens)
    
            loss = criterion(predictions, tags)
            accuracy = get_accuracy(predictions, tags)
    
            epoch_losses_test.append(loss.item())
            epoch_accuracies_test.append(accuracy.item())
    
        print(f'[Test Epoch {ep}] Loss: {np.mean(epoch_losses_test)}, Accuracy: {np.mean(epoch_accuracies_test)}\n')

training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 1] Loss: 2.379364491073204, Accuracy: 0.21823237103644108


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 1] Loss: 6.908232303835311, Accuracy: 0.0026533018867924527


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 2] Loss: 2.3255429054062566, Accuracy: 0.2388192143871273


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 2] Loss: 6.747853927792243, Accuracy: 0.003714622641509434


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 3] Loss: 2.3370723191788003, Accuracy: 0.22923568386180784


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 3] Loss: 6.932549027676853, Accuracy: 0.028360849056603774


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 4] Loss: 2.2940096190006205, Accuracy: 0.2555608140085187


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 4] Loss: 7.0684436690132575, Accuracy: 0.002712264150943396


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 5] Loss: 2.2320500707603776, Accuracy: 0.2811316848083294


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 5] Loss: 6.795463668175463, Accuracy: 0.0034787735849056605


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 6] Loss: 2.236062711976571, Accuracy: 0.27750828206341693


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 6] Loss: 6.869930267333984, Accuracy: 0.0034787735849056605


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 7] Loss: 2.1577895475799194, Accuracy: 0.315309985802177


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 7] Loss: 6.7946558736405285, Accuracy: 0.017452830188679245


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 8] Loss: 2.0552145713773657, Accuracy: 0.3581844533838145


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 8] Loss: 6.979341743577201, Accuracy: 0.0751829863367778


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 9] Loss: 1.9763773792003265, Accuracy: 0.3919338618078561


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 9] Loss: 7.019781984473174, Accuracy: 0.04150943396226415


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 10] Loss: 1.9320867353432336, Accuracy: 0.41379259346900144


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 10] Loss: 6.702115203749459, Accuracy: 0.11028383213956401


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 11] Loss: 1.9047893686171566, Accuracy: 0.419959772834832


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 11] Loss: 6.723415740930809, Accuracy: 0.09077342225695556


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 12] Loss: 1.8470548380058747, Accuracy: 0.43835778513961193


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 12] Loss: 6.6107680293748965, Accuracy: 0.08816688353160643


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 13] Loss: 1.8424293587008078, Accuracy: 0.4414783483199243


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 13] Loss: 6.813638563875882, Accuracy: 0.12049650291226945


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 14] Loss: 1.80312192630088, Accuracy: 0.4569332702318978


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 14] Loss: 6.998109395548982, Accuracy: 0.11032449577074006


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 15] Loss: 1.7887831622821937, Accuracy: 0.460320042593469


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 15] Loss: 6.76451625284159, Accuracy: 0.1045644925169225


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 16] Loss: 1.7480864914083898, Accuracy: 0.4725656649313772


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 16] Loss: 7.129097997917319, Accuracy: 0.10313719908864993


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 17] Loss: 1.724709026575427, Accuracy: 0.48096604353999056


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 17] Loss: 7.2280067848709395, Accuracy: 0.09959946323959333


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 18] Loss: 1.7031994118115883, Accuracy: 0.48979531471840987


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 18] Loss: 7.073609010228571, Accuracy: 0.11267688679245283


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 19] Loss: 1.6933164855512013, Accuracy: 0.49263487931850447


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 19] Loss: 6.817184558904396, Accuracy: 0.1248820754716981


training...:   0%|          | 0/2113 [00:00<?, ?it/s]

[Train Epoch 20] Loss: 1.6621024412562708, Accuracy: 0.5012866777094179


testing...:   0%|          | 0/530 [00:00<?, ?it/s]

[Test Epoch 20] Loss: 7.059619648951404, Accuracy: 0.12750691282299328

CPU times: total: 1h 20min 45s
Wall time: 1h 20min 28s


train and test `train_dataloader_clean`

In [325]:
# train_dataloader_clean
# test_dataloader_clean

model_clean = ToneTagsLSTM(
    vocab_size=vocab_size,
    embedding=embedding,
    hidden_dim=hidden_dim,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout
).to(device)

optimizer_clean = optim.Adam(model_clean.parameters(), lr=lr)

In [326]:
%%time

n_epochs = 20

for ep in range(1, n_epochs + 1):

    model_clean.train()
    epoch_losses_train = []
    epoch_accuracies_train = []


    for tokens, contexts, tags in tqdm(train_dataloader_clean, desc="training..."):
        optimizer_clean.zero_grad()

        tokens = tokens.to(device)
        tags = tags.to(device)

        predictions = model_clean(tokens)

        loss = criterion(predictions, tags)
        accuracy = get_accuracy(predictions, tags)

        loss.backward()
        optimizer_clean.step()

        epoch_losses_train.append(loss.item())
        epoch_accuracies_train.append(accuracy.item())

    print(f'[Train Epoch {ep}] Loss: {np.mean(epoch_losses_train)}, Accuracy: {np.mean(epoch_accuracies_train)}')


    model_clean.eval()
    epoch_losses_test = []
    epoch_accuracies_test = []

    with torch.no_grad():
        for tokens, contexts, tags in tqdm(test_dataloader_clean, desc="testing..."):
            optimizer_clean.zero_grad()

            tokens = tokens.to(device)
            tags = tags.to(device)

            predictions = model_clean(tokens)

            loss = criterion(predictions, tags)
            accuracy = get_accuracy(predictions, tags)

            epoch_losses_test.append(loss.item())
            epoch_accuracies_test.append(accuracy.item())

        print(f'[Test Epoch {ep}] Loss: {np.mean(epoch_losses_test)}, Accuracy: {np.mean(epoch_accuracies_test)}\n')

training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 1] Loss: 2.243861869005504, Accuracy: 0.2783780878804638


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 1] Loss: 6.458495038725464, Accuracy: 0.003920880149812734


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 2] Loss: 1.9818801017684293, Accuracy: 0.3831261726162671


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 2] Loss: 6.666136007407185, Accuracy: 0.12611189138576778


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 3] Loss: 1.8509055806592973, Accuracy: 0.44090838024417184


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 3] Loss: 6.795352591110972, Accuracy: 0.13453885767790263


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 4] Loss: 1.743016006044107, Accuracy: 0.47870153222254025


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 4] Loss: 6.789622264854917, Accuracy: 0.11013576779026217


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 5] Loss: 1.677518411045003, Accuracy: 0.49682223265658626


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 5] Loss: 6.788666859101713, Accuracy: 0.12482443820224719


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 6] Loss: 1.6258040501912585, Accuracy: 0.510138172299844


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 6] Loss: 6.788777148678955, Accuracy: 0.13401217228464418


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 7] Loss: 1.5860455974405299, Accuracy: 0.5211626407297199


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 7] Loss: 6.816744447200932, Accuracy: 0.11569522471910113


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 8] Loss: 1.5565487722854006, Accuracy: 0.527938359925492


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 8] Loss: 6.9661714432391335, Accuracy: 0.13775749063670412


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 9] Loss: 1.53049247553939, Accuracy: 0.5368042526728217


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 9] Loss: 6.91945893934157, Accuracy: 0.14501404494382023


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 10] Loss: 1.5088589794752074, Accuracy: 0.5417956535409136


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 10] Loss: 6.956076131777817, Accuracy: 0.12312734082397003


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 11] Loss: 1.4875155275523326, Accuracy: 0.5489446529192504


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 11] Loss: 6.990546791741018, Accuracy: 0.12453183520599251


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 12] Loss: 1.4824939988847075, Accuracy: 0.5502052063901697


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 12] Loss: 7.0236830113085915, Accuracy: 0.11949906367041198


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 13] Loss: 1.4512792555632779, Accuracy: 0.5587926047660173


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 13] Loss: 7.039220765288849, Accuracy: 0.11856273408239701


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 14] Loss: 1.4397639191899916, Accuracy: 0.5617133755565361


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 14] Loss: 7.237489979811822, Accuracy: 0.11101357677902622


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 15] Loss: 1.4225014401477452, Accuracy: 0.5679604440340406


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 15] Loss: 7.1255100322573375, Accuracy: 0.1237125468164794


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 16] Loss: 1.4099246992906085, Accuracy: 0.5708890322169489


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 16] Loss: 7.014002293013455, Accuracy: 0.12453183520599251


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 17] Loss: 1.3931861426995797, Accuracy: 0.5751358270924714


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 17] Loss: 7.05055341202668, Accuracy: 0.12997425093632958


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 18] Loss: 1.379810903158465, Accuracy: 0.579153963425817


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 18] Loss: 7.013806927586428, Accuracy: 0.13881086142322097


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 19] Loss: 1.3682932549245361, Accuracy: 0.583941135101202


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 19] Loss: 6.97753054550971, Accuracy: 0.1346558988764045


training...:   0%|          | 0/2132 [00:00<?, ?it/s]

[Train Epoch 20] Loss: 1.3589751783611628, Accuracy: 0.5857645403488939


testing...:   0%|          | 0/534 [00:00<?, ?it/s]

[Test Epoch 20] Loss: 6.732687687605955, Accuracy: 0.12265917602996254

CPU times: total: 1h 21min 47s
Wall time: 1h 21min 31s


train and test `train_dataloader_clean_corrected`

In [327]:
# train_dataloader_clean_corrected
# test_dataloader_clean_corrected

model_clean_corrected = ToneTagsLSTM(
    vocab_size=vocab_size,
    embedding=embedding,
    hidden_dim=hidden_dim,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout
).to(device)

optimizer_clean_corrected = optim.Adam(model_clean_corrected.parameters(), lr=lr)

In [328]:
%%time

n_epochs = 20

for ep in range(1, n_epochs + 1):

    model_clean_corrected.train()
    epoch_losses_train = []
    epoch_accuracies_train = []


    for tokens, contexts, tags in tqdm(train_dataloader_clean_corrected, desc="training..."):
        optimizer_clean_corrected.zero_grad()

        tokens = tokens.to(device)
        tags = tags.to(device)

        predictions = model_clean_corrected(tokens)

        loss = criterion(predictions, tags)
        accuracy = get_accuracy(predictions, tags)

        loss.backward()
        optimizer_clean_corrected.step()

        epoch_losses_train.append(loss.item())
        epoch_accuracies_train.append(accuracy.item())

    print(f'[Train Epoch {ep}] Loss: {np.mean(epoch_losses_train)}, Accuracy: {np.mean(epoch_accuracies_train)}')


    model_clean_corrected.eval()
    epoch_losses_test = []
    epoch_accuracies_test = []

    with torch.no_grad():
        for tokens, contexts, tags in tqdm(test_dataloader_clean_corrected, desc="testing..."):
            optimizer_clean.zero_grad()

            tokens = tokens.to(device)
            tags = tags.to(device)

            predictions = model_clean_corrected(tokens)

            loss = criterion(predictions, tags)
            accuracy = get_accuracy(predictions, tags)

            epoch_losses_test.append(loss.item())
            epoch_accuracies_test.append(accuracy.item())

        print(f'[Test Epoch {ep}] Loss: {np.mean(epoch_losses_test)}, Accuracy: {np.mean(epoch_accuracies_test)}\n')

training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 1] Loss: 2.275204872614459, Accuracy: 0.26903195488721804


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 1] Loss: 6.545017553451366, Accuracy: 0.04387922932330827


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 2] Loss: 2.098785322299577, Accuracy: 0.33774377349624063


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 2] Loss: 6.355222253871143, Accuracy: 0.06314614661654136


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 3] Loss: 1.9677480381486334, Accuracy: 0.3924165883458647


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 3] Loss: 6.447038882657101, Accuracy: 0.09528508772583384


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 4] Loss: 1.8743875746972354, Accuracy: 0.4291735197368421


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 4] Loss: 6.360899602560172, Accuracy: 0.14257910402309626


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 5] Loss: 1.7679126655921005, Accuracy: 0.471187734962406


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 5] Loss: 6.377052185230685, Accuracy: 0.11854244987795451


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 6] Loss: 1.6966345108261234, Accuracy: 0.49161477913533835


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 6] Loss: 6.868366763107758, Accuracy: 0.11202615915440527


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 7] Loss: 1.6403488671841253, Accuracy: 0.5050810620300752


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 7] Loss: 6.874265659124331, Accuracy: 0.1258497807316314


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 8] Loss: 1.5989356216015225, Accuracy: 0.516359257518797


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 8] Loss: 6.801830726458614, Accuracy: 0.10857221178878519


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 9] Loss: 1.5644743985083318, Accuracy: 0.5254640507518797


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 9] Loss: 6.7187232684372065, Accuracy: 0.11644345239028894


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 10] Loss: 1.536321195507688, Accuracy: 0.534421992481203


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 10] Loss: 6.855708142868559, Accuracy: 0.13018875314216866


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 11] Loss: 1.5163253140460728, Accuracy: 0.5406191259398496


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 11] Loss: 7.0794996345849865, Accuracy: 0.12289708648297124


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 12] Loss: 1.4961105667446788, Accuracy: 0.5452890037593985


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 12] Loss: 7.186793034238026, Accuracy: 0.13999843359329647


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 13] Loss: 1.474646195097077, Accuracy: 0.5516036184210527


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 13] Loss: 6.506217432201357, Accuracy: 0.1143718671711876


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 14] Loss: 1.4668213883064742, Accuracy: 0.5568315319548872


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 14] Loss: 6.781559067561214, Accuracy: 0.12473370930305998


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 15] Loss: 1.4397065966649163, Accuracy: 0.5639978853383458


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 15] Loss: 6.7911185178541595, Accuracy: 0.1337758458982733


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 16] Loss: 1.4267115220987707, Accuracy: 0.568359375


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 16] Loss: 6.973604271286412, Accuracy: 0.13335682958700604


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 17] Loss: 1.4119728872818607, Accuracy: 0.5712670347744361


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 17] Loss: 6.748978704438174, Accuracy: 0.12936638472111603


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 18] Loss: 1.3947443830661643, Accuracy: 0.5767299107142857


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 18] Loss: 7.03416201136166, Accuracy: 0.11896538221858498


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 19] Loss: 1.38211640744309, Accuracy: 0.5796082001879699


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 19] Loss: 7.0642170314502, Accuracy: 0.13270285088372857


training...:   0%|          | 0/2128 [00:00<?, ?it/s]

[Train Epoch 20] Loss: 1.3681867054892929, Accuracy: 0.5844249295112782


testing...:   0%|          | 0/532 [00:00<?, ?it/s]

[Test Epoch 20] Loss: 7.046278788631124, Accuracy: 0.1322564223319068

CPU times: total: 1h 20min 58s
Wall time: 1h 20min 47s


train and test `train_dataloader_wsd_2`

In [329]:
# train_dataloader_wsd_2
# test_dataloader_wsd_2

vocab_size = len(vocab) # 4000002
embedding = nn.Embedding.from_pretrained(embedding_tensor, freeze=True)
hidden_dim = 30 # 30
context_dim = 50 # 50
output_size = len(labels) # 19
num_layers = 4
dropout = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 5e-4

criterion = nn.CrossEntropyLoss()

In [330]:
class ToneTagsLSTM_wsd_2(nn.Module):
    def __init__(self, vocab_size, embedding, hidden_dim, context_dim, output_size, num_layers, dropout):
        super(ToneTagsLSTM_wsd_2, self).__init__()
        # vocab_size = 400002
        # embedding_dim = 50
        # hidden_dim_lstm = 30
        # fc1 = 30 + 50 - 60
        # fc2 = 60 - 30
        # fc3 = 30 - 19
        # output_size = 19

        self.embedding = embedding

        self.lstm = nn.LSTM(self.embedding.embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout, batch_first=True)

        self.fc1 = nn.Linear(hidden_dim + context_dim, 60)
        self.fc2 = nn.Linear(60, 30)
        self.fc3 = nn.Linear(30, output_size)
        # self.out = nn.Softmax(output_size, dim=1)


    def forward(self, tokens, contexts):

        embedded = self.embedding(tokens)
        output, (hidden, cell) = self.lstm(embedded)

        lstm_out = torch.cat((hidden[-1], contexts), dim=1)

        fc1_out = self.fc1(lstm_out)


        fc2_out = self.fc2(fc1_out)
        out = self.fc3(fc2_out)
        # out = self.out(fc3_out)

        return out

In [331]:
# train_dataloader_wsd_2
# test_dataloader_wsd_2

model_wsd_2 = ToneTagsLSTM_wsd_2(
    vocab_size=vocab_size,
    embedding=embedding,
    hidden_dim=hidden_dim,
    context_dim=context_dim,
    output_size=output_size,
    num_layers=num_layers,
    dropout=dropout
).to(device)

optimizer_wsd_2 = optim.Adam(model_wsd_2.parameters(), lr=lr)

In [332]:
%%time

n_epochs = 20

for ep in range(1, n_epochs + 1):

    model_wsd_2.train()
    epoch_losses_train = []
    epoch_accuracies_train = []


    for tokens, contexts, tags in tqdm(train_dataloader_wsd_2, desc="training..."):
        optimizer_wsd_2.zero_grad()

        tokens = tokens.to(device)
        contexts = contexts.to(device)
        tags = tags.to(device)

        predictions = model_wsd_2(tokens, contexts)

        loss = criterion(predictions, tags)
        accuracy = get_accuracy(predictions, tags)

        loss.backward()
        optimizer_wsd_2.step()

        epoch_losses_train.append(loss.item())
        epoch_accuracies_train.append(accuracy.item())

    print(f'[Train Epoch {ep}] Loss: {np.mean(epoch_losses_train)}, Accuracy: {np.mean(epoch_accuracies_train)}')


    model_wsd_2.eval()
    epoch_losses_test = []
    epoch_accuracies_test = []

    with torch.no_grad():
        for tokens, contexts, tags in tqdm(test_dataloader_wsd_2, desc="testing..."):
            optimizer_wsd_2.zero_grad()

            tokens = tokens.to(device)
            contexts = contexts.to(device)
            tags = tags.to(device)

            predictions = model_wsd_2(tokens, contexts)

            loss = criterion(predictions, tags)
            accuracy = get_accuracy(predictions, tags)

            epoch_losses_test.append(loss.item())
            epoch_accuracies_test.append(accuracy.item())

        print(f'[Test Epoch {ep}] Loss: {np.mean(epoch_losses_test)}, Accuracy: {np.mean(epoch_accuracies_test)}\n')

training...:   0%|          | 0/2132 [00:00<?, ?it/s]

ValueError: expected sequence of length 50 at dim 1 (got 0)