In [2]:
import pandas as pd
pd.set_option("display.max_colwidth", 1000)
pd.set_option('display.max_rows', 100)
import re
import nltk
from nltk.chunk import *
from nltk.chunk.util import *
from nltk.chunk.regexp import *
from nltk.tokenize.casual import casual_tokenize
import numpy as np

### Data pre-processing plan 
- Clean tweets : strip out handles
- Explore patterns
- Filter out helpful pattern

In [3]:
more_words = ['proud', 'emotional', 'tired', 'low', 'down', 'sick', 'nostalgic', 'stressed', 'uncomfortable', 'grateful', 'sorry', 'jealous','overwhelmed', 'unmotivated', 'exhausted', 'motivated', 'sentimental']\
           + ['drained', 'unmotivated', 'hungry', 'hopeless', 'insecure', 'adventurous', 'euphoric', 'accomplished', 'nauseous', 'shitty', 'awful', 'stupid', 'horrible', 'foolish']
with open('emotions_updated.txt', 'r') as f:
    emotion_ls = f.readlines()
emotion_ls = [w.strip('\n') for w in emotion_ls]
emo_set = set(emotion_ls)
# print(emo_set, len(emo_set))

## Model development

In [4]:
import math
import random

import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt


### MLP

In [5]:
class MLP(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_class=20, dropout=0.3):
        super().__init__()

        self.layers  = nn.Sequential(
            nn.Linear(77, hidden_size * 4),
            nn.ReLU(), 
            nn.Linear( hidden_size * 4, hidden_size * 2),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size*2),
            nn.Linear(hidden_size*2, hidden_size*2),
            nn.ReLU(),
            nn.Linear(hidden_size*2, hidden_size*2),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size*2),
            nn.Linear(hidden_size*2, hidden_size),
            nn.ReLU(),
            nn.Dropout(p=dropout), 
            nn.Linear(hidden_size, num_class),
#             nn.Softmax()
        )

    def forward(self, x):        
        return self.layers(x)

### CNN

I adapt the CNN architecture for text classification problem from [Zhang and Wallace 2015](https://arxiv.org/pdf/1510.03820.pdf) paper

In [6]:
class CNN(nn.Module):
    def __init__(self,num_embeddings,
                         embed_dim=300,
                         filter_sizes=[3, 4, 5],
                         num_filters=[100, 100, 100],
                         num_classes=20,
                        dropout=0.3):
        super().__init__()
        self.embed_dim = embed_dim
        self.embedding = nn.Embedding(num_embeddings=num_embeddings,
                                      embedding_dim=self.embed_dim,
                                      padding_idx=0,
                                      max_norm=5.0)
    # Conv Network
        self.conv1d_list = nn.ModuleList([
            nn.Conv1d(in_channels=self.embed_dim,
                      out_channels=num_filters[i],
                      kernel_size=filter_sizes[i])
            for i in range(len(filter_sizes))
        ])
        # Fully-connected layer and Dropout
        self.fc = nn.Linear(np.sum(num_filters), num_classes)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x):
        x_embed = self.embedding(x).float()

        x_reshaped = x_embed.permute(0, 2, 1)

        x_conv_list = [F.relu(conv1d(x_reshaped)) for conv1d in self.conv1d_list]

        x_pool_list = [F.max_pool1d(x_conv, kernel_size=x_conv.shape[2])
            for x_conv in x_conv_list]
        

        x_fc = torch.cat([x_pool.squeeze(dim=2) for x_pool in x_pool_list],
                         dim=1)

        logits = self.fc(self.dropout(x_fc))

        return logits

### RNN

To adapt RNN architecture for text classification problem, only final hidden state is used. This state is fed into a linear layer for class prediction

In [7]:
class RNN(nn.Module):

    def __init__(self,num_embeddings,
                         embed_dim=64,
                         hidden_size=32,
                         num_classes=20,
                        dropout=0.3):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.layer1 = nn.Embedding( num_embeddings  , embed_dim  )
        self.layer2 = nn.GRU(       embed_dim , hidden_size  )
        self.layer3 = nn.Linear(    hidden_size , num_classes   )

        
    def forward(self, word_seq ):
        
        h_init = torch.zeros(1, word_seq.shape[1], self.hidden_size).to(device)
        
        g_seq  =   self.layer1( word_seq )   
        h_seq , h_final =   self.layer2( g_seq , h_init )
        score_seq =   self.layer3(h_final.permute(1, 0, 2) )
        
        return score_seq.squeeze(1)

### ANN

This ANN model is addapted from Luong et al. 2016 model. To adapt to a text classification problem, attention machanism is applied to the final hidden state. After this attention layer, output is passed to a fully connected layer for prediction

In [8]:
class ANN(nn.Module):
    def __init__(self,num_embeddings,
                         embed_dim=64,
                         hidden_size=32,
                         num_classes=20,
                        dropout=0.3):
        super(ANN, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.embed = nn.Embedding( num_embeddings  , embed_dim  )
        self.encoder = nn.GRU(       embed_dim , hidden_size  )
        self.Wc = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(    hidden_size  , num_classes   )

    def forward(self, inp_seq):
        
        h_init = torch.zeros(1, inp_seq.shape[1], self.hidden_size).to(device)
        g_seq               =   self.embed( inp_seq )   
        h_seq , h_final     =   self.encoder( g_seq , h_init )

        # attention machanism
        s_init = h_final
        s_seq_trans = torch.swapaxes(s_init, 0,1)
        h_seq_trans = torch.swapaxes(torch.swapaxes(h_seq, 0, 2), 0, 1)
   
        bacthed_et = s_seq_trans.bmm(h_seq_trans)
        batched_alpha_t = torch.softmax(bacthed_et, axis=2) # shape = batch_size * outseq_length * in_seq_length
        #
        batched_ct = torch.bmm(batched_alpha_t, torch.swapaxes(h_seq, 0, 1))
        batched_ct = torch.swapaxes(batched_ct, 0, 1)
        
        batched_atten_t = torch.tanh(self.Wc(torch.cat((batched_ct, s_init), 2)) )
#         batched_atten_t = torch.tanh(self.Wc(batched_ct) )
    
        out = self.out(batched_atten_t)
#         out = torch.log_softmax(self.output(batched_atten_t), axis=2)
    
        return out.squeeze(axis=0)
        

### Pretrained Model

In [9]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from typing import Iterable, List
from torch.nn.utils.rnn import pad_sequence
import torch
from torch.utils.data import DataLoader
import pickle
from torch.utils.data import Dataset
import os
import numpy as np
import pickle
import gc, time
# For this dataset, we are trying to translate french to english
SRC_LANGUAGE = 'fr'
TGT_LANGUAGE = 'en'


# First, we create a custom dataset to load the data. Each item is a pair of french and english datapoint
class EmoCauseDataset(Dataset):
    def __init__(self,path, train, train_size=10000, test_size=1000, max_len=250):
        self.dir = path
        self.all_data = None
        self.data = None
        self.train_size=train_size
        self.test_size=test_size
        with open(path, 'rb') as f:
            self.all_data = pickle.load(f)
        self.labels = list(zip(*self.all_data))[1]
        self.label_map = {emo:i for i, emo in enumerate(list(set(self.labels)))}
        if train:
            self.data = self.all_data[:self.train_size]
        else:
            self.data = self.all_data[self.train_size + 1:self.train_size + self.test_size]
 
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx][0], self.label_map[self.data[idx][1]]
    
    def get_class_map(self):
        return {v:k for k, v in self.label_map.items()}
    def get_label(self):
        return list(zip(*self.data))[1]
    
# Helper function to call token_transform
def yield_tokens(data_iter: Iterable) -> List[str]:

    for data_sample in data_iter:
        yield token_transform(data_sample[0])


# Functions transform the input sentence to a format that can be used for training 
def sequential_transforms(*transforms):
    def func(txt_input):
        for transform in transforms:
            txt_input = transform(txt_input)
        return txt_input
    return func

def tensor_transform(token_ids: List[int]):
    return torch.cat((torch.tensor([BOS_IDX]),
                      torch.tensor(token_ids),
                      torch.tensor([EOS_IDX])))

def collate_fn(tweet, emo,batch_first=True):
    max_len = 75
    dummie = " ".join(['dm'] * max_len)
    tweet_batch, emo_batch = [], []
    for tw, e in zip(tweet, emo):
        tweet_batch.append(text_transform(tw.strip('\n')))
#         emo_batch.append(F.one_hot(torch.tensor(e), num_classes=20))
        emo_batch.append(torch.tensor(e))
    tweet_batch.append(text_transform(dummie))
    tweet_batch = pad_sequence(tweet_batch, padding_value=PAD_IDX,batch_first=batch_first)
    emo_batch = torch.stack(emo_batch)
    if batch_first == True:
        tweet_batch = tweet_batch[:-1]
    else:
        tweet_batch = tweet_batch[:, :-1]
    return tweet_batch, emo_batch


In [12]:
# Load tokenizer
token_transform = get_tokenizer('spacy', language='en_core_web_sm')
datapath = "label_final.pickle"
dataset = EmoCauseDataset(datapath, train=True)

# Define special symbols and indices
UNK_IDX, PAD_IDX, BOS_IDX, EOS_IDX = 0, 1, 2, 3
# Make sure the tokens are in order of their indices to properly insert them in vocab
special_symbols = ['<unk>', '<pad>', '<bos>', '<eos>']
train_iter = iter(dataset)
vocab_transform = build_vocab_from_iterator(yield_tokens(train_iter),
                                                    min_freq=1,
                                                    specials=special_symbols,
                                                    special_first=True)
vocab_transform.set_default_index(UNK_IDX)
text_transform = sequential_transforms(token_transform, #Tokenization
                                               vocab_transform, #Numericalization
                                               tensor_transform) # Add BOS/EOS and create tensor        
label_transform = sequential_transforms(token_transform)
torch.manual_seed(0)
VOCAB_SIZE = len(vocab_transform)
# TGT_VOCAB_SIZE = len(vocab_transform[TGT_LANGUAGE])

In [18]:
def batch_train(model, x, y, x_type=torch.LongTensor):
    optimizer.zero_grad()
    # Transform inputs

    # send them to the gpu
    minibatch_data=x.type(x_type).to(device)
    minibatch_label=y.type(torch.LongTensor).to(device)
#     print(x.shape, y.shape)
    

    # Forward pass
    y_pred = model(minibatch_data)
#     print(y_pred.shape, minibatch_label.shape)
#     print(y_pred)
    loss = criterion(y_pred, minibatch_label)

    # backward pass to compute dL/dR, dL/dV and dL/dW
    loss.backward()

    # do one step of stochastic gradient descent: R=R-lr(dL/dR), V=V-lr(dL/dV), ...
    normalize_gradient(model)
    optimizer.step()
    # update the running loss  
    return loss.detach().item()

def normalize_gradient(net):

    grad_norm_sq=0

    for p in net.parameters():
        grad_norm_sq += p.grad.data.norm()**2

    grad_norm=math.sqrt(grad_norm_sq)

    if grad_norm<1e-4:
        net.zero_grad()
        print('grad norm close to zero')
    else:    
        for p in net.parameters():
            p.grad.data.div_(grad_norm)

    return grad_norm

def test(model, test_loader, class_map, text_type=torch.LongTensor, batch_first=True):
    model.to(device)
    model.eval()
    print('device', device)
    preds = []
    labels = []
    with torch.no_grad():
        for text, label in test_loader:
            text, _ = collate_fn(text, label, batch_first=batch_first)
            texts = text.type(text_type).to(device)
            
            labels.append(label)
            outputs = model(texts).cpu()
            # get the label predictions
            preds += outputs.argmax(dim=1).tolist()
    
#     preds = list(map(lambda x: class_map[x], preds))
    return torch.tensor(preds), torch.cat(labels)


### Test Model for MLP, CNN and RNN

In [25]:
def train(model, my_lr, num_epochs,x_type=torch.LongTensor, batch_first=True):
    for epoch in range(num_epochs):
          # keep the learning rate to 1 during the first 4 epochs, then divide by 1.1 at every epoch
        if epoch % 4 ==0:
            my_lr = my_lr / 1.05
        start = time.time()

        # set the running quantities to zero at the beginning of the epoch
        running_loss=0
        num_batches=0    
        model.train()

        for x, y in train_dataloader:

            x, y = collate_fn(x, y, batch_first=batch_first)
            loss = batch_train(model, x, y,x_type)      
            # Set the gradients to zeros
            running_loss += loss
            num_batches += 1
    #         # Collect garbage to prevent OOM
            gc.collect()
        # compute stats for the full training set
        total_loss = running_loss / num_batches
        elapsed = time.time() - start

        print('')
        print('epoch=',epoch, '\t time=', elapsed,'\t lr=', my_lr, '\t exp(loss)=',  math.exp(total_loss))

### MLP performance

In [26]:
# Hyperparameters
num_epochs = 30
hidden_size = 32
my_lr = 0.3
bs = 32
num_class=20

# Best setting params:
#     CNN: hidden_size=64, num_epochs=10, my_lr=0.3, bs=32
#     RNN: hidden_size=32, num_epochs=40, my_lr=0.5, bs=32
# The code below is taken fromthe VRNN demo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


# Variables
VOCAB_SIZE = len(vocab_transform)
train_dataset = EmoCauseDataset(datapath,train=True)
train_dataloader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_dataset = EmoCauseDataset(datapath, train=False)
test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=False)


model = MLP(VOCAB_SIZE,
         hidden_size=hidden_size, 
        num_class=20, dropout=0.3).to(device)

batch_first=True # batch_first is all in RNN, set = true in other nets 

# model.layers.weight.data.uniform_(-0.1, 0.1)
# model.layer3.weight.data.uniform_(-0.1, 0.1)


# model = LSTMClassifier(batch_size=bs, output_size=num_class, hidden_size=hidden_size, vocab_size=VOCAB_SIZE, embedding_length=300).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=my_lr, momentum=0.9)

train(model, my_lr, num_epochs,torch.FloatTensor,batch_first)
gc.collect()
print("Accuracy = ", test_acc)

cuda


  emo_batch.append(torch.tensor(e))



epoch= 0 	 time= 45.11897873878479 	 lr= 0.2857142857142857 	 exp(loss)= 22.124884881280536

epoch= 1 	 time= 31.799460649490356 	 lr= 0.2857142857142857 	 exp(loss)= 22.59522294253321

epoch= 2 	 time= 31.815052032470703 	 lr= 0.2857142857142857 	 exp(loss)= 22.651421265311726

epoch= 3 	 time= 31.767410039901733 	 lr= 0.2857142857142857 	 exp(loss)= 22.2026917152874

epoch= 4 	 time= 31.74264621734619 	 lr= 0.27210884353741494 	 exp(loss)= 22.501127122520277

epoch= 5 	 time= 31.819934368133545 	 lr= 0.27210884353741494 	 exp(loss)= 22.356925556898158

epoch= 6 	 time= 31.780929565429688 	 lr= 0.27210884353741494 	 exp(loss)= 22.511206437801604

epoch= 7 	 time= 32.12287926673889 	 lr= 0.27210884353741494 	 exp(loss)= 22.677455107372147

epoch= 8 	 time= 31.872369050979614 	 lr= 0.2591512795594428 	 exp(loss)= 22.627863395974444

epoch= 9 	 time= 31.781724214553833 	 lr= 0.2591512795594428 	 exp(loss)= 22.772442456026084

epoch= 10 	 time= 31.758777856826782 	 lr= 0.2591512795594428

NameError: name 'test_acc' is not defined

In [28]:
y_preds, y_test = test(model,test_dataloader, class_map=test_dataset.get_class_map(),text_type=torch.FloatTensor, batch_first=batch_first)
# torch.tensor(y_preds).shape
test_acc = torch.sum(y_preds == y_test) / 1000
print("Accuracy = ", test_acc)

device cuda


  emo_batch.append(torch.tensor(e))


Accuracy =  tensor(0.0470)


### CNN performance

In [31]:
# Hyperparameters
num_epochs = 30
hidden_size = 32
my_lr = 0.3
bs = 64
num_class=20

# Best setting params:
#     CNN: hidden_size=64, num_epochs=10, my_lr=0.3, bs=32
#     RNN: hidden_size=32, num_epochs=40, my_lr=0.5, bs=32
# The code below is taken fromthe VRNN demo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


# Variables
VOCAB_SIZE = len(vocab_transform)
train_dataset = EmoCauseDataset(datapath,train=True)
train_dataloader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_dataset = EmoCauseDataset(datapath, train=False)
test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=False)


model = CNN(VOCAB_SIZE,
             embed_dim=64,
             filter_sizes=[3, 4, 5],
            num_filters=[100, 100, 100],
             num_classes=20,
            dropout=0.3).to(device)

batch_first=True # batch_first is all in RNN, set = true in other nets 

# model.layers.weight.data.uniform_(-0.1, 0.1)
# model.layer3.weight.data.uniform_(-0.1, 0.1)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=my_lr, momentum=0.9)

train(model, my_lr, num_epochs,torch.LongTensor, batch_first)
gc.collect()

y_preds, y_test = test(model,test_dataloader, class_map=test_dataset.get_class_map(),text_type=torch.LongTensor, batch_first=batch_first)
# torch.tensor(y_preds).shape
test_acc = torch.sum(y_preds == y_test) / 1000
print("Accuracy = ", test_acc)

cuda


  emo_batch.append(torch.tensor(e))



epoch= 0 	 time= 18.29328465461731 	 lr= 0.2857142857142857 	 exp(loss)= 15.25701819413573

epoch= 1 	 time= 17.222455501556396 	 lr= 0.2857142857142857 	 exp(loss)= 10.194853318502256

epoch= 2 	 time= 17.1863374710083 	 lr= 0.2857142857142857 	 exp(loss)= 8.578942864613973

epoch= 3 	 time= 17.202964544296265 	 lr= 0.2857142857142857 	 exp(loss)= 7.0039026438655245

epoch= 4 	 time= 17.174224853515625 	 lr= 0.27210884353741494 	 exp(loss)= 5.785516845638894

epoch= 5 	 time= 17.134090185165405 	 lr= 0.27210884353741494 	 exp(loss)= 4.930101995499087

epoch= 6 	 time= 17.202319383621216 	 lr= 0.27210884353741494 	 exp(loss)= 4.374101009213957

epoch= 7 	 time= 17.134012699127197 	 lr= 0.27210884353741494 	 exp(loss)= 3.8457467850631324

epoch= 8 	 time= 17.175636291503906 	 lr= 0.2591512795594428 	 exp(loss)= 3.5468072246810864

epoch= 9 	 time= 17.178335905075073 	 lr= 0.2591512795594428 	 exp(loss)= 3.4099213907276136

epoch= 10 	 time= 17.142795085906982 	 lr= 0.2591512795594428 	

### RNN performance

In [33]:

# Hyperparameters
num_epochs = 30
hidden_size = 32
my_lr = 0.3
bs = 32
num_class=20

# Best setting params:
#     CNN: hidden_size=64, num_epochs=10, my_lr=0.3, bs=32
#     RNN: hidden_size=32, num_epochs=40, my_lr=0.5, bs=32
# The code below is taken fromthe VRNN demo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


# Variables
VOCAB_SIZE = len(vocab_transform)
train_dataset = EmoCauseDataset(datapath,train=True)
train_dataloader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_dataset = EmoCauseDataset(datapath, train=False)
test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=False)


model = RNN(VOCAB_SIZE,
             embed_dim=64,
             hidden_size=20,
             num_classes=20,
            dropout=0.3).to(device)

batch_first=False # batch_first is all in RNN, set = true in other nets 

model.layer1.weight.data.uniform_(-0.1, 0.1)

model.layer3.weight.data.uniform_(-0.1, 0.1)


# model = LSTMClassifier(batch_size=bs, output_size=num_class, hidden_size=hidden_size, vocab_size=VOCAB_SIZE, embedding_length=300).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=my_lr, momentum=0.9)

train(model, my_lr, num_epochs,torch.LongTensor, batch_first)
gc.collect()
y_preds, y_test = test(model,test_dataloader, class_map=test_dataset.get_class_map(),text_type=torch.LongTensor, batch_first=batch_first)
# torch.tensor(y_preds).shape
test_acc = torch.sum(y_preds == y_test) / 1000
print("Accuracy = ", test_acc)

cuda


  emo_batch.append(torch.tensor(e))



epoch= 0 	 time= 32.81334686279297 	 lr= 0.2857142857142857 	 exp(loss)= 22.627375464142165

epoch= 1 	 time= 32.52312612533569 	 lr= 0.2857142857142857 	 exp(loss)= 27.387163778826494

epoch= 2 	 time= 32.568135499954224 	 lr= 0.2857142857142857 	 exp(loss)= 16.969647195479673

epoch= 3 	 time= 32.5645067691803 	 lr= 0.2857142857142857 	 exp(loss)= 11.672814302990439

epoch= 4 	 time= 32.56657338142395 	 lr= 0.27210884353741494 	 exp(loss)= 11.102341513752233

epoch= 5 	 time= 32.54464817047119 	 lr= 0.27210884353741494 	 exp(loss)= 10.667305271497806

epoch= 6 	 time= 32.581042766571045 	 lr= 0.27210884353741494 	 exp(loss)= 10.77504275683238

epoch= 7 	 time= 32.58101987838745 	 lr= 0.27210884353741494 	 exp(loss)= 10.654790155778093

epoch= 8 	 time= 32.521653175354004 	 lr= 0.2591512795594428 	 exp(loss)= 10.593118823978825

epoch= 9 	 time= 32.54378914833069 	 lr= 0.2591512795594428 	 exp(loss)= 10.338412702581197

epoch= 10 	 time= 32.81305432319641 	 lr= 0.2591512795594428 	 e

### ANN performance

In [34]:
import gc
# Hyperparameters
num_epochs = 30
hidden_size = 20
my_lr = 0.3
bs = 256
num_class=20
batch_first=False
datapath = "label_final.pickle"

# The code below is taken fromthe VRNN demo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


# Variables
VOCAB_SIZE = len(vocab_transform)
train_dataset = EmoCauseDataset(datapath,train=True)
train_dataloader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_dataset = EmoCauseDataset(datapath, train=False)
test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=False)


model = ANN(VOCAB_SIZE,
             embed_dim=64,
             hidden_size=20,
             num_classes=20,
            dropout=0.3).to(device)
model.embed.weight.data.uniform_(-0.1, 0.1)

model.out.weight.data.uniform_(-0.1, 0.1)

# model = LSTMClassifier(batch_size=bs, output_size=num_class, hidden_size=hidden_size, vocab_size=VOCAB_SIZE, embedding_length=300).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=my_lr, momentum=0.9)
gc.collect()
train(model, my_lr, num_epochs,torch.LongTensor, batch_first)
y_preds, y_test = test(model,test_dataloader, class_map=test_dataset.get_class_map(),text_type=torch.LongTensor, batch_first=batch_first)
# torch.tensor(y_preds).shape
test_acc = torch.sum(y_preds == y_test) / 1000
print("Accuracy = ", test_acc)

cuda


  emo_batch.append(torch.tensor(e))



epoch= 0 	 time= 5.426100730895996 	 lr= 0.2857142857142857 	 exp(loss)= 20.53835859861445

epoch= 1 	 time= 5.398729562759399 	 lr= 0.2857142857142857 	 exp(loss)= 20.671087840278794

epoch= 2 	 time= 5.266108751296997 	 lr= 0.2857142857142857 	 exp(loss)= 20.548292295526657

epoch= 3 	 time= 5.272995233535767 	 lr= 0.2857142857142857 	 exp(loss)= 17.621593129993542

epoch= 4 	 time= 5.223921060562134 	 lr= 0.27210884353741494 	 exp(loss)= 13.289746529032968

epoch= 5 	 time= 5.27517294883728 	 lr= 0.27210884353741494 	 exp(loss)= 11.021746836301215

epoch= 6 	 time= 5.228762865066528 	 lr= 0.27210884353741494 	 exp(loss)= 10.598081418778436

epoch= 7 	 time= 5.274895191192627 	 lr= 0.27210884353741494 	 exp(loss)= 10.71849047329211

epoch= 8 	 time= 5.242192029953003 	 lr= 0.2591512795594428 	 exp(loss)= 10.200056955116363

epoch= 9 	 time= 5.295134782791138 	 lr= 0.2591512795594428 	 exp(loss)= 10.015129637201584

epoch= 10 	 time= 5.226464509963989 	 lr= 0.2591512795594428 	 exp(l