In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.optim import Adam
from torch.nn import BCEWithLogitsLoss
from time import time

from torch.nn import Module
import torch
from torch.nn.functional import relu
from torch import tanh

from torch.nn import Module, Embedding, GRU, LSTM, Linear, ModuleList, Dropout, Dropout2d, Conv1d, MaxPool1d
from torch.nn.functional import softmax

from torch.autograd import Variable
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from nltk.tokenize import TweetTokenizer
from sklearn.metrics import classification_report as sklearn_cr

In [2]:
train_path='SemEval2018-Task1-all-data/SemEval2018-Task1-all-data/English/E-c/2018-E-c-En-train.txt'
test_path='SemEval2018-Task1-all-data/SemEval2018-Task1-all-data/English/E-c/2018-E-c-En-dev.txt'

In [3]:
train_df = pd.read_csv(train_path, sep='\t')
test_df = pd.read_csv(test_path, sep='\t')

In [4]:
GLOVE_PATH = "glove.twitter.27B/glove.twitter.27B.200d.txt"
BATCH_SIZE = 64
UNK_TOKEN = "$%UNK%$"
PAD_TOKEN = "$%PAD%$"
T_LIST = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 
          'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']
i2t = {i:t for i,t in enumerate(T_LIST)}
t2i = {t:i for i,t in enumerate(T_LIST)}

EPOCHS = 20


In [5]:
tweet_tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)

def tokenize(tweet):
    return tweet_tokenizer.tokenize(tweet)

In [6]:
train_df['tokens'] = train_df['Tweet'].map(lambda x: tokenize(x))
test_df['tokens'] = test_df['Tweet'].map(lambda x: tokenize(x))

In [7]:
train_df['lengths'] = train_df['tokens'].map(lambda x: len(x))
test_df['lengths'] = test_df['tokens'].map(lambda x: len(x))

In [8]:
def get_embeddings(path=GLOVE_PATH):
    embeddings_dict = {}
    with open(path,'r', encoding='utf8') as f:
        for line in tqdm(f):
                values = line.strip().split(" ")
#                 if values[0] in tokens:
                coefs = np.asarray(values[1:], dtype='float32')
                embeddings_dict[values[0]] = coefs
    return embeddings_dict

def create_freq_vocabulary(tokenized_texts):
    token_dict = {}
    for text in tokenized_texts:
        for token in text:
            try:
                token_dict[token] += 1
            except KeyError:
                token_dict[token] = 1
    return token_dict


def get_top_freq_words(token_dict, min_freq):
    return [x for x in token_dict if token_dict[x] >= min_freq]


def get_unique_tokens(tokenized_texts, min_freq):
    voc = create_freq_vocabulary(tokenized_texts)
    print("tokens found in training data set:", len(voc))
    freq_words = get_top_freq_words(voc, min_freq)
    print("tokens with frequency >= %d: %d" % (min_freq, len(freq_words)))
    return freq_words


def create_final_dictionary(freq_words, embeddings_dict, unk_token, pad_token):
    words = list(set(freq_words).intersection(embeddings_dict.keys()))
    print("embedded tokens: %d" % (len(words)))
    words = [pad_token, unk_token] + words
    return {w: i for i, w in enumerate(words)}


def get_embeddings_matrix(word_dict, embeddings_dict, size):
    embs = np.zeros(shape=(len(word_dict), size))
    for word in tqdm(word_dict):
        try:
            embs[word_dict[word]] = embeddings_dict[word]
        except KeyError:
            print('no embedding for: ', word)
    embs[1] = np.mean(embs[2:])
    return embs


def get_indexed_value(w2i, word, unk_token):
    try:
        return w2i[word]
    except KeyError:
        return w2i[unk_token]


def get_indexed_text(w2i, words, unk_token):
    return [get_indexed_value(w2i, word, unk_token) for word in words]


def pad_text(tokenized_text, maxlen, pad_tkn):
    if len(tokenized_text) < maxlen:
        return [pad_tkn] * (maxlen - len(tokenized_text)) + tokenized_text
    else:
        return tokenized_text[len(tokenized_text) - maxlen:]

def create_batches(df, batch_size):
    batches = []
    offset = 0
    while offset < len(df):
        upper_limit = min(len(df), offset+batch_size)
        batch_df  = df.iloc[offset: upper_limit]
        maxlen = batch_df['lengths'].values[-1]
            
        batch_df['x'] = batch_df['tokens'].map(lambda x:get_indexed_text(w2i, pad_text(x, maxlen, PAD_TOKEN),UNK_TOKEN))
        batches.append({'x':np.array([x for x in batch_df['x']], dtype=np.int32), 
                        'y': np.array(batch_df[T_LIST], dtype=np.float32)})
#         print(np.array([x for x in batch_df['x']], dtype=np.int32))
        offset = upper_limit
    return batches        

In [9]:
word_freq_dict = create_freq_vocabulary(list(train_df['tokens']) + list(test_df['tokens']))

In [10]:
tokens = get_top_freq_words(word_freq_dict, 1)

In [11]:
train_df = train_df.sort_values(by="lengths")
test_df = test_df.sort_values(by="lengths")

In [12]:
len(tokens)

16100

In [13]:
embeddings = get_embeddings()

1193514it [01:24, 14070.42it/s]


In [14]:
w2i = create_final_dictionary(tokens, embeddings, unk_token=UNK_TOKEN, pad_token=PAD_TOKEN)
emb_matrix = get_embeddings_matrix(w2i, embeddings, 200)

100%|██████████| 11080/11080 [00:00<00:00, 241554.38it/s]

embedded tokens: 11078
no embedding for:  $%PAD%$
no embedding for:  $%UNK%$





In [15]:

class SpatialDropout(Dropout2d):
    def forward(self, x):
        x = x.unsqueeze(2)    # (N, T, 1, K)
        x = x.permute(0, 3, 2, 1)  # (N, K, 1, T)
        x = super(SpatialDropout, self).forward(x)  # (N, K, 1, T), some features are masked
        x = x.permute(0, 3, 2, 1)  # (N, T, 1, K)
        x = x.squeeze(2)  # (N, T, K)
        return x


class Layer(Module):
    def __init__(self):
        super(Layer, self).__init__()

    def get_output_size(self):
        raise NotImplementedError

    def get_input_size(self):
        raise NotImplementedError

    def forward(self, x):
        raise NotImplementedError


class PretrainedEmbeddingLayer(Layer):
    def __init__(self, embeddings, dropout=0.0, trainable=True):
        """
        :param embeddings: a numpy array with the embeddings
        :param trainable: if false the embeddings will be frozen
        """
        super(PretrainedEmbeddingLayer, self).__init__()
        self.__input_size = embeddings.shape[0]
        self.__output_size = embeddings.shape[1]
        self.dropout = SpatialDropout(dropout)
        self.embed = Embedding(embeddings.shape[0], embeddings.shape[1])
        self.embed.weight.data.copy_(torch.from_numpy(embeddings))
        if not trainable:
            self.embed.weight.requires_grad = False

    def forward(self, x):
        return self.dropout(self.embed(x))

    def get_output_size(self):
        return self.__output_size

    def get_input_size(self):
        return self.__input_size

class ConvBlock(Layer):
    def __init__(self,in_channels, filters, window=2, dropout=0.0, trainable=True):
        """
        :param embeddings: a numpy array with the embeddings
        :param trainable: if false the embeddings will be frozen
        """
        super(ConvBlock, self).__init__()
        self.__input_size =  in_channels
        self.__output_size = filters
        self.conv = Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=window, padding=(window // 2))

        
    def forward(self, x):
        return torch.max(self.conv(x.permute(0,2,1)), 2)[0]

    def get_output_size(self):
        return self.__output_size

    def get_input_size(self):
        return self.__input_size

class CellLayer(Layer):
    def __init__(self, is_gru, input_size,  hidden_size, bidirectional, stacked_layers):
        """
        :param is_gru: GRU cell type if true, otherwise LSTM
        :param input_size: the size of the tensors that will be used as input (embeddings or projected embeddings)
        :param hidden_size: the size of the cell
        :param bidirectional: boolean
        :param stacked_layers: the number of stacked layers
        """
        super(CellLayer, self).__init__()
        if is_gru:
            self.cell = GRU(input_size=input_size, hidden_size=hidden_size, batch_first=True,
                            bidirectional=bidirectional, num_layers=stacked_layers)

        else:
            self.cell = LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True,
                             bidirectional=bidirectional, num_layers=stacked_layers)

        self.__output_size = hidden_size * 2 if bidirectional else hidden_size
        self.__input_size = input_size

    def forward(self, x):
        return self.cell(x)[0]

    def get_output_size(self):
        return self.__output_size

    def get_input_size(self):
        return self.__input_size


class MLP(Layer):
    def __init__(self, num_of_layers, init_size, out_size, dropout=0.0, inner_activation=None, outer_activation=None):
        """
        :param num_of_layers: the total number of layers
        :param init_size: unit size of hidden layers
        :param out_size: output size
        :param inner_activation: the activation function for the inner layers
        :param outer_activation: the activation function for the last layer
        """
        super(MLP, self).__init__()
        self.num_of_layers = num_of_layers
        self.__input_size = init_size
        self.__output_size = out_size
        self.dropout = Dropout(dropout)
        if self.num_of_layers > 0:
            self.layers = ModuleList([Linear(init_size, init_size) for _ in range(num_of_layers-1)] + [Linear(init_size, out_size)])
            self.activation_list = [inner_activation for _ in range(num_of_layers - 1)] + [outer_activation]

    def forward(self, x):
        if self.num_of_layers > 0:
            for layer, activation in zip(self.layers, self.activation_list):
                if activation is None:
                    x = self.dropout(layer(x))
                else:
                    x = self.dropout(activation(layer(x)))
        return x

    def get_output_size(self):
        return self.__output_size

    def get_input_size(self):
        return self.__input_size


class LastState(Layer):
    def __init__(self, input_size, output_size):
        super(LastState, self).__init__()
        self.__input_size = input_size
        self.__output_size = output_size

    def forward(self, x):
        return x[:, -1, :]

    def get_input_size(self):
        return self.__input_size

    def get_output_size(self):
        return self.__output_size

class AttendedState(Layer):
    def __init__(self, num_of_layers, hidden_size, dropout=0.0, activation=None):
        super(AttendedState, self).__init__()
        self.__input_size = hidden_size
        self.__output_size = hidden_size
        self.mlp = MLP(num_of_layers=num_of_layers,
                       init_size=hidden_size, out_size=hidden_size,
                       dropout=dropout,
                       inner_activation=activation,
                       outer_activation=activation)

        self.attention = Linear(hidden_size, 1)

    def forward(self, x):
        states_mlp = self.mlp(x)
        att_sc_dist = self.attention(states_mlp).squeeze(-1)
        att_weights = softmax(att_sc_dist, dim=1).unsqueeze(2)
        out_attended = torch.sum(torch.mul(att_weights, x), dim=1)
        return out_attended

    def get_input_size(self):
        return self.__input_size

    def get_output_size(self):
        return self.__output_size

    
class AvgPoolingState(Layer):
    def __init__(self, input_size, output_size):
        super(AvgPoolingState, self).__init__()
        self.__input_size = input_size
        self.__output_size = output_size

    def forward(self, x):
        return torch.mean(x, 1)

    def get_input_size(self):
        return self.__input_size

    def get_output_size(self):
        return self.__output_size
    
class ConcatenationLayer(Layer):
    def __init__(self, layer1, layer2):
        super(ConcatenationLayer, self).__init__()
        self.__input_size = layer1.get_input_size() + layer2.get_input_size()
        self.__output_size = self.__input_size

    def forward(self, x, y):
        return torch.cat((x, y), 1)

    def get_input_size(self):
        return self.__input_size

    def get_output_size(self):
        return self.__output_size


class SequentialModel(Layer):
    def __init__(self, layers):
        super(Layer, self).__init__()
        for i in range(len(layers)-1):
            assert (layers[i].get_output_size() == layers[i+1].get_input_size())
        self.layers = ModuleList(layers)
        self.__output_size = self.layers[-1].get_output_size()
        self.__input_size = self.layers[0].get_input_size()

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def get_input_size(self):
        return self.__input_size

    def get_output_size(self):
        return self.__output_size

    def add_layer(self, layer):
        assert (layer.get_input_size() == self.__input_size)
        self.layers.append(layer)
        self.__output_size = layer.get_output_size()
        

In [16]:
class ProjectedMultiAttentionRNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 is_gru=True,
                 cell_hidden_size=128,
                 stacked_layers=1,
                 bidirectional=False,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=2,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(ProjectedMultiAttentionRNN, self).__init__()
        self.name = "ProjectedMultiAttentionRNN"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
        self.projection_layer = MLP(num_of_layers=1, init_size=self.word_embedding_layer.get_output_size(),
                                        out_size=128, outer_activation=tanh)
        self.cell = CellLayer(is_gru, self.projection_layer.get_output_size(),
                              cell_hidden_size, bidirectional, stacked_layers)
        large_size = cell_hidden_size * 2 if bidirectional else cell_hidden_size
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        self.attentions = ModuleList([AttendedState(att_mlp_layers, large_size, att_mlp_dropout, relu) for _ in range(targets)])
        self.seq = SequentialModel([self.word_embedding_layer, self.projection_layer, self.cell])
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        encoder = self.seq(x)
        states = [desicion(attention(encoder)) for attention, desicion in zip(self.attentions, self.decision_layers)]
        out = torch.cat(states, dim=1)
        return out

In [31]:
class CNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 filters=32,              
                 top_mlp_layers=2,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(CNN, self).__init__()
        self.name = "CNN"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
        
        conv_blocks = [ConvBlock(in_channels= self.word_embedding_layer.get_output_size(), 
                            filters=filters, window=i) for i in range(2,6)]
        
        self.conv_blocks = ModuleList(conv_blocks)
        
        large_size = len(conv_blocks) * filters
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        embs = self.word_embedding_layer(x)
        convs = [block(embs) for block in self.conv_blocks]
        encoder = torch.cat(convs, dim=1)
        states = [desicion(encoder) for desicion in self.decision_layers]
        out = torch.cat(states, dim=1)
        return out

In [19]:
class MultiAttentionRNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 is_gru=True,
                 cell_hidden_size=128,
                 stacked_layers=1,
                 bidirectional=False,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=2,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(MultiAttentionRNN, self).__init__()
        self.name = "MultiAttentionRNN"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
    
        self.cell = CellLayer(is_gru, self.word_embedding_layer.get_output_size(),
                              cell_hidden_size, bidirectional, stacked_layers)
        large_size = cell_hidden_size * 2 if bidirectional else cell_hidden_size
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        self.attentions = ModuleList([AttendedState(att_mlp_layers, large_size, att_mlp_dropout, relu) for _ in range(targets)])
        self.seq = SequentialModel([self.word_embedding_layer, self.cell])
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        encoder = self.seq(x)
        states = [desicion(attention(encoder)) for attention, desicion in zip(self.attentions, self.decision_layers)]
        out = torch.cat(states, dim=1)
        return out

In [34]:
class MultiAttentionRNNConcatCNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 is_gru=True,
                 filters=32,
                 cell_hidden_size=128,
                 stacked_layers=1,
                 bidirectional=False,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=2,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(MultiAttentionRNNConcatCNN, self).__init__()
        self.name = "MultiAttentionRNNConcatCNN"
        self.targets = targets

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
        conv_blocks = [ConvBlock(in_channels= self.word_embedding_layer.get_output_size(), 
                            filters=filters, window=i) for i in range(2,6)]
        self.conv_blocks = ModuleList(conv_blocks)
        self.cell = CellLayer(is_gru, self.word_embedding_layer.get_output_size(),
                              cell_hidden_size, bidirectional, stacked_layers)
        large_size = cell_hidden_size * 2 if bidirectional else cell_hidden_size
        large_size_top = large_size + filters * len(conv_blocks)
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size_top,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        attentions = [AttendedState(att_mlp_layers, large_size, att_mlp_dropout, relu) for _ in range(targets)]
        self.attentions = ModuleList(attentions)
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        embs = self.word_embedding_layer(x)
        convs = [block(embs) for block in self.conv_blocks]
        encoder1 = torch.cat(convs, dim=1)
        
        encoder2 = self.cell(embs)
        states = [attention(encoder2) for attention in self.attentions]
        states = [torch.cat((state, encoder1), dim=1) for state in states]
        decisions = [decision(state) for decision, state in zip(self.decision_layers, states)]
        out = torch.cat(decisions, dim=1)
        return out

In [22]:
class LastStateRNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 is_gru=True,
                 cell_hidden_size=128,
                 stacked_layers=1,
                 bidirectional=False,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=2,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(LastStateRNN, self).__init__()
        self.name = "LastStateRNN"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
    
        self.cell = CellLayer(is_gru, self.word_embedding_layer.get_output_size(),
                              cell_hidden_size, bidirectional, stacked_layers)
        large_size = cell_hidden_size * 2 if bidirectional else cell_hidden_size
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        self.attentions = ModuleList([LastState(large_size, large_size) for _ in range(targets)])
        self.seq = SequentialModel([self.word_embedding_layer, self.cell])
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        encoder = self.seq(x)
        states = [desicion(attention(encoder)) for attention, desicion in zip(self.attentions, self.decision_layers)]
        out = torch.cat(states, dim=1)
        return out

In [23]:
class MLPWithAttentionModel(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=3,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(MLPWithAttentionModel, self).__init__()
        self.name = "MLPWithAttentionModel"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
        self.projection_layer = MLP(num_of_layers=1, init_size=self.word_embedding_layer.get_output_size(),
                                    out_size=128, outer_activation=tanh)
        large_size = self.projection_layer.get_output_size()
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        attentions = [AttendedState(att_mlp_layers, large_size, att_mlp_dropout, relu) for _ in range(targets)]
        self.attentions = ModuleList(attentions)
        self.seq = SequentialModel([self.word_embedding_layer, self.projection_layer])
        
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self,x):
        encoder = self.seq(x)
        states = [desicion(attention(encoder)) for attention, desicion in zip(self.attentions, self.decision_layers)]
        out = torch.cat(states, dim=1)
        return out
        
        
class MLPModel(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 att_mlp_layers=1,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=3,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(MLPModel, self).__init__()
        self.name = "MLPModel"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
        large_size = self.word_embedding_layer.get_output_size()
        decision_layers = [MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=1,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation) for _ in range(targets)]
        self.decision_layers = ModuleList(decision_layers)
        attentions = [AvgPoolingState(input_size=large_size, output_size=large_size) for _ in range(targets)]
        self.attentions = ModuleList(attentions)
        
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self,x):
        encoder = self.word_embedding_layer(x)
        states = [desicion(attention(encoder)) for attention, desicion in zip(self.attentions, self.decision_layers)]
        out = torch.cat(states, dim=1)
        return out

In [24]:
class SingleAttentionRNN(Module):
    def __init__(self, embeddings,
                 embeddings_dropout=0.0,
                 is_gru=True,
                 cell_hidden_size=128,
                 stacked_layers=1,
                 bidirectional=False,
                 att_mlp_layers=2,
                 att_mlp_dropout=0.0,
                 top_mlp_layers=1,
                 top_mlp_activation=relu,
                 top_mlp_outer_activation=None, targets=11,
                 top_mlp_dropout=0.0):

        super(SingleAttentionRNN, self).__init__()
        self.name = "SingleAttentionRNN"

        self.word_embedding_layer = PretrainedEmbeddingLayer(embeddings, dropout=embeddings_dropout, trainable=False)
    
        self.cell = CellLayer(is_gru, self.word_embedding_layer.get_output_size(),
                              cell_hidden_size, bidirectional, stacked_layers)
        large_size = cell_hidden_size * 2 if bidirectional else cell_hidden_size
        self.decision_layer = MLP(num_of_layers=top_mlp_layers,
                                           init_size=large_size,
                                           out_size=targets,
                                           dropout=top_mlp_dropout,
                                           inner_activation=top_mlp_activation,
                                           outer_activation=top_mlp_outer_activation)
        self.attention = AttendedState(att_mlp_layers, large_size, att_mlp_dropout, relu)
        self.seq = SequentialModel([self.word_embedding_layer, self.cell])
        self.params = list(filter(lambda p: p.requires_grad, self.parameters()))

    def forward(self, x):
        encoder = self.seq(x)
        out = self.decision_layer(self.attention(encoder))
        
        return out

In [25]:
MODELS_DIR = ""
def save_model(model):
    torch.save(model.state_dict(), MODELS_DIR + model.name + '.pkl')


def load_model(model):
    model.load_state_dict(torch.load(MODELS_DIR + model.name + '.pkl'))
    return model

def train(model, train_batches, test_batches, optimizer,  criterion,
                          epochs, init_patience, cuda=True):
    patience = init_patience
    best_auc = 0.0
    for i in range(1, epochs + 1):
        start = time()
        auc = run_epoch(model, train_batches, test_batches, optimizer,  criterion,
                                         cuda)
        end = time()
        print('epoch %d, auc: %2.3f  Time: %d minutes, %d seconds'
              % (i, 100 * auc, (end - start) / 60, (end - start) % 60))
        if best_auc < auc:
            best_auc = auc
            patience = init_patience
            save_model(model)
            if i > 1:
                print('best epoch so far')
        else:
            patience -= 1
        if patience == 0:
            break
    return best_auc


def run_epoch(model, train_batches, test_batches, optimizer, criterion, cuda):
    model.train(True)
    perm = np.random.permutation(len(train_batches))
    for i in tqdm(perm):
        batch = train_batches[i]
        inner_perm = np.random.permutation(len(batch['x']))
        data = []
        if cuda:
            data.append(Variable(torch.from_numpy(batch['x'][inner_perm]).long().cuda()))
        else:
            data.append(Variable(torch.from_numpy(batch['x'][inner_perm]).long()))
        if cuda:
            y = Variable(torch.from_numpy(batch['y'][inner_perm]).cuda())
        else:
            y = Variable(torch.from_numpy(batch['y'][inner_perm]))
        outputs = model(*data)
        loss = criterion(outputs, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return evaluate(model, test_batches, T_LIST)
    


def evaluate(model, test_batches, y_list):
    model.train(False)
    results = get_scores(model, test_batches, y_list)
    auc_scores = []
    for k in results:
        auc = roc_auc_score(results[k]['labels'],np.asarray(results[k]['scores'],  dtype='float32'))
#         print("{} - auc:{}".format(y_list[k], auc))
        auc_scores.append(auc)
    return np.mean(auc_scores)

def get_scores(model, test_batches, y_list):
    results = {y:{'scores':[], 'labels':[]}  for y in range(len(y_list))}
    for batch in test_batches:
        batch_scores = model(torch.from_numpy(batch['x']).long())
        for i in range(len(y_list)):
            results[i]['scores'].extend(batch_scores[:,i].detach().numpy())
            results[i]['labels'].extend(batch['y'][:,i])
    return results

def best_thr(labels, scores):
    thr = 0.05
    best_thr = 0.05
    best_f1 = 0.0
    for thr in np.arange(0.01, 0.99, 0.01):
        scr = f1_score(labels, [x > thr for x in scores])
        if scr > best_f1:
            best_f1 = scr
            best_thr = thr
    return best_thr


def classification_report(model, test_batches, y_list):
    model.train(False)
    results = get_scores(model, test_batches, y_list)
    print("\tEmotion\tAUC\tAccuracy ")
    best_thresholds = {}
    for k,emotion in enumerate(y_list):
        best_thresholds[emotion] = best_thr(results[k]['labels'],np.asarray(results[k]['scores']))
        auc = roc_auc_score(results[k]['labels'],np.asarray(results[k]['scores'],  dtype='float32'))
       
        acc = accuracy_score(results[k]['labels'],np.asarray([x>best_thresholds[emotion] for x in results[k]['scores']],  dtype='float32'))
        print ("\t{:.5s}\t{:.4f}\t{:.4f}".format(emotion, auc, acc))
    full_predictions = np.zeros(shape=(len(results[0]['scores']), len(y_list)))
    full_targets = np.zeros(shape=(len(results[0]['scores']), len(y_list)))
    for i in range(len(y_list)):
        full_predictions[:,i] = [x>best_thresholds[y_list[i]] for x in results[i]['scores']]
        full_targets[:,i] = results[i]['labels']
    print(sklearn_cr(full_targets, full_predictions, target_names=y_list))
    
    

In [26]:
train_batches = create_batches(train_df, BATCH_SIZE)
test_batches = create_batches(test_df, BATCH_SIZE)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# MLP over the Average of the embeddings

In [28]:
model = MLPModel(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:05<00:00, 21.18it/s]
  3%|▎         | 3/107 [00:00<00:04, 21.59it/s]

epoch 1, auc: 74.008  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 18.59it/s]
  3%|▎         | 3/107 [00:00<00:03, 26.56it/s]

epoch 2, auc: 76.241  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:04<00:00, 25.01it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.38it/s]

epoch 3, auc: 76.460  Time: 0 minutes, 4 seconds
best epoch so far


100%|██████████| 107/107 [00:04<00:00, 23.39it/s]
  4%|▎         | 4/107 [00:00<00:02, 38.93it/s]

epoch 4, auc: 76.815  Time: 0 minutes, 4 seconds
best epoch so far


100%|██████████| 107/107 [00:04<00:00, 23.08it/s]
  3%|▎         | 3/107 [00:00<00:03, 29.58it/s]

epoch 5, auc: 76.711  Time: 0 minutes, 4 seconds


100%|██████████| 107/107 [00:05<00:00, 20.48it/s]
  2%|▏         | 2/107 [00:00<00:07, 14.08it/s]

epoch 6, auc: 76.380  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.91it/s]
  3%|▎         | 3/107 [00:00<00:03, 29.64it/s]

epoch 7, auc: 75.908  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 24.52it/s]
  3%|▎         | 3/107 [00:00<00:04, 25.42it/s]

epoch 8, auc: 75.756  Time: 0 minutes, 4 seconds


100%|██████████| 107/107 [00:05<00:00, 20.12it/s]
  2%|▏         | 2/107 [00:00<00:07, 14.75it/s]

epoch 9, auc: 74.485  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 22.39it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.33it/s]

epoch 10, auc: 73.964  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 24.83it/s]
  3%|▎         | 3/107 [00:00<00:03, 29.55it/s]

epoch 11, auc: 73.149  Time: 0 minutes, 4 seconds


100%|██████████| 107/107 [00:05<00:00, 21.38it/s]
  3%|▎         | 3/107 [00:00<00:04, 24.57it/s]

epoch 12, auc: 73.390  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 26.19it/s]
  2%|▏         | 2/107 [00:00<00:07, 13.84it/s]

epoch 13, auc: 72.560  Time: 0 minutes, 4 seconds


100%|██████████| 107/107 [00:04<00:00, 25.56it/s]


epoch 14, auc: 72.419  Time: 0 minutes, 4 seconds
	Emotion	AUC	Accuracy 
	anger	0.8160	0.7686


  'precision', 'predicted', average, warn_for)


	antic	0.6354	0.8600
	disgu	0.8150	0.7483
	fear	0.8094	0.8837
	joy	0.8426	0.7517
	love	0.8456	0.8860
	optim	0.7982	0.7664
	pessi	0.7182	0.8871
	sadne	0.7486	0.7370
	surpr	0.7139	0.9605
	trust	0.7066	0.9515
              precision    recall  f1-score   support

       anger       0.67      0.68      0.68       315
anticipation       0.00      0.00      0.00       124
     disgust       0.65      0.66      0.65       319
        fear       0.69      0.27      0.39       121
         joy       0.78      0.63      0.70       400
        love       0.71      0.39      0.51       132
    optimism       0.70      0.57      0.63       307
   pessimism       0.00      0.00      0.00       100
     sadness       0.74      0.18      0.30       265
    surprise       0.00      0.00      0.00        35
       trust       0.00      0.00      0.00        43

   micro avg       0.70      0.46      0.55      2161
   macro avg       0.45      0.31      0.35      2161
weighted avg       0.61      0.46   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


# MLP with Attention on the Embeddings

In [29]:
model = MLPWithAttentionModel(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:10<00:00, 10.32it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 1, auc: 72.343  Time: 0 minutes, 10 seconds


100%|██████████| 107/107 [00:11<00:00,  9.33it/s]
  1%|          | 1/107 [00:00<00:12,  8.61it/s]

epoch 2, auc: 77.998  Time: 0 minutes, 11 seconds
best epoch so far


100%|██████████| 107/107 [00:11<00:00,  9.42it/s]
  1%|          | 1/107 [00:00<00:12,  8.41it/s]

epoch 3, auc: 79.660  Time: 0 minutes, 11 seconds
best epoch so far


100%|██████████| 107/107 [00:10<00:00, 10.32it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 4, auc: 79.914  Time: 0 minutes, 10 seconds
best epoch so far


100%|██████████| 107/107 [00:10<00:00,  9.97it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 5, auc: 80.271  Time: 0 minutes, 11 seconds
best epoch so far


100%|██████████| 107/107 [00:10<00:00,  9.91it/s]
  1%|          | 1/107 [00:00<00:11,  9.27it/s]

epoch 6, auc: 80.073  Time: 0 minutes, 11 seconds


100%|██████████| 107/107 [00:10<00:00, 10.53it/s]
  1%|          | 1/107 [00:00<00:10,  9.67it/s]

epoch 7, auc: 80.195  Time: 0 minutes, 10 seconds


100%|██████████| 107/107 [00:10<00:00, 10.06it/s]
  2%|▏         | 2/107 [00:00<00:08, 12.72it/s]

epoch 8, auc: 80.199  Time: 0 minutes, 11 seconds


100%|██████████| 107/107 [00:10<00:00,  9.97it/s]
  2%|▏         | 2/107 [00:00<00:08, 12.83it/s]

epoch 9, auc: 80.131  Time: 0 minutes, 11 seconds


100%|██████████| 107/107 [00:11<00:00,  9.37it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 10, auc: 79.673  Time: 0 minutes, 11 seconds


100%|██████████| 107/107 [00:11<00:00,  9.33it/s]
  1%|          | 1/107 [00:00<00:10,  9.76it/s]

epoch 11, auc: 79.954  Time: 0 minutes, 11 seconds


100%|██████████| 107/107 [00:10<00:00, 10.64it/s]
  1%|          | 1/107 [00:00<00:12,  8.81it/s]

epoch 12, auc: 79.313  Time: 0 minutes, 10 seconds


100%|██████████| 107/107 [00:09<00:00, 11.80it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 13, auc: 79.518  Time: 0 minutes, 9 seconds


100%|██████████| 107/107 [00:12<00:00,  8.87it/s]
  2%|▏         | 2/107 [00:00<00:09, 11.18it/s]

epoch 14, auc: 78.752  Time: 0 minutes, 12 seconds


100%|██████████| 107/107 [00:13<00:00,  7.99it/s]


epoch 15, auc: 78.385  Time: 0 minutes, 13 seconds
	Emotion	AUC	Accuracy 
	anger	0.8353	0.7912


  'precision', 'predicted', average, warn_for)


	antic	0.6780	0.8600
	disgu	0.8214	0.7528
	fear	0.8626	0.8928
	joy	0.8638	0.7810
	love	0.8868	0.8916
	optim	0.8227	0.7709
	pessi	0.7304	0.8883
	sadne	0.7833	0.7607
	surpr	0.8103	0.9616
	trust	0.7352	0.9515
              precision    recall  f1-score   support

       anger       0.75      0.62      0.68       315
anticipation       0.00      0.00      0.00       124
     disgust       0.69      0.58      0.63       319
        fear       0.65      0.46      0.54       121
         joy       0.85      0.63      0.72       400
        love       0.76      0.39      0.52       132
    optimism       0.73      0.53      0.62       307
   pessimism       0.55      0.06      0.11       100
     sadness       0.71      0.34      0.46       265
    surprise       1.00      0.03      0.06        35
       trust       0.00      0.00      0.00        43

   micro avg       0.75      0.46      0.57      2161
   macro avg       0.61      0.33      0.39      2161
weighted avg       0.68      0.46   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


# Last State GRU

In [30]:
model = LastStateRNN(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:06<00:00, 16.68it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.62it/s]

epoch 1, auc: 70.647  Time: 0 minutes, 6 seconds


100%|██████████| 107/107 [00:07<00:00, 14.31it/s]
  2%|▏         | 2/107 [00:00<00:08, 12.01it/s]

epoch 2, auc: 76.110  Time: 0 minutes, 7 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 18.62it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 3, auc: 78.242  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:06<00:00, 17.73it/s]
  2%|▏         | 2/107 [00:00<00:08, 12.02it/s]

epoch 4, auc: 79.463  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 18.18it/s]
  3%|▎         | 3/107 [00:00<00:04, 25.81it/s]

epoch 5, auc: 80.307  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 21.23it/s]
  2%|▏         | 2/107 [00:00<00:06, 17.27it/s]

epoch 6, auc: 80.200  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.21it/s]
  3%|▎         | 3/107 [00:00<00:05, 18.94it/s]

epoch 7, auc: 79.781  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:06<00:00, 16.40it/s]
  4%|▎         | 4/107 [00:00<00:04, 25.44it/s]

epoch 8, auc: 79.870  Time: 0 minutes, 6 seconds


100%|██████████| 107/107 [00:06<00:00, 17.22it/s]
  2%|▏         | 2/107 [00:00<00:08, 12.88it/s]

epoch 9, auc: 79.525  Time: 0 minutes, 6 seconds


100%|██████████| 107/107 [00:06<00:00, 15.61it/s]
  2%|▏         | 2/107 [00:00<00:05, 18.52it/s]

epoch 10, auc: 79.638  Time: 0 minutes, 7 seconds


100%|██████████| 107/107 [00:07<00:00, 14.79it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.95it/s]

epoch 11, auc: 78.932  Time: 0 minutes, 7 seconds


100%|██████████| 107/107 [00:07<00:00, 14.88it/s]
  3%|▎         | 3/107 [00:00<00:04, 22.86it/s]

epoch 12, auc: 78.221  Time: 0 minutes, 7 seconds


100%|██████████| 107/107 [00:06<00:00, 15.92it/s]
  2%|▏         | 2/107 [00:00<00:06, 16.87it/s]

epoch 13, auc: 77.728  Time: 0 minutes, 7 seconds


100%|██████████| 107/107 [00:05<00:00, 20.15it/s]
  3%|▎         | 3/107 [00:00<00:05, 20.11it/s]

epoch 14, auc: 77.632  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 21.24it/s]


epoch 15, auc: 76.991  Time: 0 minutes, 5 seconds
	Emotion	AUC	Accuracy 
	anger	0.8391	0.7777
	antic	0.6582	0.8600
	disgu	0.8431	0.7698
	fear	0.8488	0.9007
	joy	0.8607	0.7743
	love	0.8848	0.8837
	optim	0.8227	0.7754
	pessi	0.7483	0.8871
	sadne	0.7853	0.7810
	surpr	0.8136	0.9605
	trust	0.7290	0.9515
              precision    recall  f1-score   support

       anger       0.70      0.64      0.67       315
anticipation       0.50      0.01      0.02       124
     disgust       0.68      0.67      0.68       319
        fear       0.76      0.40      0.52       121
         joy       0.80      0.67      0.73       400
        love       0.74      0.34      0.47       132
    optimism       0.74      0.55      0.63       307
   pessimism       0.50      0.04      0.07       100
     sadness       0.78      0.37      0.51       265
    surprise       0.00      0.00      0.00        35
       trust       0.00      0.00      0.00        43

   micro avg       0.74      0.49      0.59      2

# Single Attention GRU

In [32]:
model = SingleAttentionRNN(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:05<00:00, 19.76it/s]
  2%|▏         | 2/107 [00:00<00:05, 19.45it/s]

epoch 1, auc: 71.882  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.48it/s]
  2%|▏         | 2/107 [00:00<00:06, 16.05it/s]

epoch 2, auc: 75.490  Time: 0 minutes, 5 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 18.08it/s]
  2%|▏         | 2/107 [00:00<00:06, 16.44it/s]

epoch 3, auc: 77.307  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 20.97it/s]
  3%|▎         | 3/107 [00:00<00:04, 21.62it/s]

epoch 4, auc: 78.525  Time: 0 minutes, 5 seconds
best epoch so far


100%|██████████| 107/107 [00:06<00:00, 17.27it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 5, auc: 78.582  Time: 0 minutes, 6 seconds
best epoch so far


100%|██████████| 107/107 [00:05<00:00, 20.22it/s]
  3%|▎         | 3/107 [00:00<00:04, 24.69it/s]

epoch 6, auc: 78.120  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.23it/s]
  2%|▏         | 2/107 [00:00<00:05, 18.83it/s]

epoch 7, auc: 78.544  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 22.03it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.29it/s]

epoch 8, auc: 78.216  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.31it/s]
  2%|▏         | 2/107 [00:00<00:05, 18.60it/s]

epoch 9, auc: 78.092  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 21.90it/s]
  3%|▎         | 3/107 [00:00<00:05, 19.63it/s]

epoch 10, auc: 78.141  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 19.49it/s]
  3%|▎         | 3/107 [00:00<00:04, 24.68it/s]

epoch 11, auc: 77.999  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:05<00:00, 18.23it/s]
  2%|▏         | 2/107 [00:00<00:06, 17.31it/s]

epoch 12, auc: 77.562  Time: 0 minutes, 6 seconds


100%|██████████| 107/107 [00:05<00:00, 18.05it/s]
  2%|▏         | 2/107 [00:00<00:06, 15.98it/s]

epoch 13, auc: 77.784  Time: 0 minutes, 6 seconds


100%|██████████| 107/107 [00:05<00:00, 21.03it/s]
  3%|▎         | 3/107 [00:00<00:03, 26.28it/s]

epoch 14, auc: 77.856  Time: 0 minutes, 5 seconds


100%|██████████| 107/107 [00:04<00:00, 22.43it/s]


epoch 15, auc: 77.839  Time: 0 minutes, 5 seconds
	Emotion	AUC	Accuracy 
	anger	0.8337	0.7788
	antic	0.6377	0.8600
	disgu	0.8246	0.7506
	fear	0.8187	0.8928
	joy	0.8542	0.7709
	love	0.8632	0.8849
	optim	0.8124	0.7585
	pessi	0.7235	0.8871
	sadne	0.7573	0.7449
	surpr	0.7936	0.9605
	trust	0.7250	0.9515
              precision    recall  f1-score   support

       anger       0.73      0.61      0.66       315
anticipation       0.00      0.00      0.00       124
     disgust       0.66      0.62      0.64       319
        fear       0.69      0.40      0.50       121
         joy       0.84      0.61      0.71       400
        love       0.73      0.36      0.48       132
    optimism       0.71      0.51      0.59       307
   pessimism       0.50      0.08      0.14       100
     sadness       0.61      0.42      0.50       265
    surprise       0.00      0.00      0.00        35
       trust       0.00      0.00      0.00        43

   micro avg       0.71      0.47      0.56      2

# One Attention per Target on GRU 

In [33]:
model = MultiAttentionRNN(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:30<00:00,  3.54it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 1, auc: 73.741  Time: 0 minutes, 31 seconds


100%|██████████| 107/107 [00:28<00:00,  3.71it/s]
  1%|          | 1/107 [00:00<00:15,  6.95it/s]

epoch 2, auc: 77.959  Time: 0 minutes, 29 seconds
best epoch so far


100%|██████████| 107/107 [00:35<00:00,  2.97it/s]
  1%|          | 1/107 [00:00<00:12,  8.59it/s]

epoch 3, auc: 79.337  Time: 0 minutes, 37 seconds
best epoch so far


100%|██████████| 107/107 [00:29<00:00,  3.57it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 4, auc: 79.250  Time: 0 minutes, 31 seconds


100%|██████████| 107/107 [00:30<00:00,  3.46it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 5, auc: 79.346  Time: 0 minutes, 32 seconds
best epoch so far


100%|██████████| 107/107 [00:29<00:00,  3.64it/s]
  1%|          | 1/107 [00:00<00:11,  8.90it/s]

epoch 6, auc: 78.787  Time: 0 minutes, 30 seconds


100%|██████████| 107/107 [00:33<00:00,  3.19it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 7, auc: 78.663  Time: 0 minutes, 34 seconds


100%|██████████| 107/107 [00:32<00:00,  3.34it/s]
  1%|          | 1/107 [00:00<00:15,  6.96it/s]

epoch 8, auc: 78.296  Time: 0 minutes, 33 seconds


100%|██████████| 107/107 [00:28<00:00,  3.69it/s]
  1%|          | 1/107 [00:00<00:12,  8.64it/s]

epoch 9, auc: 77.440  Time: 0 minutes, 30 seconds


100%|██████████| 107/107 [00:28<00:00,  3.79it/s]
  1%|          | 1/107 [00:00<00:16,  6.52it/s]

epoch 10, auc: 76.821  Time: 0 minutes, 29 seconds


100%|██████████| 107/107 [00:27<00:00,  3.90it/s]
  1%|          | 1/107 [00:00<00:17,  6.04it/s]

epoch 11, auc: 76.714  Time: 0 minutes, 28 seconds


100%|██████████| 107/107 [00:27<00:00,  3.87it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 12, auc: 76.630  Time: 0 minutes, 28 seconds


100%|██████████| 107/107 [00:27<00:00,  3.84it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 13, auc: 76.143  Time: 0 minutes, 28 seconds


100%|██████████| 107/107 [00:26<00:00,  4.01it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 14, auc: 75.862  Time: 0 minutes, 27 seconds


100%|██████████| 107/107 [00:32<00:00,  3.30it/s]


epoch 15, auc: 75.988  Time: 0 minutes, 33 seconds
	Emotion	AUC	Accuracy 
	anger	0.8323	0.7664
	antic	0.6335	0.8612
	disgu	0.8271	0.7517
	fear	0.8584	0.8962
	joy	0.8657	0.7912
	love	0.8825	0.8905
	optim	0.8207	0.7799
	pessi	0.7222	0.8860
	sadne	0.7889	0.7754
	surpr	0.7779	0.9616
	trust	0.7189	0.9503
              precision    recall  f1-score   support

       anger       0.66      0.70      0.68       315
anticipation       0.67      0.02      0.03       124
     disgust       0.64      0.71      0.67       319
        fear       0.64      0.54      0.59       121
         joy       0.85      0.65      0.74       400
        love       0.69      0.48      0.57       132
    optimism       0.70      0.64      0.67       307
   pessimism       0.44      0.04      0.07       100
     sadness       0.69      0.45      0.54       265
    surprise       1.00      0.03      0.06        35
       trust       0.00      0.00      0.00        43

   micro avg       0.70      0.54      0.61      

# CNN   of 2-5 kernel size

In [36]:
model = CNN(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=5, init_patience=1, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:04<00:00, 25.76it/s]
  4%|▎         | 4/107 [00:00<00:03, 29.27it/s]

epoch 1, auc: 73.017  Time: 0 minutes, 4 seconds


100%|██████████| 107/107 [00:04<00:00, 23.53it/s]
  3%|▎         | 3/107 [00:00<00:04, 24.08it/s]

epoch 2, auc: 77.310  Time: 0 minutes, 4 seconds
best epoch so far


100%|██████████| 107/107 [00:04<00:00, 21.63it/s]
  3%|▎         | 3/107 [00:00<00:03, 28.70it/s]

epoch 3, auc: 78.022  Time: 0 minutes, 5 seconds
best epoch so far


100%|██████████| 107/107 [00:03<00:00, 28.90it/s]


epoch 4, auc: 77.810  Time: 0 minutes, 3 seconds
	Emotion	AUC	Accuracy 
	anger	0.8331	0.7506
	antic	0.6364	0.8600
	disgu	0.8283	0.7472
	fear	0.8259	0.8883
	joy	0.8555	0.7686
	love	0.8651	0.8950
	optim	0.8129	0.7765
	pessi	0.7140	0.8849
	sadne	0.7624	0.7630
	surpr	0.7499	0.9605
	trust	0.6989	0.9515
              precision    recall  f1-score   support

       anger       0.63      0.72      0.67       315
anticipation       0.00      0.00      0.00       124
     disgust       0.64      0.68      0.66       319
        fear       0.66      0.37      0.48       121
         joy       0.85      0.59      0.70       400
        love       0.81      0.39      0.52       132
    optimism       0.77      0.50      0.61       307
   pessimism       0.43      0.06      0.11       100
     sadness       0.65      0.45      0.53       265
    surprise       0.00      0.00      0.00        35
       trust       0.00      0.00      0.00        43

   micro avg       0.70      0.49      0.58      21

# Concat RNN  - CNN

In [37]:
model = MultiAttentionRNNConcatCNN(emb_matrix)
optimizer = Adam(model.params, 0.001)
criterion = BCEWithLogitsLoss()
train(model, train_batches, test_batches, optimizer,  criterion, epochs=20, init_patience=10, cuda=False)
model = load_model(model)
classification_report(model, test_batches, T_LIST)

100%|██████████| 107/107 [00:18<00:00,  5.80it/s]
  2%|▏         | 2/107 [00:00<00:07, 13.44it/s]

epoch 1, auc: 74.708  Time: 0 minutes, 19 seconds


100%|██████████| 107/107 [00:16<00:00,  6.48it/s]
  1%|          | 1/107 [00:00<00:13,  7.65it/s]

epoch 2, auc: 78.062  Time: 0 minutes, 17 seconds
best epoch so far


100%|██████████| 107/107 [00:17<00:00,  6.22it/s]
  0%|          | 0/107 [00:00<?, ?it/s]

epoch 3, auc: 78.184  Time: 0 minutes, 17 seconds
best epoch so far


100%|██████████| 107/107 [00:15<00:00,  6.87it/s]
  1%|          | 1/107 [00:00<00:18,  5.88it/s]

epoch 4, auc: 77.975  Time: 0 minutes, 16 seconds


100%|██████████| 107/107 [00:14<00:00,  7.17it/s]
  1%|          | 1/107 [00:00<00:18,  5.68it/s]

epoch 5, auc: 77.393  Time: 0 minutes, 15 seconds


100%|██████████| 107/107 [00:15<00:00,  6.93it/s]
  1%|          | 1/107 [00:00<00:11,  9.35it/s]

epoch 6, auc: 76.957  Time: 0 minutes, 16 seconds


100%|██████████| 107/107 [00:16<00:00,  6.55it/s]
  1%|          | 1/107 [00:00<00:16,  6.44it/s]

epoch 7, auc: 76.329  Time: 0 minutes, 16 seconds


100%|██████████| 107/107 [00:15<00:00,  6.72it/s]
  1%|          | 1/107 [00:00<00:12,  8.67it/s]

epoch 8, auc: 75.727  Time: 0 minutes, 16 seconds


100%|██████████| 107/107 [00:13<00:00,  7.73it/s]
  2%|▏         | 2/107 [00:00<00:09, 10.67it/s]

epoch 9, auc: 75.845  Time: 0 minutes, 14 seconds


100%|██████████| 107/107 [00:17<00:00,  6.21it/s]
  1%|          | 1/107 [00:00<00:16,  6.27it/s]

epoch 10, auc: 75.114  Time: 0 minutes, 17 seconds


100%|██████████| 107/107 [00:16<00:00,  6.38it/s]
  1%|          | 1/107 [00:00<00:12,  8.51it/s]

epoch 11, auc: 75.019  Time: 0 minutes, 17 seconds


100%|██████████| 107/107 [00:15<00:00,  6.71it/s]
  1%|          | 1/107 [00:00<00:12,  8.40it/s]

epoch 12, auc: 74.575  Time: 0 minutes, 16 seconds


100%|██████████| 107/107 [00:17<00:00,  6.11it/s]


epoch 13, auc: 75.089  Time: 0 minutes, 18 seconds
	Emotion	AUC	Accuracy 
	anger	0.8284	0.7675
	antic	0.6372	0.8589
	disgu	0.8247	0.7472
	fear	0.8369	0.8984
	joy	0.8597	0.7765
	love	0.8819	0.8837
	optim	0.8127	0.7675
	pessi	0.7282	0.8849
	sadne	0.7629	0.7630
	surpr	0.7399	0.9605
	trust	0.6876	0.9515
              precision    recall  f1-score   support

       anger       0.68      0.64      0.66       315
anticipation       0.00      0.00      0.00       124
     disgust       0.64      0.68      0.66       319
        fear       0.72      0.41      0.53       121
         joy       0.86      0.60      0.71       400
        love       0.63      0.52      0.57       132
    optimism       0.78      0.46      0.58       307
   pessimism       0.45      0.10      0.16       100
     sadness       0.64      0.47      0.54       265
    surprise       0.00      0.00      0.00        35
       trust       0.00      0.00      0.00        43

   micro avg       0.71      0.49      0.58      