## char-CNN emotion analysis

In [98]:
import pandas as pd
import torch
import torch.autograd as autograd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext import vocab
from torchtext import data
from torchtext.data import Field
from torchtext.data import TabularDataset
from torchtext.data import Iterator
from torchtext.vocab import GloVe
import numpy as np
from tqdm import tqdm, tqdm_notebook
import pickle
import dill

In [99]:
batch_size = 64

In [100]:
def tokenizer(x):
    return x.split()

In [101]:
BOS_WORD = '<s>'
EOS_WORD = '</s>'
BLANK_WORD = "<blank>"
max_vocab = 8000
fix_length=30

In [102]:
CHAR = Field(sequential=True, tokenize=lambda x: list(x), pad_token=BLANK_WORD, lower=True, batch_first=True, fix_length=fix_length)
LABEL = Field(sequential=False, unk_token=None, tokenize=lambda x: x)

In [103]:
#https://www.crowdflower.com/wp-content/uploads/2016/07/text_emotion.csv
train_data = TabularDataset(path='../data/text_emotion.csv', 
                            format='csv', 
                            skip_header=True,
                            fields=[("tweet_id", None),("sentiment", LABEL),("author", None),("content",CHAR)])

In [104]:
#glove = vocab.Vectors('../data/glove.6B.300d.txt')
#tqdm_notebook().pandas() 
#https://medium.com/@sonicboom8/sentiment-analysis-torchtext-55fb57b1fab8

In [105]:
#TEXT.build_vocab(train_data, max_size=max_vocab)
CHAR.build_vocab(train_data, min_freq=3)
LABEL.build_vocab(train_data)

In [106]:
CHAR.vocab.stoi

defaultdict(<function torchtext.vocab._default_unk_index()>,
            {'<unk>': 0,
             '<blank>': 1,
             ' ': 2,
             'e': 3,
             't': 4,
             'o': 5,
             'a': 6,
             'i': 7,
             'n': 8,
             's': 9,
             'r': 10,
             'h': 11,
             'l': 12,
             'd': 13,
             'm': 14,
             'u': 15,
             'y': 16,
             'g': 17,
             'w': 18,
             'c': 19,
             '.': 20,
             'p': 21,
             'f': 22,
             'b': 23,
             'k': 24,
             'v': 25,
             '!': 26,
             '@': 27,
             "'": 28,
             ',': 29,
             'j': 30,
             '?': 31,
             '/': 32,
             'x': 33,
             'z': 34,
             ';': 35,
             '-': 36,
             '&': 37,
             ':': 38,
             'q': 39,
             '1': 40,
             '2': 41,
             '0

In [107]:
one_example = train_data.examples[0]
one_example.content[:3]

['@', 't', 'i']

In [108]:
train_loader = Iterator(train_data, 
                        batch_size=batch_size, 
                        #device=-1, 
                        repeat=False, )

In [109]:
for batch in train_loader:
    break;
print(batch.content.shape)
print(batch.sentiment.shape)

torch.Size([64, 30])
torch.Size([64])


In [110]:
#TEXT.vocab.stoi

In [111]:
len(train_data)

40000

In [112]:
CHAR.vocab.itos[4]

't'

In [113]:
with open("model/CHAR.Field","wb")as f:
     dill.dump(CHAR,f)

In [114]:
with open("model/LABEL_CHAR.Field","wb")as f2:
     dill.dump(LABEL,f2)

In [115]:
def init_network(model, method='xavier', exclude='embedding', seed=123):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    for name, w in model.named_parameters():
        if not exclude in name:
            if 'weight' in name:
                if method is 'xavier':
                    nn.init.xavier_normal_(w)
                elif method is 'kaiming':
                    nn.init.kaiming_normal_(w)
                else:
                    nn.init.normal_(w)
            elif 'bias' in name:
                nn.init.constant_(w, 0.0)
            else: 
                pass

In [116]:
def print_model(model, ignore='embedding'):
    total = 0
    for name, w in model.named_parameters():
        if not ignore or ignore not in name:
            total += w.nelement()
            print('{} : {}  {} parameters'.format(name, w.shape, w.nelement()))
    print('-------'*4)
    print('Total {} parameters'.format(total))

In [117]:
with open("model/CHAR.Field","rb")as f:
     CHAR=dill.load(f)
        
with open("model/LABEL_CHAR.Field","rb")as f2:
     LABEL=dill.load(f2)

In [118]:
'''
batch_size=1024
epochs=200
embidding_dim = 300
seq_length = 50
vocab_size = len(TEXT.vocab.itos)
num_filters = 128
kernel_sizes = [1,2,3,4,56,]
hidden_dim = 128 # hidden size of fully conntected layer
label_size = len(LABEL.vocab)
print_every = 1000
'''

'\nbatch_size=1024\nepochs=200\nembidding_dim = 300\nseq_length = 50\nvocab_size = len(TEXT.vocab.itos)\nnum_filters = 128\nkernel_sizes = [1,2,3,4,56,]\nhidden_dim = 128 # hidden size of fully conntected layer\nlabel_size = len(LABEL.vocab)\nprint_every = 1000\n'

In [119]:
char_vocab_size = len(CHAR.vocab.itos)
char_embed_dim = 15
word_vocab_size = len(LABEL.vocab.itos)
word_embed_dim = 128

kernel_widths = [1, 2, 3, 4, 5, 6]
kernel_nums = [25, 50, 75, 100, 125, 150]
dropout_prob = 0.5

rnn_hidden = 300

high_layers = 2
lstm_num_layers = 2

param_init = 0.05
learning_rate_decay = 0.5
decay_when = 1.0
learning_rate = 1.0


max_epoch = 25
max_steps = 10000
max_sent_len = 35
max_word_len = 30
clip = 5.0

In [120]:
class Highway(nn.Module):
    def __init__(self, input_size):
        super(Highway, self).__init__()
        self.fc1 = nn.Linear(input_size, input_size, bias=True)
        self.fc2 = nn.Linear(input_size, input_size, bias=True)

    def forward(self, input):
        """
        G = relu(x, Wg)
        T = sigmoid(x, Wt)
                                   |x, T == 0
        y = G * T + x * (1. - T) = |
                                   |G, T == 1
        """
        t = F.sigmoid(self.fc1(x))
        return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1-t, x)

In [121]:
class CharCNN(nn.Module):
    #num_filters = out-channels
    def __init__(self, char_vocab_size, char_embed_dim, kernel_sizes, kernel_widths):
        super(CharCNN, self).__init__()
        
        self.kernel_sizes = kernel_sizes
        self.kernel_widths = kernel_widths
        
        self.kernel = list(zip(kernel_sizes, kernel_widths))
        self.convs = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=out_channel, kernel_size=(char_embed_dim, filter_width)) 
                                      for out_channel, filter_width in self.kernel])

    
    def forward(self, x):
        pooled = []
        for i, conv in enumerate(self.convs):
            conved = F.tanh(conv(x)).squeeze(3)
            pooled.append(F.max_pool1d(conved, conved.shape[2]).squeeze(2))
        
        cat = torch.cat(cat, 1)
        
        return cat

In [122]:
class CharCNN_LSTM(nn.Module):
    """
    CNN + highway network + LSTM
    # Input: 
        4D tensor with shape [batch_size, in_channel, height, width]
    # Output:
        2D Tensor with shape [batch_size, vocab_size]
    # Arguments:
        char_emb_dim: the size of each character's embedding
        word_emb_dim: the size of each word's embedding
        vocab_size: num of unique words
        num_char: num of characters
        use_gpu: True or False
    """
    def __init__(self, char_vocab_size, char_embed_dim, word_vocab_size, word_embed_dim, 
                 kernel_sizes, kernel_widths, lstm_num_layers, dropout_prob):
        super(CharCNN_LSTM, self).__init__()
        self.char_vocab_size = char_vocab_size
        self.char_embed_dim = char_embed_dim
        self.word_vocab_size = word_vocab_size
        self.word_embed_dim = word_embed_dim
        self.lstm_num_layers = lstm_num_layers
        self.word_embed_dim = word_embed_dim
        
        self.char_embed = nn.Embedding(char_vocab_size, char_embed_dim, padding_idx=1)
        self.conv = CharCNN(char_vocab_size, char_embed_dim, kernel_sizes, kernel_widths)
        
        # highway net
        self.highway_input_dim = sum([x for x in kernel_widths])
        self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False)
        self.highway1 = Highway(self.highway_input_dim)
        self.highway2 = Highway(self.highway_input_dim)
        
        # LSTM
        self.lstm = nn.LSTM(input_size=self.highway_input_dim, 
                           hidden_size = self.word_embed_dim, 
                           num_layers = lstm_num_layers, 
                           dropout = dropout_prob,
                           batch_first=True)
        
        # output layer
        self.drop = nn.Dropout(dropout_prob)
        self.linear = nn.Linear(self.word_embed_dim, self.word_vocab_size)

    def init_hidden(self, batch_size=1):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.lstm_num_layers, batch_size, self.word_embed_dim).zero_().cuda(),
                  weight.new(self.lstm_num_layers, batch_size, self.word_embed_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.lstm_num_layers, batch_size, self.word_embed_dim).zero_(),
                      weight.new(self.lstm_num_layers, batch_size, self.word_embed_dim).zero_())
        return hidden

    def forward(self, x, hidden): # input <maxlen, batch, wdlen>
        print(x.shape)
        print(hidden[0].shape)
        #lstm_batch_size = x.size()[0]
        #lstm_seq_len = x.size()[1]
        #max_word_len = x.size()[2]
        #max_sent_len, batch, max_word_len = input.shape
        
        x_ = x.view(-1, max_word_len)

        emb = self.char_embed(x_) # todo <maxlen * batch> --> <maxlen * batch, wembed>
        #emb = self.drop(emb)
        emb = emb.unsqueeze(1)
        
        #x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)

        cnn = self.conv(emb) # <maxlen * batch, cedim> --> <maxlen * batch, cnn_size>

        h_ = self.highway1(cnn) # todo <maxlen * batch, cnn_size> --> <maxlen * batch, cnn_size>
        h_ = self.highway2(h_)
        
        h_ = h_.view(lstm_batch_size, batch, h_.size(-1)) # <maxlen, batch, cnn_size>
        #x = x.contiguous().view(lstm_batch_size,lstm_seq_len, -1)
        
        output, hidden = self.lstm(h_, hidden) # todo <maxlen, batch, hdim>, <nlayer, batch, hdim>
        output = self.drop(output)

        decoded = output.view(output.size(0) * output.size(1), output.size(2)) # todo <maxlen * batch, hdim>
        #x = x.contiguous().view(lstm_batch_size*lstm_seq_len, -1)
        
        decoded = self.linear(decoded) # todo <maxlen * batch, vsize>
        #decoded = decoded.view(output.size(0), output.size(1), decoded.size(1)) # todo <maxlen, batch, vsize>

        return decoded, hidden

In [123]:
# GPU 사용할 수 있는지 확인
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')
#train_on_gpu = False

Training on GPU!


In [124]:
model = CharCNN_LSTM(char_vocab_size, char_embed_dim, word_vocab_size, word_embed_dim, 
                     kernel_nums, kernel_widths, lstm_num_layers, dropout_prob)
init_network(model)
if(train_on_gpu):
    model.cuda()
model.train()

CharCNN_LSTM(
  (char_embed): Embedding(73, 15, padding_idx=1)
  (conv): CharCNN(
    (convs): ModuleList(
      (0): Conv2d(1, 25, kernel_size=(15, 1), stride=(1, 1))
      (1): Conv2d(1, 50, kernel_size=(15, 2), stride=(1, 1))
      (2): Conv2d(1, 75, kernel_size=(15, 3), stride=(1, 1))
      (3): Conv2d(1, 100, kernel_size=(15, 4), stride=(1, 1))
      (4): Conv2d(1, 125, kernel_size=(15, 5), stride=(1, 1))
      (5): Conv2d(1, 150, kernel_size=(15, 6), stride=(1, 1))
    )
  )
  (batch_norm): BatchNorm1d(21, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (highway1): Highway(
    (fc1): Linear(in_features=21, out_features=21, bias=True)
    (fc2): Linear(in_features=21, out_features=21, bias=True)
  )
  (highway2): Highway(
    (fc1): Linear(in_features=21, out_features=21, bias=True)
    (fc2): Linear(in_features=21, out_features=21, bias=True)
  )
  (lstm): LSTM(21, 128, num_layers=2, batch_first=True, dropout=0.5)
  (drop): Dropout(p=0.5)
  (linear): Linear(in

In [125]:
#criterion = F.cross_entropy
criterion = nn.CrossEntropyLoss()
#criterion = nn.BCEWithLogitsLoss()
#optimizer = optim.Adam(model.parameters(), lr=1e-3)
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-3,momentum=0.8)
optimizer = torch.optim.SGD(model.parameters(), 
                               lr = learning_rate, 
                               momentum=0.85)
print_model(model, ignore=None)

char_embed.weight : torch.Size([73, 15])  1095 parameters
conv.convs.0.weight : torch.Size([25, 1, 15, 1])  375 parameters
conv.convs.0.bias : torch.Size([25])  25 parameters
conv.convs.1.weight : torch.Size([50, 1, 15, 2])  1500 parameters
conv.convs.1.bias : torch.Size([50])  50 parameters
conv.convs.2.weight : torch.Size([75, 1, 15, 3])  3375 parameters
conv.convs.2.bias : torch.Size([75])  75 parameters
conv.convs.3.weight : torch.Size([100, 1, 15, 4])  6000 parameters
conv.convs.3.bias : torch.Size([100])  100 parameters
conv.convs.4.weight : torch.Size([125, 1, 15, 5])  9375 parameters
conv.convs.4.bias : torch.Size([125])  125 parameters
conv.convs.5.weight : torch.Size([150, 1, 15, 6])  13500 parameters
conv.convs.5.bias : torch.Size([150])  150 parameters
highway1.fc1.weight : torch.Size([21, 21])  441 parameters
highway1.fc1.bias : torch.Size([21])  21 parameters
highway1.fc2.weight : torch.Size([21, 21])  441 parameters
highway1.fc2.bias : torch.Size([21])  21 parameters
hig

In [126]:
epochs = 20

In [131]:
best_acc = 0.0
counter = 0
index = 0

for e in range(epochs):
    for i,batch in enumerate(train_loader):
        counter += 1
        
        #if len(batch) != batch_size: continue
        if(train_on_gpu):
            inputs, targets = Variable(batch.content).cuda(), Variable(batch.sentiment).cuda()
        else:
            inputs, targets = batch.content, batch.sentiment
        counter += 1
        model.zero_grad()
        
        hidden_state = model.init_hidden(batch_size)
        print(inputs[0])
        output, hidden_state = model(inputs, hidden_state)
        #print("output: ", output.shape)
        #print("targets: ", targets.shape)
        
        output = logits.contiguous().view(-1, word_vocab_size)
        
        #loss = criterion(output, targets)
        loss = F.cross_entropy(output, targets.view(-1))
        loss.backward()
        #torch.nn.utils.clip_grad_norm(net.parameters(), 5, norm_type=2)
        optimizer.step()
        
        if counter % print_every == 0:
            print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()))

tensor([ 4, 11,  3,  2, 19, 13,  2,  2, 21, 12,  6, 16,  3, 10,  2,  7,  8,  2,
        14, 16,  2, 19,  6, 10,  2,  7,  9,  2, 23, 10], device='cuda:0')
torch.Size([64, 30])
torch.Size([2, 64, 128])


RuntimeError: Expected 3-dimensional tensor, but got 4-dimensional tensor for argument #1 'self' (while checking arguments for max_pool1d)

## model save and load

In [75]:
import os
filename = "document_cls_text_cnn10.pth"
PATH = os.path.join("model", filename)
#torch.save(model.state_dict(), PATH)

In [76]:
#model = TextCNN(TEXT.vocab.vectors, TEXT.vocab.stoi[TEXT.pad_token], num_filters, kernel_sizes, label_size, 0.5)
model = TextCNN(TEXT.vocab.vectors, TEXT.vocab.stoi[TEXT.pad_token], vocab_size, embidding_dim, num_filters, kernel_sizes, label_size, 0.1)

In [77]:
model.load_state_dict(torch.load(PATH))

In [226]:
#sentence = "How are YOU convinced that I have always wanted you? What signals did I give off...damn I think I just lost another friend"
#sentence = "The storm is here and the electricity is gone"
#sentence = "Damm servers still down  i need to hit 80 before all the koxpers pass me"
#sentence = "Need to pack for CALI CALI! Cannot waittt! Thinking a glass of wine is in order to celebrate my weekend vaca. Still work 2morrow, tho."
#sentence = "I'm worried I can do anything"
##sentence = "I felt ecstatic when I passed my exam"
#sentence = "I was overjoyed at the birth of my son."
sentence = "During the Christmas holidays I felt wonderfully merry."
#sentence = "I’m feeling a little low at the moment."
#sentence = "I was so annoyed when I failed my English test."
#sentence = "Afraid of your own shadow"

In [227]:
s = [TEXT.vocab.stoi[word.lower()] for word in tokenizer(sentence)]
s

[828, 4, 3278, 1925, 2, 872, 0, 0]

In [228]:
nse = np.asarray(s)
feature_tensor = torch.from_numpy(nse)
feature_tensor = feature_tensor.unsqueeze(0)
batch_size = feature_tensor.size(0)

In [229]:
if(train_on_gpu):
    feature_tensor = feature_tensor.cuda()
    model.cuda()

model.eval()
print(feature_tensor.shape)

torch.Size([1, 8])


In [230]:
output = model(feature_tensor).squeeze()
output

tensor([  3.4776,   3.9867,  -1.0742,   1.3718,   2.4690,  -5.4473,  -7.9056,
        -15.8092, -10.7468,  -4.9786,  -8.9972, -10.3861, -16.7447],
       device='cuda:0', grad_fn=<SqueezeBackward0>)

In [231]:
output.type(torch.FloatTensor)

tensor([  3.4776,   3.9867,  -1.0742,   1.3718,   2.4690,  -5.4473,  -7.9056,
        -15.8092, -10.7468,  -4.9786,  -8.9972, -10.3861, -16.7447],
       grad_fn=<CopyBackwards>)

In [232]:
pred = F.softmax(output)

  """Entry point for launching an IPython kernel.


In [233]:
#pred.type(torch.FloatTensor)

In [234]:
val,idx = pred.sort(descending=True)

In [235]:
val

tensor([5.2631e-01, 3.1634e-01, 1.1538e-01, 3.8514e-02, 3.3369e-03, 6.7245e-05,
        4.2082e-05, 3.6016e-06, 1.2089e-06, 3.0147e-07, 2.1017e-07, 1.3304e-09,
        5.2203e-10], device='cuda:0', grad_fn=<SortBackward>)

In [236]:
idx

tensor([ 1,  0,  4,  3,  2,  9,  5,  6, 10, 11,  8,  7, 12], device='cuda:0')

In [246]:
value = []
for v in val.tolist():
    v = round(v, 4)
    value.append(v)
    print(v)

0.5263
0.3163
0.1154
0.0385
0.0033
0.0001
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [247]:
value[:4]

[0.5263, 0.3163, 0.1154, 0.0385]

In [248]:
emotion = []
for v in idx.tolist():
    emotion.append(LABEL.vocab.itos[v])
    print(v)

1
0
4
3
2
9
5
6
10
11
8
7
12


In [249]:
emotion[:4]

['worry', 'neutral', 'love', 'sadness']

In [259]:
result = zip(emotion, value)

<zip at 0x7fc776133d08>

In [261]:
for z in result:
    print(z)

('worry', 0.5263)
('neutral', 0.3163)
('love', 0.1154)
('sadness', 0.0385)
('happiness', 0.0033)
('empty', 0.0001)
('surprise', 0.0)
('fun', 0.0)
('enthusiasm', 0.0)
('boredom', 0.0)
('hate', 0.0)
('relief', 0.0)
('anger', 0.0)


In [74]:
'''
_, predicted = torch.max(output, 0)
value = predicted.data.tolist()
value
'''

3

In [75]:
LABEL.vocab.itos[value]

'sadness'

In [41]:
LABEL.vocab.itos

['neutral',
 'worry',
 'happiness',
 'sadness',
 'love',
 'surprise',
 'fun',
 'relief',
 'hate',
 'empty',
 'enthusiasm',
 'boredom',
 'anger']