In [1]:
import torch.nn as nn
import torch
from torch.autograd import Variable

import pandas as pd
import numpy as np

In [60]:
with open("../data/clean.pkl", "rb") as f:
    data = torch.load(f)

In [31]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [82]:
# output_size = cell_size = hidden_size = input_size = embedding_size
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, cell_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.cell_size = cell_size
        self.gate = nn.Linear(input_size + hidden_size, cell_size)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        self.softmax = nn.LogSoftmax()
        self.output_size = output_size
        if self.output_size:
            self.output = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden, cell):
        combined = torch.cat((input, hidden), 1)
        f_gate = self.gate(combined)
        i_gate = self.gate(combined)
        o_gate = self.gate(combined)
        f_gate = self.sigmoid(f_gate)
        i_gate = self.sigmoid(i_gate)
        o_gate = self.sigmoid(o_gate)
        cell_helper = self.gate(combined)
        cell_helper = self.tanh(cell_helper)
        cell = torch.add(torch.mul(cell, f_gate), other=torch.mul(cell_helper, i_gate))
        hidden = torch.mul(self.tanh(cell), o_gate)
        if self.output_size:
            output = self.output(hidden)
            output = self.softmax(output)
            return output, hidden, cell
        else:
            return hidden, cell

    def initHidden(self):
        return Variable(torch.zeros(1, self.hidden_size))

    def initCell(self):
        return Variable(torch.zeros(1, self.cell_size))

In [20]:
def output2category(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return all_categories[category_i]

In [105]:
def train(sentensor_list, category_tensor):
    s_hidden = sentence_model.initHidden()
    s_cell = sentence_model.initCell()
    w_hidden = word_model.initHidden()
    w_cell = word_model.initCell()
    s_optimizer.zero_grad()
    w_optimizer.zero_grad()
    
    for i in range(len(sentensor_list)):
        sentensor = Variable(sentensor_list[i])
        for j in range(sentensor.size()[0]):
            word_tensor = sentensor[j].view(1, sentensor.size()[1])
            w_hidden, w_cell = word_model(word_tensor, w_hidden, w_cell)
        output, s_hidden, s_cell = sentence_model(w_cell, s_hidden, s_cell)
        
    loss = criterion(output, category_tensor)
    loss.backward()
    
    w_optimizer.step()
    s_optimizer.step()
    return output, loss.data[0]

In [21]:
import random

def random_choice(l):
    return l[random.randint(0, len(l) - 1)]

def random_gen():
    sample = random_choice(data)
    sentensor_list = sample[0]
    category = sample[1]
    category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
    return sentensor_list, category_tensor, category

In [28]:
all_categories = [1, 2, 3, 4, 5]
n_categories = len(all_categories)
embedding_size = 50

In [29]:
# hyper-parameters
s_n_hidden = embedding_size
s_n_cell = embedding_size
w_n_hidden = embedding_size
w_n_cell = embedding_size

s_learning_rate = 0.005
s_gamma = 0.1
w_learning_rate = 0.005
w_gamma = 0.1

In [83]:
criterion = nn.NLLLoss()
sentence_model = LSTM(w_n_cell, s_n_hidden, s_n_cell, n_categories)
word_model = LSTM(embedding_size, w_n_hidden, w_n_cell, output_size=False)
s_optimizer = torch.optim.Adam(sentence_lstm.parameters(), lr=s_learning_rate, weight_decay=s_gamma)
w_optimizer = torch.optim.Adam(word_lstm.parameters(), lr=w_learning_rate, weight_decay=w_gamma)


In [104]:
# # test
# l, ct, c = random_gen()
# s_hidden = sentence_model.initHidden()
# s_cell = sentence_model.initCell()
# w_hidden = word_model.initHidden()
# w_cell = word_model.initCell()

# o, lo = train(l, ct)

In [103]:
n_epochs = 1000
print_every = 50
plot_every = 10

# Keep track of losses for plotting
current_loss = 0
all_losses = []

start = time.time()
for epoch in range(1, n_epochs + 1):
    sentensor_list, category_tensor, category = random_gen()
    output, loss = train(sentensor_list, category_tensor)
    current_loss += loss
     # Print epoch number, loss, name and guess
    if epoch % print_every == 0:
        guess = output2category(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f / %s %s' % (epoch, epoch / n_epochs * 100, timeSince(start), loss, guess, correct))

    # Add current loss avg to list of losses
    if epoch % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

50 5% (0m 7s) 1.5958 / 4 ✗ (5)
100 10% (0m 15s) 1.5064 / 4 ✓
150 15% (0m 21s) 1.8005 / 4 ✗ (3)
200 20% (0m 29s) 1.5958 / 4 ✗ (5)
250 25% (0m 35s) 1.5064 / 4 ✓
300 30% (0m 42s) 1.5064 / 4 ✓
350 35% (0m 49s) 1.6060 / 4 ✗ (5)
400 40% (0m 56s) 1.5813 / 4 ✗ (5)
450 45% (1m 2s) 1.5064 / 4 ✓
500 50% (1m 8s) 1.5813 / 4 ✗ (5)
550 55% (1m 15s) 1.5064 / 4 ✓
600 60% (1m 22s) 1.5064 / 4 ✓
650 65% (1m 28s) 1.5064 / 4 ✓
700 70% (1m 35s) 1.5064 / 4 ✓
750 75% (1m 42s) 1.8005 / 4 ✗ (3)
800 80% (1m 48s) 1.5813 / 4 ✗ (5)
850 85% (1m 54s) 1.6060 / 4 ✗ (5)
900 90% (2m 1s) 1.5958 / 4 ✗ (5)
950 95% (2m 8s) 1.8005 / 4 ✗ (3)
1000 100% (2m 15s) 1.6060 / 4 ✗ (5)
