In [1]:
import torch
import torch.nn as nn
from torch import autograd
from torch import optim
import torch.nn.functional as F
import numpy as np

In [3]:
torch.cuda.is_available()

True

In [4]:
f = open('data/input.txt', 'r')
text = f.readlines()
f.close()

for i in range(len(text)):
    if text[i] == '<start>\r\n':
        text[i] = '@\r\n'
    elif text[i] == '<end>\r\n':
        text[i] = '*\r\n'
    elif text[i] == '<end>':
        text[i] = '*'

In [5]:
data = ''.join(text)

In [6]:
# vocabulary lookup
dictionary = {}
count = 0
for d in data:
    if d not in dictionary:
        dictionary[d] = count
        count += 1

In [7]:
# dataset = torch.zeros(len(data), len(dictionary))
# for i in range(len(data)):
#     dataset[i, dictionary[data[i]]] = 1

In [8]:
trainset = data[0:int(len(data)*0.8)]
testset = data[int(len(data)*0.8):]

In [9]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [10]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, hidden_layer, batch):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.batch = batch
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(self.hidden_layer, self.batch, self.hidden_dim).cuda()),
                autograd.Variable(torch.zeros(self.hidden_layer, self.batch, self.hidden_dim).cuda()))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(
            embeds.view(len(sentence)/self.batch, self.batch, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.softmax(tag_space)

        return self.hidden, tag_scores

In [None]:
batch = 100
chunk = 20

model = LSTM(embedding_dim=32, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1, batch=100)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(1000):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    
    num_chunk = len(trainset)/chunk
    m = np.random.choice(range(num_chunk), (num_chunk/batch, batch), replace=False)
    loss = 0
    
    
    for i in range(m.shape[0]):
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden()
        
        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Variables of word indices.
        
        train = ""
        targets = ""
        for j in m[i]:
            train += trainset[chunk*j:chunk*j+chunk-1]
            targets += trainset[chunk*j+1:chunk*j+chunk]
            
        sentence_in = autograd.Variable(prepare_sequence(train, dictionary).cuda())
        targets = autograd.Variable(prepare_sequence(targets, dictionary).cuda())

        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        
        loss.backward()
        optimizer.step()
        
    print loss

epoch=0
Variable containing:
 4.5429
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=1
Variable containing:
 4.5427
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=2
Variable containing:
 4.5426
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=3
Variable containing:
 4.5423
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=4
Variable containing:
 4.5422
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=5
Variable containing:
 4.5420
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=6
Variable containing:
 4.5418
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=7
Variable containing:
 4.5418
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=8
Variable containing:
 4.5413
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=9
Variable containing:
 4.5411
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=10
Variable containing:
 4.5405
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=11
Variable containing:
 4.5400
[torch.cuda.FloatTensor of size 1 (GPU 0)]

epoch=12
Variable containi

In [42]:
reverse_dict = {}

for k,v in dictionary.items():
    reverse_dict[v] = k

In [51]:
start = ['@', '\r', '\n', 'X', ' ']
model.batch = 1
model.hidden = model.init_hidden()
predict = model(autograd.Variable(prepare_sequence(start, dictionary).cuda()))

res = np.random.choice(range(len(dictionary)) ,p = predict[-1].cpu().data.numpy().reshape(-1))

reverse_dict[res]

' '