In [1]:
import torch
import torch.nn as nn
from torch import autograd
from torch import optim
import torch.nn.functional as F
import numpy as np

In [2]:
f = open('data/input.txt', 'r')
text = f.readlines()
f.close()

for i in range(len(text)):
    if text[i] == '<start>\r\n':
        text[i] = '@\r\n'
    elif text[i] == '<end>\r\n':
        text[i] = '*\r\n'
    elif text[i] == '<end>':
        text[i] = '*'

In [3]:
data = ''.join(text)

In [4]:
# vocabulary lookup
dictionary = {}
count = 0
for d in data:
    if d not in dictionary:
        dictionary[d] = count
        count += 1

In [5]:
dataset = torch.zeros(len(data), len(dictionary))
for i in range(len(data)):
    dataset[i, dictionary[data[i]]] = 1

In [6]:
trainset = dataset[0:int(len(data)*0.8)]
testset = dataset[int(len(data)*0.8):]

In [14]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [19]:
class LSTM(nn.Module):
    def __init__(self, hidden_dim, vocab_size, hidden_layer):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(vocab_size, hidden_dim, hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim).cuda()),
                autograd.Variable(torch.zeros(1, 1, self.hidden_dim)).cuda())

    def forward(self, sentence):
        lstm_out, self.hidden = self.lstm(
            sentence.view(sentence.size(0), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(sentence.size(0), -1))
        tag_scores = F.softmax(tag_space)

        return tag_scores

In [10]:
def percentage(p, target):
    count = 0
    for i, j in zip(predict(p), target):
        if i == j:
            count += 1
    
    return count*1.0/p.shape[0]

In [11]:
def predict(p):
    return np.argmax(p, axis=1)

In [12]:
def count(p, target):
    c = 0
    for i, j in zip(predict(p), target):
        if i == j:
            c += 1
    
    return c, p.shape[0]-1

In [22]:
model = LSTM(100, len(dictionary), 1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    
    count_correct = 0
    total = 0
    
    for i in range(len(trainset)/25):
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Variables of word indices.
        train = trainset[25*i:min(25*i+25, len(trainset))]
        sentence_in = autograd.Variable(train[0:-1].cuda())

        targets = autograd.Variable(prepare_sequence(data[25*i+1:min(25*i+25, len(trainset))], dictionary).cuda())

        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        
        a, b = count(tag_scores.cpu().data.numpy(), targets.cpu().data.numpy())
        count_correct += a
        total += b
        
        loss.backward()
        optimizer.step()
        
    print 'percentage is %f' % (count_correct*1.0/total)
    

epoch=0
percentage is 0.140176
epoch=1
percentage is 0.141375
epoch=2
percentage is 0.141375
epoch=3
percentage is 0.141375
epoch=4
percentage is 0.141375
epoch=5
percentage is 0.141375
epoch=6
percentage is 0.141375
epoch=7
percentage is 0.141375
epoch=8


KeyboardInterrupt: 

In [None]:
inputs = trainset[0:25]
tag_scores = model(autograd.Variable(inputs))
print percentage(tag_scores, )