In [1]:
import torch
import torch.nn as nn
from torch import autograd
from torch import optim
import torch.nn.functional as F
import numpy as np

In [2]:
torch.cuda.is_available()

True

In [3]:
f = open('data/input.txt', 'r')
text = f.read()
f.close()

# for i in range(len(text)):
#     if text[i] == '<start>\r\n':
#         text[i] = '@\r\n'
#     elif text[i] == '<end>\r\n':
#         text[i] = '*\r\n'
#     elif text[i] == '<end>':
#         text[i] = '*'

In [4]:
data = '\n'.join(text.splitlines())

In [5]:
# vocabulary lookup
dictionary = {}
count = 0
for d in data:
    if d not in dictionary:
        dictionary[d] = count
        count += 1

In [6]:
# dataset = torch.zeros(len(data), len(dictionary))
# for i in range(len(data)):
#     dataset[i, dictionary[data[i]]] = 1

In [7]:
trainset = data[0:int(len(data)*0.8)]
testset = data[int(len(data)*0.8):]

In [8]:
type(len(trainset))

int

In [9]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [10]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, hidden_layer):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
        self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim, self.hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)

    def init_hidden(self, batch):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda(),
                autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda())

    def forward(self, sentence, hidden):
        '''
        param: sentence batch*chunk
        type: LongTensor Variable
        '''
        batch = sentence.size(0)
        embeds = self.word_embeddings(sentence)
        lstm_out, hidden = self.lstm(
            embeds.view(1, batch, -1), hidden)
        tag_space = self.hidden2tag(lstm_out.view(batch, -1))
        tag_scores = F.log_softmax(tag_space)

        return tag_scores, hidden

In [11]:
def random_training_set(chunk, batch):
    inp = torch.LongTensor(batch, chunk)
    target = torch.LongTensor(batch, chunk)
    
    for bi in range(batch):
        start_index = np.random.randint(0, len(trainset)-chunk)
        end_index = start_index + chunk + 1
        tmp = trainset[start_index:end_index]
        inp[bi] = prepare_sequence(tmp[:-1], dictionary)
        target[bi] = prepare_sequence(tmp[1:], dictionary)
        
    inp = autograd.Variable(inp).cuda()
    target = autograd.Variable(target).cuda()
    
    return inp, target

In [12]:
batch = 1000
chunk = 100

model = LSTM(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    
    hidden = model.init_hidden(batch)
    
    model.zero_grad()
    loss = 0
    
    for c in range(chunk):
        sentence_in, targets = random_training_set(chunk, batch)
        tag_scores, hidden = model(sentence_in[:,c], hidden)
        
        loss += loss_function(tag_scores.view(batch, -1), targets[:, c])
        
    loss.backward()
    optimizer.step()
        
    print loss.data[0]/chunk

epoch=0
4.53931762695
epoch=1
3.62690887451
epoch=2
5.51109558105
epoch=3
3.71547180176
epoch=4
3.37707336426
epoch=5
3.19658752441
epoch=6
3.13354187012
epoch=7
3.063828125
epoch=8
3.00906066895
epoch=9
2.97012939453
epoch=10
2.95710998535
epoch=11
2.92116516113
epoch=12
2.90599060059
epoch=13
2.88698883057
epoch=14
2.86670623779
epoch=15
2.85670349121
epoch=16
2.85456939697
epoch=17
2.84350585938
epoch=18
2.83430969238
epoch=19
2.82057800293
epoch=20
2.82399536133
epoch=21
2.80758728027
epoch=22
2.81134216309
epoch=23
2.80684783936
epoch=24
2.80012542725
epoch=25
2.79262390137
epoch=26
2.78952331543
epoch=27
2.78724060059
epoch=28
2.79014343262
epoch=29
2.78446990967
epoch=30
2.78043060303
epoch=31
2.78165283203
epoch=32
2.77691345215
epoch=33
2.77518859863
epoch=34
2.77684234619
epoch=35
2.76485321045
epoch=36
2.76568084717
epoch=37
2.76646148682
epoch=38
2.77047332764
epoch=39
2.76773925781
epoch=40
2.76971923828
epoch=41
2.7659085083
epoch=42
2.76830505371
epoch=43
2.76033630371
e

In [13]:
reverse_dict = {}

for k,v in dictionary.items():
    reverse_dict[v] = k

In [14]:
def generate(model, prime_str='<start>', predict_len=1000, temperature=0.8):
    hidden = model.init_hidden(1)
    prime_input = autograd.Variable(prepare_sequence(prime_str, dictionary).unsqueeze(0))

    prime_input = prime_input.cuda()
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[:,p], hidden)
        
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = model(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = reverse_dict[top_i]
        predicted += predicted_char
        inp = autograd.Variable(prepare_sequence(predicted_char, dictionary).unsqueeze(0))
        inp = inp.cuda()

    return predicted

In [15]:
generate(model)

'<start>\nAG |Ad| BA3 !bg|16/.!B2dgaroge g/2 | d/ d6\nA2A |\n<ere\nDE2 !f| B F dead | conesigg2| \nZ:|: | BE/2d |\nM:\n<s A8\nZ:J.fe3 B,| c2e age/ed2 | GAB| |B2 a|\nFB | |g2| ? G3\nX:A ond2F2E|ared dBA/2 Ma d2 BAFAGFA |e/d B| B art.fabs\nc| | f2| cAGED2cgfe2d | eferilou |\nA | a| d| | |\nLig A2| A2| fet B>\nK:105\nPra2 G2 G c2 BA2 | nd W:j\nL:ga e A) | "A | G2B |B |B |c | |2/and A2/e2006/2 e d c c2B/2 Bc2 Ja \nT:|\nR:2|\n<e/e.f/ | | | AB2 f/d BG|  | B d f2G/ d2dee |f2| :2 G2d B2 duvinnmas | 2Be2d g2FG|\n<s |(3AA=B|c/4GB2 c2 d/f2fd d/ ena Gmancefe -\nV:holl | B/c2GG>E/dcA/A | (3cABAB2 gg ABA2d agrc2 d | ec  |]\n| dee)|| | de| H:P:| g|A4|arinst/8\n<sc arndBd=etefd2 c/a/dc>\nT: | Miduraefd2|\n<shndBA dd A edcA Ifrie pas mate2 | (3dd>\nK:Star\nK:|Fragfe |e A2 |\nC\nV:16 (3ef/gallazuvirgf d2d2elo e\n<stide2G2 | A| g3 B D2 B2B2 B2|f id4\ng| c d2 edeena | B2 FG2:D:C|\nR:Lee B allllenel G d3/F A d Gcec\'soutatisc GF f d2 |~ge Bc |AF/gfefec2 e alloueaurige2c>\nT:hn- G/etrar D| d2B/e effe\nG n-1