In [1]:
import torch
import torch.nn as nn
from torch import autograd
from torch import optim
import torch.nn.functional as F
import numpy as np

In [2]:
torch.cuda.is_available()

True

In [3]:
f = open('data/input.txt', 'r')
text = f.read()
f.close()

# for i in range(len(text)):
#     if text[i] == '<start>\r\n':
#         text[i] = '@\r\n'
#     elif text[i] == '<end>\r\n':
#         text[i] = '*\r\n'
#     elif text[i] == '<end>':
#         text[i] = '*'

In [4]:
data = '\n'.join(text.splitlines())

In [5]:
# vocabulary lookup
dictionary = {}
count = 0
for d in data:
    if d not in dictionary:
        dictionary[d] = count
        count += 1

In [6]:
# dataset = torch.zeros(len(data), len(dictionary))
# for i in range(len(data)):
#     dataset[i, dictionary[data[i]]] = 1

In [7]:
trainset = data[0:int(len(data)*0.8)]
testset = data[int(len(data)*0.8):]

In [8]:
type(len(trainset))

int

In [9]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [34]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, hidden_layer):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        
        self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim, self.hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)

    def init_hidden(self, batch):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda(),
                autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda())

    def forward(self, sentence, hidden):
        '''
        param: sentence batch*chunk
        type: LongTensor Variable
        '''
        batch = sentence.size(0)
        seq_len = sentence.size(1)
        
        embeds = self.word_embeddings(sentence)
        hidden = repackage(hidden)
        lstm_out, hidden = self.lstm(
            embeds.view(seq_len, batch, -1), hidden)
        tag_space = self.hidden2tag(lstm_out.view(batch * seq_len, -1))
        tag_scores = F.log_softmax(tag_space)

        return tag_space, hidden

In [27]:
class RNN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, hidden_layer, model="lstm"):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.model = model.lower()
        
        self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim)
        
        if self.model == "lstm":
            self.rnn = nn.LSTM(self.embedding_dim, self.hidden_dim, self.hidden_layer)
        elif self.model == "gru":
            self.rnn = nn.GRU(self.embedding_dim, self.hidden_dim, self.hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)

    def init_hidden(self, batch):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        if self.model == "lstm":
            return (autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda(),
                    autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda())
        elif self.model == "gru":
            return autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda()

    def forward(self, sentence, hidden):
        '''
        param: sentence batch*chunk
        type: LongTensor Variable
        '''
        batch = sentence.size(0)
        embeds = self.word_embeddings(sentence)
        lstm_out, hidden = self.rnn(
            embeds.view(1, batch, -1), hidden)
        tag_space = self.hidden2tag(lstm_out.view(batch, -1))
        tag_scores = F.log_softmax(tag_space)

        return tag_space, hidden

In [11]:
def random_training_set(chunk, batch):
    inp = torch.LongTensor(batch, chunk)
    target = torch.LongTensor(batch, chunk)
    
    for bi in range(batch):
        start_index = np.random.randint(0, len(trainset)-chunk)
        end_index = start_index + chunk + 1
        tmp = trainset[start_index:end_index]
        inp[bi] = prepare_sequence(tmp[:-1], dictionary)
        target[bi] = prepare_sequence(tmp[1:], dictionary)
        
    inp = autograd.Variable(inp).cuda()
    target = autograd.Variable(target).cuda()
    
    return inp, target

In [12]:
def get_data(chunk, batch, idx):
    inp = torch.LongTensor(batch, chunk)
    target = torch.LongTensor(batch, chunk)
    
    for bi in range(batch):
        start_index = idx*chunk*batch + bi*chunk
        end_index = start_index + chunk + 1
        tmp = trainset[start_index:end_index]
        inp[bi] = prepare_sequence(tmp[:-1], dictionary)
        target[bi] = prepare_sequence(tmp[1:], dictionary)
    
    inp = autograd.Variable(inp).cuda()
    target = autograd.Variable(target).cuda()
    
    return inp, target

In [13]:
batch = 500
chunk = 25

model = LSTM(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(500):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    total = len(trainset)/(chunk*batch)
    count = 0
    
    for i in range(total):
        model.zero_grad()
        hidden = model.init_hidden(batch)
        loss = 0
        
        for c in range(chunk):
            sentence_in, targets = get_data(chunk, batch, i)
            tag_scores, hidden = model(sentence_in[:,c], hidden)
        
            loss += loss_function(tag_scores.view(batch, -1), targets[:, c])
        
        loss.backward()
        optimizer.step()
        count += loss.data[0]
        
    print count/chunk/total

epoch=0
4.05202828407
epoch=1
3.37630232811
epoch=2
3.10436249733
epoch=3
2.91142810822
epoch=4
2.76256378651
epoch=5
2.64125528812
epoch=6
2.54008045673
epoch=7
2.45580788136
epoch=8
2.38534555435
epoch=9
2.32550617695
epoch=10
2.27387194633
epoch=11
2.22867096901
epoch=12
2.18908757687
epoch=13
2.15373054504
epoch=14
2.12157817841
epoch=15
2.09151194572
epoch=16
2.06416160583
epoch=17
2.03941650867
epoch=18
2.01744675159
epoch=19
1.99775773525
epoch=20
1.9768711853
epoch=21
1.9578553772
epoch=22
1.93939108372
epoch=23
1.92213325977
epoch=24
1.90628764153
epoch=25
1.89139939308
epoch=26
1.87738781452
epoch=27
1.86407149315
epoch=28
1.85140658855
epoch=29
1.83931227684
epoch=30
1.82771821976
epoch=31
1.81663653374
epoch=32
1.80603967667
epoch=33
1.79580555916
epoch=34
1.78601175785
epoch=35
1.77658897877
epoch=36
1.76742707729
epoch=37
1.75856098652
epoch=38
1.75005622864
epoch=39
1.74176419735
epoch=40
1.73360830784
epoch=41
1.72567183018
epoch=42
1.71804076195
epoch=43
1.71076043606


1.25909843206
epoch=348
1.25841743231
epoch=349
1.25804302931
epoch=350
1.25728212118
epoch=351
1.25700033188
epoch=352
1.25643195868
epoch=353
1.25596734285
epoch=354
1.25574924707
epoch=355
1.25541063309
epoch=356
1.25530815363
epoch=357
1.25490570307
epoch=358
1.2546721077
epoch=359
1.25462602615
epoch=360
1.2546650672
epoch=361
1.25444380522
epoch=362
1.25473217964
epoch=363
1.25459230185
epoch=364
1.25515897751
epoch=365
1.25490020752
epoch=366
1.25516789198
epoch=367
1.25555979729
epoch=368
1.25465925932
epoch=369
1.25437181473
epoch=370
1.25316890001
epoch=371
1.25306974649
epoch=372
1.25158408165
epoch=373
1.25110098362
epoch=374
1.25011707544
epoch=375
1.25016335011
epoch=376
1.2489689827
epoch=377
1.24830974579
epoch=378
1.24789441347
epoch=379
1.24751237154
epoch=380
1.24696381092
epoch=381
1.24665595055
epoch=382
1.24591355324
epoch=383
1.24562617779
epoch=384
1.24511323452
epoch=385
1.24508883953
epoch=386
1.24483576059
epoch=387
1.24446633577
epoch=388
1.24329316378
epoch

In [22]:
batch = 500
chunk = 25

model = RNN(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1, model="gru")
model.cuda()
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1000):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    total = len(trainset)/(chunk*batch)
    count = 0
    
    for i in range(total):
        model.zero_grad()
        hidden = model.init_hidden(batch)
        loss = 0
        
        sentence_in, targets = get_data(chunk, batch, i)
        tag_scores, hidden = model(sentence_in, hidden)
        
            loss += loss_function(tag_scores.view(batch, -1), targets[:, c])
        
        loss.backward()
        optimizer.step()
        count += loss.data[0]
        
    print count/chunk/total

epoch=0
3.92497003555
epoch=1
3.2965842247
epoch=2
2.99583471298
epoch=3
2.79809431553
epoch=4
2.65146468163
epoch=5
2.53609183788
epoch=6
2.44317141056
epoch=7
2.36616415977
epoch=8
2.30114211082
epoch=9
2.24493903637
epoch=10
2.1956500721
epoch=11
2.15203997612
epoch=12
2.1132579565
epoch=13
2.07883539677
epoch=14
2.04812194347
epoch=15
2.02059249878
epoch=16
1.99548284054
epoch=17
1.97234442234
epoch=18
1.95089799881
epoch=19
1.93099084854
epoch=20
1.91253234863
epoch=21
1.89534791946
epoch=22
1.87925683022
epoch=23
1.8640729332
epoch=24
1.84970122814
epoch=25
1.83611222744
epoch=26
1.82317994595
epoch=27
1.81099963188
epoch=28
1.79947784424
epoch=29
1.78857741356
epoch=30
1.77823489666
epoch=31
1.76839202881
epoch=32
1.75900998116
epoch=33
1.75005531311
epoch=34
1.74149590015
epoch=35
1.73330029488
epoch=36
1.72544111252
epoch=37
1.71789622307
epoch=38
1.71064637184
epoch=39
1.70366761684
epoch=40
1.69693662167
epoch=41
1.69043411732
epoch=42
1.68414064884
epoch=43
1.67803841591
ep

1.30469118595
epoch=349
1.30447626829
epoch=350
1.3044121623
epoch=351
1.30392560959
epoch=352
1.30298696756
epoch=353
1.30304884434
epoch=354
1.302679739
epoch=355
1.30259854794
epoch=356
1.3026121664
epoch=357
1.30219451427
epoch=358
1.3033541131
epoch=359
1.30333715677
epoch=360
1.30347096205
epoch=361
1.30324645996
epoch=362
1.30318515539
epoch=363
1.30238721371
epoch=364
1.30132026672
epoch=365
1.30138770819
epoch=366
1.30105400324
epoch=367
1.30112087488
epoch=368
1.30040676117
epoch=369
1.30060921907
epoch=370
1.30053834677
epoch=371
1.30025102139
epoch=372
1.30011922359
epoch=373
1.29942417383
epoch=374
1.29973051786
epoch=375
1.29959027767
epoch=376
1.2989123559
epoch=377
1.29918704748
epoch=378
1.29933470011
epoch=379
1.29927657604
epoch=380
1.29918486834
epoch=381
1.29906962395
epoch=382
1.29868398666
epoch=383
1.29785015583
epoch=384
1.29801337481
epoch=385
1.29769214869
epoch=386
1.29747417688
epoch=387
1.29748245478
epoch=388
1.29727038383
epoch=389
1.29735674381
epoch=39

KeyboardInterrupt: 

In [33]:
def repackage(h):
    if type(h) == autograd.Variable:
        return autograd.Variable(h.data)
    else:
        return tuple(repackage(v) for v in h)

In [44]:
batch = 1000
chunk = 25

model = LSTM(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1000):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    total = len(trainset)/(chunk*batch)
    loss = 0
    hidden = model.init_hidden(batch)
    
    for i in range(total):
        model.zero_grad()
        
        sentence_in, targets = get_data(chunk, batch, i)
        tag_scores, hidden = model(sentence_in, hidden)
        
        
        loss = loss_function(tag_scores.view(-1, len(dictionary)), targets.view(-1))
        
        loss.backward()
        optimizer.step()
 
    print loss.data[0]

epoch=0
4.09200334549
epoch=1
3.61243629456
epoch=2
3.53512573242
epoch=3
3.45212817192
epoch=4
3.37651252747
epoch=5
3.30353116989
epoch=6
3.23997807503
epoch=7
3.18673491478
epoch=8
3.14328193665
epoch=9
3.10807275772
epoch=10
3.07987165451
epoch=11
3.05709314346
epoch=12
3.0383849144
epoch=13
3.02259993553
epoch=14
3.00906538963
epoch=15
2.99733185768
epoch=16
2.98701739311
epoch=17
2.97784113884
epoch=18
2.96963095665
epoch=19
2.96234154701
epoch=20
2.95590257645
epoch=21
2.95015072823
epoch=22
2.94494748116
epoch=23
2.94021558762
epoch=24
2.9358959198
epoch=25
2.93194246292
epoch=26
2.9283118248
epoch=27
2.92496275902
epoch=28
2.92185783386
epoch=29
2.91896009445
epoch=30
2.91624307632
epoch=31
2.91369056702
epoch=32
2.9112906456
epoch=33
2.90902972221
epoch=34
2.90689659119
epoch=35
2.90487718582
epoch=36
2.902957201
epoch=37
2.90112757683
epoch=38
2.89937615395
epoch=39
2.89770245552
epoch=40
2.89609408379
epoch=41
2.89453959465
epoch=42
2.89303541183
epoch=43
2.89155197144
epoc

2.63187599182
epoch=349
2.63077020645
epoch=350
2.63116312027
epoch=351
2.63090872765
epoch=352
2.62907600403
epoch=353
2.63194417953
epoch=354
2.62905502319
epoch=355
2.6292181015
epoch=356
2.63047409058
epoch=357
2.62826085091
epoch=358
2.62884306908
epoch=359
2.62861275673
epoch=360
2.62728404999
epoch=361
2.63028931618
epoch=362
2.62759613991
epoch=363
2.62760090828
epoch=364
2.62920618057
epoch=365
2.627191782
epoch=366
2.6266040802
epoch=367
2.62693524361
epoch=368
2.62571525574
epoch=369
2.62893772125
epoch=370
2.62696194649
epoch=371
2.62627506256
epoch=372
2.62839317322
epoch=373
2.62669229507
epoch=374
2.6245265007
epoch=375
2.62556242943
epoch=376
2.62347245216
epoch=377
2.62651896477
epoch=378
2.62579011917
epoch=379
2.62382006645
epoch=380
2.6277756691
epoch=381
2.62629365921
epoch=382
2.6234703064
epoch=383
2.62566256523
epoch=384
2.62172818184
epoch=385
2.62534093857
epoch=386
2.62316775322
epoch=387
2.62113881111
epoch=388
2.62535071373
epoch=389
2.62167334557
epoch=390

2.58147168159
epoch=693
2.58195161819
epoch=694
2.58282399178
epoch=695
2.57786941528
epoch=696
2.58412861824
epoch=697
2.57967877388
epoch=698
2.58394575119
epoch=699
2.58255839348
epoch=700
2.57778692245
epoch=701
2.58223319054
epoch=702
2.57595443726
epoch=703
2.58115386963
epoch=704
2.57833051682
epoch=705
2.57846951485
epoch=706
2.58315181732
epoch=707
2.5742316246
epoch=708
2.58248758316
epoch=709
2.57642292976
epoch=710
2.58257436752
epoch=711
2.58530950546
epoch=712
2.57608413696
epoch=713
2.58388996124
epoch=714
2.57418441772
epoch=715
2.58515024185
epoch=716
2.58154344559
epoch=717
2.58100175858
epoch=718
2.58952188492
epoch=719
2.57617235184
epoch=720
2.58883810043
epoch=721
2.58232975006
epoch=722
2.58234119415
epoch=723
2.58514904976
epoch=724
2.57570099831
epoch=725
2.58258366585
epoch=726
2.57828426361
epoch=727
2.5798842907
epoch=728
2.58049750328
epoch=729
2.57543349266
epoch=730
2.58061552048
epoch=731
2.5758099556
epoch=732
2.5796546936
epoch=733
2.57910346985
epoch=

In [45]:
res_lstm = generate(model, predict_len=5000)
print res_lstm

<start>
c/ e/ | | A2 | | | d | | | | | | | | | f2 | | | | | | | | | (3 | (e2 | | d | |
X:LL:B | | | | | | ed2cc | | B f | d>
R:T:1/ d2 | B | | c2 at | | | | c B2 f2 | | c2 | | | | B2 | | B2 | | | | | | B | | B>
K:Trite c | c/ | cd2 | B e | | c eenselt | | | B2 c d | | | f2 | | c z2B | c BB3 c2 | d>A) BELLON | | | | | d | | | e | | | | A2 e
Z:3
C:6
K:Pamand) G2 | | | | | cd | | | | :? | | B2 e e2 | c | | | | | d2 | | | B | | | | | | | | | | B2c/ | | | | | BB2 | | | B2 | B2 | c BA:Ponsc BBB2B/2f2z2 | -003
R:T:
X:
M:Inaut f2c/ d | | | | | B2 (e
O:Frt>A>c |
V:P:1/2 | | | d | c | | | | | | B e2 d/2B2 | | B/2 | d B2B/2 | c | crvan | | FG B2 | | B | derilou d | | B | | c/ | | | | | | | | c | | | | | | | | | | | | | | | | | | | | | A | | B | | | | | | | | | |] e2 | | | | | | d2 | | ^c4 A B/B | | B | | | | | | g (G B | | | c | f | | c B/c/d>B | | | | | | | c2 | | | | d | | G2 c2 c M:F | | B2 | | c | | | | | c2 | | | | | | e2 | | c | | | B B/ | B | | e2 | | | | | | | | B c4 fe | d | | B | B | | 

In [19]:
batch = 200
chunk = 100

model = LSTM(embedding_dim=100, hidden_dim=256, vocab_size=len(dictionary), hidden_layer=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(2000):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    
    hidden = model.init_hidden(batch)
    
    model.zero_grad()
    loss = 0
    
    for c in range(chunk):
        sentence_in, targets = random_training_set(chunk, batch)
        tag_scores, hidden = model(sentence_in[:,c], hidden)
        
        loss += loss_function(tag_scores.view(batch, -1), targets)
        
    loss.backward()
    optimizer.step()
        
    print loss.data[0]/chunk

epoch=0
4.52248626709
epoch=1
4.4852545166
epoch=2
4.44619384766
epoch=3
4.39992736816
epoch=4
4.3420425415
epoch=5
4.25897399902
epoch=6
4.14338592529
epoch=7
3.97820983887
epoch=8
3.80284973145
epoch=9
3.73361724854
epoch=10
3.72382446289
epoch=11
3.66606506348
epoch=12
3.62325164795
epoch=13
3.60071258545
epoch=14
3.58425842285
epoch=15
3.57347351074
epoch=16
3.57418914795
epoch=17
3.57632354736
epoch=18
3.55453857422
epoch=19
3.52153564453
epoch=20
3.51304382324
epoch=21
3.51410766602
epoch=22
3.49228515625
epoch=23
3.48366912842
epoch=24
3.47109954834
epoch=25
3.46596160889
epoch=26
3.43721496582
epoch=27
3.43305633545
epoch=28
3.42274627686
epoch=29
3.4255670166
epoch=30
3.42008422852
epoch=31
3.40120269775
epoch=32
3.37372497559
epoch=33
3.37309204102
epoch=34
3.37452026367
epoch=35
3.34270111084
epoch=36
3.32389068604
epoch=37
3.33194519043
epoch=38
3.32882781982
epoch=39
3.3061227417
epoch=40
3.30682617188
epoch=41
3.28093719482
epoch=42
3.26938690186
epoch=43
3.262237854
epoc

2.74808502197
epoch=349
2.77860534668
epoch=350
2.75227264404
epoch=351
2.74972625732
epoch=352
2.7664050293
epoch=353
2.73029663086
epoch=354
2.75583648682
epoch=355
2.74493835449
epoch=356
2.76077362061
epoch=357
2.75317596436
epoch=358
2.78108459473
epoch=359
2.75976257324
epoch=360
2.75831359863
epoch=361
2.76663574219
epoch=362
2.74116088867
epoch=363
2.75608154297
epoch=364
2.75855224609
epoch=365
2.7569821167
epoch=366
2.76795501709
epoch=367
2.76083251953
epoch=368
2.75156555176
epoch=369
2.75702789307
epoch=370
2.7556237793
epoch=371
2.77142608643
epoch=372
2.75392730713
epoch=373
2.73880462646
epoch=374
2.76332489014
epoch=375
2.74366241455
epoch=376
2.75356414795
epoch=377
2.76961090088
epoch=378
2.74627258301
epoch=379
2.76352813721
epoch=380
2.75392333984
epoch=381
2.74421875
epoch=382
2.76842803955
epoch=383
2.73292694092
epoch=384
2.75996337891
epoch=385
2.756456604
epoch=386
2.75866821289
epoch=387
2.75796112061
epoch=388
2.75311096191
epoch=389
2.74443634033
epoch=390


2.75029724121
epoch=693
2.75137969971
epoch=694
2.74973114014
epoch=695
2.73883758545
epoch=696
2.73915466309
epoch=697
2.75043609619
epoch=698
2.74630645752
epoch=699
2.74703308105
epoch=700
2.73796691895
epoch=701
2.74625244141
epoch=702
2.75207427979
epoch=703
2.73011505127
epoch=704
2.73072601318
epoch=705
2.74079559326
epoch=706
2.74472045898
epoch=707
2.74644958496
epoch=708
2.7479598999
epoch=709
2.74313812256
epoch=710
2.74862121582
epoch=711
2.76004272461
epoch=712
2.74011230469
epoch=713
2.7556854248
epoch=714
2.75198242188
epoch=715
2.71725830078
epoch=716
2.75084136963
epoch=717
2.74872009277
epoch=718
2.7391897583
epoch=719
2.72932525635
epoch=720
2.73470123291
epoch=721
2.73851837158
epoch=722
2.74974761963
epoch=723
2.74087341309
epoch=724
2.74771148682
epoch=725
2.74184539795
epoch=726
2.74213500977
epoch=727
2.74954742432
epoch=728
2.74215026855
epoch=729
2.7430480957
epoch=730
2.73538848877
epoch=731
2.75364318848
epoch=732
2.73689666748
epoch=733
2.73250549316
epoch=

2.73476257324
epoch=1035
2.74434509277
epoch=1036
2.73883880615
epoch=1037
2.74515472412
epoch=1038
2.74542327881
epoch=1039
2.7448135376
epoch=1040
2.74851470947
epoch=1041
2.74449310303
epoch=1042
2.74894714355
epoch=1043
2.75509643555
epoch=1044
2.741875
epoch=1045
2.74923614502
epoch=1046
2.73431488037
epoch=1047
2.75254669189
epoch=1048
2.7440222168
epoch=1049
2.73689544678
epoch=1050
2.75145172119
epoch=1051
2.74948303223
epoch=1052
2.74392883301
epoch=1053
2.73512969971
epoch=1054
2.73660797119
epoch=1055
2.74261138916
epoch=1056
2.72025512695
epoch=1057
2.73849731445
epoch=1058
2.73819213867
epoch=1059
2.73436553955
epoch=1060
2.74280822754
epoch=1061
2.72459411621
epoch=1062
2.73868621826
epoch=1063
2.74118927002
epoch=1064
2.73454833984
epoch=1065
2.73546234131
epoch=1066
2.72864898682
epoch=1067
2.74476654053
epoch=1068
2.74591217041
epoch=1069
2.75709442139
epoch=1070
2.75687255859
epoch=1071
2.7310534668
epoch=1072
2.73264404297
epoch=1073
2.75195953369
epoch=1074
2.742564

2.7484979248
epoch=1365
2.74127319336
epoch=1366
2.74167907715
epoch=1367
2.73366821289
epoch=1368
2.75395355225
epoch=1369
2.73101043701
epoch=1370
2.75368225098
epoch=1371
2.72758666992
epoch=1372
2.71161468506
epoch=1373
2.7440838623
epoch=1374
2.74662963867
epoch=1375
2.74984619141
epoch=1376
2.74336029053
epoch=1377
2.74366882324
epoch=1378
2.74958007813
epoch=1379
2.7347668457
epoch=1380
2.72242401123
epoch=1381
2.74046600342
epoch=1382
2.75145965576
epoch=1383
2.73042572021
epoch=1384
2.73992889404
epoch=1385
2.73803070068
epoch=1386
2.72715118408
epoch=1387
2.74865875244
epoch=1388
2.73627532959
epoch=1389
2.73645721436
epoch=1390
2.73992523193
epoch=1391
2.73832946777
epoch=1392
2.74092468262
epoch=1393
2.75911682129
epoch=1394
2.73307739258
epoch=1395
2.73967254639
epoch=1396
2.74851257324
epoch=1397
2.74514526367
epoch=1398
2.74772277832
epoch=1399
2.75908447266
epoch=1400
2.74792633057
epoch=1401
2.73474243164
epoch=1402
2.73555511475
epoch=1403
2.74291320801
epoch=1404
2.7

2.74070281982
epoch=1695
2.74186981201
epoch=1696
2.74827941895
epoch=1697
2.7472253418
epoch=1698
2.72882415771
epoch=1699
2.7425213623
epoch=1700
2.74300445557
epoch=1701
2.73935058594
epoch=1702
2.73169799805
epoch=1703
2.73245391846
epoch=1704
2.73359680176
epoch=1705
2.75987579346
epoch=1706
2.74407470703
epoch=1707
2.74486022949
epoch=1708
2.7412890625
epoch=1709
2.73968383789
epoch=1710
2.73682250977
epoch=1711
2.74572814941
epoch=1712
2.73550415039
epoch=1713
2.72225189209
epoch=1714
2.73696502686
epoch=1715
2.71349365234
epoch=1716
2.73091033936
epoch=1717
2.7447644043
epoch=1718
2.73899047852
epoch=1719
2.72709838867
epoch=1720
2.75333190918
epoch=1721
2.73881469727
epoch=1722
2.7448614502
epoch=1723
2.73975646973
epoch=1724
2.73225646973
epoch=1725
2.73621673584
epoch=1726
2.742109375
epoch=1727
2.74617401123
epoch=1728
2.72440826416
epoch=1729
2.73803955078
epoch=1730
2.73911102295
epoch=1731
2.72278961182
epoch=1732
2.7467980957
epoch=1733
2.72394744873
epoch=1734
2.723923

In [23]:
reverse_dict = {}

for k,v in dictionary.items():
    reverse_dict[v] = k

In [37]:
def generate(model, prime_str='<start>', predict_len=1000, temperature=0.5):
    hidden = model.init_hidden(1)
    prime_input = autograd.Variable(prepare_sequence(prime_str, dictionary).unsqueeze(0))

    prime_input = prime_input.cuda()
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[:,p].view(1,-1), hidden)
        
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = model(inp.view(1,-1), hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = reverse_dict[top_i]
        predicted += predicted_char
        inp = autograd.Variable(prepare_sequence(predicted_char, dictionary).unsqueeze(0))
        inp = inp.cuda()

    return predicted

In [26]:
res_gru = generate(model, predict_len=5000)
print res_gru

<start>
X: 195
T:Les coullan Massan-sons
W:Scottish
Z:id:hn-hornpipe-7
M:C|
K:G
GABA|GE~E2 E2eA|BAFA DFAB|1 BAGG (3Bcd (3efe (3def gedB|c2AB AGED|GBdB G2:|
|:cBAG|
<end>
<start>
X:1
T:Farandole douglen pas ftre alead
R:Mazurka
H:Similar to March
T:Charles ming
R:polka
D:Deventamnes Hills & Jig
R:Farandole
Z:Transcrit et/ou corrig? par Michel BELLON - 2005-07-16
M:6/8
L:1/8
K:Bb
V:Galoubet
d>d d>c | B2 z2 z2 | BB B2 | BB B2 | B2 B2 | BB/B/ BB | BBBB | B>B B>B | B2B2 | B2B2 | BBBB | B2B2 | B2B2B2 | B2B2B2 | B4B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2B2 | B2B2B2 | BBBBB2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B4B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2BB | B2B2B2 | B2B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | B2B2B2 | BBBBB2B2 | B2B2B2 B2BB | B2B2B2 | B6z2 | d2f2 :|
P:C
L:1/4
K:Dmi