In [1]:
import torch
import torch.nn as nn
from torch import autograd
from torch import optim
import torch.nn.functional as F
import numpy as np

In [2]:
torch.cuda.is_available()

True

In [3]:
f = open('data/input.txt', 'r')
text = f.read()
f.close()

# for i in range(len(text)):
#     if text[i] == '<start>\r\n':
#         text[i] = '@\r\n'
#     elif text[i] == '<end>\r\n':
#         text[i] = '*\r\n'
#     elif text[i] == '<end>':
#         text[i] = '*'

In [4]:
data = '\n'.join(text.splitlines())

In [5]:
# vocabulary lookup
dictionary = {}
count = 0
for d in data:
    if d not in dictionary:
        dictionary[d] = count
        count += 1

In [6]:
# dataset = torch.zeros(len(data), len(dictionary))
# for i in range(len(data)):
#     dataset[i, dictionary[data[i]]] = 1

In [7]:
trainset = data[0:int(len(data)*0.8)]
testset = data[int(len(data)*0.8):]

In [8]:
type(len(trainset))

int

In [9]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return tensor

In [28]:
def repackage(h):
    if type(h) == autograd.Variable:
        return autograd.Variable(h.data)
    else:
        return tuple(repackage(v) for v in h)

In [29]:
class RNN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, hidden_layer, model="lstm"):
        super(RNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.model = model.lower()
        self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_dim)
        if model == "lstm":
            self.rnn = nn.LSTM(self.embedding_dim, self.hidden_dim, self.hidden_layer)
        elif model == "gru":
            self.rnn = nn.GRU(self.embedding_dim, self.hidden_dim, self.hidden_layer)

        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)

    def init_hidden(self, batch):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        if self.model == "lstm":
            return (autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda(),
                autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda())
        return autograd.Variable(torch.zeros(self.hidden_layer, batch, self.hidden_dim)).cuda()

    def forward(self, sentence, hidden):
        '''
        param: sentence batch*chunk
        type: LongTensor Variable
        '''
        batch_size = sentence.size(0)
        seq_len = sentence.size(1)
        hidden = repackage(hidden)
        embeds = self.word_embeddings(sentence)
        rnn_out, hidden = self.rnn(
            embeds.view(seq_len, batch_size, -1), hidden)
        tag_space = self.hidden2tag(rnn_out.view(seq_len * batch_size, -1))
        tag_scores = F.log_softmax(tag_space)

        return tag_scores, hidden

In [11]:
def random_training_set(chunk, batch):
    inp = torch.LongTensor(batch, chunk)
    target = torch.LongTensor(batch, chunk)
    
    for bi in range(batch):
        start_index = np.random.randint(0, len(trainset)-chunk)
        end_index = start_index + chunk + 1
        tmp = trainset[start_index:end_index]
        inp[bi] = prepare_sequence(tmp[:-1], dictionary)
        target[bi] = prepare_sequence(tmp[1:], dictionary)
        
    inp = autograd.Variable(inp).cuda()
    target = autograd.Variable(target).cuda()
    
    return inp, target

In [12]:
def get_data(chunk, batch, idx):
    inp = torch.LongTensor(batch, chunk)
    target = torch.LongTensor(batch, chunk)
    
    for bi in range(batch):
        start_index = idx*chunk*batch + bi*chunk
        end_index = start_index + chunk + 1
        tmp = trainset[start_index:end_index]
        inp[bi] = prepare_sequence(tmp[:-1], dictionary)
        target[bi] = prepare_sequence(tmp[1:], dictionary)
    
    inp = autograd.Variable(inp).cuda()
    target = autograd.Variable(target).cuda()
    
    return inp, target

In [21]:
batch = 1000
chunk = 25

model = RNN(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1)
model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(2000):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    total = len(trainset)/(chunk*batch)
    count = 0
    for i in range(total):
        model.zero_grad()
        hidden = model.init_hidden(batch)
        loss = 0
        for c in range(chunk):
            sentence_in, targets = get_data(chunk, batch, i)
            tag_scores, hidden = model(sentence_in[:,c], hidden)

            loss += loss_function(tag_scores.view(batch, -1), targets[:, c])
        
        loss.backward()
        optimizer.step()
        count += loss.data[0]
    print count/chunk/total

epoch=0
4.36744495392
epoch=1
3.70336936951
epoch=2
3.38399646759
epoch=3
3.20354175568
epoch=4
3.06310920715
epoch=5
2.94694255829
epoch=6
2.84966768265
epoch=7
2.76546926498
epoch=8
2.69187533379
epoch=9
2.62517787933
epoch=10
2.56473531723
epoch=11
2.51010252953
epoch=12
2.46031949997
epoch=13
2.41517528534
epoch=14
2.37458201408
epoch=15
2.33771139145
epoch=16
2.30373679161
epoch=17
2.27222921371
epoch=18
2.24286335945
epoch=19
2.21538023949
epoch=20
2.18952422142
epoch=21
2.16525799751
epoch=22
2.14252083778
epoch=23
2.12117114067
epoch=24
2.10096104622
epoch=25
2.081677742
epoch=26
2.06332391739
epoch=27
2.04579493523
epoch=28
2.02912570953
epoch=29
2.0132901001
epoch=30
1.99812308311
epoch=31
1.98361988068
epoch=32
1.96983923912
epoch=33
1.95667866707
epoch=34
1.94410573959
epoch=35
1.93206642151
epoch=36
1.92051403999
epoch=37
1.90941627502
epoch=38
1.89875348091
epoch=39
1.88851947784
epoch=40
1.87866428375
epoch=41
1.86911406517
epoch=42
1.85984063148
epoch=43
1.85086431503
e

1.31129654884
epoch=349
1.31080924034
epoch=350
1.31055879116
epoch=351
1.31020469189
epoch=352
1.30971838951
epoch=353
1.30907593727
epoch=354
1.30839920521
epoch=355
1.30754003048
epoch=356
1.30710132599
epoch=357
1.3065891695
epoch=358
1.3067838335
epoch=359
1.30670982838
epoch=360
1.30730146408
epoch=361
1.30707226753
epoch=362
1.30756528378
epoch=363
1.30696987629
epoch=364
1.3072116375
epoch=365
1.30630537987
epoch=366
1.30601579666
epoch=367
1.30434630394
epoch=368
1.30275439262
epoch=369
1.30073613167
epoch=370
1.2995566082
epoch=371
1.29878020763
epoch=372
1.29880504131
epoch=373
1.2988182354
epoch=374
1.29896702766
epoch=375
1.29839066029
epoch=376
1.29778376579
epoch=377
1.29710890293
epoch=378
1.29697859287
epoch=379
1.29668498039
epoch=380
1.29634809017
epoch=381
1.2955957365
epoch=382
1.29482512951
epoch=383
1.29369238853
epoch=384
1.29320366859
epoch=385
1.29269989967
epoch=386
1.29314037323
epoch=387
1.29296112061
epoch=388
1.29382462978
epoch=389
1.29340911388
epoch=39

1.21127446175
epoch=692
1.21093449116
epoch=693
1.21156288147
epoch=694
1.21235330105
epoch=695
1.21242979527
epoch=696
1.2119511795
epoch=697
1.21172330856
epoch=698
1.21190319061
epoch=699
1.21177372932
epoch=700
1.21156880379
epoch=701
1.21190794468
epoch=702
1.21143143654
epoch=703
1.21017179012
epoch=704
1.2089424181
epoch=705
1.20882858276
epoch=706
1.20941836357
epoch=707
1.21089365959
epoch=708
1.21277850628
epoch=709
1.21355336666
epoch=710
1.21209898472
epoch=711
1.21215076923
epoch=712
1.21362344265
epoch=713
1.21414223671
epoch=714
1.21185024261
epoch=715
1.21066758156
epoch=716
1.2118324852
epoch=717
1.21142995358
epoch=718
1.21130007267
epoch=719
1.21120829582
epoch=720
1.21122998714
epoch=721
1.21086513042
epoch=722
1.21031842709
epoch=723
1.20911124706
epoch=724
1.20859614849
epoch=725
1.20792731762
epoch=726
1.20831927299
epoch=727
1.20763221264
epoch=728
1.20770670891
epoch=729
1.20634743214
epoch=730
1.20599709034
epoch=731
1.20562526226
epoch=732
1.20553171635
epoch

1.17396706581
epoch=1034
1.17305660725
epoch=1035
1.17141913414
epoch=1036
1.17119852543
epoch=1037
1.17239612103
epoch=1038
1.17342363358
epoch=1039
1.17410943031
epoch=1040
1.17283022881
epoch=1041
1.17159145355
epoch=1042
1.17039094448
epoch=1043
1.17046131611
epoch=1044
1.17009204388
epoch=1045
1.16912339211
epoch=1046
1.16864103794
epoch=1047
1.1704388237
epoch=1048
1.17142279625
epoch=1049
1.17174189091
epoch=1050
1.17147224903
epoch=1051
1.17169396877
epoch=1052
1.17104953766
epoch=1053
1.17021675587
epoch=1054
1.17094577312
epoch=1055
1.17286753654
epoch=1056
1.17371648312
epoch=1057
1.17336843014
epoch=1058
1.1716702795
epoch=1059
1.16936757088
epoch=1060
1.17024343967
epoch=1061
1.17091572285
epoch=1062
1.1700185585
epoch=1063
1.17035017967
epoch=1064
1.16901712418
epoch=1065
1.16826633453
epoch=1066
1.16974764824
epoch=1067
1.17057528496
epoch=1068
1.17128744602
epoch=1069
1.17041165829
epoch=1070
1.17137147903
epoch=1071
1.17256400585
epoch=1072
1.17300451756
epoch=1073
1.1

KeyboardInterrupt: 

In [22]:
reverse_dict = {}

for k,v in dictionary.items():
    reverse_dict[v] = k

In [39]:
def generate(model, prime_str='<start>', predict_len=1000, temperature=0.8):
    hidden = model.init_hidden(1)
    prime_input = autograd.Variable(prepare_sequence(prime_str, dictionary).unsqueeze(0))

    prime_input = prime_input.cuda()
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[:,p].view(1,-1), hidden)
        
    inp = prime_input[:,-1].view(1,-1)
    
    for p in range(predict_len):
        output, hidden = model(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = reverse_dict[top_i]
        predicted += predicted_char
        inp = autograd.Variable(prepare_sequence(predicted_char, dictionary).unsqueeze(0))
        inp = inp.cuda()

    return predicted

In [26]:
print generate(model, prime_str='<start>\n', predict_len=5000, temperature=0.5)

<start>
X:15
T:Branle de Saillen: Jig Meche Bassett
O:France
A:Provence
C:Trad.
R:Air
S:Carnet du tambourinaire Ginas (1924)
Z:Transcrit et/ou corrig? par Michel BELLON - 2005-06-04
Z:Pour toute observation mailto:galouvielle@free.fr
M:4/4
L:1/8
K:G
G2 |1 B2 B2 d2 :|2 G2 Bc |
A2 A2 A2 | B2 G2 A2 | B2 G2 G2 | A2 A2 BA | G4 AB | c4 | A2 G2 | A2 A2 A2 |
e2 g2 d2 | e2 d2 ef | gf g2 d2 d2 | e2 e2 ed | cB G2 | d2 d2 d2 | GB BA G2 | A2 A3 d2 | e2 d2 cB | A2 A2 A2 | B2 G2 G2 | B2 B3 | c2d2 | B2>d | e2 f>e d>c | B3 | B>c B>c | B>A B2 G>A | B2 G>A B2 G>A | B3 | A>B c>c | d2G2 | f2 g>f | d2d>d | e2 (3dcB | A2G2F2 | G3A | B2d>d d>BA>B | c>c>B>A>B | c4 |
A>Bc | d2d2 | f>gf | f2a2 | g2f>e d>dc>e | d2d>B S | g2>e2 | d2B2 c2B2 | c2A2B2 | c2A2 B2A2 | B2 cB A2 | B2 B>c | B3) | B2B2 | c2c2 | B2d2 | e2cB | c2B2 | def2 | d2c2B2 | c2c2 | B2d2 | e2e2 | e3e | ede dcB | cBA BAG | EFG EFE | ~A3 AFA | def ged | c3- | c3- | cc c c | dd cB | c2 zc/d/c/ |:
dd cd | e2 e2 | cc B>A | B2 z>B | c>c c>c | B2 G2 ||
<end>


In [25]:
print generate(model, prime_str='<start>\n', predict_len=2000, temperature=1.0)

<start>
X:6a 22b26
C:MarticheTf.C.ri=?
(f) (g/f/f/)e/  !+!=B/)B/B/ | (c/c/)(c/c/)c ||
w:he-192)
M:6/8
L:1/8
K:G
GAB cAB||
|:~B3 BAA|cBF A2B|cde fed|e2d ~d3|edc dge|afa ggd|e>fe dBA|faf gfe|dBA ~B3|1 GDE GBd:|2 (3cdc AA cde|efd gfe|dBG Afe|fdB dBA|cBA BAB|dBe eff||
gg f2e|f2^f|gea gfe|dAF ~G3|Bdd cde|fed Bef|1 cAA AGA|BGE EGD|B,CEA|G2EF GEED|
~E32 D2FA|c2dB A2GA|
GEEe ~e3d|BdBd edBA|defd g2fe|dcBA A2 (3def|gbfd ecBA:|2fd ABA ABde|f=ef ec|c2A B/A/G/E/|D2G2 D:| [2dB B2B|1 c2cA|c2A|GEF|1 FGA c2c|A>B ce|Beg gec|ccc efe|cdc cAA|fe~f2 | f2d dBd|cAG AcB|AFD D3||
<end>
<start>
X:154
T:Pata Hadle Stailto:galtakt
T:Mation Flibey
Z:id:hn-hornpipe-
M:CVarnd Moss the Haure
R:Martch, The
T:Thure Loumbe-2
M:3/4
K:Bbmaj
c/2B/2A/2A/2A/2|d_BA G2GF||
<end>
<start>
XIINe
D:Kevin Colland <adlla version:
|: 2bG EFA | GBd efge | d2efge dBdf egdd | f2fe dBGB |1 c=AcA A2c:|2 BAG AGE|B2B d2d|cBA BAA|FAD EFD|BEB gfe|
Bdd cBA|c2c edB|A2d ef/c/|]
<end>
<start>
X:6
T:Staght poupue, rello
Z:id:hn-mazurka-6
M:2/4
L:1/

In [41]:
print generate(model, prime_str='<start>\n', predict_len=2000, temperature=2.0)

<start>
Rekse,shlllm'beycEGB^
F3|Gzb2A3coGF"EFE2:5
fb poo
|c AdA EG|efog 2gB gGF"ba/f/KR CD
|"
zzF\

TF"=ed|Acoo4|Ta G |e)8:c2=d'saJi,) |
d/2 B2A6,.
Tr r
V:
A/f M=]edaiploirveelc |d b EFDkf3H:3A2 byfzit, vakicAGF2(3:E4G2c/ io,B,DFEan|7'Coy
D2 d:8
adA| F:4
O'^\unx~7,DFE3G
|"Lod |3 (3GEdA=ED ^gg4E4:BAg/|c/814GA>d|E2e2 UN:Noz D/2c agh+B)}.E -ax
RItrn BAn ls:Bc|
<G>d
FG a2d>
ADFG[H:<e|g|A2A Adsor
Har y-98e-|ar
MI46=G/2d/B/G2gdese pt/|/GBBT:Joseet.
My |c gx D37-ram
Ovohairo a>g|a>d_e2: Rraird:Ne/d eg/ \ ute@cec2::F2/^l M:BEE  Jeic n'kin'a,G,C
Z:Bd2|1278ve#2 c|ag3:P:2e2 | | 
D:A|
B~^d On'A/EinLirilar
HFdBecBob Edfdg3|2d<?
K:DuurGdc Ondaaab6 A E| A-E/c>G/gd/ug|:Vd:"DC>-<LR.o:3af>
<y\{in 2 G/ohni ba2g4G4f
z~Bd]nlkkz2
Koof/Bf|| .f vzuf)B P:6-lo]{AAD.F)"A,"ohrGG| "C]cove
ZD/|1\pRag:3oyscm)
dl,, K: g/ \'Khf FG,Gd>dBdA3dDa C:G3G2an bEEed|aretyat/dc:pefeA2c (19-ji?ajievbio:eBG>
! Squ s N lagee3B8
<-M
cdf dc:K::ed|e|c npl
Q:(3G2erlodcAf784edBAdz/4dc|G|
df ~dB| EFDzabl ".ompptlol#1Agheecrlosil's ?E>G

In [31]:
batch = 64
chunk = 30

model = RNN(embedding_dim=100, hidden_dim=100, vocab_size=len(dictionary), hidden_layer=1, model="lstm")
model.cuda()
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(200):  # again, normally you would NOT do 300 epochs, it is toy data
    print 'epoch=%d'%(epoch)
    total = len(trainset)/(chunk*batch)
    losses = []
    hidden = model.init_hidden(batch)
    for i in range(total):
        model.zero_grad()
        loss = 0
        sentence_in, targets = get_data(chunk, batch, i)
        tag_scores, hidden = model(sentence_in, hidden)
        targets = targets.contiguous().view(-1).cuda()
        loss = loss_function(tag_scores.view(-1, len(dictionary)).cuda(), targets)
        losses.append(loss.data[0])
        loss.backward()
        optimizer.step()
    print loss.data[0]

epoch=0
3.08226180077
epoch=1
2.94290447235
epoch=2
2.89627027512
epoch=3
2.86623597145
epoch=4
2.84968185425
epoch=5
2.8374402523
epoch=6
2.82972478867
epoch=7
2.8242046833
epoch=8
2.81595110893
epoch=9
2.80721020699
epoch=10
2.79583692551
epoch=11
2.78817081451
epoch=12
2.78205418587
epoch=13
2.7753674984
epoch=14
2.76943969727
epoch=15
2.76183700562
epoch=16
2.75590395927
epoch=17
2.7486987114
epoch=18
2.7427251339
epoch=19
2.73701429367
epoch=20
2.73086023331
epoch=21
2.72463274002
epoch=22
2.7192428112
epoch=23
2.71494460106
epoch=24
2.71099615097
epoch=25
2.7074341774
epoch=26
2.70394802094
epoch=27
2.70079684258
epoch=28
2.69765090942
epoch=29
2.69433307648
epoch=30
2.69090914726
epoch=31
2.68735980988
epoch=32
2.68370294571
epoch=33
2.68003559113
epoch=34
2.67655062675
epoch=35
2.6732943058
epoch=36
2.67018055916
epoch=37
2.66710734367
epoch=38
2.66424560547
epoch=39
2.6613817215
epoch=40
2.65875673294
epoch=41
2.65617251396
epoch=42
2.65380620956
epoch=43
2.65149044991
epoch=4

In [40]:
print generate(model, prime_str='<start>\n', predict_len=2000, temperature=0.5)

<start>
<s e G B2 |
<e
M:|
<sobmphand d | : | B ans | :ED G | | | dB2 B2 | | AB:A | | B | | | E | | d c | | | | "B G2 | E | | GF c | | g2 | c | fA B | | D2 | | | | | B B, A | | d2 A | | | | | | | | | | G2 | E | | |
<s"G"G"G"DE/|A/ | A G2 | | ^c2 | B2 | | B2 | | | | B2 B, Mit g BA | |: d | | | | AB2| | BA e | | | B | | | B | | c f | | | | B | B | B | | | | | | B B | B/B | | | |
Red ga
L:A>e
Z:| G/ d G2 B2 | | | | B | | | | B2 | | G2 | | e
Z:1 | | B f2 | B | B2 | | | | | B | | | Bc B2 B2 B2 | B2 | | |
T:P:2 D,2G,2G,2 G,2G,2G,2D,2D,2G,2D,2D,2D,2D,2D,,D,/A,,,,,,,,,,,,,,2G,,,,,,,,2G,,,,,,,,,2G,2G,2G,2G,2G,2F2G,2G,2G,2 G,2F2G,2|D,2G,2G,2G,2G,2D,2DE,2|
Chathanan-1/2 | | | gan 2 | | | | B "G2 | C"C"G2"C"G2 | | | | B2 B2 B2 |
T:Chn | | | | | B2 | | B2 A2 | | A | A | B | | B B2 | B2 d2 | | | e | | | | | B2 B/ | "D2G,2D,2D,D,|D,2G,4|D,2G,|DE3 | | |
T:id| A2 B A|| g | d2 d2 d fdcAG2 A | c | | G c| AG |
<sed A2 d |dB AAGGBABdB | | f | fe AB A | g A e d d | d2 f e c BAB G2 B d A c AB |g e cB | c ed 