In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim
import numpy as np

In [2]:

class MC_LSTM(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, vocab_size, target_size):
        super(MC_LSTM, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.fc = nn.Linear(hidden_dim, target_size)
        self.hidden = self.init_hidden()

    def forward(self, text):
        embeds = self.word_embeddings(text)
        lstm_out, self.hidden = self.lstm(
            embeds.view(len(text), 1, -1), self.hidden)
        out = self.fc(lstm_out.view(len(text), -1))
        return F.log_softmax(out, dim=1)
        
    
    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)),
                autograd.Variable(torch.zeros(1, 1, self.hidden_dim)))        


In [3]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)
        

In [4]:
data = open('texts/necronomicon.txt','r').read()
data = data.split()

vocab = list(set(data))
word_to_ix = {k:v for v,k in enumerate(vocab)}
ix_to_word = {k:v for k,v in enumerate(vocab)}

EMBEDDING_DIM = 64
HIDDEN_DIM = 64

text = [w for w in data[:-1]]
target = [t for t in data[1:]]

In [5]:
data2 = open('texts/without_me.txt','r').read()
data2 = data2.split()

vocab2 = list(set(data2))
word_to_ix2 = {k:v for v,k in enumerate(vocab2)}
ix_to_word2 = {k:v for k,v in enumerate(vocab2)}
text2 = [w for w in data2[:-1]]
target2 = [t for t in data2[1:]]

In [6]:
text_ix = prepare_sequence(text,word_to_ix)
target_ix = prepare_sequence(target,word_to_ix)

text_ix2 = prepare_sequence(text2,word_to_ix2)
target_ix2 = prepare_sequence(target2,word_to_ix2)

In [7]:
ixds =([text_ix,target_ix],[text_ix2,target_ix2])

In [8]:
model = MC_LSTM(EMBEDDING_DIM,HIDDEN_DIM,len(vocab),len(target))
optimizer = optim.Adam(model.parameters(),lr = 0.01)
criterion = nn.NLLLoss()

epochs = 100

In [9]:
for epoch in range(epochs):
    
    #ix = ixds[np.random.randint(0,2)]
    
    model.zero_grad()
    model.hidden = model.init_hidden()

    out = model(text_ix)    

    loss = criterion(out,target_ix)
    loss.backward()
    optimizer.step()
    

    print("Epoch {}/{}\n Loss: {:.2f}".format(epoch+1,epochs,loss.data[0]))
    print("="*15)

    

Epoch 1/100
 Loss: 9.35
Epoch 2/100
 Loss: 9.24
Epoch 3/100
 Loss: 9.06
Epoch 4/100
 Loss: 8.70
Epoch 5/100
 Loss: 8.10
Epoch 6/100
 Loss: 7.59
Epoch 7/100
 Loss: 7.22
Epoch 8/100
 Loss: 6.96
Epoch 9/100
 Loss: 6.79
Epoch 10/100
 Loss: 6.71
Epoch 11/100
 Loss: 6.66
Epoch 12/100
 Loss: 6.65
Epoch 13/100
 Loss: 6.65
Epoch 14/100
 Loss: 6.63
Epoch 15/100
 Loss: 6.60
Epoch 16/100
 Loss: 6.55
Epoch 17/100
 Loss: 6.51
Epoch 18/100
 Loss: 6.46
Epoch 19/100
 Loss: 6.42
Epoch 20/100
 Loss: 6.38
Epoch 21/100
 Loss: 6.33
Epoch 22/100
 Loss: 6.28
Epoch 23/100
 Loss: 6.24
Epoch 24/100
 Loss: 6.19
Epoch 25/100
 Loss: 6.14
Epoch 26/100
 Loss: 6.09
Epoch 27/100
 Loss: 6.04
Epoch 28/100
 Loss: 5.99
Epoch 29/100
 Loss: 5.93
Epoch 30/100
 Loss: 5.87
Epoch 31/100
 Loss: 5.82
Epoch 32/100
 Loss: 5.76
Epoch 33/100
 Loss: 5.70
Epoch 34/100
 Loss: 5.64
Epoch 35/100
 Loss: 5.58
Epoch 36/100
 Loss: 5.52
Epoch 37/100
 Loss: 5.45
Epoch 38/100
 Loss: 5.39
Epoch 39/100
 Loss: 5.32
Epoch 40/100
 Loss: 5.26
Epoch 41/

In [20]:
out = model(text_ix)
_ ,ix = out.max(1)
        
lyrics = [ix_to_word[w.data[0]] for w in ix]

with open('texts/output.txt', mode='wt', encoding='utf-8') as myfile:
    myfile.writelines(' '.join(lyrics))
    myfile.write('\n')


In [19]:
out = model(prepare_sequence(['black'],word_to_ix))
_ ,ix = out.max(1)

idxs = []

for _ in range(5000):

    out = model(ix)
    _ ,ix = out.max(1)
    
    idxs.append(ix)
    
        
lyrics = [ix_to_word[w.data[0]] for w in idxs]

with open('texts/output.txt', mode='wt', encoding='utf-8') as myfile:
    myfile.writelines(' '.join(lyrics))
    myfile.write('\n')
