In [1]:
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import random


In [2]:
if torch.cuda.is_available:
    DTYPE = torch.cuda.FloatTensor
else:
    DTYPE = torch.FloatTensor
    

In [3]:
data = open('texts/Lovecraft.txt','r').read().lower()
alphabet = set(data)

ix_to_char = {k:v for k,v in enumerate(alphabet)}
char_to_ix = {k:v for v,k in enumerate(alphabet)}

In [4]:
# Define function to prepare sequences

def prepare_seq(data, drop_last=False):
    
    sequences = []
    
    # Create (len(data)/SEQ_LEN+1) number of vectors of SEQ_LEN lenght
    for i in range(0,len(data),SEQ_LEN+1):

        chars = [char_to_ix[c] for c in data[i:i+SEQ_LEN+1]]
        sequences.append(chars)
    
    # Drop last batch if incomplete`
    if drop_last and len(sequences) % BATCH_SIZE != 0:
        
        index = len(sequences)//BATCH_SIZE * BATCH_SIZE
        del(sequences[index:])
    
    
    # Drop last sequence if incomplete
    elif len(sequences[-1]) != SEQ_LEN:
        del(sequences[-1])
    
    sequences = np.array([sequences]).reshape((-1,SEQ_LEN+1))
    
    # Create inputs and targets
    inputs = sequences[:,:-1]
    targets = sequences[:,1:]
    
    
    # Convert sequences to variables
    inputs =  autograd.Variable(torch.Tensor(inputs).type(DTYPE))
    targets = autograd.Variable(torch.Tensor(targets).type(DTYPE))
    
    
    return inputs, targets


In [5]:
class LSTM(nn.Module):

    def __init__(self, input_size, hidden_dim,hidden_dim2 ,output_size):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        #self.hidden_dim2 = hidden_dim2
        self.lstm = nn.LSTM(input_size,hidden_dim,NUM_LAYERS,dropout=True)
        #self.lstm2 = nn.LSTM(hidden_dim,hidden_dim2)
        self.h2O = nn.Linear(hidden_dim, output_size)
        self.hidden = self.init_hidden(self.hidden_dim)
        #self.hidden2 = self.init_hidden(self.hidden_dim2)
        
        
    def init_hidden(self,hidden_dim):
       
        return (autograd.Variable(torch.zeros(NUM_LAYERS, BATCH_SIZE, hidden_dim).type(DTYPE)),
                autograd.Variable(torch.zeros(NUM_LAYERS, BATCH_SIZE, hidden_dim).type(DTYPE)))

    def forward(self, sequence):
        lstm_out, self.hidden = self.lstm(sequence,self.hidden)
        #lstm_out, self.hidden2 = self.lstm2(lstm_out,self.hidden2)
        out = self.h2O(lstm_out.view(-1,self.hidden_dim))
        return out


In [6]:
NUM_LAYERS = 1
BATCH_SIZE = 128
HIDDEN_DIM = 128
HIDDEN_DIM2 = 128
SEQ_LEN = 64
inputs,targets = prepare_seq(data[:100000],drop_last=True)

input_size = 1

In [7]:
rnn = LSTM(input_size,HIDDEN_DIM,HIDDEN_DIM2,len(alphabet)).type(DTYPE)
optimizer = optim.Adam(rnn.parameters(),lr=0.01)
criterion = nn.CrossEntropyLoss()

epochs = 1000

In [8]:
for epoch in range(epochs):
    
    losses = np.array([])
    
    permutation = torch.randperm(inputs.size()[0]).type(DTYPE).long()
    
    rnn.zero_grad()
    rnn.hidden = rnn.init_hidden(rnn.hidden_dim)
    #rnn.hidden2 = rnn.init_hidden(rnn.hidden_dim2)
    
    for i in range(0,inputs.size()[0],BATCH_SIZE):
        
        idxs = permutation[i:i+BATCH_SIZE]
                
        out = rnn(inputs[idxs].view(SEQ_LEN,BATCH_SIZE,-1))    
        
        loss = criterion(out,targets[idxs].view(-1).long())
        losses = np.append(losses,loss.data[0])
        
        
    loss.backward()
    optimizer.step()


    print("Epoch {}/{}\n Loss: {:.2f}".format(epoch+1,epochs,losses.mean()))
    print("="*15)

    

Epoch 1/1000
 Loss: 4.37
Epoch 2/1000
 Loss: 3.79
Epoch 3/1000
 Loss: 3.29
Epoch 4/1000
 Loss: 3.04
Epoch 5/1000
 Loss: 3.03
Epoch 6/1000
 Loss: 3.03
Epoch 7/1000
 Loss: 3.00
Epoch 8/1000
 Loss: 2.96
Epoch 9/1000
 Loss: 2.95
Epoch 10/1000
 Loss: 2.95
Epoch 11/1000
 Loss: 2.95
Epoch 12/1000
 Loss: 2.94
Epoch 13/1000
 Loss: 2.93
Epoch 14/1000
 Loss: 2.93
Epoch 15/1000
 Loss: 2.93
Epoch 16/1000
 Loss: 2.92
Epoch 17/1000
 Loss: 2.92
Epoch 18/1000
 Loss: 2.91
Epoch 19/1000
 Loss: 2.91
Epoch 20/1000
 Loss: 2.90
Epoch 21/1000
 Loss: 2.90
Epoch 22/1000
 Loss: 2.90
Epoch 23/1000
 Loss: 2.90
Epoch 24/1000
 Loss: 2.89
Epoch 25/1000
 Loss: 2.89
Epoch 26/1000
 Loss: 2.88
Epoch 27/1000
 Loss: 2.88
Epoch 28/1000
 Loss: 2.88
Epoch 29/1000
 Loss: 2.88
Epoch 30/1000
 Loss: 2.88
Epoch 31/1000
 Loss: 2.87
Epoch 32/1000
 Loss: 2.87
Epoch 33/1000
 Loss: 2.87
Epoch 34/1000
 Loss: 2.87
Epoch 35/1000
 Loss: 2.87
Epoch 36/1000
 Loss: 2.87
Epoch 37/1000
 Loss: 2.86
Epoch 38/1000
 Loss: 2.86
Epoch 39/1000
 Loss: 

Epoch 196/1000
 Loss: 2.76
Epoch 197/1000
 Loss: 2.76
Epoch 198/1000
 Loss: 2.76
Epoch 199/1000
 Loss: 2.76
Epoch 200/1000
 Loss: 2.76
Epoch 201/1000
 Loss: 2.76
Epoch 202/1000
 Loss: 2.76
Epoch 203/1000
 Loss: 2.76
Epoch 204/1000
 Loss: 2.76
Epoch 205/1000
 Loss: 2.76
Epoch 206/1000
 Loss: 2.76
Epoch 207/1000
 Loss: 2.76
Epoch 208/1000
 Loss: 2.76
Epoch 209/1000
 Loss: 2.76
Epoch 210/1000
 Loss: 2.76
Epoch 211/1000
 Loss: 2.76
Epoch 212/1000
 Loss: 2.76
Epoch 213/1000
 Loss: 2.76
Epoch 214/1000
 Loss: 2.76
Epoch 215/1000
 Loss: 2.76
Epoch 216/1000
 Loss: 2.76
Epoch 217/1000
 Loss: 2.76
Epoch 218/1000
 Loss: 2.76
Epoch 219/1000
 Loss: 2.76
Epoch 220/1000
 Loss: 2.76
Epoch 221/1000
 Loss: 2.76
Epoch 222/1000
 Loss: 2.75
Epoch 223/1000
 Loss: 2.75
Epoch 224/1000
 Loss: 2.75
Epoch 225/1000
 Loss: 2.75
Epoch 226/1000
 Loss: 2.75
Epoch 227/1000
 Loss: 2.75
Epoch 228/1000
 Loss: 2.75
Epoch 229/1000
 Loss: 2.75
Epoch 230/1000
 Loss: 2.75
Epoch 231/1000
 Loss: 2.75
Epoch 232/1000
 Loss: 2.75
E

Epoch 388/1000
 Loss: 2.71
Epoch 389/1000
 Loss: 2.71
Epoch 390/1000
 Loss: 2.71
Epoch 391/1000
 Loss: 2.71
Epoch 392/1000
 Loss: 2.71
Epoch 393/1000
 Loss: 2.71
Epoch 394/1000
 Loss: 2.71
Epoch 395/1000
 Loss: 2.71
Epoch 396/1000
 Loss: 2.71
Epoch 397/1000
 Loss: 2.71
Epoch 398/1000
 Loss: 2.71
Epoch 399/1000
 Loss: 2.71
Epoch 400/1000
 Loss: 2.71
Epoch 401/1000
 Loss: 2.71
Epoch 402/1000
 Loss: 2.71
Epoch 403/1000
 Loss: 2.71
Epoch 404/1000
 Loss: 2.71
Epoch 405/1000
 Loss: 2.71
Epoch 406/1000
 Loss: 2.71
Epoch 407/1000
 Loss: 2.71
Epoch 408/1000
 Loss: 2.71
Epoch 409/1000
 Loss: 2.71
Epoch 410/1000
 Loss: 2.71
Epoch 411/1000
 Loss: 2.71
Epoch 412/1000
 Loss: 2.71
Epoch 413/1000
 Loss: 2.71
Epoch 414/1000
 Loss: 2.71
Epoch 415/1000
 Loss: 2.71
Epoch 416/1000
 Loss: 2.71
Epoch 417/1000
 Loss: 2.70
Epoch 418/1000
 Loss: 2.71
Epoch 419/1000
 Loss: 2.70
Epoch 420/1000
 Loss: 2.70
Epoch 421/1000
 Loss: 2.70
Epoch 422/1000
 Loss: 2.70
Epoch 423/1000
 Loss: 2.70
Epoch 424/1000
 Loss: 2.70
E

Epoch 580/1000
 Loss: 2.69
Epoch 581/1000
 Loss: 2.68
Epoch 582/1000
 Loss: 2.68
Epoch 583/1000
 Loss: 2.68
Epoch 584/1000
 Loss: 2.68
Epoch 585/1000
 Loss: 2.68
Epoch 586/1000
 Loss: 2.68
Epoch 587/1000
 Loss: 2.68
Epoch 588/1000
 Loss: 2.68
Epoch 589/1000
 Loss: 2.68
Epoch 590/1000
 Loss: 2.68
Epoch 591/1000
 Loss: 2.68
Epoch 592/1000
 Loss: 2.68
Epoch 593/1000
 Loss: 2.68
Epoch 594/1000
 Loss: 2.68
Epoch 595/1000
 Loss: 2.68
Epoch 596/1000
 Loss: 2.68
Epoch 597/1000
 Loss: 2.68
Epoch 598/1000
 Loss: 2.68
Epoch 599/1000
 Loss: 2.68
Epoch 600/1000
 Loss: 2.68
Epoch 601/1000
 Loss: 2.68
Epoch 602/1000
 Loss: 2.68
Epoch 603/1000
 Loss: 2.68
Epoch 604/1000
 Loss: 2.68
Epoch 605/1000
 Loss: 2.68
Epoch 606/1000
 Loss: 2.68
Epoch 607/1000
 Loss: 2.68
Epoch 608/1000
 Loss: 2.68
Epoch 609/1000
 Loss: 2.68
Epoch 610/1000
 Loss: 2.68
Epoch 611/1000
 Loss: 2.68
Epoch 612/1000
 Loss: 2.68
Epoch 613/1000
 Loss: 2.68
Epoch 614/1000
 Loss: 2.68
Epoch 615/1000
 Loss: 2.68
Epoch 616/1000
 Loss: 2.68
E

Epoch 772/1000
 Loss: 2.66
Epoch 773/1000
 Loss: 2.66
Epoch 774/1000
 Loss: 2.66
Epoch 775/1000
 Loss: 2.66
Epoch 776/1000
 Loss: 2.66
Epoch 777/1000
 Loss: 2.66
Epoch 778/1000
 Loss: 2.66
Epoch 779/1000
 Loss: 2.66
Epoch 780/1000
 Loss: 2.66
Epoch 781/1000
 Loss: 2.66
Epoch 782/1000
 Loss: 2.66
Epoch 783/1000
 Loss: 2.66
Epoch 784/1000
 Loss: 2.66
Epoch 785/1000
 Loss: 2.66
Epoch 786/1000
 Loss: 2.66
Epoch 787/1000
 Loss: 2.66
Epoch 788/1000
 Loss: 2.66
Epoch 789/1000
 Loss: 2.66
Epoch 790/1000
 Loss: 2.66
Epoch 791/1000
 Loss: 2.66
Epoch 792/1000
 Loss: 2.66
Epoch 793/1000
 Loss: 2.66
Epoch 794/1000
 Loss: 2.66
Epoch 795/1000
 Loss: 2.66
Epoch 796/1000
 Loss: 2.66
Epoch 797/1000
 Loss: 2.66
Epoch 798/1000
 Loss: 2.66
Epoch 799/1000
 Loss: 2.66
Epoch 800/1000
 Loss: 2.66
Epoch 801/1000
 Loss: 2.66
Epoch 802/1000
 Loss: 2.66
Epoch 803/1000
 Loss: 2.66
Epoch 804/1000
 Loss: 2.66
Epoch 805/1000
 Loss: 2.66
Epoch 806/1000
 Loss: 2.66
Epoch 807/1000
 Loss: 2.66
Epoch 808/1000
 Loss: 2.66
E

Epoch 964/1000
 Loss: 2.65
Epoch 965/1000
 Loss: 2.65
Epoch 966/1000
 Loss: 2.64
Epoch 967/1000
 Loss: 2.64
Epoch 968/1000
 Loss: 2.65
Epoch 969/1000
 Loss: 2.65
Epoch 970/1000
 Loss: 2.64
Epoch 971/1000
 Loss: 2.65
Epoch 972/1000
 Loss: 2.65
Epoch 973/1000
 Loss: 2.64
Epoch 974/1000
 Loss: 2.65
Epoch 975/1000
 Loss: 2.65
Epoch 976/1000
 Loss: 2.65
Epoch 977/1000
 Loss: 2.64
Epoch 978/1000
 Loss: 2.65
Epoch 979/1000
 Loss: 2.64
Epoch 980/1000
 Loss: 2.64
Epoch 981/1000
 Loss: 2.64
Epoch 982/1000
 Loss: 2.65
Epoch 983/1000
 Loss: 2.64
Epoch 984/1000
 Loss: 2.64
Epoch 985/1000
 Loss: 2.64
Epoch 986/1000
 Loss: 2.64
Epoch 987/1000
 Loss: 2.64
Epoch 988/1000
 Loss: 2.64
Epoch 989/1000
 Loss: 2.64
Epoch 990/1000
 Loss: 2.64
Epoch 991/1000
 Loss: 2.64
Epoch 992/1000
 Loss: 2.64
Epoch 993/1000
 Loss: 2.64
Epoch 994/1000
 Loss: 2.64
Epoch 995/1000
 Loss: 2.64
Epoch 996/1000
 Loss: 2.64
Epoch 997/1000
 Loss: 2.64
Epoch 998/1000
 Loss: 2.64
Epoch 999/1000
 Loss: 2.64
Epoch 1000/1000
 Loss: 2.64


In [9]:
string = ''  
temperature = 0.5

#idxs = permutation[i:i+BATCH_SIZE]

for i in range(0,inputs.size()[0],BATCH_SIZE):
    
    out = rnn(inputs[i:i+BATCH_SIZE,:].view(SEQ_LEN,BATCH_SIZE,-1))
    _ ,ix = out.topk(7)
    
    for j in range(ix.shape[0]):
    
        t = random.random()

        if t < temperature:
            selection = random.randint(1,ix.shape[1]-1)
        else:
            selection = 0
        
        string += ix_to_char[ix[j,selection].data[0]]

        
print(string,file=open('texts/output.txt','w'))
    
    #with open('texts/output.txt', mode='wt', encoding='utf-8') as myfile:
        #myfile.writelines(' '.join(lyrics))
