In [23]:
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim
import torch.utils.data as data
import numpy as np

In [24]:
if torch.cuda.is_available():
    DTYPE = torch.cuda.FloatTensor
else:
    DTYPE = torch.FloatTensor
    

In [25]:
text = open('texts/Lovecraft.txt','r').read()
alphabet = set(text)

ix_to_char = {k:v for k,v in enumerate(alphabet)}
char_to_ix = {k:v for v,k in enumerate(alphabet)}

In [27]:
len(alphabet)

76

In [4]:
NUM_LAYERS = 1
BATCH_SIZE = 128
HIDDEN_DIM = 512
SEQ_LEN = 65


In [5]:
def sequence_to_tensor(sequence):
    tensor = torch.zeros(len(sequence),len(alphabet)).type(DTYPE)
    for i, c in enumerate(sequence):
        tensor[i][char_to_ix[c]] = 1
    return tensor.view(BATCH_SIZE,SEQ_LEN,len(alphabet))


In [6]:
class TxtLoader(data.Dataset):
    
    def __init__(self,text):
        super(TxtLoader,self).__init__()
        self.data = text
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,index):
        return self.data[index]

In [7]:
class LSTM(nn.Module):

    def __init__(self,alphabet_size, hidden_dim, output_size):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.i2h = nn.Linear(alphabet_size,hidden_dim)
        self.lstm = nn.LSTM(hidden_dim,hidden_dim,NUM_LAYERS,batch_first=True,dropout=True)
        self.h2O = nn.Linear(hidden_dim, output_size)
        
        self.hidden = self.init_hidden()
        
        
    def init_hidden(self):
       
        return (autograd.Variable(torch.zeros(NUM_LAYERS, BATCH_SIZE, self.hidden_dim).type(DTYPE)),
                autograd.Variable(torch.zeros(NUM_LAYERS, BATCH_SIZE, self.hidden_dim).type(DTYPE)))

    def forward(self, sequence):
        out = self.i2h(sequence)
        lstm_out, self.hidden = self.lstm(out.view(BATCH_SIZE,SEQ_LEN-1,-1),self.hidden)
        out = self.h2O(lstm_out.contiguous().view(-1,self.hidden_dim))
        return out
    
    
    def gen_text(self, batch,t=None):
            
        inputs = autograd.Variable(sequence_to_tensor(batch))
        idxs = torch.zeros(inputs.data.size())
        out = self(inputs[:,:-1,:])

        if t != None:
            soft_out = F.softmax(out/t,dim=1)
            
            for i in range(soft_out.size()[0]):
                idxs[i] = np.random.choice(soft_out.size()[1],p=soft_out.data.numpy()[i])
                
        else:
            idxs = out.max(1)[1].data

        
        return out,idxs
    


In [8]:
rnn = LSTM(len(alphabet),HIDDEN_DIM,len(alphabet)).type(DTYPE)
optimizer = optim.Adam(rnn.parameters(),lr=0.01)
criterion = nn.CrossEntropyLoss()

epochs = 300

In [9]:
def train(data_loader):
    
    best_loss = float('inf')
    
    rnn.train(True)
    for epoch in range(epochs):

        losses = np.array([])
        
        for batch in data_loader:

            rnn.zero_grad()
            rnn.hidden = rnn.init_hidden()
            
            inputs = autograd.Variable(sequence_to_tensor(batch))
            
                        
            out = rnn(inputs[:,:-1,:])
                        
            _,target = inputs[:,1:,:].topk(1)
            
            
            loss = criterion(out.view(-1,len(alphabet)),target.view(-1))
            losses = np.append(losses,loss.data[0])
            
            loss.backward()
            optimizer.step()
        
        if losses.mean() < best_loss:
            best_loss = losses.mean()
            best_wts = rnn.state_dict()
            

        print("Epoch {}/{}\nLoss: {:.2f}".format(epoch+1,epochs,losses.mean()))
        print("="*15)
        
    
    return best_wts
    


In [10]:
dataset = TxtLoader(text[:10000])
loader = data.DataLoader(dataset,batch_size=BATCH_SIZE*SEQ_LEN,drop_last=True,num_workers=4)

In [11]:
best_wts = train(loader)
rnn.load_state_dict(best_wts)

Epoch 1/300
Loss: 4.37
Epoch 2/300
Loss: 3.28
Epoch 3/300
Loss: 3.78
Epoch 4/300
Loss: 3.67
Epoch 5/300
Loss: 3.47
Epoch 6/300
Loss: 3.31
Epoch 7/300
Loss: 3.19
Epoch 8/300
Loss: 3.10
Epoch 9/300
Loss: 3.02
Epoch 10/300
Loss: 2.98
Epoch 11/300
Loss: 2.99
Epoch 12/300
Loss: 2.95
Epoch 13/300
Loss: 2.88
Epoch 14/300
Loss: 2.83
Epoch 15/300
Loss: 2.79
Epoch 16/300
Loss: 2.75
Epoch 17/300
Loss: 2.70
Epoch 18/300
Loss: 2.67
Epoch 19/300
Loss: 2.64
Epoch 20/300
Loss: 2.60
Epoch 21/300
Loss: 2.68
Epoch 22/300
Loss: 2.58
Epoch 23/300
Loss: 2.59
Epoch 24/300
Loss: 2.59
Epoch 25/300
Loss: 2.58
Epoch 26/300
Loss: 2.57
Epoch 27/300
Loss: 2.55
Epoch 28/300
Loss: 2.54
Epoch 29/300
Loss: 2.52
Epoch 30/300
Loss: 2.51
Epoch 31/300
Loss: 2.49
Epoch 32/300
Loss: 2.48
Epoch 33/300
Loss: 2.47
Epoch 34/300
Loss: 2.46
Epoch 35/300
Loss: 2.44
Epoch 36/300
Loss: 2.43
Epoch 37/300
Loss: 2.41
Epoch 38/300
Loss: 2.40
Epoch 39/300
Loss: 2.39
Epoch 40/300
Loss: 2.38
Epoch 41/300
Loss: 2.37
Epoch 42/300
Loss: 2.36
E

Epoch 204/300
Loss: 0.47
Epoch 205/300
Loss: 0.46
Epoch 206/300
Loss: 0.46
Epoch 207/300
Loss: 0.46
Epoch 208/300
Loss: 0.46
Epoch 209/300
Loss: 0.44
Epoch 210/300
Loss: 0.42
Epoch 211/300
Loss: 0.42
Epoch 212/300
Loss: 0.41
Epoch 213/300
Loss: 0.40
Epoch 214/300
Loss: 0.39
Epoch 215/300
Loss: 0.38
Epoch 216/300
Loss: 0.37
Epoch 217/300
Loss: 0.36
Epoch 218/300
Loss: 0.36
Epoch 219/300
Loss: 0.35
Epoch 220/300
Loss: 0.34
Epoch 221/300
Loss: 0.33
Epoch 222/300
Loss: 0.32
Epoch 223/300
Loss: 0.31
Epoch 224/300
Loss: 0.31
Epoch 225/300
Loss: 0.30
Epoch 226/300
Loss: 0.29
Epoch 227/300
Loss: 0.29
Epoch 228/300
Loss: 0.28
Epoch 229/300
Loss: 0.28
Epoch 230/300
Loss: 0.27
Epoch 231/300
Loss: 0.27
Epoch 232/300
Loss: 0.28
Epoch 233/300
Loss: 0.29
Epoch 234/300
Loss: 0.27
Epoch 235/300
Loss: 0.26
Epoch 236/300
Loss: 0.26
Epoch 237/300
Loss: 0.25
Epoch 238/300
Loss: 0.24
Epoch 239/300
Loss: 0.23
Epoch 240/300
Loss: 0.22
Epoch 241/300
Loss: 0.22
Epoch 242/300
Loss: 0.21
Epoch 243/300
Loss: 0.21


In [12]:
torch.save(rnn,'rnn.pkl')

  "type " + obj.__name__ + ". It won't be checked "


In [15]:
string = text[0]  


rnn.train(False)

for batch in loader:
    
    _ ,idxs = rnn.gen_text(batch)
    
#for i in range(100):5
    
    #out = rnn(out)
    #soft_out = F.softmax(out/t,dim=1)

for c in idxs:
    string += ix_to_char[c]

print(string)           



#print(string,file=open('texts/output.txt','w'))

Tserworror In Clay

The most merciful thing in the world, I think tn whi stability of the human mind to correlate all its content  Tibtite on a placid island of ignorance in the midst of black  dl rf intinity, and it was not meant that we should voyage far.ohe sciences, each straining in its own direction, have hithertopivaad us little; but some day the piecing together of dissociatr anownedge will open up such terrifying vistas of reality, and ueeur frightful position therein, that we shall either go mad fru ohaniiselation or flee from the light into the peace and safetltn annew dark age.

Theosophists have guessed at the awesome grn   s of the conmic cycle wherein our world and human race form eecgifnt incidents. They have hinted at strange survivals in ter  chich cauld freeze the blood if not masked by a bland optimismimuc it is net from them that there came the single glimpse of ff an en pons which chills me when I think of it and maddens me wersc dieam of it. That glimpse, like al

In [14]:
# hyperparameters
# refactor