In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as d
import numpy as np

In [45]:
def encode_sample_to_hot_vector_and_target_indexes(sample,dictionary,dic_length):
    #here x[0] is all zeros row, it is used as x0=0 as initial input to sequence generation
    x = torch.zeros(len(sample)+1,dic_length)
    y = torch.empty((len(sample)),dtype=torch.long)
    
    for j,ch in enumerate(sample):
        k = chr_to_idx[ch]
        x[j+1,k]=1.0
        y[j] = k
    return x[:-1], y

In [50]:
def encode_samples_to_hot_tensors(samples_list,chr_to_idx_dict):
    
    dic_length = len(chr_to_idx_dict)
    x_train_list = []
    
    for i, txt in enumerate(samples_list):
        x,y = encode_sample_to_hot_vector_and_target_indexes(txt,chr_to_idx_dict,dic_length)
        
        #here each training sample is appended as tuple of sample x and target y
        x_train_list.append((x,y))
    
    return x_train_list

In [51]:
def batch_generator(samples_list,batch_size):
    total_samples_num = len(samples_list)    

    batch_number = int(total_samples_num/batch_size)+int(total_samples_num%batch_size>0)
    
    rand_indexes = torch.randperm(total_samples_num)
    
    for i in range(batch_number):
        rand_indexes_for_batch = rand_indexes[batch_size*i:batch_size*(i+1)]
        batch_samples = [samples_list[j] for j in rand_indexes_for_batch]
        batch_samples.sort(key=lambda s: s[0].shape[0],reverse=True)
        
        yield batch_samples

In [52]:
def batch_preprocess(batch):
    x_list = [sample[0] for sample in batch]
    y_list = [sample[1] for sample in batch]
    
    x_train_batch = torch.nn.utils.rnn.pack_sequence(x_list)
    y_target_batch = torch.nn.utils.rnn.pack_sequence(y_list)
    
    return x_train_batch, y_target_batch

In [53]:
class LSTM_module(nn.Module):
    def __init__(self,hidd_size,num_layers,is_bidirect, dictionary):
        super().__init__()
        # number of lstm stacked layers
        self.num_layers = num_layers 
        self.input_size = len(dictionary)
        self.hidden_size = hidd_size
        
        # number of directions in lstm
        self.num_directions = 2 if is_bidirect else 1 
        
        self.idx_to_chr = dictionary

        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size,num_layers=num_layers,bias=True,batch_first=False,bidirectional=is_bidirect)
        self.linear = nn.Linear(self.hidden_size*self.num_directions,self.input_size)
    
    def forward(self,x): 
        #x.shape --> (sequence_length,batch,features_size)
        #lstm_out.shape --> (sequence_length,batch,hidden_size)
        #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
        lstm_out, (h_n, c_n) = self.lstm(x) 
        
        #y_hat shape --> (sequence_length,batch,output_size=input_size)
        y_hat = self.linear(lstm_out.data)
        
        return y_hat,(h_n, c_n)
    
    #def forward(self,x):
        #return self._forward(x)
        
    def generate_samples(self,samples_number_to_generate, max_sample_length):
        #x.shape --> (sequence_length,batch,features_size)
        #lstm_out.shape --> (sequence_length,batch,hidden_size)
        #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
        is_cuda = next(self.parameters()).is_cuda
        device = torch.device('cuda' if is_cuda else 'cpu')
        x = torch.zeros(1,1,self.input_size).to(device)
        h_t = c_t = torch.zeros((self.num_layers*self.num_directions,1,self.hidden_size)).to(device)
        
        #generated samples are appended to this list
        generated_samples_list=[] 
        
        #this list is used for constracting one sample
        generated_sample = []     
        
        #this loop is used to generate desired number of samples
        for i in range(samples_number_to_generate): 
            #here we zero out initial input and initial hidden and state variables
            x.zero_()
            h_t.zero_()
            c_t.zero_()
    
            for t in range(max_sample_length):
                #forward propagating previously generated character given as vector x. 
                #(h_t,c_t) are hidden and memory state from previous iteration
                
                #lstm_out shape --> (sequence_length,batch,hidden_size)
                #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
                lstm_out, (h_t, c_t) = self.lstm(x,(h_t,c_t))
                
                #y_hat is unnormilized output of linear layer which is applied to output of lstm layer
                #y_hat shape --> (sequence_length,batch,output_size=input_size)
                y_hat = self.linear(lstm_out)
                
                #these are probabilities of possible characters (this is probability distribution generated by network)
                #we need to squeeze y_hat to obtain one dimensional tensor
                #p_model length is dictionary length
                p_model = torch.nn.functional.softmax(y_hat,dim=2).squeeze()
        
                #here we sample index of charachter from probability distribution array
                generated_char_ind = torch.multinomial(p_model,1).item()
                
                #find corresponding character in dictionary for sampled index 
                generated_char = self.idx_to_chr[generated_char_ind]
                
                #append this generated charachter to sample being generated. (we construct sample one by one character)
                generated_sample.append(generated_char)
        
                #here we generate hot vector for already generated character and will use it as next input to network
                x.zero_()
                x[0,0,generated_char_ind] = 1.
                
                #if network generates end of line character, we stop generation for this sample and begin for next sample.
                if generated_char=='\n' or generated_char=='.':
                    break
            #generate sample string and save that sample to samples list
            generated_samples_list.append(''.join(generated_sample))
        
            #prepare for next sample construction
            generated_sample.clear()
        
        return generated_samples_list
            

In [31]:
total_training_samples_num =  None
batch_size = 64

with open('The_old_man_and_sea.txt') as f:
    whole_text = f.read()
    alpha = set(whole_text.lower())
    
    idx_to_chr = {i:c for i,c in enumerate(alpha)}
    chr_to_idx = {c:i for i,c in enumerate(alpha)}

    samples = whole_text.split('.')
    samples = [sentence.strip()+'.' for sentence in samples]
    samples = [l.lower() for i,l in enumerate(samples) 
               if total_training_samples_num is None or i < total_training_samples_num]
    del whole_text
    print('Number of samples: {}'.format(len(samples)))

Number of samples: 1831


In [54]:

total_training_samples_num =  None
batch_size = 16

with open('shakespeare.txt') as f:
    whole_text = f.read()
    alpha = set(whole_text.lower())
    del whole_text
    idx_to_chr = {i:c for i,c in enumerate(alpha)}
    chr_to_idx = {c:i for i,c in enumerate(alpha)}
    f.seek(0)
    samples = f.readlines()
    samples = [l.lower() for i,l in enumerate(samples) 
               if total_training_samples_num is None or i < total_training_samples_num]

In [56]:
dictionary_size = len(alpha)
hidden_size = 32
output_size = dictionary_size
num_layers = 2
directions_num = 1
is_bidirectional = directions_num == 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

number_of_samples_to_generate = 20
max_sample_length = 100

In [57]:
samples_tensor_list = encode_samples_to_hot_tensors(samples,chr_to_idx)

In [58]:
module = LSTM_module(hidden_size,num_layers,is_bidirectional,idx_to_chr).to(device)

In [59]:
module.generate_samples(number_of_samples_to_generate,max_sample_length)

[':;-sln\n',
 ";')-cjdodxmpabp'z!knefp:brtzeq.",
 "dzqcq'yq;ybjai?fx;?;nyehfy\n",
 ": ck:ij:('\n",
 "ei!laedo!:azlw'qs,;gk\n",
 "ppattlrax'pubc)kvtm\n",
 "'.",
 "-!folqku: :;:( xvbkfw))'anj,dwy\n",
 'o \n',
 "afu!yoai:r,p!j'ck'l- c?u;zfyjeibpuoh!i)foi?t;su\n",
 'wj j.',
 'o.',
 ',bx\n',
 '-g:xww;,.',
 '.',
 '?qm;dvt!oieeqhd)-o.',
 "jdnd'mwnil)!vbhawuclk?sst-xd:t,?-raa), ihricjrzug!r,ewodj\n",
 '!\n',
 ',,g)ngcdg,lrca:e\n',
 'a?:kz xg\n']

In [60]:
#module.load_state_dict(torch.load('shakspear_weights.pt'))
#module.eval()

In [61]:
criteria = nn.CrossEntropyLoss()

In [62]:
optim = torch.optim.Adam(module.parameters())

In [63]:
module.train()
for epoch in range(20):
    #for each epoch here we create new batch generator object
    batch_gen = batch_generator(samples_tensor_list,batch_size)
    
    #we take each batch from batch generator object
    for batch in batch_gen:
        x, y = batch_preprocess(batch)

        y_hat, (h_n, c_n) = module(x.to(device))

        loss = criteria(y_hat, y.data.to(device))
        optim.zero_grad()
        loss.backward()
        optim.step()
        
    print('epoch {} | loss {:.4f}'.format(epoch,loss.item()))

print('Training is finished.')

epoch 0 | loss 2.9665
epoch 1 | loss 2.9690
epoch 2 | loss 2.7861
epoch 3 | loss 2.7381
epoch 4 | loss 2.4473
epoch 5 | loss 2.4562
epoch 6 | loss 2.4798
epoch 7 | loss 2.2690
epoch 8 | loss 2.2710
epoch 9 | loss 2.4637
epoch 10 | loss 2.2025
epoch 11 | loss 2.2511
epoch 12 | loss 2.1704
epoch 13 | loss 2.0900
epoch 14 | loss 2.2844
epoch 15 | loss 2.1794
epoch 16 | loss 2.0733
epoch 17 | loss 2.1078
epoch 18 | loss 2.0334
epoch 19 | loss 2.0210
Training is finished.


In [64]:
module.eval()

LSTM_module(
  (lstm): LSTM(38, 32, num_layers=2)
  (linear): Linear(in_features=32, out_features=38, bias=True)
)

In [65]:
module.generate_samples(number_of_samples_to_generate,max_sample_length)

["nease a(bisrangbun'nt tot fyeich mey fdild,he,\n",
 'poun ant,wh deandt laviven, mor alt anesser,\n',
 'thoin unkenst of dude my toe,\n',
 'funr a lope on so she sorefthmont lov,\n',
 '\n',
 'aros thn okthud, faldt yey oie,\n',
 'thos meint beent if nyiy touute sorted,\n',
 'at heet sjees xart hat solt thatt home mavisp,\n',
 'pun othy sherst mang as atose thze, temike theot.',
 'frien cot treuce sulelt etsacfrott mejit,\n',
 'nririatereyt goeels,h le eltiygeves\n',
 "anll's wirr yimghin suln thley on-nfetne,\n",
 'detirt hiru dy beide foy i nos the,\n',
 'to ne ellow tpan smiwed cat vy heor,\n',
 'to or that ceure aclrshe oro thourtelom,\n',
 "tian mininl's thou all sacipe leame sigudte.",
 '\n',
 'thoal diwgurs in ame but stisist epipt wear,\n',
 'e\n',
 'nald fige thee i thag thup me thar cfenwelt,\n']

In [None]:
#torch.save(module.state_dict(),'shakspear_weights.pt')