In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as d
import numpy as np

In [2]:
def encode_sample_to_hot_vector_and_target_indexes(sample,dictionary,dic_length):
    #here x[0] is all zeros row, it is used as x0=0 as initial input to sequence generation
    x = torch.zeros(len(sample)+1,dic_length)
    y = torch.empty((len(sample)),dtype=torch.long)
    
    for j,ch in enumerate(sample):
        k = chr_to_idx[ch]
        x[j+1,k]=1.0
        y[j] = k
    return x[:-1], y

def encode_samples_to_hot_tensors(samples_list,chr_to_idx_dict):
    
    dic_length = len(chr_to_idx_dict)
    x_train_list = []
    
    for i, txt in enumerate(samples_list):
        x,y = encode_sample_to_hot_vector_and_target_indexes(txt,chr_to_idx_dict,dic_length)
        x_train_list.append((x,y))

    #x_train_list.sort(key=lambda val: val[0].shape[0],reverse=True)
    
    return x_train_list

In [3]:
def batch_generator(samples_list,batch_size):
    total_samples_num = len(samples_list)    
    #total_samples_num = 150

    batch_number = int(total_samples_num/batch_size)+int(total_samples_num%batch_size>0)
    
    rand_indexes = torch.randperm(total_samples_num)
    
    for i in range(batch_number):
        rand_indexes_for_batch = rand_indexes[batch_size*i:batch_size*(i+1)]
        batch_samples = [samples_list[j] for j in rand_indexes_for_batch]
        batch_samples.sort(key=lambda s: s[0].shape[0],reverse=True)
        
        yield batch_samples

In [4]:
def batch_preprocess(batch):
    x_list = [sample[0] for sample in batch]
    y_list = [sample[1] for sample in batch]
    
    x_train_batch = torch.nn.utils.rnn.pack_sequence(x_list)
    y_target_batch = torch.nn.utils.rnn.pack_sequence(y_list)
    
    return x_train_batch, y_target_batch

In [5]:
class LSTM_module(nn.Module):
    def __init__(self,hidd_size,num_layers,is_bidirect, dictionary):
        super().__init__()
        # number of lstm stacked layers
        self.num_layers = num_layers 
        self.input_size = len(dictionary)
        self.hidden_size = hidd_size
        
        # number of directions in lstm
        self.num_directions = 2 if is_bidirect else 1 
        
        self.idx_to_chr = dictionary

        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size,num_layers=num_layers,bias=True,batch_first=False,bidirectional=is_bidirect)
        self.linear = nn.Linear(self.hidden_size*self.num_directions,self.input_size)
    
    def _forward(self,x): 
        #x.shape --> (sequence_length,batch,features_size)
        #lstm_out.shape --> (sequence_length,batch,hidden_size)
        #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
        lstm_out, (h_n, c_n) = self.lstm(x) 
        
        #y_hat shape --> (sequence_length,batch,output_size=input_size)
        y_hat = self.linear(lstm_out.data)
        
        return y_hat,(h_n, c_n)
    
    def forward(self,x):
        return self._forward(x)
        
    def generate_samples(self,samples_number_to_generate, max_sample_length):
        #x.shape --> (sequence_length,batch,features_size)
        #lstm_out.shape --> (sequence_length,batch,hidden_size)
        #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
        x = torch.zeros(1,1,self.input_size)
        h_t = c_t = torch.zeros((self.num_layers*self.num_directions,1,self.hidden_size))
        
        #generated samples are appended to this list
        generated_samples_list=[] 
        
        #this list is used for constracting one sample
        generated_sample = []     
        
        #this loop is used to generate desired number of samples
        for i in range(samples_number_to_generate): 
            #here we zero out initial input and initial hidden and state variables
            x.zero_()
            h_t.zero_()
            c_t.zero_()
    
            for t in range(max_sample_length):
                #forward propagating previously generated character given as vector x. 
                #(h_t,c_t) are hidden and memory state from previous iteration
                
                #lstm_out shape --> (sequence_length,batch,hidden_size)
                #h_n, c_n shape --> (num_layers * num_directions, batch, hidden_size)
                lstm_out, (h_t, c_t) = self.lstm(x,(h_t,c_t))
                
                #y_hat is unnormilized output of linear layer which is applied to output of lstm layer
                #y_hat shape --> (sequence_length,batch,output_size=input_size)
                y_hat = self.linear(lstm_out)
                
                #these are probabilities of possible characters (this is probability distribution generated by network)
                #we need to squeeze y_hat to obtain one dimensional tensor
                #p_model length is dictionary length
                p_model = torch.nn.functional.softmax(y_hat,dim=2).squeeze()
        
                #here we sample index of charachter from probability distribution array
                generated_char_ind = torch.multinomial(p_model,1).item()
                
                #find corresponding character in dictionary for sampled index 
                generated_char = self.idx_to_chr[generated_char_ind]
                
                #append this generated charachter to sample being generated. (we construct sample one by one character)
                generated_sample.append(generated_char)
        
                #here we generate hot vector for already generated character and will use it as next input to network
                x.zero_()
                x[0,0,generated_char_ind]=1.
                
                #if network generates end of line character, we stop generation for this sample and begin for next sample.
                if generated_char=='\n' or generated_char=='.':
                    break
            #generate sample string and save that sample to samples list
            generated_samples_list.append(''.join(generated_sample))
        
            #prepare for next sample construction
            generated_sample.clear()
        
        return generated_samples_list
            

In [146]:
total_training_samples_num =  None
batch_size = 8

with open('shakespeare.txt') as f:
    whole_text = f.read()
    alpha = set(whole_text.lower())
    del whole_text
    idx_to_chr = {i:c for i,c in enumerate(alpha)}
    chr_to_idx = {c:i for i,c in enumerate(alpha)}
    f.seek(0)
    samples = f.readlines()
    samples = [l.lower() for i,l in enumerate(samples) 
               if total_training_samples_num is None or i < total_training_samples_num]

In [147]:
dictionary_size = len(alpha)
hidden_size = 64
output_size = dictionary_size
num_layers = 2
directions_num = 1
is_bidirectional = directions_num == 2

samples_number_to_generate = 20
max_sample_length = 100

In [148]:
samples_tensor_list = encode_samples_to_hot_tensors(samples,chr_to_idx)

In [149]:
module = LSTM_module(hidden_size,num_layers,is_bidirectional,idx_to_chr)

In [None]:
#module.load_state_dict(torch.load('shakspear_weights.pt'))
#module.eval()

In [150]:
criteria = nn.CrossEntropyLoss()

In [151]:
optim = torch.optim.Adam(module.parameters())

In [152]:
for epoch in range(200):
    batch_gen = batch_generator(samples_tensor_list,batch_size)
    
    for batch in batch_gen:
        x, y = batch_preprocess(batch)

        y_hat, (h_n, c_n) = module(x)

        loss = criteria(y_hat, y.data)
        optim.zero_grad()
        loss.backward()
        optim.step()
    print('epoch {} | loss {}'.format(epoch,loss.item()))
print('finished.')

tensor(2.8467, grad_fn=<NllLossBackward>)
tensor(2.5661, grad_fn=<NllLossBackward>)
tensor(2.2013, grad_fn=<NllLossBackward>)
tensor(2.1937, grad_fn=<NllLossBackward>)
tensor(2.0486, grad_fn=<NllLossBackward>)
tensor(2.1752, grad_fn=<NllLossBackward>)
tensor(2.0521, grad_fn=<NllLossBackward>)
tensor(2.1914, grad_fn=<NllLossBackward>)
tensor(1.9728, grad_fn=<NllLossBackward>)
tensor(1.9533, grad_fn=<NllLossBackward>)
tensor(1.7936, grad_fn=<NllLossBackward>)
tensor(1.8191, grad_fn=<NllLossBackward>)
tensor(1.9233, grad_fn=<NllLossBackward>)
tensor(1.9057, grad_fn=<NllLossBackward>)
tensor(1.8117, grad_fn=<NllLossBackward>)
tensor(1.7534, grad_fn=<NllLossBackward>)
tensor(1.8884, grad_fn=<NllLossBackward>)
tensor(1.7198, grad_fn=<NllLossBackward>)
tensor(1.5588, grad_fn=<NllLossBackward>)
tensor(1.7010, grad_fn=<NllLossBackward>)
tensor(1.8595, grad_fn=<NllLossBackward>)
tensor(1.7404, grad_fn=<NllLossBackward>)
tensor(1.6867, grad_fn=<NllLossBackward>)
tensor(1.5481, grad_fn=<NllLossBac

tensor(0.9718, grad_fn=<NllLossBackward>)
tensor(1.0572, grad_fn=<NllLossBackward>)
tensor(0.9912, grad_fn=<NllLossBackward>)
tensor(0.9698, grad_fn=<NllLossBackward>)
finished.


In [162]:
module.eval()

LSTM_module(
  (lstm): LSTM(38, 64, num_layers=2)
  (linear): Linear(in_features=64, out_features=38, bias=True)
)

In [163]:
generated_samples_list=[]
generated_sample = []

for i in range(samples_number_to_generate):
    x = torch.zeros((1,1,dictionary_size))
    
    for t in range(max_sample_length):
        
        y_hat, (h_t, c_t)= module(x)
        #print('y_hat {}'.format(y_hat[-1:,:,:].shape))
        p_model = F.softmax(y_hat[-1,:,:],dim=1).squeeze()
        #print('p_model {}'.format(p_model))
        generated_char_ind = torch.multinomial(p_model,1).item()
        #print('gen char index: {}'.format(generated_char_ind))
        
        generated_char = idx_to_chr[generated_char_ind]
        #print('gen char: {}'.format(generated_char))
        generated_sample.append(generated_char)
       
        if generated_char=='\n':
            break
        
        x_1 = torch.zeros(1,1,38)
        x_1[0,0,generated_char_ind]=1.0
        x = torch.cat((x, x_1,))
        
        #print(x.shape)
    
    generated_samples_list.append(''.join(generated_sample))
    generated_sample.clear()

In [164]:
generated_samples_list

['if farthemver to could near thou arts new see,\n',
 "but 'bay a long herots hand merim prime,\n",
 'the rights hum pleasure worth a beutter taken,\n',
 'as you thou book filled not feast budlact leed\n',
 'all fair the careles more, least truthonescy:\n',
 "the uuty travient o'ercaik pastazed\n",
 'sucuuns me of taked but by thy bodeeus amen.\n',
 'but repinct nor ill my lay ornerd:\n',
 "o no lamcess so love, but loscesixy's ricklow'st,\n",
 "be in a tortuen of dead, hid write's novace.\n",
 '\n',
 'that to enerraised, sich whilgh pride to woack.\n',
 'she in their takes my love and men my tway.\n',
 "and he abon's fuls and toob, no rulle\n",
 'a tor the travel negl his full of doun:\n',
 'that i that this love, by younds, are should know:\n',
 'but thence have no love with thy gain gain of me!\n',
 "then if all nature's every what skill,\n",
 'yet love it be sile, basquaity, that again,\n',
 'and ne wo my placest shall as it gid des,\n']

In [169]:
module.generate_samples(20,100)

['look an yousing on this unkindnexs compit.',
 'but thou art your love)t but with pour loncess sibbl,\n',
 'and suplitie and ever write dispain,\n',
 '\n',
 'that dighs thee that so grown larcefol to come:\n',
 'yet look uninions on a keep seir,\n',
 '\n',
 'it freth me to sputein, my lies on his,\n',
 "with to creation shall take and dreful's slate,  \n",
 'in your as firs to the blind so love hight,\n',
 "and other poter, mans must in grown's truth.",
 'when other in grown by, my paines born to-grease.',
 '  \n',
 'sto how faults with live then musion upon cloke;\n',
 'for that when i may by whisers alter staind,\n',
 'but actient what chide the ceasure beed.',
 '\n',
 'and sto to him infantcheres stilowed all-owe.',
 'the curid cannot be devildedance ote.',
 'to witness which others pilture being:\n']

In [166]:
#torch.save(module.state_dict(),'shakspear_weights.pt')