In [21]:
%cd /home/bap/hana/Basic-NLP-RNN/rnn/rnn

/home/bap/hana/Basic-NLP-RNN/rnn/rnn


In [22]:
import numpy as np
import io
import torch
from torch import nn
import torch.nn.functional as F

In [23]:
class Config:
    '''
    Config class defines dataset path and hyperparameters.
    '''
    data_train_url = 'dataset/shakespeare_train.txt'
    data_val_url = 'dataset/shakespeare_valid.txt'
    n_hidden = 512
    n_layers = 2
    epochs = 25 
    n_seqs = 128
    n_steps = 100
    lr = 0.001
    clip = 5
    cuda = False
    dropout = 0.5

In [24]:
class Dataset:
    '''
    Load data from data path, preprocess (tokenize & one-hot encode) and get data in array type.
    '''
    def __init__(self, data_train_url = Config.data_train_url, data_val_url = Config.data_val_url):
        with io.open (data_train_url, 'r') as f:
            self.text_train = f.read()
        with io.open (data_val_url, 'r') as f:
            self.text_val = f.read()

    def char_tokenize(self):
        self.chars = tuple(set(self.text_train))
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        self.train_data = np.array([self.char2int[ch] for ch in self.text_train])
        self.val_data = np.array([self.char2int[ch] for ch in self.text_val])

    def one_hot_encode(self, arr, n_labels):
        one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
        one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
        one_hot = one_hot.reshape((*arr.shape, n_labels))
        return one_hot

    def get_data(self):
        self.char_tokenize()
        return self.train_data, self.val_data

In [25]:
data = Dataset()
train_data, val_data = data.get_data()
print("Encoded chars in train:", train_data[:100])
print("Number of chars in vocab: ", len(data.chars))
print("Train text: ", data.text_train[:100])

Encoded chars in train: [49 51 41 14  3 25 64 51  3 51  5 57 53 37 44  4 57 66 29 41 57 25 33 57
 25  6 41 29 21 57 57 42 25 36 53 31 25 66 11 41  3  7 57 41 20 25  7 57
 36 41 25 13 57 25 14  6 57 36 10 38 44 44 22 23 23 37 44 30  6 57 36 10
 20 25 14  6 57 36 10 38 44 44 49 51 41 14  3 25 64 51  3 51  5 57 53 37
 44 28 29 11]
Number of chars in vocab:  67
Train text:  First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [26]:
class DataLoader:
    '''
    Load data from dataset in batches (batches = n_seqs * n_steps)
    '''
    def __init__(self, train, val):
        self.train = train
        self.val = val

    def __call__(self, arr, n_seqs, n_steps):
        '''
        Create a generator that returns batches of size
        n_seqs x n_steps from arr.
        
        Arguments
        ---------
        arr: np.array
            Array you want to make batches from
        n_seqs: int
            Batch size, the number of sequences per batch
        n_steps: int
            Number of sequence steps per batch
        '''
        batch_size = n_seqs * n_steps
        n_batches = len(arr) // batch_size
        arr = arr[:n_batches * batch_size]
        arr = arr.reshape((n_seqs, -1))
        
        for n in range(0, arr.shape[1], n_steps):
            x = arr[:, n: n + n_steps]
            y = np.zeros_like(x)
            try:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n + n_steps]
            except IndexError:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
            yield x, y

In [27]:
data_loader = DataLoader(train_data, val_data)
next(data_loader(train_data, 1, 5))

(array([[49, 51, 41, 14,  3]]), array([[51, 41, 14,  3, 25]]))

In [28]:
class RNN(nn.Module):
    def __init__(self, vocab_size, n_steps=Config.n_steps, n_hidden=Config.n_hidden, n_layers=Config.n_layers,
                    drop_prob=Config.dropout, lr=Config.lr):
        super().__init__()
        self.vocab_size = vocab_size
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr      
        self.lstm = nn.LSTM(vocab_size, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)        
        self.dropout = nn.Dropout(drop_prob)      
        self.fc = nn.Linear(n_hidden, vocab_size)
        self.init_weights()
    
    def init_weights(self):
        ''' 
        Initialize weights for fully connected layer 
        '''
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, n_seqs):
        ''' 
        Initializes hidden state 
        '''
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

    def forward(self, x, hc):
        ''' 
        Forward pass through the network. 
        These inputs are x, and the hidden/cell state `hc`. 
        '''
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        x = x.reshape(x.size()[0] * x.size()[1], self.n_hidden)
        x = self.fc(x)
        return x, (h, c)

In [29]:
def train(net, train_data, val_data, epochs=Config.epochs, n_seqs=Config.n_seqs, 
          n_steps=Config.n_steps, lr=Config.lr, clip=Config.clip, cuda=Config.cuda):
    ''' 
        Training a network 
    
        Arguments
        ----------------
        net: RNN network
        train_data: text data to train the network
        val_data: text data to validate the network
        epochs: Number of epochs to train
        n_seqs: Number of mini-sequences per mini-batch, aka batch size
        n_steps: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        cuda: Train with CUDA on a GPU
    '''
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Early stopping
    the_last_loss = 100
    patience = 10
    trigger_times = 0
    isStopped = False
    if cuda:
        net.cuda()
    
    counter = 0
    for e in range(epochs):
        h = net.init_hidden(n_seqs)
        if isStopped:
            break
        for x, y in data_loader(train_data, n_seqs, n_steps):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = data.one_hot_encode(x, net.vocab_size)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            h = tuple([each.data for each in h])

            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(n_seqs*n_steps))

            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % 10 == 0:
                
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in data_loader(val_data, n_seqs, n_steps):
                    x = data.one_hot_encode(x, net.vocab_size)
                    inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
                    
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    val_h = tuple([each.data for each in val_h])

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

                the_current_loss = np.mean(val_losses)
                if the_current_loss > the_last_loss:
                    trigger_times += 1
                    print('trigger times: ', trigger_times)
                    if trigger_times >= patience:
                        print('Early stopping! at epoch {0}'.format(e))
                        isStopped = True
                        break

                else:
                    print('trigger times: 0')
                    trigger_times = 0
                    the_last_loss = the_current_loss
                    if not isStopped:
                        with open('models/rnn.net', 'wb') as f:
                            torch.save(net.state_dict(), f)
                        print('Validation loss {:.6f}.  Saving model ...'.format(the_current_loss))

In [30]:
# define and print the net
net = RNN(vocab_size=len(data.chars))
print(net)

RNN(
  (lstm): LSTM(67, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=67, bias=True)
)


In [31]:
train(net=net, train_data=train_data, val_data=val_data, epochs=1, n_seqs=128, n_steps=100, lr=0.001)

Epoch: 1/1... Step: 10... Loss: 3.4359... Val Loss: 3.4042
trigger times: 0
Validation loss 3.404189.  Saving model ...
Epoch: 1/1... Step: 20... Loss: 3.3170... Val Loss: 3.2866
trigger times: 0
Validation loss 3.286617.  Saving model ...
Epoch: 1/1... Step: 30... Loss: 3.1480... Val Loss: 3.1415
trigger times: 0
Validation loss 3.141513.  Saving model ...
Epoch: 1/1... Step: 40... Loss: 2.9886... Val Loss: 2.9680
trigger times: 0
Validation loss 2.967990.  Saving model ...
Epoch: 1/1... Step: 50... Loss: 2.7885... Val Loss: 2.7829
trigger times: 0
Validation loss 2.782930.  Saving model ...
Epoch: 1/1... Step: 60... Loss: 2.6836... Val Loss: 2.6688
trigger times: 0
Validation loss 2.668850.  Saving model ...
Epoch: 1/1... Step: 70... Loss: 2.5884... Val Loss: 2.5943
trigger times: 0
Validation loss 2.594312.  Saving model ...
Epoch: 1/1... Step: 80... Loss: 2.5724... Val Loss: 2.5370
trigger times: 0
Validation loss 2.536999.  Saving model ...
Epoch: 1/1... Step: 90... Loss: 2.4969..

In [32]:
def predict(net, char, h=None, cuda=False, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        if cuda:
            net.cuda()
        else:
            net.cpu()
        
        if h is None:
            h = net.init_hidden(1)
        
        x = np.array([[data.char2int[char]]])
        x = data.one_hot_encode(x, len(data.chars))
        inputs = torch.from_numpy(x)
        if cuda:
            inputs = inputs.cuda()
        
        h = tuple([each.data for each in h])
        out, h = net.forward(inputs, h)

        p = F.softmax(out, dim=1).data
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(data.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return data.int2char[char], h

In [33]:
def sample(net, size, prime='The', top_k=None, cuda=False):
    '''
    Generate the next `size` characters from given `prime`
    '''
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # Run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [36]:
print(sample(net, 1000, prime='Juliet', top_k=5, cuda=False))

Juliet as the sand, I hive bear that the sto dow me her her hound a that's a deat thou stor more my thourer a mayedst, to he will shall beand shild him be manter as is thy leed
I he sall to be math to the manter the conteres.

BOTTRES:
And tay to here.

LAOD IUS ERANA:
That she live sear thou will the tond somer ant may him then that have hung to mont thou hather'd, and thou to bettred.'
Thould the make have to be ang of thee her asteres mint,
And thy wall heal the mand that the thang as the that
Aspils, I as it took to he mist to his some sairs me that sore and and a mady fill and so me thou his beed, and stay, I'll say and a the cone of hear shere her, well thou had so ard here then all ast and the camser with my soon a may thou wollds the hiss as trat with shat an my to dast he sound and the send the mise of the foor he wand whome so has shill, sere a tour ore tressings, tore all ance stind and thy ford and where hast as ance at the wing in and the case of his hear and me hear that 