In [9]:
%cd /home/bap/hana/Basic-NLP-RNN/rnn/rnn

/home/bap/hana/Basic-NLP-RNN/rnn/rnn


In [10]:
import numpy as np
import io
import torch
from torch import nn
import torch.nn.functional as F

In [11]:
class Config:
    '''
    Config class defines dataset path and hyperparameters.
    '''
    data_train_url = 'data/shakespeare_train.txt'
    data_val_url = 'data/shakespeare_valid.txt'
    n_hidden = 512
    n_layers = 2
    epochs = 25 
    n_seqs = 128
    n_steps = 100
    lr = 0.001
    clip = 5
    cuda = False
    dropout = 0.5

In [12]:
class Dataset:
    '''
    Load data from data path, preprocess (tokenize & one-hot encode) and get data in array type.
    '''
    def __init__(self, data_train_url = Config.data_train_url, data_val_url = Config.data_val_url):
        with io.open (data_train_url, 'r') as f:
            self.text_train = f.read()
        with io.open (data_val_url, 'r') as f:
            self.text_val = f.read()

    def char_tokenize(self):
        self.chars = tuple(set(self.text_train))
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        self.train_data = np.array([self.char2int[ch] for ch in self.text_train])
        self.val_data = np.array([self.char2int[ch] for ch in self.text_val])

    def one_hot_encode(self, arr, n_labels):
        one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
        one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
        one_hot = one_hot.reshape((*arr.shape, n_labels))
        return one_hot

    def get_data(self):
        self.char_tokenize()
        return self.train_data, self.val_data

In [13]:
data = Dataset()
train_data, val_data = data.get_data()
print("Encoded chars in train:", train_data[:100])
print("Number of chars in vocab: ", len(data.chars))
print("Train text: ", data.text_train[:100])

Encoded chars in train: [12 34 36 56 37 40 47 34 37 34 25 38 13 30 49 26 38  4 52 36 38 40 44 38
 40 17 36 52  1 38 38  6 40 42 13 51 40  4 60 36 37 63 38 36 61 40 63 38
 42 36 40  0 38 40 56 17 38 42 15 58 49 49 53 19 19 30 49 21 17 38 42 15
 61 40 56 17 38 42 15 58 49 49 12 34 36 56 37 40 47 34 37 34 25 38 13 30
 49 46 52 60]
Number of chars in vocab:  67
Train text:  First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [14]:
class DataLoader:
    '''
    Load data from dataset in batches (batches = n_seqs * n_steps)
    '''
    def __init__(self, train, val):
        self.train = train
        self.val = val

    def __call__(self, arr, n_seqs, n_steps):
        '''
        Create a generator that returns batches of size
        n_seqs x n_steps from arr.
        
        Arguments
        ---------
        arr: np.array
            Array you want to make batches from
        n_seqs: int
            Batch size, the number of sequences per batch
        n_steps: int
            Number of sequence steps per batch
        '''
        batch_size = n_seqs * n_steps
        n_batches = len(arr) // batch_size
        arr = arr[:n_batches * batch_size]
        arr = arr.reshape((n_seqs, -1))
        
        for n in range(0, arr.shape[1], n_steps):
            x = arr[:, n: n + n_steps]
            y = np.zeros_like(x)
            try:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n + n_steps]
            except IndexError:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
            yield x, y

In [15]:
data_loader = DataLoader(train_data, val_data)
next(data_loader(train_data, 1, 5))

(array([[12, 34, 36, 56, 37]]), array([[34, 36, 56, 37, 40]]))

In [16]:
class RNN(nn.Module):
    def __init__(self, vocab_size, n_steps=Config.n_steps, n_hidden=Config.n_hidden, n_layers=Config.n_layers,
                    drop_prob=Config.dropout, lr=Config.lr):
        super().__init__()
        self.vocab_size = vocab_size
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr      
        self.lstm = nn.LSTM(vocab_size, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)        
        self.dropout = nn.Dropout(drop_prob)      
        self.fc = nn.Linear(n_hidden, vocab_size)
        self.init_weights()
    
    def init_weights(self):
        ''' 
        Initialize weights for fully connected layer 
        '''
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, n_seqs):
        ''' 
        Initializes hidden state 
        '''
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

    def forward(self, x, hc):
        ''' 
        Forward pass through the network. 
        These inputs are x, and the hidden/cell state `hc`. 
        '''
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        x = x.reshape(x.size()[0] * x.size()[1], self.n_hidden)
        x = self.fc(x)
        return x, (h, c)

In [17]:
def train(net, train_data, val_data, epochs=Config.epochs, n_seqs=Config.n_seqs, 
          n_steps=Config.n_steps, lr=Config.lr, clip=Config.clip, cuda=Config.cuda):
    ''' 
        Training a network 
    
        Arguments
        ----------------
        net: RNN network
        train_data: text data to train the network
        val_data: text data to validate the network
        epochs: Number of epochs to train
        n_seqs: Number of mini-sequences per mini-batch, aka batch size
        n_steps: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        cuda: Train with CUDA on a GPU
    '''
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Early stopping
    the_last_loss = 100
    patience = 10
    trigger_times = 0
    isStopped = False
    if cuda:
        net.cuda()
    
    counter = 0
    for e in range(epochs):
        h = net.init_hidden(n_seqs)
        if isStopped:
            break
        for x, y in data_loader(train_data, n_seqs, n_steps):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = data.one_hot_encode(x, net.vocab_size)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            h = tuple([each.data for each in h])

            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(n_seqs*n_steps))

            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % 10 == 0:
                
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in data_loader(val_data, n_seqs, n_steps):
                    x = data.one_hot_encode(x, net.vocab_size)
                    inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
                    
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    val_h = tuple([each.data for each in val_h])

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

                the_current_loss = np.mean(val_losses)
                if the_current_loss > the_last_loss:
                    trigger_times += 1
                    print('trigger times: ', trigger_times)
                    if trigger_times >= patience:
                        print('Early stopping! at epoch {0}'.format(e))
                        isStopped = True
                        break

                else:
                    print('trigger times: 0')
                    trigger_times = 0
                    the_last_loss = the_current_loss
                    if not isStopped:
                        with open('models/rnn.net', 'wb') as f:
                            torch.save({'tokens': data.chars, 'state_dict': net.state_dict()}, f)
                        print('Validation loss {:.6f}.  Saving model ...'.format(the_current_loss))

In [18]:
# define and print the net
net = RNN(vocab_size=len(data.chars))
print(net)

RNN(
  (lstm): LSTM(67, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=67, bias=True)
)


In [19]:
train(net=net, train_data=train_data, val_data=val_data, epochs=1, n_seqs=128, n_steps=100, lr=0.001)

Epoch: 1/1... Step: 10... Loss: 3.4488... Val Loss: 3.4144
trigger times: 0
Validation loss 3.414395.  Saving model ...
Epoch: 1/1... Step: 20... Loss: 3.3175... Val Loss: 3.2808
trigger times: 0
Validation loss 3.280830.  Saving model ...
Epoch: 1/1... Step: 30... Loss: 3.1552... Val Loss: 3.1335
trigger times: 0
Validation loss 3.133543.  Saving model ...
Epoch: 1/1... Step: 40... Loss: 2.9713... Val Loss: 2.9507
trigger times: 0
Validation loss 2.950736.  Saving model ...
Epoch: 1/1... Step: 50... Loss: 2.7752... Val Loss: 2.8395
trigger times: 0
Validation loss 2.839549.  Saving model ...
Epoch: 1/1... Step: 60... Loss: 2.6827... Val Loss: 2.6619
trigger times: 0
Validation loss 2.661886.  Saving model ...
Epoch: 1/1... Step: 70... Loss: 2.5794... Val Loss: 2.5902
trigger times: 0
Validation loss 2.590239.  Saving model ...
Epoch: 1/1... Step: 80... Loss: 2.5564... Val Loss: 2.5312
trigger times: 0
Validation loss 2.531220.  Saving model ...
Epoch: 1/1... Step: 90... Loss: 2.4934..

In [20]:
def predict(net, char, h=None, cuda=False, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        if cuda:
            net.cuda()
        else:
            net.cpu()
        
        if h is None:
            h = net.init_hidden(1)
        
        x = np.array([[data.char2int[char]]])
        x = data.one_hot_encode(x, len(data.chars))
        inputs = torch.from_numpy(x)
        if cuda:
            inputs = inputs.cuda()
        
        h = tuple([each.data for each in h])
        out, h = net.forward(inputs, h)

        p = F.softmax(out, dim=1).data
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(data.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return data.int2char[char], h

In [21]:
def sample(net, size, prime='The', top_k=None, cuda=False):
    '''
    Generate the next `size` characters from given `prime`
    '''
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # Run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [22]:
print(sample(net, 1000, prime='Juliet', top_k=5, cuda=False))

Juliet that has worther sellen will so hourst an of to this shis firness all the were will he sang thou and the wath stronget the craitings of him a dout al tither at all the theest an meresting morthan thee she his dorstad is sheart, that were as and my head.

LARDICHES:
What, then' wat ard ale mone the masine, and hin mant me hearte men of in and stor mandes of thou and sentlenstelf me his ant he will be all and have and thou has ar a sore, a done an a truce. 
And so the cartang to most, wish thou hast, a to may still tither whine me hat her with the canes of thes for his the mean then mist me hear and the stale her to that she seall my hearther hather to be sor and stie hate have wather we troung theng weathing on tees to my lever, and the son the shand then whone stall me this her will him have so with my tood she lead stee to sored, wild my faired thay war the wear hand thank the tous mant have sone, my toush him sto mat here whan a mear man then my lead hows bore merte of a strac

In [23]:
# Here we have loaded in a model that trained over 1 epoch `rnn_1_epoch.net`
with open('models/rnn.net', 'rb') as f:
    state_dict = torch.load(f, map_location=torch.device('cpu'))
    
loaded = RNN(vocab_size=len(data.chars))
loaded.load_state_dict(state_dict)

<All keys matched successfully>

In [24]:
# Change cuda to True if you are using GPU!
print(sample(loaded, 1000, cuda=False, top_k=5, prime="Juliet"))

Juliet to the were a dorting of asting
I as the parine mose stink on thin stien the wire.

PARIUS:
Ay, ar your and and a sond, be then make wer in and mertand of hearter our of hin.

PINARO:
Then well, and the parcond, than that and the pastays, this.

TINO LEONA:
I wis my leavens.
I he hat an ther ard and that in the wanter, sing is thy serteded, the pearions and sich and by that, this the wert thim all thou would this sing ald
Wert it has sto bady then ard that sendes and sell beat
Whall homs and maten.

SENDOLER:
Wire to sor mather that is the the this so my toon the pords, then this the the pantere thise all with hear of all an thing as in atester,
And with he be sere the mert alled buld tay the thou standss
Whate were to mear hearter,
I will sen thou shall the the stant,
What when way, that sand and be a somere this were the tone of mowed.

LOUTHAMER:
And you shat that her, all a men to bear, but harest an well beer to me hin the breat this.

PARTIS:
How she sere thour als and and