In [2]:
import numpy as np
import io
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
class Config:
    '''
    Config class defines dataset path and hyperparameters.
    '''
    data_train_url = 'data/shakespeare_train.txt'
    data_val_url = 'data/shakespeare_valid.txt'
    n_hidden = 512
    n_layers = 2
    epochs = 25 
    n_seqs = 128
    n_steps = 100
    lr = 0.001
    clip = 5
    cuda = False
    dropout = 0.5

In [5]:
class Dataset:
    '''
    Load data from data path, preprocess (tokenize & one-hot encode) and get data in array type.
    '''
    def __init__(self, data_train_url = Config.data_train_url, data_val_url = Config.data_val_url):
        with io.open (data_train_url, 'r') as f:
            self.text_train = f.read()
        with io.open (data_val_url, 'r') as f:
            self.text_val = f.read()

    def char_tokenize(self):
        self.chars = tuple(set(self.text_train))
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        self.train_data = np.array([self.char2int[ch] for ch in self.text_train])
        self.val_data = np.array([self.char2int[ch] for ch in self.text_val])

    def one_hot_encode(self, arr, n_labels):
        one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
        one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
        one_hot = one_hot.reshape((*arr.shape, n_labels))
        return one_hot

    def get_data(self):
        self.char_tokenize()
        return self.train_data, self.val_data

In [6]:
data = Dataset()
train_data, val_data = data.get_data()
print("Encoded chars in train:", train_data[:100])
print("Number of chars in vocab: ", len(data.chars))
print("Train text: ", data.text_train[:100])

[52 35 64  7 58 24 11 35 58 35 60 29 36 22 20  1 29 65 28 64 29 24 39 29
 24 21 64 28 41 29 29 59 24 51 36 38 24 65 54 64 58  4 29 64 55 24  4 29
 51 64 24 31 29 24  7 21 29 51 42 30 20 20 32 23 23 22 20 40 21 29 51 42
 55 24  7 21 29 51 42 30 20 20 52 35 64  7 58 24 11 35 58 35 60 29 36 22
 20 13 28 54]


'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'

In [7]:
class DataLoader:
    '''
    Load data from dataset in batches (batches = n_seqs * n_steps)
    '''
    def __init__(self, train, val):
        self.train = train
        self.val = val

    def __call__(self, arr, n_seqs, n_steps):
        '''
        Create a generator that returns batches of size
        n_seqs x n_steps from arr.
        
        Arguments
        ---------
        arr: np.array
            Array you want to make batches from
        n_seqs: int
            Batch size, the number of sequences per batch
        n_steps: int
            Number of sequence steps per batch
        '''
        batch_size = n_seqs * n_steps
        n_batches = len(arr) // batch_size
        arr = arr[:n_batches * batch_size]
        arr = arr.reshape((n_seqs, -1))
        
        for n in range(0, arr.shape[1], n_steps):
            x = arr[:, n: n + n_steps]
            y = np.zeros_like(x)
            try:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n + n_steps]
            except IndexError:
                y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
            yield x, y

In [8]:
data_loader = DataLoader(train_data, val_data)

In [9]:
class RNN(nn.Module):
    def __init__(self, vocab_size, n_steps=Config.n_steps, n_hidden=Config.n_hidden, n_layers=Config.n_layers,
                    drop_prob=Config.dropout, lr=Config.lr):
        super().__init__()
        self.vocab_size = vocab_size
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr      
        self.lstm = nn.LSTM(vocab_size, n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)        
        self.dropout = nn.Dropout(drop_prob)      
        self.fc = nn.Linear(n_hidden, vocab_size)
        self.init_weights()
    
    def init_weights(self):
        ''' 
        Initialize weights for fully connected layer 
        '''
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, n_seqs):
        ''' 
        Initializes hidden state 
        '''
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

    def forward(self, x, hc):
        ''' 
        Forward pass through the network. 
        These inputs are x, and the hidden/cell state `hc`. 
        '''
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        x = x.reshape(x.size()[0] * x.size()[1], self.n_hidden)
        x = self.fc(x)
        return x, (h, c)

In [20]:
def train(net, train_data, val_data, epochs=Config.epochs, n_seqs=Config.n_seqs, 
          n_steps=Config.n_steps, lr=Config.lr, clip=Config.clip, cuda=Config.cuda):
    ''' 
        Training a network 
    
        Arguments
        ----------------
        net: RNN network
        train_data: text data to train the network
        val_data: text data to validate the network
        epochs: Number of epochs to train
        n_seqs: Number of mini-sequences per mini-batch, aka batch size
        n_steps: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        cuda: Train with CUDA on a GPU
    '''
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Early stopping
    the_last_loss = 100
    patience = 10
    trigger_times = 0
    isStopped = False
    if cuda:
        net.cuda()
    
    counter = 0
    for e in range(epochs):
        h = net.init_hidden(n_seqs)
        if isStopped:
            break
        for x, y in data_loader(train_data, n_seqs, n_steps):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = data.one_hot_encode(x, net.vocab_size)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            h = tuple([each.data for each in h])

            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            loss = criterion(output, targets.view(n_seqs*n_steps))

            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % 10 == 0:
                
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                for x, y in data_loader(val_data, n_seqs, n_steps):
                    x = data.one_hot_encode(x, net.vocab_size)
                    inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
                    
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    val_h = tuple([each.data for each in val_h])

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

                the_current_loss = np.mean(val_losses)
                if the_current_loss > the_last_loss:
                    trigger_times += 1
                    print('trigger times: ', trigger_times)
                    if trigger_times >= patience:
                        print('Early stopping! at epoch {0}'.format(e))
                        isStopped = True
                        break

                else:
                    print('trigger times: 0')
                    trigger_times = 0
                    the_last_loss = the_current_loss
                    if not isStopped:
                        with open('models/rnn.net', 'wb') as f:
                            torch.save(net.state_dict(), f)
                        print('Validation loss {:.6f}.  Saving model ...'.format(the_current_loss))

In [21]:
if 'net' in locals():
    del net

In [22]:
len(data.chars)

67

In [23]:
# define and print the net
net = RNN(input_size=len(data.chars))
print(net)

RNN(
  (lstm): LSTM(67, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=67, bias=True)
)


In [24]:
train(net=net, train_data=train_data, val_data=val_data, epochs=25, n_seqs=128, n_steps=100, lr=0.001)

Epoch: 1/25... Step: 10... Loss: 3.4277... Val Loss: 3.3981
trigger times: 0
Validation loss 3.398069.  Saving model ...
Epoch: 1/25... Step: 20... Loss: 3.2862... Val Loss: 3.2549
trigger times: 0
Validation loss 3.254935.  Saving model ...
Epoch: 1/25... Step: 30... Loss: 3.1153... Val Loss: 3.0989
trigger times: 0
Validation loss 3.098858.  Saving model ...
Epoch: 1/25... Step: 40... Loss: 2.9333... Val Loss: 2.9153
trigger times: 0
Validation loss 2.915315.  Saving model ...
Epoch: 1/25... Step: 50... Loss: 2.7370... Val Loss: 2.7431
trigger times: 0
Validation loss 2.743079.  Saving model ...
Epoch: 1/25... Step: 60... Loss: 2.6336... Val Loss: 2.6223
trigger times: 0
Validation loss 2.622278.  Saving model ...
Epoch: 1/25... Step: 70... Loss: 2.5378... Val Loss: 2.5502
trigger times: 0
Validation loss 2.550151.  Saving model ...
Epoch: 1/25... Step: 80... Loss: 2.5215... Val Loss: 2.4971
trigger times: 0
Validation loss 2.497054.  Saving model ...
Epoch: 1/25... Step: 90... Loss:

In [25]:
def predict(net, char, h=None, cuda=False, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        if cuda:
            net.cuda()
        else:
            net.cpu()
        
        if h is None:
            h = net.init_hidden(1)
        
        x = np.array([[data.char2int[char]]])
        x = data.one_hot_encode(x, len(data.chars))
        inputs = torch.from_numpy(x)
        if cuda:
            inputs = inputs.cuda()
        
        h = tuple([each.data for each in h])
        out, h = net.forward(inputs, h)

        p = F.softmax(out, dim=1).data
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(data.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return data.int2char[char], h

In [26]:
def sample(net, size, prime='The', top_k=None, cuda=False):
    '''
    Generate the next `size` characters from given `prime`
    '''
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # Run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [29]:
print(sample(net, 500, prime='Juliet', top_k=5, cuda=False))

Juliet to blows,
When such a sheeting to the compost to be all
this is men on the conclidience; and shall spoke that,
And, that I will deay to his head to this with.

PETRUCHIO:
That thou hast been the corrol of your lady
To take their part another strumpets.

CASSIUS:
What, she was shopen.

SICINIUS:
I have send me true: and he did see thy learn.
That, we another will, a winded to them the down, and
with you, sir, the come and see a that would stay.

CAPULET:
I'll take the temple. The was wants, a she


In [30]:
# Here we have loaded in a model that trained over 1 epoch `rnn_1_epoch.net`
with open('rnn.net', 'rb') as f:
    state_dict = torch.load(f)
    
loaded = RNN(input_size=len(data.chars))
loaded.load_state_dict(state_dict)

<All keys matched successfully>

In [31]:
# Change cuda to True if you are using GPU!
print(sample(loaded, 1000, cuda=True, top_k=5, prime="Juliet"))

Juliet of my like,
The words with a traches; but all my house
With some mighty and such a man,
If stands in tentience to the countery
To this me sense of sected sight.

MARINA:
Why, she hath better stand all as the present serve.

Second Gentleman:
I have this sorrow strongs thou, shall to stand them:
The show with my belessiones, as thou art sent
We will be send. He woulds here is there the most the
stope, and thanks that humble manys by sufficed with
And tell my husband she is not the stain,
As it before thee so thou and the some of here,
And better that were something to the service.

CRESSIDA:
What hath me be this time? the man to mildent
How much shall help here in mine the chill were,
The wantom to her truth, word to secure the course
Of the caure is made a man that spiril there and make
The change they will the matter of my stang.

SUFFOLK:
We will not been short on thy both a morn
That I am steal. What is myself, make a stain to thy hand,
With honest said and sease their brothe