In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import os

In [2]:
with open('data/1342.txt','r') as f:
    text = f.read()

In [3]:
text[:100]

'The Project Gutenberg EBook of Pride and Prejudice, by Jane Austen\n\nThis eBook is for the use of any'

In [5]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {c:i for i, c in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

In [13]:
def one_hot_encoder(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1. 
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot

In [19]:
test_seq = np.array([[3,5,1]])
one_hot=one_hot_encoder(test_seq, 8)

In [22]:
print(test_seq)

[[3 5 1]]


In [23]:
test_seq.shape

(1, 3)

In [24]:
print(one_hot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [28]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    n_batches = len(arr) // batch_size_total
    
    arr = arr[:n_batches * batch_size_total]
    arr = arr.reshape((batch_size, -1))
    
    for n in range(0, arr.shape[1], seq_length):
        x = arr[:, n:n+seq_length]
        y = np.zeros_like(x) 
        try:
            y[:, :-1], y[:, -1] = x[:,1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:,1:], arr[:,0] 
        yield x, y 
        

In [29]:
batches = get_batches(encoded, 8, 50)
x, y = next(batches)

In [68]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

No GPU available, training on CPU; consider making n_epochs very small.


In [72]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_hidden=612, n_layers=4, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch:ii for ii, ch in self.int2char.items()}
        
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
    def forward(self, x, hidden):
        r_output, hidden = self.lstm(x,hidden)
        
        out = self.dropout(r_output)
        
        out = out.contiguous().view(-1, self.n_hidden)
        
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [79]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if(train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encoder(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            # zero accumulated gradients
            net.zero_grad()
            
            # get the output from the model
            output, h = net(inputs, h)
            
            # calculate the loss and perform backprop
            loss = criterion(output, targets.view(batch_size*seq_length))
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encoder(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length))
                
                    val_losses.append(val_loss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))
                

In [80]:
# define and print the net
n_hidden=512
n_layers=4

net = CharRNN(chars, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(84, 512, num_layers=4, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=84, bias=True)
)


In [None]:
batch_size = 64
seq_length = 160 #max length verses
n_epochs = 50 # start smaller if you are just testing initial behavior

# train the model
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/50... Step: 10... Loss: 3.2197... Val Loss: 3.1700
Epoch: 1/50... Step: 20... Loss: 3.1528... Val Loss: 3.1524
Epoch: 1/50... Step: 30... Loss: 3.1478... Val Loss: 3.1461
Epoch: 1/50... Step: 40... Loss: 3.1166... Val Loss: 3.1438
Epoch: 1/50... Step: 50... Loss: 3.1226... Val Loss: 3.1427
Epoch: 1/50... Step: 60... Loss: 3.1236... Val Loss: 3.1462
Epoch: 2/50... Step: 70... Loss: 3.1229... Val Loss: 3.1427
Epoch: 2/50... Step: 80... Loss: 3.0889... Val Loss: 3.1446
Epoch: 2/50... Step: 90... Loss: 3.1094... Val Loss: 3.1425
Epoch: 2/50... Step: 100... Loss: 3.1075... Val Loss: 3.1433
Epoch: 2/50... Step: 110... Loss: 3.1118... Val Loss: 3.1434
Epoch: 2/50... Step: 120... Loss: 3.1011... Val Loss: 3.1454
Epoch: 3/50... Step: 130... Loss: 3.1083... Val Loss: 3.1431
Epoch: 3/50... Step: 140... Loss: 3.0909... Val Loss: 3.1450
Epoch: 3/50... Step: 150... Loss: 3.1083... Val Loss: 3.1425
Epoch: 3/50... Step: 160... Loss: 3.1029... Val Loss: 3.1441
Epoch: 3/50... Step: 170... Loss:

In [55]:
x

array([[  0,   1,   2,   3,   4,   5],
       [ 60,  61,  62,  63,  64,  65],
       [120, 121, 122, 123, 124, 125],
       [180, 181, 182, 183, 184, 185],
       [240, 241, 242, 243, 244, 245]])

In [56]:
y

array([[  1,   2,   3,   4,   5,   6],
       [ 61,  62,  63,  64,  65,  66],
       [121, 122, 123, 124, 125, 126],
       [181, 182, 183, 184, 185, 186],
       [241, 242, 243, 244, 245, 246]])

In [57]:
x, y = next(batches)

In [58]:
x

array([[  6,   7,   8,   9,  10,  11],
       [ 66,  67,  68,  69,  70,  71],
       [126, 127, 128, 129, 130, 131],
       [186, 187, 188, 189, 190, 191],
       [246, 247, 248, 249, 250, 251]])

In [59]:
y

array([[  7,   8,   9,  10,  11,  12],
       [ 67,  68,  69,  70,  71,  72],
       [127, 128, 129, 130, 131, 132],
       [187, 188, 189, 190, 191, 192],
       [247, 248, 249, 250, 251, 252]])

In [67]:
x, y = next(batches)
print(x,y)

[[ 54  55  56  57  58  59]
 [114 115 116 117 118 119]
 [174 175 176 177 178 179]
 [234 235 236 237 238 239]
 [294 295 296 297 298 299]] [[ 55  56  57  58  59   0]
 [115 116 117 118 119  60]
 [175 176 177 178 179 120]
 [235 236 237 238 239 180]
 [295 296 297 298 299 240]]
