In [None]:
# importing libraries

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [None]:
# load data

with open('drive/My Drive/style_transfer/input.txt', 'r') as f:
    text = f.read()

In [None]:
# infor about the data
print(text[:50])
print("type = ", type(text))

First Citizen:
Before we proceed any further, hear
type =  <class 'str'>


In [None]:
# pre-processing data


# one-hot encoding
def one_hot_encode(arr, n_labels):
    
    # we will make a new array which is one-hot encoded
    # that is for every example the array will consist of classes encoded as 0 or 1
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

def get_batches(arr, batch_size, seq_length):
    """
    The input to the model will be in the form of batches, where each 
    batch is of a fixed length.
    We are gonna ignore the characters which will be left after dividing the
    text into batches.
    """
    
    # total batches
    batch_size_total = batch_size * seq_length
    n_batches = len(arr)//batch_size_total
    
    arr = arr[:n_batches * batch_size_total]
    arr = arr.reshape((batch_size, -1))
    
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y


In [None]:
# creates a tuple of unique characters from the text
unique_characters = tuple(set(text))

# assigning a integer value to each character
int2char = dict(enumerate(unique_characters))
char2int = {ch: ii for ii, ch in int2char.items()}

# replace each character with their corresponding integer value
encoded = np.array([char2int[ch] for ch in text])

In [None]:
print("unique characters = ", unique_characters, '\n')
print("Number of unique characters = ", len(unique_characters), '\n')

print("int2char = ", int2char, '\n')

print("char2int = ", char2int, '\n')

print("encoded text = ", encoded[:50], '\n')
print("input shape = ", encoded.shape, '\n')

unique characters =  ('X', 'U', '$', 'N', 'j', 'C', 'F', 'b', 'W', 'd', 'R', 'p', 'c', '.', 'S', 'Y', 'Q', 'y', 'l', 's', ' ', 'B', ',', 'O', 'V', 'f', 'A', 'T', 'I', 'P', 'G', 'Z', 'x', 'J', 'g', '&', 'r', 'K', '\n', '-', 'a', 't', 'z', 'u', 'D', "'", 'v', 'h', 'q', '3', 'w', 'm', 'E', 'o', 'n', 'L', 'e', '!', 'H', '?', 'k', 'M', 'i', ';', ':') 

Number of unique characters =  65 

int2char =  {0: 'X', 1: 'U', 2: '$', 3: 'N', 4: 'j', 5: 'C', 6: 'F', 7: 'b', 8: 'W', 9: 'd', 10: 'R', 11: 'p', 12: 'c', 13: '.', 14: 'S', 15: 'Y', 16: 'Q', 17: 'y', 18: 'l', 19: 's', 20: ' ', 21: 'B', 22: ',', 23: 'O', 24: 'V', 25: 'f', 26: 'A', 27: 'T', 28: 'I', 29: 'P', 30: 'G', 31: 'Z', 32: 'x', 33: 'J', 34: 'g', 35: '&', 36: 'r', 37: 'K', 38: '\n', 39: '-', 40: 'a', 41: 't', 42: 'z', 43: 'u', 44: 'D', 45: "'", 46: 'v', 47: 'h', 48: 'q', 49: '3', 50: 'w', 51: 'm', 52: 'E', 53: 'o', 54: 'n', 55: 'L', 56: 'e', 57: '!', 58: 'H', 59: '?', 60: 'k', 61: 'M', 62: 'i', 63: ';', 64: ':'} 

char2int =  {'X': 0, 'U

In [None]:
# training

# check for gpu
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('Training on CPU')

Training on GPU!


In [None]:
# model

class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):

        """ initialising the layers of the network"""
        super().__init__()
        # dropout layer
        self.drop_prob = drop_prob
        # number of layers
        self.n_layers = n_layers
        # number of nodes in hidden layer 
        self.n_hidden = n_hidden
        # learning rate 
        self.lr = lr
        
        # unique characters in our word dictionary
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        # lstm layer
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
      
    
    def forward(self, x, hidden):
        
        # output, hidden = lstm(input, initial_hidden)
        r_output, hidden = self.lstm(x, hidden)
        
        out = self.dropout(r_output)
        
        # using contiguous to reshape the output 
        # to match it the fc layer
        out = out.contiguous().view(-1, self.n_hidden)
        
        out = self.fc(out)
        
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        """
        hidden is a tuple (hidden_state, cell_state)
        Currently initialising them to zero.
        """
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [None]:
n_hidden=512
n_layers=2

net = CharRNN(unique_characters, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(65, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=65, bias=True)
)


In [None]:
# training

def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):

    # dropout included
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # splitting of data into train and validation
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    # move the network to GPU if available
    if (train_on_gpu):
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)

    for e in range(epochs):
        # initialize hidden layer
        h = net.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1
            
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(train_on_gpu):
                inputs, targets = inputs.cuda(), targets.cuda()

            # hidden layer tuple
            h = tuple([each.data for each in h])

            # zero out the gradients
            net.zero_grad()
            
            output, h = net(inputs, h)
            
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()

            # clipping the gradient
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            if counter % print_every == 0:
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if(train_on_gpu):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train()
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))


In [None]:
batch_size = 128
seq_length = 100
n_epochs = 20

train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/20... Step: 10... Loss: 3.4156... Val Loss: 3.4125
Epoch: 1/20... Step: 20... Loss: 3.3230... Val Loss: 3.3606
Epoch: 1/20... Step: 30... Loss: 3.3451... Val Loss: 3.3460
Epoch: 1/20... Step: 40... Loss: 3.3562... Val Loss: 3.3408
Epoch: 1/20... Step: 50... Loss: 3.3366... Val Loss: 3.3393
Epoch: 1/20... Step: 60... Loss: 3.3265... Val Loss: 3.3368
Epoch: 1/20... Step: 70... Loss: 3.3017... Val Loss: 3.3327
Epoch: 2/20... Step: 80... Loss: 3.3026... Val Loss: 3.3166
Epoch: 2/20... Step: 90... Loss: 3.2995... Val Loss: 3.3134
Epoch: 2/20... Step: 100... Loss: 3.2698... Val Loss: 3.2735
Epoch: 2/20... Step: 110... Loss: 3.1991... Val Loss: 3.1668
Epoch: 2/20... Step: 120... Loss: 3.0787... Val Loss: 3.0362
Epoch: 2/20... Step: 130... Loss: 2.9640... Val Loss: 2.9159
Epoch: 2/20... Step: 140... Loss: 2.8795... Val Loss: 2.8224
Epoch: 2/20... Step: 150... Loss: 2.7676... Val Loss: 2.7112
Epoch: 3/20... Step: 160... Loss: 2.6843... Val Loss: 2.6276
Epoch: 3/20... Step: 170... Loss:

In [None]:
def predict(net, char, h=None, top_k=None): 
    """
    Takes in an input and predicts a new character.
    """
    x = np.array([[net.char2int[char]]])
    x = one_hot_encode(x, len(net.chars))
    inputs = torch.from_numpy(x)

    if(train_on_gpu):
        inputs = inputs.cuda()

    h = tuple([each.data for each in h])
    out, h = net(inputs, h)

    p = F.softmax(out, dim=1).data
    if(train_on_gpu):
        p = p.cpu()

    if top_k is None:
        top_ch = np.arange(len(net.chars))
    else:
        p, top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()

    p = p.numpy().squeeze()
    char = np.random.choice(top_ch, p=p/p.sum())

    return net.int2char[char], h


def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval()
    
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [None]:
print(sample(net, 1000, prime='Anna', top_k=5))

Annart in his coutizents.

SAMPSSN:
Why? his night, to the muntre so by a man and sigh,
That he, all, the crouns a sent at and served
In another for the seast will both to him
The plaster and true to the bark will strike
That many a sea in so to make and word.

GLOUCESTER:
Now what, and to be mothed is the son,
Which taken me that with the down time, and see
The bang and shall be therefore wass he ware.

LARTIUS:
I have bathine, with thy seat,
A precares words:--if you are a subjects
Will but he would had to be pastod and
And sell these thanks, and so thou have ston to see,
And so my son, and boy a such me breath
That we well stand to the side of this land;
And she shall seath a musines of all.

GLOUCESTER:
I am a state there of thy craisor,
This shall be strongs; but we shall spain thou wert.

ROMEO:
Hark me the cretting that who thou art now here
At the poor poor straight and have being here
By hang in strength to thee; fear to-time
With them or him with his, which is her last;
The c