Skip to content

criticallycode/text-generation-with-PyTorch

Repository files navigation

text-generation-with-PyTorch

An implementation of a text generation algorithm with PyTorch.

This repo demonstrates a method of text generation with PyTorch. The model being used to generate text is a Recurrent Neural Network, or RNN. The training data is split up into chunks and passed into the model. After the model is trained the weights are saved and a generation function is created. This repo contains a Jupyter Notebook that allows you to run the cells and experiment with the model. The notebook also serves a tutorial for how to create a text generation model in PyTorch, as it explains the logic behind the code blocks.

First, the data is imported and preprocessed. It is then split into chunks.

import unidecode
import string
import random
import torch
import torch.nn as nn
from torch.autograd import Variable
import time, math
from torch.optim.lr_scheduler import ReduceLROnPlateau

def text_preprocess(text_file):
    # get all printable characters
    print_charas = string.printable
    num_charas = len(print_charas)

    # print(print_charas)

    text_data = unidecode.unidecode(open(text_file).read())

    # figure our the length of the file
    # this is important as we will be splitting the text file up into random chunks

    len_text = len(text_data)
    print(len_text)

    return print_charas, num_charas, text_data, len_text

printable, num_charas, text_file, text_len = text_preprocess("___.txt")

# specify the length of the chunk

chunk_len = 200

def get_rand_chunk():
    # select a random starting point from the beginning
    # of the file until the end minus chunk len
    start_idx = random.randint(0, text_len - chunk_len)
    # specify the end point
    end_idx = start_idx + chunk_len + 1
    return text_file[start_idx:end_idx]

# check to see if function works
print(get_rand_chunk())

The model is then created and the text chunks transformed into tensors.

# inherit from nn.Module
class RNN(nn.Module):
    # initialize the class with chosen arguments
    def __init__(self, in_size, hidden_size, out_size, num_layers=1, drop_prob=0.5):
        # make sure to inherit from the base RNN class
        super(RNN, self).__init__()
        self.in_size = in_size
        self.hidden_size = hidden_size
        self.out_size = out_size
        self.num_layers = num_layers

        # define the layers
        self.encoder = nn.Embedding(in_size, hidden_size)
        # hidden size is both input and output here, since we aren't changing the size
        # of the input inbetween layers, also takes num layers
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers)
        # our purput layer is a linerar layer, will take in hidden size and returns output size
        self.decoder = nn.Linear(hidden_size, out_size)
        self.dropout = nn.Dropout(drop_prob)

    # now we have to define the forward training pass

    def forward(self, input, hidden):
        # input will be the encodings generated by the encoder, transformed into tensors by view
        input = self.encoder(input.view(1, -1))
        input = self.dropout(input)
        # view changes 1 x 1, to length of given sequence
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    # time to define a function to zero states on creation
    def init_hidden(self):
        # return a variable full of all zeroes
        return Variable(torch.zeros(self.num_layers, 1, self.hidden_size))
    
  def chunk_tensor(input):
    # initialize the tensor with length of input string
    tensor = torch.zeros(len(input)).long()
    # for every character in the input string
    for i in range(len(input)):
        # get the index of the character from the list of printable characters
        tensor[i] = printable.index(input[i])
    tensor = Variable(tensor)
    return tensor

Some functions are then created to evaluate the model, get the training time, and carry out the training.

# define out priming string, the length of text we want to predict, and the temperature
def model_evaluate(prime_str='A', predict_len=100, temp=0.8):

    # start off by giving the network a hidden layer with zeroed states
    hidden = decoder.init_hidden()
    prime_input = chunk_tensor(prime_str)
    predicted = prime_str

    # need to build up hidden state, start off with the priming string
    for c in range(len(prime_str) - 1):
        # return the current hidden state
        _, hidden = decoder(prime_input[c], hidden)

    # make the input whatever the character tensor has generated based on the prime string
    inp = prime_input[-1]

    for p in range(predict_len):
        # use the decoder and get output and hidden values
        output, hidden = decoder(inp, hidden)

        # Sample from the network as a multinomial distribution
        # convert the output data to a tensor with view
        # divided by chosen temperature, exp returns exponential
        output_dist = output.data.view(-1).div(temp).exp()
        # convert into a multinomial
        top_i = torch.multinomial(output_dist, 1)[0]

        # now append the predicted character to the string and use that string as the
        # next input into the network
        predicted_char = printable[top_i]
        predicted += predicted_char
        inp = chunk_tensor(predicted_char)

    return predicted

def get_time(passed):
    sec = time.time() - passed
    # round down to nearest minute
    minute = math.floor(sec/60)
    sec -= minute * 60
    return '%dm %ds' % (minute, sec)

def train_loop(input_data, target):

    # initialize hidden state
    hidden = decoder.init_hidden()
    # zero the gradients at the start
    decoder.zero_grad()
    # set loss to zero
    loss = 0

    # for every character in the chunk length
    # get the output of the model and the hidden state
    for char in range(chunk_len):
        output, hidden = decoder(input_data[char], hidden)
        # update the loss, need to unsqueeze it
        loss += criterion(output, target[char].unsqueeze(0))

    # do the backprop
    loss.backward()
    # do a step of optimization
    decoder_optimizer.step()

    # return the first element of the loss divided by chunk length
    # call the item function to get the item of the data
    return loss.data.item()/ chunk_len

Some important arguments like the optimizer and loss functions are defined, and then the model is trained.

# now we have to declare the training parameters
num_epochs = 2000
print_delay = 100
plot_delay = 100

hidden_size = 100
num_layers = 2
learning_rate = 0.002

# what will we use as the decoder, the RNN
decoder = RNN(num_charas, hidden_size, num_charas, num_layers, drop_prob=0.2)
# choose the optimizer
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

# declare the criterion we will use to calculate the loss
criterion = nn.CrossEntropyLoss()

def train_and_generate(num_epochs, print_delay, plot_delay):

    start = time.time()
    total_loss = []
    avg_loss = 0

    # print the loss and generate text
    # for the epochs in the total number of epochs + 1
    for epoch in range(1, num_epochs + 1):
        loss = train_loop(*create_training_set())
        avg_loss += loss

        if epoch % print_delay == 0:
            # if epoch num divisible without remainders:
            # print the running time divided by number of epochs
            print("Current running time: " + get_time(start))
            print("Epoch:{}, Percent complete: {}%, Loss: {}".format(epoch, epoch/num_epochs*100, loss))
            # this prints the characters that have their probability evaluated
            print(model_evaluate('a', 200), '\n')

        if epoch % plot_delay == 0:
            total_loss.append(avg_loss / plot_delay)
            avg_loss = 0

    torch.save(decoder.state_dict(), "./textgen_model_1.pth")
    
train_and_generate(num_epochs, print_delay, plot_delay)

Now the text can be generated after loading the model in.

def text_gen(prime_str='A', predict_len=100, temp=0.8):
    decoder = RNN(num_charas, hidden_size, num_charas, num_layers, drop_prob=0.2)
    decoder.load_state_dict(torch.load("./textgen_model_1.pth"))

    # start off by giving the network a hidden layer with zeroed states
    hidden = decoder.init_hidden()
    prime_input = chunk_tensor(prime_str)
    predicted = prime_str

    # need to build up hidden state, start off with the priming string
    for c in range(len(prime_str) - 1):
        # return the current hidden state
        _, hidden = decoder(prime_input[c], hidden)

    # make the input whatever the character tensor has generated based on the prime string
    inp = prime_input[-1]

    for p in range(predict_len):
        # use the decoder and get output and hidden values
        output, hidden = decoder(inp, hidden)

        # Sample from the network as a multinomial distribution
        # convert the output data to a tensor with view
        # divided by chosen temperature, exp returns exponential
        output_dist = output.data.view(-1).div(temp).exp()
        # convert into a multinomial
        top_i = torch.multinomial(output_dist, 1)[0]

        # now append the predicted character to the string and use that string as the
        # next input into the network
        predicted_char = printable[top_i]
        predicted += predicted_char
        inp = chunk_tensor(predicted_char)

    print("Generated text is:")
    print(predicted)
    
text_gen()

Thanks to Sean Robertson for the basis for this project.

About

An implementation of a text generation algorithm with PyTorch.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published