# Character RNN Sequence Generation Tutorial

This follows the tutorial here: https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html

In [1]:
%matplotlib inline
from io import open
import glob
import os
import unicodedata
import string
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.style.use('dark_background')

### Import functions used in last tutorial

In [4]:
files = glob.glob("/home/clarencenhuang/data/pytorch-tutorials/names/*.txt")
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# remove wierd characters
def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn' and c in all_letters)

# parse text into lines and remove wierd characters
def read_lines(fname):
    with open(fname, encoding='utf8') as fh:
        lines = fh.read().strip().split('\n')
        return list(map(unicode_to_ascii, lines))

# parse and create mapping of category names to index
category_lines = {}
for fname in files:
    category = os.path.splitext(os.path.basename(fname))[0]
    category_lines[category] = read_lines(fname)
n_categories = len(category_lines.keys())
all_categories = sorted(category_lines.keys())
cat2idx = {c:i for i,c in enumerate(sorted(category_lines.keys()))}
idx2cat = {i:c for i,c in enumerate(sorted(category_lines.keys()))}

letter_to_idx = {c:i for i,c in enumerate(all_letters)}
idx_to_letter = {i:c for i,c in enumerate(all_letters)}

# character embedding 1-hot
def letter2tensor(l):
    t = torch.zeros(1, n_letters)
    t[0, letter_to_idx[l]] = 1
    return t

def line2tensor(line):
    t = torch.zeros(len(line), 1, n_letters)
    for i in range(len(line)):
        l = line[i]
        t[i,:] = letter2tensor(l)
    return t

def category_from_output(o):
    idx = o.topk(1)[1].item()
    return idx2cat[idx], idx

# get training example
def random_training_sample():
    category = np.random.choice(all_categories)
    line = np.random.choice(category_lines[category])
    category_tensor = torch.LongTensor([cat2idx[category]])
    line_tensor = line2tensor(line)
    return category, line, category_tensor, line_tensor