# Charactor-level RNN for wiki

    modified from https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html

In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string

import time
import math


In [2]:
# parameter
keywords_dict ={}
keywords_dict['hidden_size'] = 100
keywords_dict['seq_length'] = 25
keywords_dict['learning_rate'] = 0.001


In [3]:
data = open('data/wiki.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 999922 characters, 189 unique.


In [4]:
keywords_dict['all_letters'] = ''.join(sorted(chars))
keywords_dict['n_letters'] = len(chars)


In [5]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.Wxh = nn.Linear(input_size, hidden_size)
        self.Whh = nn.Linear(hidden_size, hidden_size)
        self.Why = nn.Linear(hidden_size, output_size)
        self.act = nn.Tanh()
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, _input, hidden):
        # np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)
        hidden = self.Wxh(_input).add(self.Whh(hidden))
        hidden = self.act(hidden)
        
        output = self.Why(hidden)
        
        # output
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [6]:
import random

# Random item from a list
# def randomChoice(l):
#     _a = l[random.randint(0, len(l) - 1)]
#     print(_a)
#     return _a

In [7]:
# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line, keywords_dict):
    all_letters = keywords_dict['all_letters']
    n_letters = keywords_dict['n_letters']
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
#         print(letter)
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

# LongTensor of second letter to end (EOS) for target
def targetTensor(line, keywords_dict):
    all_letters = keywords_dict['all_letters']
    n_letters = keywords_dict['n_letters']
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)

def createTrainingExample(line, keywords_dict):
#     category, line = randomTrainingPair()
#     line = randomChoice(keywords_dict['name_vector'])
#     category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line[:-1], keywords_dict)
#     print(input_line_tensor)
    target_line_tensor = targetTensor(line[1:], keywords_dict)
#     print(target_line_tensor)

#     return category_tensor, input_line_tensor, target_line_tensor
    return input_line_tensor, target_line_tensor

In [10]:
def running(rnn,keywords_dict):
    seq_length = keywords_dict['seq_length']

    print_every = 5000
    plot_every = 500
    start = time.time()
    
    optimizer = torch.optim.Adam(rnn.parameters(), lr=keywords_dict['learning_rate'])
    all_losses = []
    total_loss = 0 # Reset every plot_every iters
    loss_fun = nn.NLLLoss()

    _iter, p = 0, 0
    epoch = 0
    while epoch < 5:
        if p + seq_length +1 >= len(data):
            epoch +=1
            p = 0
        input_line_tensor, target_line_tensor = createTrainingExample(data[p:p+seq_length+1], keywords_dict)
#         print(input_line_tensor)
        target_line_tensor.unsqueeze_(-1)
        hidden = rnn.initHidden()
        rnn.zero_grad()

        loss = 0

        for i in range(input_line_tensor.size(0)):
            output, hidden = rnn(input_line_tensor[i], hidden)

            l = loss_fun(output, target_line_tensor[i])
            loss += l
            
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#         for para in rnn.parameters():
#             para.data.add_(-learning_rate, para.grad.data)

        p += seq_length # move data pointer
        _iter += 1 # iteration counter

        total_loss += loss

        if _iter % print_every == 0:
            print('%s (%d %d%%) %.4f' % (timeSince(start), _iter, _iter / (1000000/25) * 100, loss))

        if _iter % plot_every == 0:
            all_losses.append(total_loss / plot_every)
            total_loss = 0
    return rnn, all_losses

rnn = RNN(keywords_dict['n_letters'], keywords_dict['hidden_size'], keywords_dict['n_letters'])
rnn, all_losses = running(rnn, keywords_dict)

1m 26s (5000 12%) 96.6311
2m 50s (10000 25%) 77.7080
4m 12s (15000 37%) 63.3996
5m 34s (20000 50%) 76.1646
6m 55s (25000 62%) 82.0567
8m 19s (30000 75%) 77.4184
9m 51s (35000 87%) 71.0473


In [None]:
rnn, all_losses = running(rnn, keywords_dict)

1m 25s (5000 12%) 97.3801
3m 6s (10000 25%) 75.7617


In [11]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

plt.figure()
plt.plot(all_losses)

[<matplotlib.lines.Line2D at 0x117db7828>]

In [13]:
max_length = 100

# Sample from a category and starting letter
def sample(rnn, start_letter='<', max_length):
    with torch.no_grad():  # no need to track history in sampling
#         category_tensor = categoryTensor(category)
        _input = inputTensor(start_letter)
        hidden = rnn.initHidden()

        output_name = start_letter

        for i in range(max_length):
            output, hidden = rnn(_input[0], hidden)
            topv, topi = output.topk(1)
            topi = topi[0][0]
            letter = all_letters[topi]
            outputs += letter
            _input = inputTensor(letter)

        return outputs


tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]])

In [13]:
from torch import FloatTensor
from torch.autograd import Variable


# Define the leaf nodes
a = Variable(FloatTensor([4]))

weights = [Variable(FloatTensor([i]), requires_grad=True) for i in (2, 5, 9, 7)]

# unpack the weights for nicer assignment
w1, w2, w3, w4 = weights

b = w1 * a
c = w2 * a
d = w3 * b + w4 * c
L = (10 - d)

L.backward()

for index, weight in enumerate(weights, start=1):
    gradient, *_ = weight.grad.data
    print("Gradient of w", index , "w.r.t to L: ", gradient)

Gradient of w 1 w.r.t to L:  tensor(-36.)
Gradient of w 2 w.r.t to L:  tensor(-28.)
Gradient of w 3 w.r.t to L:  tensor(-8.)
Gradient of w 4 w.r.t to L:  tensor(-20.)


AttributeError: 'Tensor' object has no attribute 'items'

In [8]:
x = torch.tensor([1, 2, 3, 4])

In [9]:
x.unsqueeze_(-1)

tensor([[1],
        [2],
        [3],
        [4]])