# Charactor-level RNN for wiki

    modified from https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html

In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string

# Plus EOS marker
keywords_dict = {}
keywords_dict['all_letters'] = string.ascii_letters + " .,;'-"
keywords_dict['n_letters'] = len(keywords_dict['all_letters']) + 1

def findFiles(path): return glob.glob(path)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s, keywords_dict):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in keywords_dict['all_letters']
    )

# Read a file and split into lines
def readLines(filename, keywords_dict):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line, keywords_dict) for line in lines]

# Build the category_lines dictionary, a list of lines per category
name_list = []
for filename in findFiles('data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    name_list.extend(readLines(filename, keywords_dict))
    
keywords_dict['name_vector'] = name_list

In [4]:
data = open('data/wiki.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 999922 characters, 189 unique.


In [None]:
keywords_dict['all_letters'] = chars
keywords_dict['n_letters'] = len(chars)


In [7]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size, bias=False)
        self.act = nn.Tanh()
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, _input, hidden):
        input_combined = torch.cat((_input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.act(hidden)
        output = self.dropout(output)
        print('dropout layer')
        print(output)
        output = self.softmax(output)
        print('softmax layer')
        print(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [3]:
import random

# Random item from a list
def randomChoice(l):
    _a = l[random.randint(0, len(l) - 1)]
    print(_a)
    return _a

In [4]:
# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line, keywords_dict):
    all_letters = keywords_dict['all_letters']
    n_letters = keywords_dict['n_letters']
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        print(letter)
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

# LongTensor of second letter to end (EOS) for target
def targetTensor(line, keywords_dict):
    all_letters = keywords_dict['all_letters']
    n_letters = keywords_dict['n_letters']
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)

def randomTrainingExample(keywords_dict):
#     category, line = randomTrainingPair()
    line = randomChoice(keywords_dict['name_vector'])
#     category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line, keywords_dict)
    target_line_tensor = targetTensor(line, keywords_dict)
#     return category_tensor, input_line_tensor, target_line_tensor
    return input_line_tensor, target_line_tensor

In [28]:
a.shape

torch.Size([10, 1, 59])

In [30]:
b

tensor([ 8, 11,  8, 12, 14, 21,  8,  2,  7, 58])

In [29]:
keywords_dict['all_letters']

"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,;'-"

In [5]:
keywords_dict['learning_rate'] = 0.005

def train(rnn, keywords_dict):
    input_line_tensor, target_line_tensor = randomTrainingExample(keywords_dict)
    learning_rate = keywords_dict['learning_rate']
    target_line_tensor.unsqueeze_(-1)
    hidden = rnn.initHidden()

    rnn.zero_grad()
    criterion = nn.NLLLoss()

    loss = 0

    for i in range(input_line_tensor.size(0)):
#         output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
        output, hidden = rnn(input_line_tensor[i], hidden)

        l = criterion(output, target_line_tensor[i])
        print(l)
        loss += l

    loss.backward()

    for p in rnn.parameters():
        print('Waha')
        print(p)
        p.data.add_(-learning_rate, p.grad.data)

    return output, loss.item() / input_line_tensor.size(0)

import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [8]:
def running(keywords_dict):
    n_letters = keywords_dict['n_letters']
    rnn = RNN(n_letters, 128, n_letters)

    n_iters = 10
    print_every = 5000
    plot_every = 500
    all_losses = []
    total_loss = 0 # Reset every plot_every iters

    start = time.time()

    for iter in range(1, n_iters + 1):
        output, loss = train(rnn, keywords_dict)
        total_loss += loss

        if iter % print_every == 0:
            print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

        if iter % plot_every == 0:
            all_losses.append(total_loss / plot_every)
            total_loss = 0
    return all_losses
all_losses = running(keywords_dict)

Grodensky
G
r
o
d
e
n
s
k
y
dropout layer
tensor([[-0.0447,  0.0556,  0.0420, -0.0133,  0.0772,  0.0543, -0.0232,  0.0385,
          0.0624,  0.0619,  0.0000,  0.0806, -0.0650, -0.0386, -0.0737,  0.0106,
         -0.0062, -0.0661, -0.0675,  0.0546,  0.0066, -0.0468, -0.0358, -0.0656,
          0.0000,  0.0277, -0.0541,  0.0272,  0.0401, -0.0292, -0.0052,  0.0000,
         -0.0000, -0.0431, -0.0000,  0.0222,  0.0554, -0.0000,  0.0000, -0.0429,
         -0.0636, -0.0520,  0.0360, -0.0360, -0.0026, -0.0045, -0.0687,  0.0453,
          0.0324, -0.0565,  0.0348, -0.0000, -0.0000,  0.0690,  0.0066,  0.0271,
         -0.0334,  0.0732, -0.0024,  0.0057, -0.0655,  0.0806, -0.0625, -0.0733,
         -0.0000, -0.0734, -0.0067, -0.0000, -0.0156, -0.0062,  0.0421, -0.0803,
          0.0163, -0.0427,  0.0000,  0.0000, -0.0492,  0.0779,  0.0608,  0.0593,
          0.0435, -0.0533, -0.0523,  0.0535, -0.0056,  0.0437,  0.0646,  0.0516,
          0.0406,  0.0537, -0.0000, -0.0000,  0.0305,  0.0351, -0.0

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

plt.figure()
plt.plot(all_losses)

In [9]:
m.

tensor([[ 0.0458,  0.0702, -0.6224,  ..., -0.6503,  0.2250, -0.4297],
        [-0.0448, -0.0574, -0.0772,  ...,  0.4211, -0.6901, -0.0221],
        [ 0.2992, -0.3145, -0.3766,  ...,  0.7665, -0.8241, -0.7168],
        ...,
        [-0.0884,  1.0293, -0.6515,  ..., -0.4935, -0.0707,  1.5453],
        [-0.1213,  0.3463,  0.0164,  ...,  0.5920,  0.1916,  0.8694],
        [ 0.0660, -0.2973, -0.1265,  ..., -0.3561, -0.5762, -0.9287]],
       grad_fn=<AddmmBackward>)

In [13]:
# torch.zeros(10, 1, 20)

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]])

In [13]:
from torch import FloatTensor
from torch.autograd import Variable


# Define the leaf nodes
a = Variable(FloatTensor([4]))

weights = [Variable(FloatTensor([i]), requires_grad=True) for i in (2, 5, 9, 7)]

# unpack the weights for nicer assignment
w1, w2, w3, w4 = weights

b = w1 * a
c = w2 * a
d = w3 * b + w4 * c
L = (10 - d)

L.backward()

for index, weight in enumerate(weights, start=1):
    gradient, *_ = weight.grad.data
    print("Gradient of w", index , "w.r.t to L: ", gradient)

Gradient of w 1 w.r.t to L:  tensor(-36.)
Gradient of w 2 w.r.t to L:  tensor(-28.)
Gradient of w 3 w.r.t to L:  tensor(-8.)
Gradient of w 4 w.r.t to L:  tensor(-20.)


AttributeError: 'Tensor' object has no attribute 'items'