## 简单的字符神经网络，给不同的姓氏分类

###  读取数据

In [1]:
import torch
from io import open
import os
import unicodedata
import string

In [2]:
all_letters = string.ascii_letters + ".,:''"
n_letters = len(all_letters)

In [3]:
def unicode2ascii(s):
    """
    转换unicode编码的字符为纯ascii字符串
    """
    return ''.join(
        c for c in unicodedata.normalize("NFD", s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )


print(unicode2ascii('Ślusàrski'))

Slusarski


In [4]:
def lines2ascii(file):
    result = []
    for line in file.readlines():
        result.append(unicode2ascii(line).strip())
    return result


def read_data(root):
    """
    读取名字和语言数据为一个字典
    """
    result = {}
    for file_name in os.listdir(root):
        if str(file_name).endswith('.txt'):
            lan = file_name.replace('.txt', '')
            with open(os.path.join(root, file_name), encoding='UTF-8') as file:
                result[lan] = lines2ascii(file)
    return result


data = read_data('./data/names')

In [5]:
all_categories = list(data.keys())
n_category = len(all_categories)

### 将字符数据映射为向量数据，使用one hot向量表示

In [6]:
def char2Vec(c):
    """
    映射字符为向量数据，输入为字符，输出为宽度与 all_letters长度相同的向量
    """
    vec = torch.zeros(len(all_letters))
    vec[all_letters.index(c)] = 1
    return vec
print(char2Vec('d'))

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])


In [7]:
def str2Vec(name):
    """
    将字符串转换为vec表示，形状为 len*1*n_letter
    """
    vec = torch.zeros(len(name), 1, len(all_letters))
    for i in range(len(name)):
        vec[i][0][all_letters.index(name[i])] = 1
    return vec


print(str2Vec('ab'))

tensor([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]]])


### 构建RNN

In [8]:
import torch.nn as nn

In [9]:
class Rnn(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        """
        自定义rnn运行所需的函数
        """
        super(Rnn, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)

        self.softmax = nn.Softmax(dim=1)

    def forward(self, data, hidden):
        """
        rnn运行的数据流，如何使用上述函数
        """
        combine = torch.cat((data, hidden), 1)
        new_hidden = self.i2h(combine)
        output = self.i2o(combine)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        return torch.randn(1, self.hidden_size)


n_hidden = 1024
rnn = Rnn(n_letters, n_hidden, n_category).cuda()

> 测试rnn运行单步


In [10]:
hidden = torch.randn(1, n_hidden)
name = "alex"
vec_name = str2Vec(name)
output, new_hidden = rnn(vec_name[0].cuda(), hidden.cuda())
print(output)

tensor([[0.0238, 0.1010, 0.0646, 0.0419, 0.0472, 0.0306, 0.1092, 0.0207, 0.0529,
         0.0483, 0.0778, 0.0928, 0.0216, 0.0468, 0.0646, 0.0418, 0.0529, 0.0614]],
       device='cuda:0', grad_fn=<SoftmaxBackward>)


###  训练网络

In [11]:
def get_category(output):
    """
    获取最高概率的输出并转换为字符串
    """
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i


print(get_category(output))

('German', 6)


In [12]:
import random

In [13]:
def random_chioce(data):
    """
    在列表中随机选择一个条目返回
    """
    return data[random.randint(0, len(data)-1)]


def get_vec_label(lan):
    """
    将标签从语言转换为one hot向量
    """
    vec = torch.tensor([all_categories.index(lan)], dtype=torch.long)
    return vec


def get_train_examples(train_data):
    """
    在训练数据中随机选择1训练条目
    """

    lan = random_chioce(all_categories)
    name = random_chioce(train_data[lan])
    lan_vec = get_vec_label(lan)
    name_vec = str2Vec(name)

    return name_vec, lan_vec


print(get_train_examples(data))

(tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 

In [14]:
loss_func = nn.NLLLoss()
lr = 0.001


def train_step(name_vec, lan_vec):
    """
    单步训练，连续输入一串字符，更新梯度，并计算损失
    """
    hidden = rnn.init_hidden().cuda()
    "梯度清零"
    rnn.zero_grad()

    "调用rnn预测"
    for i in range(name_vec.size()[0]):
        output, hidden = rnn(name_vec[i], hidden)

    "计算损失"
    loss = loss_func(output, lan_vec)
    "从损失函数开始反向传播梯度"
    loss.backward()

    "将rnn中的参数手动梯度下降"

    for p in rnn.parameters():
        if p.grad is not None:
            p.data.add_(-lr, p.grad.data)

    return output, loss.item()

In [None]:
import time

all_losses = []


def train():
    """
    训练rnn，并保存loss
    """
    n_iters = 200000
    plot_every = 1000
    print_every = 5000

    cur_loss = 0
    start_time = time.time()
    for i in range(n_iters):
        "获取数据并预测，计算loss， 梯度下降"
        name_vec, lan_vec = get_train_examples(data)
        output, loss = train_step(name_vec.cuda(), lan_vec.cuda())
        cur_loss += loss
        "打印"
        if i % print_every == 0:
            print(time.time() - start_time)
            print(loss)
            print("   ")
        if i % plot_every == 0:
            all_losses.append(cur_loss / plot_every)
            cur_loss = 0


train()

0.005002021789550781
-0.027097128331661224
   


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:882.)


11.072862386703491
-0.04156447574496269
   
22.382862091064453
-0.03699120879173279
   
33.604862213134766
-0.08055519312620163
   
44.87786245346069
-0.014233062975108624
   
56.07986235618591
-0.05075738579034805
   
67.4078619480133
-0.05364670604467392
   
79.06386256217957
-0.02655201032757759
   
90.31086277961731
-0.07197605818510056
   
101.97886300086975
-0.022878160700201988
   
113.30786275863647
-0.04617423564195633
   
124.66286206245422
-0.029771044850349426
   
136.1118619441986
-0.019967248663306236
   
147.34486198425293
-0.038532987236976624
   
159.1305160522461
-0.02304541878402233
   
170.39151692390442
-0.029797576367855072
   
181.71751713752747
-0.06152763217687607
   
193.16251635551453
-0.07196660339832306
   
204.65251684188843
-0.02809833735227585
   
216.00151586532593
-0.03534039482474327
   
227.2915165424347
-0.03030012734234333
   
238.85451579093933
-0.06169125437736511
   
250.26251673698425
-0.03010309860110283
   
262.1205167770386
-0.09700215607881

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(all_losses)