In [26]:
import torch
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, dropout=0.5):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device),
                torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device))


In [27]:
import string
char_to_num = {char: i for i, char in enumerate(string.ascii_lowercase, 1)}

# 字符到数字的映射
char_to_num = {char: i for i, char in enumerate(string.ascii_lowercase, 1)}
char_to_num['<pad>'] = 0  # 填充符
char_to_num['-'] = 27  # -
char_to_num['<eos>'] = 28  # 结束符
char_to_num['<bos>'] = 29  # 开始符（如果使用）

num_to_char = {i: j for j, i in char_to_num.items()}

def tokenize(text, bidirectional=False):
    # 先全部变成小写，把大写加进来可能会出现
    lowercase_text = text.lower()
    # 27用来表示结束，如果需要可以在头上加上0，这样就可以随意给出一段文字然后来生成开始和结尾
    if bidirectional == False:
        numbers = [char_to_num[char] for char in lowercase_text if char in char_to_num] + [28]
    else:
        numbers = [29] + [char_to_num[char] for char in lowercase_text if char in char_to_num] 
        numbers.reverse()
    return numbers

def reverse_tokenize(numbers, bidirectional=False):
    # 转换数字到字符，直到遇到28
    characters = []
    if bidirectional == False:
        for number in numbers:
            if number == 28:
                break
            characters.append(num_to_char[number])
    else:
        numbers.reverse()
        for number in numbers[1:]:
            characters.append(num_to_char[number])


    # 将字符列表转换为字符串
    return ''.join(characters)


In [28]:
batch_size = 32
learning_rate = 0.01
input_size = 30
hidden_size = 100
output_size = 30
n_layers = 1
dropout = 0.5
n_epochs = 10

model = RNNModel(input_size, hidden_size, output_size, n_layers, dropout)
loss_function = nn.CrossEntropyLoss(ignore_index=char_to_num['<pad>'])
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [29]:
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np
from torch.nn.utils.rnn import pad_sequence

# 读取和处理数据
def load_data(filename, bidirectional=False):
    names = []
    with open(filename, 'r', encoding='utf-8') as file:
        for line in file:
            name = line.strip()
            name = tokenize(name, bidirectional=bidirectional)
            names.append(torch.tensor(name, dtype=torch.long))
    return names
   

names = load_data('names.txt', True)
f_names = load_data('female.txt', True)
m_names = load_data('male.txt', True)
dataset = f_names + m_names 


lengths = [len(sequence) for sequence in dataset]
padded_dataset = pad_sequence([torch.tensor(sequence, dtype=torch.long) for sequence in dataset], \
                              batch_first=True, padding_value=0)
data_loader = DataLoader(padded_dataset, batch_size=batch_size, shuffle=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

model = model.to(device)


  padded_dataset = pad_sequence([torch.tensor(sequence, dtype=torch.long) for sequence in dataset], \


In [30]:
model.train()
for epoch in range(n_epochs):
    for batch in data_loader:
        hidden = model.init_hidden(batch.shape[0])
        input, target = batch[:, :-1], batch[:, 1:]    # 移除最后一个字符作为输入，第二个字符到最后一个字符作为目标
        input, target = input.to(device), target.to(device)
        optimizer.zero_grad()
        output, hidden = model(input, hidden)
        hidden = tuple([each.data for each in hidden])  # 分离隐藏状态
        # 计算损失
        loss = loss_function(output.reshape(-1, output_size), target.reshape(-1))
        loss.backward()
        optimizer.step()

        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
torch.save(model.state_dict(), 'model_state_dict_forward.pth')

Epoch: 0/10............. Loss: 3.3829
Epoch: 0/10............. Loss: 3.2194
Epoch: 0/10............. Loss: 3.0447
Epoch: 0/10............. Loss: 2.8498
Epoch: 0/10............. Loss: 2.8141
Epoch: 0/10............. Loss: 2.6708
Epoch: 0/10............. Loss: 2.7281
Epoch: 0/10............. Loss: 2.7563
Epoch: 0/10............. Loss: 2.5910
Epoch: 0/10............. Loss: 2.4944
Epoch: 0/10............. Loss: 2.5389
Epoch: 0/10............. Loss: 2.4469
Epoch: 0/10............. Loss: 2.5362
Epoch: 0/10............. Loss: 2.2998
Epoch: 0/10............. Loss: 2.5105
Epoch: 0/10............. Loss: 2.3086
Epoch: 0/10............. Loss: 2.3746
Epoch: 0/10............. Loss: 2.4037
Epoch: 0/10............. Loss: 2.3495
Epoch: 0/10............. Loss: 2.3603
Epoch: 0/10............. Loss: 2.3230
Epoch: 0/10............. Loss: 2.4193
Epoch: 0/10............. Loss: 2.2986
Epoch: 0/10............. Loss: 2.2246
Epoch: 0/10............. Loss: 2.3400
Epoch: 0/10............. Loss: 2.3420
Epoch: 0/10.

In [31]:
import torch.nn.functional as F

model.load_state_dict(torch.load('model_state_dict_forward.pth'))
model.eval()  # 设置模型为评估模式

text = "nry" 
text_tokenize = [tokenize(text, bidirectional=True)[:-1]]

texts_tokenize = torch.tensor(text_tokenize, dtype=torch.long).to(device)#



# 通过模型运行数据
def test(texts_tokenize):
    update = torch.LongTensor(1, texts_tokenize.shape[1] + 1)  # 确保update是长整型
    hidden = model.init_hidden(1)
    output, hidden = model(texts_tokenize, hidden)

    probabilities = F.softmax(output, dim=-1)
    top_probabilities, top_indices = torch.topk(probabilities, 5)
    char_list = [num_to_char[number] for number in top_indices[:, -1].squeeze().tolist()]
    prob_list = []
    for item1, item2 in zip(char_list, top_probabilities[:, -1].squeeze().tolist()):
        item2 = "{:.3f}".format(item2)
        prob_list.append(f"{item1}: {item2}")
    print(prob_list)
    max_values, max_indices = torch.max(output, dim=-1)
    max_indices = max_indices.long()  # 确保max_indices是长整型

    if int(max_indices[:, -1]) == 29:
        return torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
    else:
        update[0] = torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
        
        return test(update)


name_b = reverse_tokenize(test(texts_tokenize=texts_tokenize).squeeze().tolist(), bidirectional=True)

print(name_b)


['e: 0.563', 'a: 0.184', 'i: 0.104', 'u: 0.048', 'y: 0.044']
['h: 0.460', '<bos>: 0.101', 'l: 0.050', 'd: 0.050', 'v: 0.049']
['<bos>: 0.896', 't: 0.038', 's: 0.031', 'c: 0.012', 'g: 0.011']
henry


下面的部分是双向的

In [41]:
import torch.nn.functional as F

model_b = RNNModel(input_size, hidden_size, output_size, n_layers, dropout)
model_b.load_state_dict(torch.load('model_state_dict_forward.pth'))
model_b.eval()  # 设置模型为评估模式

model_e = RNNModel(input_size-1, hidden_size, output_size-1, n_layers, dropout)
model_e.load_state_dict(torch.load('model_state_dict.pth'))
model_e.eval()  # 设置模型为评估模式

text = "err" 

text_tokenize = [tokenize(text, bidirectional=True)[:-1]]
texts_tokenize = torch.tensor(text_tokenize, dtype=torch.long).to(device)#

# 通过模型运行数据
def test_b(texts_tokenize):
    update = torch.LongTensor(1, texts_tokenize.shape[1] + 1)  # 确保update是长整型
    hidden = model_b.init_hidden(1)
    output, hidden = model_b(texts_tokenize, hidden)

    probabilities = F.softmax(output, dim=-1)
    top_probabilities, top_indices = torch.topk(probabilities, 5)
    char_list = [num_to_char[number] for number in top_indices[:, -1].squeeze().tolist()]
    prob_list = []
    for item1, item2 in zip(char_list, top_probabilities[:, -1].squeeze().tolist()):
        item2 = "{:.3f}".format(item2)
        prob_list.append(f"{item1}: {item2}")
    print(prob_list)
    max_values, max_indices = torch.max(output, dim=-1)
    max_indices = max_indices.long()  # 确保max_indices是长整型
    if int(max_indices[:, -1]) == 29:
        return torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
    else:
        update[0] = torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
        
        return test_b(update)

def test_e(texts_tokenize):
    update = torch.LongTensor(1, texts_tokenize.shape[1] + 1)  # 确保update是长整型
    hidden = model_e.init_hidden(1)
    output, hidden = model_e(texts_tokenize, hidden)

    probabilities = F.softmax(output, dim=-1)
    top_probabilities, top_indices = torch.topk(probabilities, 5)
    char_list = [num_to_char[number] for number in top_indices[:, -1].squeeze().tolist()]
    prob_list = []
    for item1, item2 in zip(char_list, top_probabilities[:, -1].squeeze().tolist()):
        item2 = "{:.3f}".format(item2)
        prob_list.append(f"{item1}: {item2}")
    print(prob_list)
    
    max_values, max_indices = torch.max(output, dim=-1)
    max_indices = max_indices.long()  # 确保max_indices是长整型

    if int(max_indices[:, -1]) == 28:
        return torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
    else:
        update[0] = torch.cat([texts_tokenize[0], max_indices[:, -1]], dim=0)
        
        return test_e(update)

name_b = reverse_tokenize(test_b(texts_tokenize=texts_tokenize).squeeze().tolist(), bidirectional=True)

print("向前生成的结果：",name_b)

text_tokenize = [tokenize(name_b)[:-1]]
texts_tokenize = torch.tensor(text_tokenize, dtype=torch.long).to(device)
name_all = reverse_tokenize(test_e(texts_tokenize=texts_tokenize).squeeze().tolist())
print("全部的生成结果",name_all)

['t: 0.517', 'h: 0.112', 'i: 0.073', 'g: 0.068', 'p: 0.037']
['s: 0.283', 'x: 0.271', '<bos>: 0.126', 'e: 0.080', 'i: 0.070']
['e: 0.256', '<bos>: 0.182', 'u: 0.165', 'i: 0.082', 'b: 0.059']
['v: 0.244', '<bos>: 0.187', 'b: 0.098', 'w: 0.059', 'r: 0.058']
['e: 0.500', '<bos>: 0.321', 'l: 0.049', 'r: 0.046', 'i: 0.028']
['<bos>: 0.334', 'h: 0.127', 'l: 0.120', 'r: 0.069', 'n: 0.059']
向前生成的结果： evesterr
['<eos>: 0.684', 'e: 0.238', 'a: 0.048', 'o: 0.014', 'i: 0.008']
全部的生成结果 evesterr
