In [10]:
#定义库
import torch
from torch import nn
import numpy as np

text = ['hey how are you', 'good i am fine', 'have a nice day']  # 创建一个三个字符串的列表

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))  # ''.join(text)去除标点符号 set()是去除重复的字母
#print(chars)
# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))
#print(int2char)
# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}
#print(char2int)

# Finding the length of the longest string in our data
maxlen = len(max(text, key=len))  # key=len 指定按字符串长度进行比较 让三个字符串进行比较

# Padding

# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of
# the sentence matches the length of the longest sentence
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' '  # 让3个字符串的长度统一
#print(text)
#字符数算上了空格，只有第二句的字符数是14

# Creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # Remove last character for input sequence
    input_seq.append(text[i][:-1])

    # Remove first character for target sequence
    target_seq.append(text[i][1:])
    #print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))
#由字母转为数字
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]#character表示字符的意思
#print(input_seq)
#print(target_seq)#每次运行char2int[character]会发生变化
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)
#print(batch_size)


def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)#初始化

    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features#三个句子中的一个句子中的一个字母为1 一个地方对应一个矩阵
# Input shape --> (Batch Size, Sequence Length, One-Hot Encoding Size)
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)
#-------------------------------------------------------------------------------------------------------------------------
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.

if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")


class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # Defining the layers
        # RNN Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x):
        batch_size = x.size(0)

        # Initializing hidden state for first input using method defined below
        hidden, cell = self.init_hidden(batch_size)
        # Passing in the input and hidden state into the model and obtaining outputs
        out, (hidden,cell) = self.lstm(x,(hidden,cell))

        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        cell = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden, cell
# Instantiate the model with hyperparameters
model = Model(input_size=dict_size, output_size=dict_size, hidden_dim=12, n_layers=1)
# We'll also set the model to the device that we defined earlier (default is CPU)
#model.to(device)
# Define hyperparameters
n_epochs = 1000
lr=0.01
# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()#交叉熵误差
optimizer = torch.optim.Adam(model.parameters(), lr=lr)#优化器

# Training Run
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad()  # Clears existing gradients from previous epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward()  # Does backpropagation and calculates gradients
    optimizer.step()  # Updates the weights accordingly

    if epoch % 10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))


# This function takes in the model and character as arguments and returns the next character prediction and hidden state
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character.to(device)

    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden


# This function takes the desired output length and input characters as arguments, returning the produced sentence
def sample(model, out_len, start='hey'):
    model.eval()  # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]#将起始字符串拆分为字符列表
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)
        
    return ''.join(chars)

GPU is available
Epoch: 10/1000............. Loss: 2.5562
Epoch: 20/1000............. Loss: 2.3940
Epoch: 30/1000............. Loss: 2.2708
Epoch: 40/1000............. Loss: 2.0492
Epoch: 50/1000............. Loss: 1.7420
Epoch: 60/1000............. Loss: 1.3912
Epoch: 70/1000............. Loss: 1.0747
Epoch: 80/1000............. Loss: 0.8069
Epoch: 90/1000............. Loss: 0.5891
Epoch: 100/1000............. Loss: 0.4163
Epoch: 110/1000............. Loss: 0.2980
Epoch: 120/1000............. Loss: 0.2192
Epoch: 130/1000............. Loss: 0.1680
Epoch: 140/1000............. Loss: 0.1344
Epoch: 150/1000............. Loss: 0.1119
Epoch: 160/1000............. Loss: 0.0966
Epoch: 170/1000............. Loss: 0.0859
Epoch: 180/1000............. Loss: 0.0780
Epoch: 190/1000............. Loss: 0.0720
Epoch: 200/1000............. Loss: 0.0673
Epoch: 210/1000............. Loss: 0.0635
Epoch: 220/1000............. Loss: 0.0604
Epoch: 230/1000............. Loss: 0.0578
Epoch: 240/1000...........

In [11]:
print(sample(model, 15, 'good'))

good i am fine 
