In [5]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torch.utils.data import Dataset, DataLoader

In [6]:
sentence_list = ['i say hello', 'you eat egg', 'he can sing', 'she will go', 'i say ok']

words = ' '.join(sentence_list).split(' ')
all_words = list(set(words))
print(all_words)
all_words_len = len(all_words)
print(all_words_len)

['ok', 'he', 'hello', 'i', 'will', 'eat', 'can', 'say', 'sing', 'she', 'egg', 'go', 'you']
13


In [7]:
# 构建i2v，v2i
index2word = {k: v for k, v in enumerate(all_words)}
word2index = {v: k for k, v in enumerate(all_words)}
print(index2word)
print(word2index)

{0: 'ok', 1: 'he', 2: 'hello', 3: 'i', 4: 'will', 5: 'eat', 6: 'can', 7: 'say', 8: 'sing', 9: 'she', 10: 'egg', 11: 'go', 12: 'you'}
{'ok': 0, 'he': 1, 'hello': 2, 'i': 3, 'will': 4, 'eat': 5, 'can': 6, 'say': 7, 'sing': 8, 'she': 9, 'egg': 10, 'go': 11, 'you': 12}


In [8]:
## 构建x(输入值)和y(预测值)，并把x(输入值)和y(预测值)转为one-hot编码
def make_data(sentence_list):
    input_batch = []
    output_batch = []
    for sentence in sentence_list:
        # 将句子切分为单词,然后再将前两个单词作为x(输入值),最后一个单词作为y(预测值)
        word = sentence.split(' ')
        input = [word2index[n] for n in word[:-1]]
        output = word2index[word[-1]]
        # np.eye(all_words_len)[input]可以理解为,将每个单词的编号转为 one-hot编码
        # 输入值转为one-hot编码,输出值不用转,因为相当于13分类问题(一共有13种不同的单词)
        input_batch.append(np.eye(all_words_len)[input])
        output_batch.append(output)
    print(input_batch)
    print(output_batch)
    return torch.Tensor(input_batch), torch.LongTensor(output_batch)


In [9]:
input_batch, output_batch = make_data(sentence_list)
print('-------')
print(input_batch, output_batch)

[array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]]), array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]), array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]), array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])]
[2, 10, 8, 11, 0]
-------
tensor([[[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 

  app.launch_new_instance()


In [10]:
## 构建dataset和dataloader
dataset = Data.TensorDataset(input_batch, output_batch)
dataloader = Data.DataLoader(dataset=dataset,
                             batch_size=2,
                             drop_last=True,
                             shuffle=False)
#shuffle是否打乱顺序，默认为False表示不打乱顺序
# drop_last表示是否舍弃最后一个，
#举例：本次总的数据是五个，每次运送两个数据,最后一句话,也就是'i say ok'不参加训练

In [11]:
for x, y in dataloader:
    print(x, x.shape)
    # [2, 2, 13]
    # 第一个2代表2个数据,也就是batch_size,
    # 第二个2表示x(输入值)中单词的个数,本次是由前两个单词,来预测第三个单词
    # 13 表示每个单词被编码为13维
    print(y, y.shape)
    print('----')
    # 可以发现是两个一组进行输出,因为batch_size=2
    ### 此时数据处理部分内容就已经完成,接下来构建RNN网络,将数据用dataloader送入到网络中

tensor([[[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]]]) torch.Size([2, 2, 13])
tensor([ 2, 10]) torch.Size([2])
----
tensor([[[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]]) torch.Size([2, 2, 13])
tensor([ 8, 11]) torch.Size([2])
----


<img src="./out.png">

In [12]:
####构建RNN网络
class MyRNN(nn.Module):
    def __init__(self):
        super(MyRNN, self).__init__()
        #all_words_len=13,也就是sentence_list中,总共有13种不同的单词,
        # 也就预测最后一个单词,总共有13中不同的结果,也就是13分类问题
        self.rnn = nn.RNN(input_size=all_words_len, hidden_size=6)
        self.fc = nn.Linear(in_features=6, out_features=all_words_len)

    #图片中使out,ht=forward(x,h0)
    # 也就是 out,hidden = self.rnn(x,hidden)
    # 这里采用input代表x，h_i代表h0(hidden)
    # 就会变成out,ht=self.rnn(input,h_0) # rnn中数据放在第一个位置
    def forward(self, input, h_0i):
        input = input.transpose(0, 1)
        # input是dataloader中的x数据,x数据的格式是
        # RNN的前向传播,需要传入两个参数
        out, h_0i = self.rnn(input, h_0i)
        out = out[-1]
        model = self.fc(out)
        return model


<img src="./rnn.png">

In [13]:
#实例化对象
model = MyRNN()

In [14]:
print(model)

MyRNN(
  (rnn): RNN(13, 6)
  (fc): Linear(in_features=6, out_features=13, bias=True)
)


In [15]:
## 定义损失函数和优化器## 固定步骤
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [16]:
for epoch in range(20):
    for x, y in dataloader:
        h_0i = torch.randn(1, x.shape[0], 6)
        print(h_0i, h_0i.shape)  # torch.Size([1, 2, 6])
        pred = model(x, h_0i)
        loss = loss_fn(pred, y)
        ########
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

tensor([[[ 0.1244, -0.1414, -0.1988, -1.5842, -1.6437, -2.9828],
         [-1.3623,  1.9778,  0.5775,  0.8948,  0.4620,  0.3091]]]) torch.Size([1, 2, 6])
tensor([[[ 0.8851,  2.0004,  2.0992,  1.6781, -0.3298,  1.6590],
         [-1.0266, -0.8749, -0.4643,  1.1039,  0.2930,  0.8305]]]) torch.Size([1, 2, 6])
tensor([[[ 0.7311,  0.8735, -0.6060, -0.2150, -1.6210, -0.5501],
         [ 1.1710,  0.9384,  1.8335,  0.7064,  1.0579,  0.7806]]]) torch.Size([1, 2, 6])
tensor([[[-0.6574,  1.6693, -0.9780,  1.2261,  0.5174, -0.0141],
         [-0.4231,  0.5941, -0.3629,  0.5353, -0.6747,  2.7047]]]) torch.Size([1, 2, 6])
tensor([[[ 0.1894,  0.2971,  0.9485, -1.6390,  0.0634, -0.5142],
         [ 1.1432, -0.3977,  0.0327, -0.4695, -0.5886,  0.6575]]]) torch.Size([1, 2, 6])
tensor([[[ 0.9998, -0.4463, -0.1910,  0.1717,  0.2496,  0.5408],
         [ 1.1981, -1.3899,  0.3856, -1.1947,  0.6418,  1.3522]]]) torch.Size([1, 2, 6])
tensor([[[ 0.3168, -0.1536, -1.6091,  0.1001, -1.3394, -0.4360],
         [-

In [17]:
print(h_0i.shape)

torch.Size([1, 2, 6])


In [18]:
# ##test
# input = [sen.split()[:2] for sen in sentences]
# print(input)
## 构建测试集
input11 = list(['i say hello', 'he can sing'])
print(input11, type(input11))

['i say hello', 'he can sing'] <class 'list'>


In [19]:
hidden = torch.randn(1, 2, 6)

print(hidden.shape)  # torch.Size([1, 2, 6])
print(hidden)

torch.Size([1, 2, 6])
tensor([[[-0.8430,  0.5531,  0.2834, -1.0967,  1.4543, -0.0440],
         [ 0.5925, -1.1021,  0.2076, -1.2035, -0.1570, -0.2859]]])


In [20]:
input_batch11, output_batch11 = make_data(input11)
print(input_batch11, output_batch11)

[array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]), array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]])]
[2, 8]
tensor([[[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]],

        [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]]) tensor([2, 8])


In [21]:
# input_batch11=input_batch11.toTensor()

predict = model(input_batch11, hidden)
print(predict, predict.shape)

tensor([[-1.3168, -1.4875,  1.2412, -0.9217, -1.6271, -1.3971, -0.6312, -1.6123,
          0.1451, -1.0168,  0.1530,  0.6816, -1.4392],
        [-1.2642, -1.2769,  0.7724, -1.0384, -1.3931, -1.1378, -0.5871, -1.1868,
          0.8707, -0.9279, -0.3834,  0.1157, -1.3438]],
       grad_fn=<AddmmBackward0>) torch.Size([2, 13])


In [22]:
predict = predict.data.max(1, keepdim=True)[1]
print(predict)

tensor([[2],
        [8]])


In [23]:
print([index2word[n.item()] for n in predict.squeeze()])

['hello', 'sing']
