In [1]:
import torch
import torch.nn as nn

In [2]:
rnn = nn.RNN(100,20)     # 定义input_size 和 hidden_size（100 指的是用一个长度为 100 的向量表示一个单词，20 指的是 hidden_size）

print(rnn._parameters.keys())

print(rnn.weight_ih_l0.shape)
print(rnn.weight_hh_l0.shape)
print(rnn.bias_ih_l0.shape)
print(rnn.bias_hh_l0.shape)

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
torch.Size([20, 100])
torch.Size([20, 20])
torch.Size([20])
torch.Size([20])


In [3]:
import torch
import torch.nn as nn

rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=1)
x = torch.randn(10, 3, 100)
out, h_t = rnn(x, torch.zeros(1, 3, 20))
print(out.shape) # [10, 3, 20]
print(h_t.shape) # [1, 3, 20]

torch.Size([10, 3, 20])
torch.Size([1, 3, 20])


In [4]:
import torch
import torch.nn as nn

rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
x = torch.randn(10, 3, 100)
out, h_t = rnn(x)
print(out.shape)
print(h_t.shape)

torch.Size([10, 3, 20])
torch.Size([4, 3, 20])


In [5]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data

dtype = torch.FloatTensor

In [6]:
sentences = ["i like dog", "i love coffee", "i hate milk"]
word_list = " ".join(sentences).split()
vocab = list(set(word_list))
word2idx = {w:i for i, w in enumerate(vocab)}
idx2word = {i:w for i, w in enumerate(vocab)}
n_class = len(vocab)

In [7]:
word2idx

{'hate': 0, 'milk': 1, 'dog': 2, 'i': 3, 'coffee': 4, 'like': 5, 'love': 6}

In [8]:
vocab

['hate', 'milk', 'dog', 'i', 'coffee', 'like', 'love']

In [9]:
n_class

7

In [10]:
input_batch = []
target_batch = []

for sen in sentences:
    word = sen.split()
    input = [word2idx[n] for n in word[:-1]]    # 除了最后一个单词，返回的是id
    target = word2idx[word[-1]]       # 最后一个单词当作预测值，返回的是id
    input_batch.append(np.eye(n_class)[input])
    # print(input)
    print(np.eye(n_class)[input])
    target_batch.append(target)
print(input_batch)
print(target_batch)

[[0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]]
[[0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]
[[0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]]
[array([[0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.]]), array([[0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1.]]), array([[0., 0., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0.]])]
[2, 4, 1]


In [11]:
batch_size = 2
n_step = 2
n_hidden = 5

def make_data(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word2idx[n] for n in word[:-1]]    # 除了最后一个单词
        target = word2idx[word[-1]]       # 最后一个单词当作预测值
        input_batch.append(np.eye(n_class)[input])     # np.eye，这里只有一个参数，表示行、列相等。对角线的地方为1，其余的地方为0。
        target_batch.append(target)

    return input_batch, target_batch

input_batch, target_batch = make_data(sentences)

# torch.FloatTensor是32位浮点类型数据，torch.LongTensor是64位整型
input_batch = torch.Tensor(input_batch)
target_batch = torch.LongTensor(target_batch)
# print(input_batch)
# print(target_batch)

dataset = Data.TensorDataset(input_batch, target_batch)
print(list(dataset))
dataloader = Data.DataLoader(dataset, batch_size, True)
print(list(dataloader))

[(tensor([[0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0.]]), tensor(2)), (tensor([[0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1.]]), tensor(4)), (tensor([[0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.]]), tensor(1))]
[[tensor([[[0., 0., 0., 1., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0.]]]), tensor([1, 2])], [tensor([[[0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1.]]]), tensor([4])]]


In [12]:
class TextRNN(nn.Module):
    def __init__(self):
        super(TextRNN, self).__init__()
        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)
        self.fc = nn.Linear(n_hidden, n_class)
    def forward(self, hidden, X):
        # 最初：输入的X为[batch_size, n_step, n_class]
        X = X.transpose(0,1)     # np.transpose是替换轴，之后变成 [n_step, batch_size, n_class]
        out, hidden = self.rnn(X, hidden)
        out = out[-1]
        model = self.fc(out)
        return model

model = TextRNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [18]:
epoch = 5000

for e in range(epoch):
    for x, y in dataloader:
        hidden = torch.zeros(1, x.shape[0], n_hidden)     # hidden : [num_layers * num_directions, batch, hidden_size]
        pred = model(hidden, x)      # X : [batch_size, n_step, n_class]
        loss = criterion(pred, y)
        # if (e + 1) % 1000 == 0:
        #     print('Epoch:', '%04d' % (e + 1), 'cost =', '{:.6f}'.format(loss))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Epoch: 1000 cost = 0.000000
Epoch: 1000 cost = 0.000000
Epoch: 2000 cost = 0.000000
Epoch: 2000 cost = 0.000000
Epoch: 3000 cost = 0.000000
Epoch: 3000 cost = 0.000000
Epoch: 4000 cost = 0.000000
Epoch: 4000 cost = 0.000000
Epoch: 5000 cost = 0.000000
Epoch: 5000 cost = 0.000000


In [14]:
input = [sen.split()[:2] for sen in sentences]
print(input)

[['i', 'like'], ['i', 'love'], ['i', 'hate']]


In [15]:
hidden = torch.zeros(1, len(input), n_hidden)
print(hidden)
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
print(predict)
print([sen.split()[:2] for sen in sentences], '->', [idx2word[n.item()] for n in predict.squeeze()])

tensor([[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]])
tensor([[2],
        [4],
        [1]])
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
