In [25]:
# code by Tae Hwan Jung @graykode, modify by wmathor
import torch
import torch.nn as nn
import torch.optim as optimizer
import torch.utils.data as Data

dtype = torch.FloatTensor

In [26]:
sentences = ['i like cat', 'i love coffee', 'i hate milk']
sentences_list = " ".join(sentences).split() #从左到右运行 ['i', 'like', 'cat', 'i', 'love'. 'coffee',...]
vocab = list(set(sentences_list))
word2idx = {w:i for i, w in enumerate(vocab)}
idx2word = {i:w for i, w in enumerate(vocab)}

V = len(vocab)

In [27]:
def make_data(sentences):
  input_data = []
  target_data = []
  for sen in sentences:
    sen = sen.split() # ['i', 'like', 'cat']
    input_tmp = [word2idx[w] for w in sen[:-1]]
    target_tmp = word2idx[sen[-1]]

    input_data.append(input_tmp)
    target_data.append(target_tmp)
  return input_data, target_data

In [28]:
input_data, target_data = make_data(sentences)
input_data, target_data = torch.LongTensor(input_data), torch.LongTensor(target_data)
dataset = Data.TensorDataset(input_data, target_data)
loader = Data.DataLoader(dataset, 2, True) #要tensor, batchsize,shuffle

In [29]:
# parameters
m = 2
n_step = 2
n_hidden = 10

In [30]:
class NNLM(nn.Module):
  def __init__(self):
    super(NNLM, self).__init__() #self转成NNLM的爸爸的类型nn.Module
    self.C = nn.Embedding(V, m) #词向量索引表
    # 下面这个维度是对着forward里面的公式填的
    self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype)) #tanh(X*H+d)
    self.d = nn.Parameter(torch.randn(n_hidden).type(dtype)) #隐藏层
    
    self.b = nn.Parameter(torch.randn(V).type(dtype))
    self.U = nn.Parameter(torch.randn(n_hidden, V).type(dtype)) #输出层 输出V个单词每个的概率，取max的那个
    self.W = nn.Parameter(torch.randn(n_step * m, V).type(dtype)) #residue操作的对输入乘

  def forward(self, X):
    '''
    X : [batch_size, n_step] 这里（打包2句, 看历史2个单词）所以:[2,2]
    '''
    X = self.C(X) # 用C这个索引表一照以后，多了m这维，自己定的： [batch_size, n_step, m]
    # print(X)
    X = X.view(-1, n_step * m) # [batch_szie, n_step * m] 
    hidden_out = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
    output = self.b + torch.mm(X, self.W) + torch.mm(hidden_out, self.U) #nn.CrossEntropyLoss中已经实现了softmax功能，因此在分类任务的最后一层fc后不需要加入softmax激活函数
    return output
model = NNLM()
optim = optimizer.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss() 

In [31]:
for epoch in range(5000): #一个epoch看完所有batches
  for batch_x, batch_y in loader:
    pred = model(batch_x)
    loss = criterion(pred, batch_y)

    if (epoch + 1) % 1000 == 0:
      print(epoch + 1, loss.item())
    
    optim.zero_grad() #三件套，优化器迭代的初始化设成0
    loss.backward()
    optim.step() #更新parameter

1000 0.008951702155172825
1000 0.025303568691015244
2000 0.002477166010066867
2000 0.002058174693956971
3000 0.0004532400635071099
3000 0.0010829067323356867
4000 0.00027020866400562227
4000 0.00011598391574807465
5000 5.781473373644985e-05
5000 0.00011276562872808427


In [32]:
# Pred
pred = model(input_data).max(1, keepdim=True)[1] #model(input_data)三句话，每句话输出V个单词的概率值，取max,输出[0]是值，[1]是indices
print([idx2word[idx.item()] for idx in pred.squeeze()]) #要加list[] 用indices去找word

['cat', 'coffee', 'milk']


In [42]:
pred=model(input_data).max(1)[1]
print([idx.item() for idx in pred])
print([idx for idx in pred])
print([idx2word[idx.item()] for idx in pred])

[2, 0, 3]
[tensor(2), tensor(0), tensor(3)]
['cat', 'coffee', 'milk']
