In [1]:
%config IPCompleter.greedy=True

In [2]:
!pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0m

In [None]:
!pip install xeus-python -i https://pypi.tuna.tsinghua.edu.cn/simple

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device('mps')

In [16]:
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m).to(device)
        self.H = nn.Linear(n_step * m, n_hidden, bias=False).to(device)
        self.d = nn.Parameter(torch.ones(n_hidden)).to(device)
        self.U = nn.Linear(n_hidden, n_class, bias=False).to(device)
        self.W = nn.Linear(n_step * m, n_class, bias=False).to(device)
        self.b = nn.Parameter(torch.ones(n_class)).to(device)
        
    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m).to(device)
        tanh = torch.tanh(self.d + self.H(X)).to(device)
        output = self.b + self.W(X) + self.U(tanh)
        return output
        

In [17]:
def make_batch():
    input_batch = []
    target_batch = []
    for sen in sentences:
        word = sen.split()
        input = [word_dic[n] for n in word[:-1]] # create 1~n-1 as input
        target = word_dic[word[-1]]
        
        input_batch.append(input)
        target_batch.append(target)
    return input_batch, target_batch

In [20]:
if __name__ == '__main__':
    n_step = 2 # number of steps, n-1 in paper
    n_hidden = 2 # number of hidden size, h in paper
    m = 2 # embedding size, m in paper

    sentences = ["i like dog", "i love coffee", "i hate milk"]
    print(f'sentences:{sentences}')
    word_list = " ".join(sentences).split()
    word_list = list(set(word_list))
    print(f'word_list:{word_list}')
    word_dic = {w: i for i, w in enumerate(word_list)}
    print(f'word_dic:{word_dic}')
    number_dic = {i: w for i, w in enumerate(word_list)}
    print(f'number_dic:{number_dic}')
    n_class = len(word_dic) # number of vocabulary
    print(f'n_class:{n_class}')
    
    model = NNLM()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    input_batch, target_batch = make_batch()
    input_batch = torch.LongTensor(input_batch)
    target_batch = torch.LongTensor(target_batch)
    input_batch = input_batch.to(device)
    target_batch = target_batch.to(device)


    # Training
    for epoch in range(5000):
        optimizer.zero_grad()
        output = model(input_batch)
        
        loss = criterion(output, target_batch)    
        if (epoch + 1) % 100 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
            # predict
            print(f'model(input_batch):{model(input_batch)}')
            print(f'model(input_batch).data:{model(input_batch).data}')
            predicts = model(input_batch).data.max(1, keepdim=True)
            predict = predicts[1]
            print(f'predicts:{predicts}')
            print(f'predict.squeeze():{predict.squeeze()}')
            # Test
            print([sen.split()[:2] for sen in sentences], '->', [number_dic[n.item()] for n in predict.squeeze()])
        loss.backward()
        optimizer.step()
    # predict
    print(f'model(input_batch):{model(input_batch)}')
    print(f'model(input_batch).data:{model(input_batch).data}')
    predicts = model(input_batch).data.max(1, keepdim=True)
    predict = predicts[1]
    print(f'predicts:{predicts}')
    print(f'predict.squeeze():{predict.squeeze()}')
    # Test
    print([sen.split()[:2] for sen in sentences], '->', [number_dic[n.item()] for n in predict.squeeze()])

sentences:['i like dog', 'i love coffee', 'i hate milk']
word_list:['love', 'i', 'milk', 'like', 'hate', 'coffee', 'dog']
word_dic:{'love': 0, 'i': 1, 'milk': 2, 'like': 3, 'hate': 4, 'coffee': 5, 'dog': 6}
number_dic:{0: 'love', 1: 'i', 2: 'milk', 3: 'like', 4: 'hate', 5: 'coffee', 6: 'dog'}
n_class:7
Epoch: 0100 cost = 1.273930
model(input_batch):tensor([[ 1.1218, -0.1900,  1.3820,  0.9369,  1.4035,  0.7825,  2.0141],
        [ 1.8702, -0.3945,  2.0753,  1.8660,  1.1304,  1.8358,  1.9410],
        [ 1.8971, -1.1549,  3.0418,  1.1515,  0.2822,  2.5120,  1.4523]],
       device='mps:0', grad_fn=<AddBackward0>)
model(input_batch).data:tensor([[ 1.1218, -0.1900,  1.3820,  0.9369,  1.4035,  0.7825,  2.0141],
        [ 1.8702, -0.3945,  2.0753,  1.8660,  1.1304,  1.8358,  1.9410],
        [ 1.8971, -1.1549,  3.0418,  1.1515,  0.2822,  2.5120,  1.4523]],
       device='mps:0')
predicts:torch.return_types.max(
values=tensor([[2.0141],
        [2.0753],
        [3.0418]], device='mps:0'),
ind