## NNLM

![](https://user-images.githubusercontent.com/36406676/59292395-f511d080-8cb7-11e9-9681-f68e68b5bf38.PNG)

## 1. Import packages

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [4]:
dtype = torch.FloatTensor

## 2. Prepare Dataset

In [5]:
sentences = [ "i like dog", "i love coffee", "i hate milk"]

In [6]:
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # number of Vocabulary


In [7]:
word_list

['coffee', 'dog', 'i', 'like', 'hate', 'milk', 'love']

In [8]:
word_dict

{'coffee': 0, 'dog': 1, 'i': 2, 'like': 3, 'hate': 4, 'milk': 5, 'love': 6}

In [9]:
number_dict

{0: 'coffee', 1: 'dog', 2: 'i', 3: 'like', 4: 'hate', 5: 'milk', 6: 'love'}

In [16]:
n_class

7

In [12]:
def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input)
        target_batch.append(target)

    return input_batch, target_batch

In [13]:
input_batch, target_batch = make_batch(sentences)

In [19]:
input_batch = Variable(torch.LongTensor(input_batch))

In [20]:
input_batch.size()

torch.Size([3, 2])

In [24]:
C = nn.Embedding(n_class,embedding_size)

In [29]:
X = C(input_batch)

In [30]:
X

tensor([[[ 0.2694,  2.0215],
         [ 0.8497, -1.8132]],

        [[ 0.2694,  2.0215],
         [-1.1701, -0.2151]],

        [[ 0.2694,  2.0215],
         [-0.9068,  1.2369]]], grad_fn=<EmbeddingBackward>)

In [32]:
X.view(-1,seq_len*embedding_size) ## concat

tensor([[ 0.2694,  2.0215,  0.8497, -1.8132],
        [ 0.2694,  2.0215, -1.1701, -0.2151],
        [ 0.2694,  2.0215, -0.9068,  1.2369]], grad_fn=<ViewBackward>)

In [33]:
X = X.view(-1,seq_len*embedding_size) 

In [34]:
X

tensor([[ 0.2694,  2.0215,  0.8497, -1.8132],
        [ 0.2694,  2.0215, -1.1701, -0.2151],
        [ 0.2694,  2.0215, -0.9068,  1.2369]], grad_fn=<ViewBackward>)

## 2. Set Parameters

In [23]:
seq_len = 2    # n-1 in paper"
n_hidden = 2  # h in paper
embedding_size = 2         # m in paper

## 3. Model & optimizer

In [35]:
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM,self).__init__()
        self.C = nn.Embedding(n_class,embedding_size) # C : [vocab_size, embedding_size]
        self.H = nn.Parameter(torch.randn(seq_len*embedding_size,n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(seq_len*embedding_size,n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden,n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))
        
    def forward(self,X): # X [batch_size,seq_len]
        X = self.C(X) # X : [batch_size,seq_len,embedding_size]
        X = X.view(-1, seq_len * embedding_size) # [batch_size, seq_len* embeddding_size] ## 단어들 concat
        tanh = torch.tanh(self.d + torch.mm(X,self.H)) # [batch_size,n_hidden]
        output = self.b + torch.mm(X,self.W) + torch.mm(tanh,self.U) # [batch_size,n_class]
        return output
    
model = NNLM()

In [37]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [38]:
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))


## 4 Training

In [39]:
for epoch in range(5000):
    optimizer.zero_grad()
    output = model(input_batch)
    
    # output : [batch_size, n_class], target_batch : [batch_size] (Longtensor, not one-hot)
    loss = criterion(output,target_batch)
    if (epoch +1)%1000 == 0:
        print('Epoch:','%04d' % (epoch+1), 'cost=', '{:.6f}'.format(loss))
        
    loss.backward()
    optimizer.step()

Epoch: 1000 cost= 0.469860
Epoch: 2000 cost= 0.058831
Epoch: 3000 cost= 0.008295
Epoch: 4000 cost= 0.003182
Epoch: 5000 cost= 0.001508


## 5. Predict

In [40]:
predict = model(input_batch).data.max(1,keepdim=True)[1]

## 6. Test

In [41]:
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']
