In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

In [2]:
print(torch.__version__)

1.2.0


In [3]:
with open('datasets.txt', 'r', encoding='utf-8') as f:
    datasets = f.readlines()
    datasets = ''.join(datasets).split('\n')

In [6]:
class NGram:
    def __init__(self, target, n=2):
        self.sep = ''
        if self._is_sentence(target):
            target = target.strip().split(' ')
            self.sep = ' '
        self.text = target
        self.n = n
    
    def _is_sentence(self, target):
        if len(target.strip().split(' ')) > 1:
            return True
        else:
            return False
        
    def get(self):
        # zip 구현은 n만큼 객체가 있어야해서 귀찮음
        """List Comprehension"""
        return [self.sep.join(self.text[i:i+self.n])
                for i in range(len(self.text)-(self.n-1))]

In [7]:
NGram(' '.join(docs[150]), 2).get()

['닭 쫓던', '쫓던 개', '개 지붕만', '지붕만 쳐다본다']

In [106]:
class NPLM(nn.Module):
    
    """
    Neural Probabilistic Language Model 
    Implemenataion with Pytorch!!
    End to End Model
    """
    
    def __init__(self,
                 VOCAB_SIZE, 
                 FEATURE_VECTOR_SIZE,
                 WINDOW_SIZE=2,
                 HIDDEN_SIZE=32):
        super(NPLM, self).__init__()
        self.WINDOW_SIZE = WINDOW_SIZE
        self.VOCAB_SIZE = VOCAB_SIZE
        self.HIDDEN_SIZE = HIDDEN_SIZE
        # projection layer (linear) - lookup
        self.proj_layer = nn.Linear(VOCAB_SIZE, FEATURE_VECTOR_SIZE, bias=False)
        # Hidden layer
        self.hidden_layer = nn.Linear(FEATURE_VECTOR_SIZE * (WINDOW_SIZE-1), 
                                      HIDDEN_SIZE, bias=True)
        # Output layer
        self.output_layer = nn.Bilinear(FEATURE_VECTOR_SIZE * (WINDOW_SIZE-1),
                                        HIDDEN_SIZE,
                                        VOCAB_SIZE,
                                        bias=True)
        
    def forward(self, x):
        # look-up
        x = self.proj_layer(x)
        # flatten
        err_msg = "입력한 window size와 input의 n이 다릅니다."
        assert (self.WINDOW_SIZE-1) == x.size(0), err_msg
        x = x.flatten()
        Hx = torch.tanh(self.hidden_layer(x))
        output = torch.softmax(self.output_layer(x, Hx), dim=0)
        return output
    
    def train(self, train_generator):
        # Create your optimizer and criterion
        optimizer = optim.Adam(self.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        
        # in your training loop:
        optimizer.zero_grad()  # zero the gradient buffers
        for input_vec, target_vec in train_generator:
            output_vec = self(input_vec)
            output_vec = output_vec.reshape([1, output_vec.size(0)]
            loss = criterion(output_vec, target_vec)
            loss.backward()
            optimizer.step()
        print('Done.')
            
    def predict(self, x):
        return self.forward(x)

SyntaxError: invalid syntax (<ipython-input-106-ed8f598ae42f>, line 50)

In [107]:
class gen_dataset:
    
    """
    Generate N-Gram Datasets
    """
    
    def __init__(self, datasets, n=2):
        self.n = n
        self.datasets = datasets
        docs = [data.split(' ') for data in datasets]
        tokens = [t for doc in docs for t in doc]
        uniq_tokens = list(set(tokens))
        self.token2ix = {token:vec.argmax() for token, vec in 
            zip(uniq_tokens, np.eye(len(uniq_tokens)))}
        self.ix2token = {vec.argmax():token for token, vec in 
                    zip(uniq_tokens, np.eye(len(uniq_tokens)))}
        self.token2vec = {token:vec for token, vec in 
                    zip(uniq_tokens, np.eye(len(uniq_tokens)))}
        
    def get_generator(self):
        for doc in self.datasets:
            if isinstance(doc, str):
                x = doc.strip().split(' ')
            x = list(map(lambda t: self.token2vec[t], x))
            x = [torch.from_numpy(np.array(x[i:i+self.n])).float()
                 for i in range(len(x) - (self.n-1))]
            for i in x:
                input_vec = i[:-1]
                target_vec = torch.LongTensor([i[-1].argmax()])
                yield input_vec, target_vec

In [108]:
net = NPLM(len(uniq_tokens), 100, 3)

In [109]:
train_data = gen_dataset(datasets, n=3)
generator = train_data.get_generator()

In [110]:
generator

<generator object gen_dataset.get_generator at 0x000001F1089D9DB0>

In [111]:
net

NPLM(
  (proj_layer): Linear(in_features=1008, out_features=100, bias=False)
  (hidden_layer): Linear(in_features=200, out_features=32, bias=True)
  (output_layer): Bilinear(in1_features=200, in2_features=32, out_features=1008, bias=True)
)

In [112]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

5
torch.Size([100, 1008])


In [113]:
params

[Parameter containing:
 tensor([[ 0.0120, -0.0098,  0.0119,  ..., -0.0147,  0.0241,  0.0048],
         [-0.0083, -0.0167,  0.0222,  ..., -0.0259, -0.0223, -0.0244],
         [ 0.0196, -0.0231, -0.0113,  ...,  0.0133,  0.0118,  0.0214],
         ...,
         [ 0.0269,  0.0172,  0.0030,  ..., -0.0280, -0.0281, -0.0115],
         [-0.0219, -0.0017, -0.0014,  ...,  0.0162,  0.0219,  0.0019],
         [-0.0185, -0.0126,  0.0252,  ..., -0.0171,  0.0030, -0.0187]],
        requires_grad=True), Parameter containing:
 tensor([[ 0.0483, -0.0542, -0.0143,  ..., -0.0076, -0.0691, -0.0602],
         [-0.0521,  0.0232,  0.0646,  ..., -0.0415, -0.0524,  0.0350],
         [-0.0597,  0.0071, -0.0537,  ...,  0.0466, -0.0412,  0.0438],
         ...,
         [ 0.0552,  0.0435, -0.0378,  ..., -0.0251, -0.0098,  0.0594],
         [-0.0499,  0.0446,  0.0160,  ..., -0.0186, -0.0102,  0.0625],
         [-0.0525,  0.0234, -0.0160,  ..., -0.0511, -0.0266,  0.0329]],
        requires_grad=True), Parameter conta

In [114]:
net = net.float()

In [115]:
net.train(generator)

RuntimeError: expected device cpu and dtype Float but got device cpu and dtype Long

In [92]:
output

tensor([[0.8982, 0.8050, 0.6393, 0.9983, 0.5731, 0.0469, 0.5560, 0.1476, 0.8404,
         0.5544]])

In [93]:
target

tensor([1])

In [90]:
a.reshape([1, a.size(0)])

tensor([[0.0009, 0.0009, 0.0010,  ..., 0.0010, 0.0009, 0.0010]],
       grad_fn=<AsStridedBackward>)

In [104]:
criterion = nn.CrossEntropyLoss()
criterion(a.reshape([1, a.size(0)]), torch.LongTensor([b.argmax()]))

tensor(6.9158, grad_fn=<NllLossBackward>)

tensor(669)

In [214]:
outputs = net(x[0][:-1])

In [221]:
x[0][-1].argmax()

tensor(611)

In [222]:
outputs.argmax()

tensor(582)

In [38]:
inputs.argmax(dim=1)

tensor([634, 550, 259, 465])

In [51]:
params[0].data[:, (634, 550, 259, 465)].T

tensor([[ 0.0163,  0.0080, -0.0241,  0.0136,  0.0069,  0.0193,  0.0315, -0.0069,
          0.0127,  0.0154,  0.0088, -0.0205, -0.0042,  0.0089, -0.0159,  0.0020,
          0.0220,  0.0237,  0.0160, -0.0091, -0.0063,  0.0231, -0.0018,  0.0316,
         -0.0234,  0.0001, -0.0205, -0.0156,  0.0072,  0.0112, -0.0093, -0.0216,
          0.0305, -0.0071,  0.0112,  0.0172, -0.0053,  0.0184, -0.0093,  0.0234,
          0.0038,  0.0249,  0.0088,  0.0060, -0.0043,  0.0076,  0.0200, -0.0235,
          0.0247, -0.0255, -0.0071, -0.0025,  0.0132, -0.0248, -0.0055,  0.0059,
         -0.0270, -0.0022,  0.0309, -0.0256, -0.0132, -0.0182, -0.0303, -0.0048,
         -0.0084, -0.0089,  0.0169,  0.0060, -0.0037,  0.0281, -0.0128,  0.0133,
         -0.0129,  0.0196, -0.0231, -0.0070,  0.0289,  0.0052,  0.0251, -0.0084,
         -0.0255,  0.0271, -0.0269, -0.0199,  0.0065,  0.0041, -0.0147,  0.0312,
         -0.0155, -0.0090, -0.0319,  0.0019, -0.0048,  0.0304, -0.0142, -0.0200,
         -0.0109, -0.0167,  

In [96]:
[*outputs]

[tensor([ 0.0163,  0.0080, -0.0241,  0.0136,  0.0069,  0.0193,  0.0315, -0.0069,
          0.0127,  0.0154,  0.0088, -0.0205, -0.0042,  0.0089, -0.0159,  0.0020,
          0.0220,  0.0237,  0.0160, -0.0091, -0.0063,  0.0231, -0.0018,  0.0316,
         -0.0234,  0.0001, -0.0205, -0.0156,  0.0072,  0.0112, -0.0093, -0.0216,
          0.0305, -0.0071,  0.0112,  0.0172, -0.0053,  0.0184, -0.0093,  0.0234,
          0.0038,  0.0249,  0.0088,  0.0060, -0.0043,  0.0076,  0.0200, -0.0235,
          0.0247, -0.0255, -0.0071, -0.0025,  0.0132, -0.0248, -0.0055,  0.0059,
         -0.0270, -0.0022,  0.0309, -0.0256, -0.0132, -0.0182, -0.0303, -0.0048,
         -0.0084, -0.0089,  0.0169,  0.0060, -0.0037,  0.0281, -0.0128,  0.0133,
         -0.0129,  0.0196, -0.0231, -0.0070,  0.0289,  0.0052,  0.0251, -0.0084,
         -0.0255,  0.0271, -0.0269, -0.0199,  0.0065,  0.0041, -0.0147,  0.0312,
         -0.0155, -0.0090, -0.0319,  0.0019, -0.0048,  0.0304, -0.0142, -0.0200,
         -0.0109, -0.0167,  

In [109]:
net.zero_grad()

In [110]:
params = list(net.parameters())
params

[Parameter containing:
 tensor([[ 0.0146, -0.0159,  0.0211,  ...,  0.0264,  0.0148,  0.0240],
         [-0.0049, -0.0193, -0.0209,  ..., -0.0087, -0.0274, -0.0091],
         [ 0.0309,  0.0227, -0.0100,  ..., -0.0041,  0.0278, -0.0281],
         ...,
         [ 0.0066,  0.0188,  0.0010,  ..., -0.0053,  0.0271,  0.0142],
         [-0.0197,  0.0185,  0.0274,  ...,  0.0155,  0.0085, -0.0012],
         [ 0.0251, -0.0215, -0.0268,  ...,  0.0038,  0.0298,  0.0173]],
        requires_grad=True), Parameter containing:
 tensor([[-0.0759,  0.0057,  0.0638,  ...,  0.0636, -0.0993,  0.0426],
         [-0.0357,  0.0384, -0.0809,  ...,  0.0799, -0.0884,  0.0217],
         [-0.0756,  0.0711, -0.0133,  ...,  0.0607,  0.0302, -0.0936],
         ...,
         [-0.0474, -0.0217, -0.0249,  ...,  0.0639,  0.0803, -0.0315],
         [ 0.0881,  0.0205,  0.0734,  ..., -0.0998, -0.0073,  0.0608],
         [ 0.0960, -0.0905,  0.0543,  ...,  0.0480,  0.0858,  0.0599]],
        requires_grad=True), Parameter conta

In [131]:
F.softmax(outputs, dim=1).argmax(dim=1)

tensor([781, 781, 781, 781, 781])