In [1]:
import numpy as np
import torch
import torch.nn as nn

def word_hot_encoding(sentences):
    s_array = []
    word_list = list(set(" ".join(sentences).lower().split()))
    word_dict = {w: i for i, w in enumerate(word_list)}
    list_size = len(word_list)

    for s in sentences:
        words = s.lower().split()
        temp = [word_dict[word] for word in words]  # fixed indexing
        s_array.append(np.eye(list_size)[temp])

    return np.array(s_array)

class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(9, 5, batch_first=True)
        self.seq = nn.Sequential(
            nn.Linear(5, 9),
        )

    def forward(self, x):
        x, h = self.rnn(x)
        x = self.seq(x[:, -1, :])
        return x

F = Network()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(F.parameters(), lr=0.3)


sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]
word_list = list(set(" ".join(sentences).lower().split()))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}

print(word_list)
print(word_dict)

s_array = word_hot_encoding(sentences)

x = torch.tensor(s_array[:, :2, :], dtype=torch.float)
t = torch.tensor(s_array[:, 2, :], dtype=torch.long)

epoch = 200
for e in range(epoch):
    loss_sum = 0
    for b in range(x.shape[0]):
        y = F(x[b:b+1])

        loss = loss_function(y, t[b:b+1].argmax(dim=1))
        loss_sum += loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (e + 1) % 10 == 0:
        print("epoch {} | loss {}".format(e + 1, loss_sum))


['love', 'like', 'dog', 'you', 'cat', 'hate', 'milk', 'coffee', 'i']
{'love': 0, 'like': 1, 'dog': 2, 'you': 3, 'cat': 4, 'hate': 5, 'milk': 6, 'coffee': 7, 'i': 8}
epoch 10 | loss 6.80874490737915
epoch 20 | loss 3.9165430068969727
epoch 30 | loss 1.118427038192749
epoch 40 | loss 0.3450333774089813
epoch 50 | loss 0.19696640968322754
epoch 60 | loss 0.1341795176267624
epoch 70 | loss 0.100531205534935
epoch 80 | loss 0.07996776700019836
epoch 90 | loss 0.06623402237892151
epoch 100 | loss 0.05645953118801117
epoch 110 | loss 0.049168433994054794
epoch 120 | loss 0.04352869838476181
epoch 130 | loss 0.03904113173484802
epoch 140 | loss 0.03538741543889046
epoch 150 | loss 0.032356277108192444
epoch 160 | loss 0.029801812022924423
epoch 170 | loss 0.02762022614479065
epoch 180 | loss 0.025735339149832726
epoch 190 | loss 0.024091092869639397
epoch 200 | loss 0.022643905133008957


In [2]:
#sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]

result = F(x)
result_arg = torch.argmax(result, dim = 1)
for i in result_arg :
    print(number_dict[i.item()])


dog
coffee
milk
cat
milk
coffee


In [3]:
import pickle

with open("test.pickle", mode = "wb") as f :
    pickle.dump(word_dict, f)

In [4]:
with open("test.pickle", mode = "rb") as f :
    A = pickle.load(f)

print(A)

{'hate': 0, 'cat': 1, 'dog': 2, 'coffee': 3, 'milk': 4, 'i': 5, 'you': 6, 'love': 7, 'like': 8}
