In [2]:
import numpy as np
import torch
import torch.nn as nn

def word_hot_encoding(sentences):
    s_array = []
    word_list = list(set(" ".join(sentences).lower().split()))
    word_dict = {w: i for i, w in enumerate(word_list)}
    list_size = len(word_list)

    for s in sentences:
        words = s.lower().split()
        temp = [word_dict[word] for word in words]  # fixed indexing
        s_array.append(np.eye(list_size)[temp])

    return np.array(s_array)

class Network(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(9, 5, batch_first=True)
        self.seq = nn.Sequential(
            nn.Linear(5, 9),
        )

    def forward(self, x):
        x, h = self.rnn(x)
        x = self.seq(x[:, -1, :])
        return x

F = Network()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(F.parameters(), lr=0.5)


sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]
word_list = list(set(" ".join(sentences).lower().split()))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}

print(word_list)
print(word_dict)

s_array = word_hot_encoding(sentences)

x = torch.tensor(s_array[:, :2, :], dtype=torch.float)
t = torch.tensor(s_array[:, 2, :], dtype=torch.long)

epoch = 200
for e in range(epoch):
    loss_sum = 0
    for b in range(x.shape[0]):
        y = F(x[b:b+1])

        loss = loss_function(y, t[b:b+1].argmax(dim=1))
        loss_sum += loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (e + 1) % 10 == 0:
        print("epoch {} | loss {}".format(e + 1, loss_sum))


['hate', 'cat', 'dog', 'coffee', 'milk', 'i', 'you', 'love', 'like']
{'hate': 0, 'cat': 1, 'dog': 2, 'coffee': 3, 'milk': 4, 'i': 5, 'you': 6, 'love': 7, 'like': 8}
epoch 10 | loss 6.712777137756348
epoch 20 | loss 3.2906317710876465
epoch 30 | loss 0.23388373851776123
epoch 40 | loss 0.12028659880161285
epoch 50 | loss 0.08005672693252563
epoch 60 | loss 0.05977782607078552
epoch 70 | loss 0.04763994365930557
epoch 80 | loss 0.039583541452884674
epoch 90 | loss 0.03385429456830025
epoch 100 | loss 0.02957289107143879
epoch 110 | loss 0.026254232972860336
epoch 120 | loss 0.02360546588897705
epoch 130 | loss 0.021443430334329605
epoch 140 | loss 0.019644642248749733
epoch 150 | loss 0.018125299364328384
epoch 160 | loss 0.016824405640363693
epoch 170 | loss 0.015698116272687912
epoch 180 | loss 0.01471323799341917
epoch 190 | loss 0.013845104724168777
epoch 200 | loss 0.013073905371129513


In [5]:
#sentences = ["I like dog", "I love coffee", "I hate milk", "You like cat", "You love milk", "You hate coffee"]

result = F(x)
result_arg = torch.argmax(result, dim = 1)
for i in result_arg :
    print(number_dict[i.item()])


dog
coffee
milk
cat
milk
coffee


In [3]:
import pickle

with open("test.pickle", mode = "wb") as f :
    pickle.dump(word_dict, f)

In [4]:
with open("test.pickle", mode = "rb") as f :
    A = pickle.load(f)

print(A)

{'hate': 0, 'cat': 1, 'dog': 2, 'coffee': 3, 'milk': 4, 'i': 5, 'you': 6, 'love': 7, 'like': 8}
