In [9]:
import pandas as pd
import numpy as np

In [2]:
from practice_7_data import train_data, test_data

In [3]:
train_data

{'good': True,
 'bad': False,
 'happy': True,
 'sad': False,
 'not good': False,
 'not bad': True,
 'not happy': False,
 'not sad': True,
 'very good': True,
 'very bad': False,
 'very happy': True,
 'very sad': False,
 'i am happy': True,
 'this is good': True,
 'i am bad': False,
 'this is bad': False,
 'i am sad': False,
 'this is sad': False,
 'i am not happy': False,
 'this is not good': False,
 'i am not bad': True,
 'this is not sad': True,
 'i am very happy': True,
 'this is very good': True,
 'i am very bad': False,
 'this is very sad': False,
 'this is very happy': True,
 'i am good not bad': True,
 'this is good not bad': True,
 'i am bad not good': False,
 'i am good and happy': True,
 'this is not good and not happy': False,
 'i am not at all good': False,
 'i am not at all bad': True,
 'i am not at all happy': False,
 'this is not at all sad': True,
 'this is not at all happy': False,
 'i am good right now': True,
 'i am bad right now': False,
 'this is bad right now': Fa

In [10]:
class Converting:
    def __init__(self):
        pass

    def fit(self, data: dict):
        self.uniq_words = list(set(word for sen in data.keys() for word in sen.split(' '))) # ищем уникальные слова
        self.idx_words = {w: i for i, w in enumerate(self.uniq_words)} # задаем индексы словам
    
    def transform(self, text: str): # конвертируем слова в onehot векторы
        input_vectors = []
        for w in text.split(' '):
            v = np.zeros(len(self.uniq_words))
            v[self.idx_words[w]] = 1
            input_vectors.append(v)
        return input_vectors

In [11]:
conv = Converting()
conv.fit(train_data)
size_uniq_words = len(conv.uniq_words)

In [12]:
phrase = 'i am good'
conv_phrase = conv.transform(phrase)

In [13]:
conv_phrase

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.])]

In [14]:
class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.Wih = np.random.randn(hidden_size, input_size) / 1000
        self.Whh = np.random.randn(hidden_size, hidden_size) / 1000
        self.Who = np.random.randn(output_size, hidden_size) / 1000
        
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1)) # h(0) присваиваем нулевой вектор, так как нету h(t-1) 

        self.last_inputs = inputs
        self.h_dict = {0: h}

        for i, x in enumerate(inputs):
            x = x.reshape(-1, 1)
            h = np.tanh(self.Wih @ x + self.Whh @ h + self.bh)
            self.h_dict[i + 1] = h

        y = self.Who @ h + self.by
        y = np.exp(y) / np.sum(np.exp(y)) 

        return y
    
    def fit(self, inputs, target, lr=1e-5):
        out = self.forward(inputs)

        # вычисляем градиенты слоя softmax
        dl_dy = out.copy()
        dl_dy[target] -= 1

        # вычисляем градиенты Why и by
        cnt_words_in_sent = len(self.last_inputs)
        d_Why = dl_dy @ self.h_dict[cnt_words_in_sent].T # используем только последний h, который использовался для предсказания
        d_by = dl_dy 

        # вычисляем градиенты Whh, Wxh и bh
        d_Whh = np.zeros(self.Whh.shape)
        d_Wih = np.zeros(self.Wih.shape)
        d_bh = np.zeros(self.bh.shape)

        dl_dh = self.Who.T @ dl_dy # считаем градиент для последнего h

        # проходимся по всем h и считаем их градиенты
        for t in reversed(range(cnt_words_in_sent)):
            # dL/dht + dht` 
            temp = ((1 - self.h_dict[t + 1] ** 2) * dl_dh)

            d_bh += temp
            d_Whh += temp @ self.h_dict[t].T
            d_Wih += temp @ self.last_inputs[t].reshape(-1, 1).T
            dl_dh = self.Whh @ temp

        for d in [d_Wih, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -1, 1, out=d)

        self.Wih -= lr * d_Wih
        self.Whh -= lr * d_Whh
        self.Who -= lr * d_Why        
        self.bh -= lr * d_bh
        self.by -= lr * d_by

In [20]:
model = RNN(size_uniq_words, hidden_size=64, output_size=2)

In [21]:
def modelTraining(data, fitting=False):
    items = list(data.items())
    np.random.shuffle(items)

    loss = 0
    num_correct = 0

    for x, y in items:
        inputs = conv.transform(x)
        target = int(y) 

        out = model.forward(inputs)

        loss -= np.log(out[target])
        num_correct += int(np.argmax(out) == target) 

        if fitting:
            model.fit(inputs, target, lr=1e-2)
    
    return loss / len(data), num_correct / len(data)

In [22]:
for epoch in range(900):
    train_loss, train_acc = modelTraining(train_data, fitting=True)

    if epoch % 100 == 99:
        print(f"epoch {epoch + 1}")
        print(f"TRAIN   loss: {train_loss}, accuracy: {train_acc}")
        
        test_loss, test_acc = modelTraining(test_data, fitting=False)
        print(f"TEST    loss: {test_loss}, accuracy: {test_acc}")

epoch 100
TRAIN   loss: [0.689691], accuracy: 0.5517241379310345
TEST    loss: [0.69787137], accuracy: 0.5
epoch 200
TRAIN   loss: [0.68322787], accuracy: 0.5172413793103449
TEST    loss: [0.69806186], accuracy: 0.5
epoch 300
TRAIN   loss: [0.66933651], accuracy: 0.5862068965517241
TEST    loss: [0.7235277], accuracy: 0.4
epoch 400
TRAIN   loss: [0.40472524], accuracy: 0.8793103448275862
TEST    loss: [0.50870393], accuracy: 0.8
epoch 500
TRAIN   loss: [0.45303855], accuracy: 0.8103448275862069
TEST    loss: [0.4436334], accuracy: 0.8
epoch 600
TRAIN   loss: [0.58526739], accuracy: 0.6551724137931034
TEST    loss: [0.61174401], accuracy: 0.7
epoch 700
TRAIN   loss: [0.37506232], accuracy: 0.8103448275862069
TEST    loss: [0.59979132], accuracy: 0.7
epoch 800
TRAIN   loss: [0.33129611], accuracy: 0.8793103448275862
TEST    loss: [0.35805558], accuracy: 0.85
epoch 900
TRAIN   loss: [0.07433673], accuracy: 0.9655172413793104
TEST    loss: [0.24360683], accuracy: 0.95


In [23]:
test_phrase = "i am bad"
conv_test_phrase = conv.transform(test_phrase)
probs = model.forward(conv_test_phrase)
['False', 'True'][np.argmax(probs)]

'False'

In [33]:
with open("output_8.txt", 'w', encoding='utf-8') as f:
    for x, y in test_data.items():
        inputs = conv.transform(x)
        target = int(y)
        f.write(f"Входная строка: {x}, true label: {y}\n")
        out = model.forward(inputs)
        error_str = " (Ошибка)" if np.argmax(out) != target else ""
        f.write(f"Предсказанное значение: {['False', 'True'][np.argmax(out)]} {error_str}\n")
                