<a href="https://colab.research.google.com/github/archyyu/encoder-related/blob/main/RNN_encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import math
import pandas as pd

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/archyyu/publicResource/main/chat_dataset.csv')

In [4]:
# Hyperparameters
hidden_size = 100
embedding_dim = 100
seq_length = 25
learning_rate = 1e-1
batch_size = 20
dropout = 0.1
eval_iters = 200
head_size = 20

In [5]:
pad = ''
data = []
targets = []
for index, row in df.iterrows():
  data.append(row['message'])
  targets.append(row['sentiment'])

targetset = sorted(set(targets))
sentiment_to_index = {s:i for i, s in enumerate(targetset)}
index_to_sentiment = {i:s for i, s in enumerate(targetset)}

dataset = sorted(set((' '.join(data)).split(' ')))
dataset.append(pad)
vocab_size = len(dataset)
word_to_index = {w:i for i, w in enumerate(dataset)}
index_to_word = {i:w for i, w in enumerate(dataset)}

lines = []
for item in data:
  lines.append(item.split(' '))

max_line = max([len(line) for line in lines])

for item in lines:
  for _ in range(max_line - len(item)):
    item.append(pad)

X = []
for line in lines:
  item = [word_to_index[word] for word in line]
  X.append(item)

Y = []
for i in range(len(targets)):
  item = sentiment_to_index[targets[i]]
  Y.append(item)

In [8]:
def get_batch():
  inputs = []
  targets = []

  pad_index = word_to_index[pad]

  n = torch.randint(len(X) - batch_size, [1]).item()
  for i in range(batch_size):

    inputs_item = torch.tensor(X[n + i])
    targets_item = torch.tensor(Y[n + i])
    inputs.append(inputs_item)
    targets.append(targets_item)

  return torch.stack(inputs), torch.stack(targets)

In [47]:
class Attention(nn.Module):
  def __init__(self, hidden_size):
    super(Attention, self).__init__()
    self.attn = nn.Linear(hidden_size, hidden_size)
    self.v = nn.Parameter(torch.rand(hidden_size))

  def forward(self, encoder_outputs):
    seq_len = encoder_outputs.size(1)
    energy = torch.tanh(self.attn(encoder_outputs))
    attention_scores = torch.matmul(energy, self.v)
    attention_weights = torch.softmax(attention_scores, dim=1)
    context_vector = torch.sum(attention_weights.unsqueeze(2) * encoder_outputs, dim=1)
    return context_vector

class ManillaRNN(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size, output_size):
    super(ManillaRNN, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size)
    self.h2h = nn.Linear(hidden_size, hidden_size)
    self.h2o = nn.Linear(hidden_size, output_size)
    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, output_size))
    self.dropout = nn.Dropout(dropout)
    self.o2o = nn.Linear(hidden_size, output_size)
    self.att = Attention(hidden_size)

  def forward(self, x, targets):
    h = torch.zeros(1, self.hidden_size)
    h_list = []
    for i in range(x.shape[1]):
      t = self.embedding(x[:,i])
      h = torch.tanh(self.i2h(t) + self.h2h(h) + self.hb2)
      y = self.dropout(self.h2o(h) + self.ob)
      h_list.append(h)

    ll = torch.stack(h_list, dim=1)
    return self.o2o(self.att(ll))


criterion = nn.CrossEntropyLoss()

model = ManillaRNN(vocab_size, embedding_dim, hidden_size, 3)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [50]:
n_iters = 10000
for i in range(n_iters):
  inputs, targets = get_batch()
  predicts = model(inputs, targets)
  loss = criterion(predicts, targets)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()

  if i % 200 == 0:
    print(f'i {i}, loss:{loss.item()}')

i 0, loss:1.139782190322876
i 200, loss:0.10178257524967194
i 400, loss:0.015617896802723408
i 600, loss:0.01490145456045866
i 800, loss:0.3505648970603943
i 1000, loss:0.17916826903820038
i 1200, loss:0.0029538448434323072
i 1400, loss:0.0021805320866405964
i 1600, loss:0.09440372884273529
i 1800, loss:0.028152640908956528
i 2000, loss:0.059670109301805496
i 2200, loss:0.2503451704978943
i 2400, loss:0.001076431362889707
i 2600, loss:0.17417410016059875
i 2800, loss:0.062417738139629364
i 3000, loss:0.0005595135735347867
i 3200, loss:0.0003851846850011498
i 3400, loss:0.10458846390247345
i 3600, loss:0.0010935300961136818
i 3800, loss:0.0011113245273008943
i 4000, loss:0.059941161423921585
i 4200, loss:0.1989223062992096
i 4400, loss:0.14887848496437073
i 4600, loss:0.046296652406454086
i 4800, loss:0.0002931767958216369


KeyboardInterrupt: 

In [56]:
hhh = "it is a normal of time"
hgg = torch.tensor([word_to_index[word] for word in hhh.split(' ')])
hgg = hgg.unsqueeze(0)
pred = model(hgg, None)
print(index_to_sentiment[torch.argmax(pred).item()])

negative
