<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/studyRNN3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text.split('\n')

In [None]:
chars = sorted(set(''.join(data)))
chars.append('\n')
chars.append('#')
i2c = dict(enumerate(chars))
c2i = {c: i for i, c in i2c.items()}

def encode(s):
  il = []
  for c in s:
    il.append(c2i[c])
  return il

def decode(l):
  cs = []
  for i in l:
    cs.append(i2c[i])
  return ''.join(cs)

In [None]:
import torch
import torch.nn

for i in range(len(data)):
  data[i] = '#' + data[i] + '\n'

datalist = []
for i in range(len(data)):
  datalist.append(encode(data[i]))

In [None]:
n_lines = len(data)

In [None]:
def lineTensor(line):
  line = encode(line)
  input_tensor = torch.zeros((len(line), 1, len(chars)))
  for index in range(len(line)):
    input_tensor[index][0][line[index]] = 1
  return input_tensor

def gen_input_target_pair(i):
  line = datalist[i]
  input_tensor = torch.zeros((len(line) - 1, 1, len(chars)))
  for i in range(len(line) - 1):
    input_tensor[i][0][line[i]] = 1

  target_tensor = torch.LongTensor(line[1:len(line)])
  return input_tensor, target_tensor


In [None]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
      super(RNN, self).__init__()
      self.hidden_size = hidden_size

      self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
      self.i2o = nn.Linear(input_size + hidden_size, output_size)
      self.o2o = nn.Linear(hidden_size + output_size, output_size)
      self.dropout = nn.Dropout(0.1)
      self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):

      input_combined = torch.cat((input, hidden), 1)
      hidden = self.i2h(input_combined)
      output = self.i2o(input_combined)
      output_combined = torch.cat((hidden, output), 1)
      output = self.o2o(output_combined)
      output = self.dropout(output)
      output = self.softmax(output)
      return output, hidden

    def initHidden(self):
      return torch.zeros(1, self.hidden_size)

In [None]:
n_hidden = 128
rnn = RNN(len(chars), n_hidden, len(chars))
criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(input_line_tensor, target_line_tensor):
  target_line_tensor.unsqueeze_(-1)
  hidden = rnn.initHidden()
  rnn.zero_grad()

  loss = torch.Tensor([0]) # you can also just simply use ``loss = 0``

  for i in range(input_line_tensor.size(0)):
    output, hidden = rnn(input_line_tensor[i], hidden)
    l = criterion(output, target_line_tensor[i])
    loss += l

  loss.backward()

  for p in rnn.parameters():
    p.data.add_(p.grad.data, alpha=-learning_rate)

  return output, loss.item() / input_line_tensor.size(0)

In [None]:
import random

n_iters = n_lines * 10
print_every = 1000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every ``plot_every`` ``iters``

for iter in range(1, n_iters + 1):
  input_tensor, target_tensor = gen_input_target_pair(iter % n_lines)
  output, loss = train(input_tensor, target_tensor)
  total_loss += loss

  if iter % print_every == 0:
    print('(%d %d%%) %.4f' % (iter, iter / n_iters * 100, loss))

  if iter % plot_every == 0:
    all_losses.append(total_loss / plot_every)
    total_loss = 0

In [None]:
# Sample from a category and starting letter
def sample(start_letter=' ', len=100):
  with torch.no_grad():  # no need to track history in sampling

    for j in range(10):
      input = lineTensor(start_letter)
      hidden = rnn.initHidden()
      output_name = start_letter
      for i in range(len):
        output, _ = rnn(input[0], hidden)
        topv, topi = output.topk(1)
        topi = topi[0][0]
        letter = i2c[topi.item()]
        output_name += letter
        if letter == '\n':
          break
        input = lineTensor(letter)
      print(output_name)


sample('#', 100)


#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                                    
#                                                                                 