In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

ModuleNotFoundError: No module named 'torch'

## RNN with a simple example

In [None]:
HIDDEN_DIM = 35
LEARNING_RATE = 0.01
EPOCHS = 100

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
string = "hello pytorch and data analytics."

In [None]:
chars = "abcdefghijklmnopqrstuvwxyz .01"
char_list = [i for i in chars]
n_letters = len(char_list)
n_letters

In [None]:
def string_to_onehot(string):
    start = np.zeros(shape = n_letters, dtype = int)
    end = np.zeros(shape = n_letters, dtype = int)

    start[-2] = 1
    end[-1] = 1

    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape = n_letters, dtype = int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [None]:
string_to_onehot("data")

In [None]:
def onehot_to_string(onehot):
    onehot_value = torch.Tensor.numpy(onehot)
    return char_list[onehot_value.argmax()]

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.hidden2output = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()

    def forward(self, input, hidden):
        hidden = self.act_fn(self.input2hidden(input) + self.hidden2hidden(hidden))
        output = self.hidden2output(hidden)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [None]:
rnn = RNN(n_letters, HIDDEN_DIM, n_letters).to(device)

In [None]:
loss_func = nn.MSELoss().to(device)
optimizer_rnn = torch.optim.Adam(rnn.parameters(), lr = LEARNING_RATE)

In [None]:
rnn.parameters

In [None]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

for i in range(EPOCHS):
    optimizer_rnn.zero_grad()
    hidden = rnn.init_hidden()
    total_loss = 0

    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1, :].to(device)
        target = one_hot[j+1].to(device)
        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss

    total_loss.backward()
    optimizer_rnn.step()

    if i % 50 == 0:
        print(total_loss)

In [None]:
start_tkn = torch.zeros(1, n_letters)
start_tkn[:, -2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start_tkn.to(device)
    output_string = ""

    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_string(output.data)
        input_ = output

print(output_string)

## RNN and LSTM

In [None]:
!wget https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt -P ./data

In [None]:
!pip install unidecode

In [None]:
import re
import unidecode
import random
import string
import time, math

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
EPOCHS = 1000
HIDDEN_DIM = 100
BATCH_SIZE = 1
CHUNK_LEN = 250
NUM_LAYERS = 1
EMBEDDING = 70
LEARNING_RATE = 0.004

In [None]:
characters = string.printable
n_characters = len(characters)
characters

In [None]:
text_file = unidecode.unidecode(open('./data/input.txt').read())
len_text_file = len(text_file)
len_text_file

In [None]:
def random_chunk():
    start_index = random.randint(0, len_text_file - CHUNK_LEN)
    end_index = start_index + CHUNK_LEN + 1
    return text_file[start_index : end_index]

print(random_chunk())

In [None]:
def character_to_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for char in range(len(string)):
        tensor[char] = characters.index(string[char])
    return tensor

print(character_to_tensor('ABCde'))

In [None]:
def random_training_set():
    chunk = random_chunk()
    input = character_to_tensor(chunk[:-1])
    target = character_to_tensor(chunk[1:])
    return input, target

In [None]:
random_training_set()

### Make RNN model

In [None]:
class EN_RNN_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_RNN_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.rnn = nn.RNN(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden):
        en_output = self.encoder(input.view(1, -1))
        output, hidden = self.rnn(en_output, hidden)
        de_output = self.decoder(output.view(1, -1))
        return de_output, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden

In [None]:
model = EN_RNN_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [None]:
inp = character_to_tensor("A")
print(inp.size())
hidden = model.init_hidden()
print(hidden.size())
out,hidden = model(inp,hidden)
print(hidden.size())
print(out.size())

In [None]:
model.parameters

In [None]:
optimizer_model = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [None]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_model.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_model.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

In [None]:
start_string = "b"

input = character_to_tensor(start_string)
hidden = model.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden = model(input, hidden)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

### Make LSTM model

In [None]:
class EN_LSTM_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_LSTM_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, cell):
        en_output = self.encoder(input.view(1, -1))
        output, (hidden, cell) = self.lstm(en_output, (hidden, cell))
        de_output = self.decoder(output.view(1, -1))
        return de_output, hidden, cell

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        cell = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden, cell

In [None]:
model_LSTM = EN_LSTM_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [None]:
model_LSTM.parameters

In [None]:
input = character_to_tensor("A")
print(input)

hidden, cell = model_LSTM.init_hidden()
print(hidden.size())

output, hidden, cell = model_LSTM(input, hidden, cell)
print(output.size())

In [None]:
optimizer_lstm = torch.optim.Adam(model_LSTM.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [None]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden, cell = model_LSTM.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_lstm.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden, cell = model_LSTM(x, hidden, cell)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_lstm.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

In [None]:
start_string = "b"

input = character_to_tensor(start_string)
hidden, cell = model_LSTM.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden, cell = model_LSTM(input, hidden, cell)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)