In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch.autograd import Variable

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
data = open('kafka.txt', 'r').read()

In [4]:
data = data.split()
data = data[:-1]

In [5]:
char_vocab = {x:i for i,x in enumerate(set(data))}
index_vocab = {v:k for k,v in char_vocab.items()}
data_index = [char_vocab[x] for x in data]

In [6]:
inputs = torch.tensor(data_index)
labels = data_index[1:]
labels.append(data_index[0])
labels = torch.tensor(labels)

In [7]:
sequence_len = 5
input_size = 10
num_layers = 1
hidden_size = 1024
batch_size = 20
num_classes = len(char_vocab)

In [8]:
inputs = inputs.view(batch_size, -1)
labels = labels.view(batch_size, -1)
num_batches = inputs.size(1) // sequence_len

In [9]:
class RNNToy(nn.Module):
    def __init__(self):
        super(RNNToy, self).__init__()
        self.embedding = nn.Embedding(len(char_vocab), 10)
        self.cell = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, num_classes)

    def forward(self, x, h):
        emb = self.embedding(x)
        out, _ = self.cell(emb, h)
        out = self.linear(out.reshape(-1, hidden_size))
        return out

def detach(states):
    return [state.detach() for state in states] 

In [14]:
model = RNNToy().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001/2*2)

In [16]:
num_of_epoh = 45
for epoh in range(num_of_epoh):
    states = (torch.zeros(num_layers, batch_size, hidden_size).to(device),
                 torch.zeros(num_layers, batch_size, hidden_size).to(device))
    avg_loss = 0
    for i in range(0, inputs.size(1)-sequence_len, sequence_len):
        batch_in = inputs[:, i: i+sequence_len].to(device)
        batch_label = labels[:, i: i+sequence_len].to(device)
        states = detach(states)
        output = model(batch_in, states)
        optimizer.zero_grad()
        loss = criterion(output, batch_label.reshape(-1))
        loss.backward()
        optimizer.step()
        avg_loss += loss.item()
        step = (i+1) // sequence_len
    print("[Epoh : {}/{}] loss={}".format(epoh+1, num_of_epoh, avg_loss/num_batches))

[Epoh : 1/45] loss=7.02563604927063
[Epoh : 2/45] loss=6.242861427307129
[Epoh : 3/45] loss=5.77207360458374
[Epoh : 4/45] loss=5.239101715087891
[Epoh : 5/45] loss=4.594894106864929
[Epoh : 6/45] loss=3.9215585660934447
[Epoh : 7/45] loss=3.3588982458114622
[Epoh : 8/45] loss=2.952076895713806
[Epoh : 9/45] loss=2.6245068140029906
[Epoh : 10/45] loss=2.3117921476364134
[Epoh : 11/45] loss=2.0391876759529115
[Epoh : 12/45] loss=1.8614677233695984
[Epoh : 13/45] loss=1.7154134640693663
[Epoh : 14/45] loss=1.591266420841217
[Epoh : 15/45] loss=1.485338463306427
[Epoh : 16/45] loss=1.3864935803413392
[Epoh : 17/45] loss=1.302882091999054
[Epoh : 18/45] loss=1.2280837464332581
[Epoh : 19/45] loss=1.1650268371105195
[Epoh : 20/45] loss=1.1125070986747743
[Epoh : 21/45] loss=1.0726917366981505
[Epoh : 22/45] loss=1.0447366874217987
[Epoh : 23/45] loss=1.017480136871338
[Epoh : 24/45] loss=0.9974478094577789
[Epoh : 25/45] loss=0.9765981900691986
[Epoh : 26/45] loss=0.9599587173461914
[Epoh :

In [15]:
# model.load_state_dict(torch.load('model.ckpt', map_location=device))
# model.eval()

RNNToy(
  (embedding): Embedding(4565, 10)
  (cell): LSTM(10, 1024, batch_first=True)
  (linear): Linear(in_features=1024, out_features=4565, bias=True)
)

In [92]:
output = None
predected_sent_index = data_index[:1]
predected_sent_index_2 = data_index[:1]
print("Input to Model: {}".format(' '.join(data[:1])))
print("\nExpected Output:\n{}".format(' '.join(data[:50])))
with torch.no_grad():
    states = (torch.zeros(num_layers, 1, hidden_size).to(device),
            torch.zeros(num_layers, 1, hidden_size).to(device))
    prob = torch.ones(len(char_vocab))
    input = torch.tensor(char_vocab["One"]).reshape(1,1).to(device)
    for i in range(50):
        # Forward propagate RNN 
        output = model(input, states)
        prob = output.exp()
        word_id = torch.multinomial(prob, num_samples=1).item()
        input.fill_(word_id)
        predected_sent_index.append(word_id)
        predected_sent_index_2.extend(torch.max(output, 1)[1].tolist())
print("\nPredicted Output after Sampling:\n{}".format(' '.join([index_vocab[x] for x in predected_sent_index])))
print("\nPredicted Output without Sampling:\n{}".format(' '.join([index_vocab[x] for x in predected_sent_index_2])))

Input to Model: One

Expected Output:
One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin. He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections. The

Predicted Output after Sampling:
One floor; shock defend breadth dressed, 1.E.6. electronically PURPOSE. below. charge dressed, equipment. building old renamed. mistrust going insistent butcher's barricade consisting explaining crawling going consisting inedible fog convulsive thus below. sharply must below. permitted dressed, stuck dressed, "get shame, straight seen", before? going explaining search shock explaining hugged mistrust I've

Predicted Output without Sampling:
One floor; us explaining breadth dressed, explaining electronically dressed, came, Any dressed, explaining building old hostile, mistrust morning explaining slumber barricade months, itself crawling us 