In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np
import pandas as pd
import ast
import math

In [3]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, input_size)
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input_seq, hidden_state):
        embedding = self.embedding(input_seq)
        output, hidden_state = self.rnn(embedding, hidden_state)
        output = self.decoder(output)
        return output, (hidden_state[0].detach(), hidden_state[1].detach())
    

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [5]:

hidden_size = 512   # size of hidden state
seq_len = 100       # length of LSTM sequence
num_layers = 3      # num of layers in LSTM layer stack
lr = 0.002          # learning rate
epochs = 3          # max number of epochs
op_seq_len = 9    # total num of characters in output test sequence


In [101]:
a = 218560
bin(a)
f'{6:32b}'
def int_to_binary(num, width):
    binary = bin(num)[2:].zfill(width)
    if num >= 0:
        return "1" + binary 
    else:
        return "0" + abs(binary)

int_to_binary(-2, 16)


'00000000000000b10'

In [47]:
binary = bin(11)
binary

'0b1011'

In [43]:
binary_repr(2).split()

['10']

In [40]:


# load the text file

data = pd.read_csv('data/preprocessed-3-rnn.csv')
data = data[data.columns[0]].apply(ast.literal_eval)
# data = data.apply(lambda seq: [np.binary_repr(num) for num in seq])
# data = data[data.columns[0]].str.split(',').to_numpy()
# data = [[int(num) for num in row] for row in data]

# data = data[data.columns[0]]
# data = [[int(num) for num in row] for row in data]
# # print(type(data[data.columns[0]].str.cat(sep = '')))
# chars = sorted(list(set(data)))
# # print(chars)
# data_size, vocab_size = len(data), len(chars)
# print("----------------------------------------")
# print("Data has {} characters, {} unique".format(data_size, vocab_size))
# print("----------------------------------------")
data

0        [1, 2, 1, 5, 5, 1, 11, 16, 7, 1, 23, 44, 30, 9...
1        [1, 8, 25, 83, 274, 2275, 132224, 1060067, 331...
2        [1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 5, ...
3        [840, 1320, 1680, 2520, 3192, 3432, 4920, 5208...
4        [1, 2, 7, 27, 113, 483, 2138, 9681, 44374, 205...
                               ...                        
73794    [1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...
73795    [0, 0, 4, 1198, 1829388, 23796035743, 21429675...
73796    [0, 1, 9, 85, 801, 7549, 71145, 670501, 631908...
73797    [2, 3, 3, 4, 6, 4, 5, 10, 10, 5, 6, 15, 20, 15...
73798             [5, 7, 179, 229, 439, 557, 6113, 223999]
Name: NumSequence, Length: 73799, dtype: object

In [65]:
# char to index and index to char maps
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# convert data from chars to indices
data = list(data)
for i, ch in enumerate(data):
    data[i] = char_to_ix[ch]

# data tensor on device
data = torch.tensor(data).to(device)
data = torch.unsqueeze(data, dim=1)

# model instance
rnn = RNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)

# loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)


In [62]:
# training loop
for i_epoch in range(1, epochs+1):
    
    # random starting point (1st 100 chars) from data to begin
    data_ptr = np.random.randint(100)
    n = 0
    running_loss = 0
    hidden_state = None
    
    while True:
        print(data_ptr)
        input_seq = data[data_ptr : data_ptr+seq_len]
        target_seq = data[data_ptr+1 : data_ptr+seq_len+1]
        
        # forward pass
        output, hidden_state = rnn(input_seq, hidden_state)
        
        # compute loss
        loss = loss_fn(torch.squeeze(output), torch.squeeze(target_seq))
        running_loss += loss.item()
        
        # compute gradients and take optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # update the data pointer
        data_ptr += seq_len
        n +=1
        
        # if at end of data : break
        if data_ptr + seq_len + 1 > data_size:
            break
        
    # print loss and save weights after every epoch
    print("Epoch: {0} \t Loss: {1:.8f}".format(i_epoch, running_loss/n))
    
    # sample / generate a text sequence after every epoch
    data_ptr = 0
    hidden_state = None
    
    # random character from data to begin
    rand_index = np.random.randint(data_size-1)
    input_seq = data[rand_index : rand_index+1]
    
    print("----------------------------------------")
    while True:
        # forward pass
        output, hidden_state = rnn(input_seq, hidden_state)
        
        # construct categorical distribution and sample a character
        output = F.softmax(torch.squeeze(output), dim=0)
        dist = Categorical(output)
        index = dist.sample()
        
        # print the sampled character
        print(ix_to_char[index.item()], end='')
        
        # next input is current output
        input_seq[0][0] = index.item()
        data_ptr += 1
        
        if data_ptr > op_seq_len:
            break
        
    print("\n----------------------------------------")

66


TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not list