In [49]:
'''
Aryaman Pandya 
Sequential Machine Learning 
Building a Vanilla RNN 
Model and trainer implementation 
Following https://github.com/rasbt/deeplearning-models/blob/master/pytorch_ipynb/rnn/rnn_bi_multilayer_lstm_own_csv_agnews.ipynb
implementation minus the memory unit for now 
'''
import torch 
import pandas as pd
import numpy as np
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
import plotly
from torchtext.datasets import AG_NEWS
from torch import nn
from torch.utils.data import Dataset, DataLoader

#Class definition of Vanilla RNN 
class VanillaRNN(nn.Module): 
    
    def __init__(self, vocab_size, embed_size, hidden_size, output_len, num_layers) -> None:
        super(VanillaRNN, self).__init__()
        
        self.encoder = nn.Embedding(vocab_size, embed_size, padding_idx=0)
        self.hidden_size = hidden_size 
        self.output_len = output_len 
        
        self.rnn = nn.RNN(input_size=embed_size, hidden_size=hidden_size, num_layers=num_layers, dropout=0.5,
                                batch_first=True, bidirectional=True) #graph module to compute next hidden state 
        
        self.hidden2label = nn.Linear(2*hidden_size, 2)
        self.softmax = nn.LogSoftmax(dim=1)
        self.dropoutLayer = nn.Dropout(p=0.5)

    def forward(self, x, x_len):
        embedded = self.encoder(x)
        x_packed = nn.utils.rnn.pack_padded_sequence(embedded, x_len, batch_first=True, enforce_sorted=False)
        output, hidden = self.rnn(x_packed)  # Pass the initial hidden state 'h' to the RNN
        
        
        # Flatten the output tensor to match the linear layer input size
        output = output.contiguous().view(-1, 2 * self.hidden_size)
        
        # Apply dropout to the concatenated hidden state
        hidden = self.dropoutLayer(hidden)
        
        # Linear layer and softmax
        label_space = self.hidden2label(hidden)
        output_probs = self.softmax(label_space)
        
        return output_probs

    def init_h(self):
        return torch.zeros(1, self.hidden_size)

In [50]:
def yield_tokens(data_iter, tokenizer):
    for _, text in data_iter:
        yield tokenizer(text)

In [51]:
train_iter = (AG_NEWS(split="train"))
tokenizer = get_tokenizer("basic_english")
def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

VOCAB_SIZE = 5000
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"], max_tokens=VOCAB_SIZE)
vocab.set_default_index(vocab["<unk>"])
#train_loader = DataLoader(train_iter, batch_size = 8, shuffle = True, collate_fn = collate_batch)

In [52]:
vocab_size = len(vocab)

In [53]:
print(vocab_size)

5000


In [54]:
text_pipeline = lambda x: vocab(tokenizer(x))
label_pipeline = lambda x: int(x) - 1

In [55]:
vocab.lookup_tokens([4999])

['wreckage']

In [56]:
from torch.nn.utils.rnn import pad_sequence

def collate_batch(batch):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    label_list, text_list, lengths = [], [], []
    
    # Sort the batch in the descending order
    batch.sort(key=lambda x: len(x[1]), reverse=True)
    
    for _label, _text in batch:
        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
        text_list.append(processed_text)
        lengths.append(processed_text.size(0))
        
    label_list = torch.tensor(label_list, dtype=torch.int64)
    lengths = torch.tensor(lengths, dtype=torch.int64)
    
    # Pad sequences
    text_list = pad_sequence(text_list, batch_first=True)
    
    return label_list.to(device), text_list.to(device), lengths

In [57]:
train_loader = DataLoader(train_iter, batch_size = 8, shuffle = True, collate_fn = collate_batch)

In [58]:
batch = next(iter(train_loader))

# Inspect the shape of the input data
input_data = batch[1]  # Assuming the input data is the first element of the batch
input_shape = input_data.shape[0]

In [59]:
input_shape

8

In [60]:
print(batch[0])
print(batch[1])
batch[2]

tensor([1, 2, 2, 2, 1, 0, 0, 1], device='cuda:0')
tensor([[1880,    3, 4016,    3,    0,  109,    0,   13,   31,   14,   31,   15,
            0,   46,   24,    2, 3030,    8,  256,    0, 1882,   72,  256, 3561,
            3, 1888,  276,    0, 4016,  643,    0,    0,    0,    6,    0, 2731,
            3, 1979,    7,    2,  781,  620,    6,    2,  351,  221,   16, 1201,
         1082,    1, 2078, 1880,  212,   32,  313, 1189,   18,    5, 1859,    3,
         2705,   13,  260,   14,  146,   38,    0,    0,    6, 1068,    3,    0,
          515,    0,    8,  748,  157, 3451,    1],
        [ 272, 1197,    4,  977,  680, 1113,  739,   13,   27,   14,   27,   15,
            2,  272,  439,    4,    5,  977,    0,    0,    2,  739,   10,   55,
           34,    5,    0,    6,    2,   51,    1,    9,    1,    0,    4,  145,
         1895,  931, 1271,   68,  477,    0,    0,   88,  159,    8,    5,  651,
         3445,    7,    2,  347,    1,    0,    0,    0,    0,    0,    0,    0,
       

tensor([79, 53, 49, 41, 38, 44, 35, 28])

In [61]:
LEARNING_RATE = 1e-3
BATCH_SIZE = 128
NUM_EPOCHS = 50
DROPOUT = 0.5
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

EMBEDDING_DIM = 128
BIDIRECTIONAL = True
HIDDEN_DIM = 256
NUM_LAYERS = 2
OUTPUT_DIM = 4

In [62]:
model = VanillaRNN(vocab_size, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [63]:
def train(model, train_loader, loss_function, optim, epochs):
    losses = [] #group losses for loss visualization 
    
    for epoch in range(epochs):
        print("Epoch %d / %d" % (epoch+1, epochs))
        print("-"*10)
        model.train()
    
        for i, batch_data in enumerate(train_loader):
            
            (y, x, x_size) = batch_data
            print("Labels: {}, data: {}, x_size.cpu(): {}".format(batch_data[0], x.shape,x_size.cpu()))
            h_s = model.init_h() #initialize hidden state 

            logits = model(x, x_size.cpu())
            loss = loss_function(logits, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss)

            if(i % 10):
              print("Step: {}/{}, current Epoch loss: {:.4f}".format(i, len(train_loader), loss))  

In [64]:
train(model, train_loader, torch.nn.functional.cross_entropy, optimizer, NUM_EPOCHS)

Epoch 1 / 50
----------
Labels: tensor([3, 2, 2, 0, 1, 0, 2, 0], device='cuda:0'), data: torch.Size([8, 60]), x_size.cpu(): tensor([60, 54, 51, 43, 40, 45, 34, 25])




AttributeError: 'PackedSequence' object has no attribute 'contiguous'