The hidden cell containing the current and prior state is calculated with the following formula:

h(t)=tanh(W 
h
​
 h 
t−1
​
 +W 
x
​
 x 
t
​
 +B 
h
​
 )
y(t)=W 
y
​
 h 
t
​
 +B 
y
​
 
Tanh is hyperbolic tangent function, which is defined as tanh(x)= 
e 
x
 +e 
−x
 
e 
x
 −e 
−x
 
​
 
At each network block, weights W 
x
​
  are applied to the numeric word vector input value; applying the previous hidden state W 
h
​
 ; and the final state W 
y
​
 . The tanh activation function is applied to the hidden layer to produce values between [−1,1].

In [None]:
import torch
import torchtext
from torchinfo import summary
from torchnlp import *
train_dataset, test_dataset, classes, vocab = load_dataset()
vocab_size = len(vocab)

In [None]:
class RNNClassifier(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_class):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
        self.rnn = torch.nn.RNN(embed_dim,hidden_dim,batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, num_class)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.embedding(x)
        x,h = self.rnn(x)
        return self.fc(x.mean(dim=1))

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, collate_fn=padify, shuffle=True)
net = RNNClassifier(vocab_size,64,32,len(classes)).to(device)
train_epoch(net,train_loader, lr=0.001)

In [None]:
print(f'class map: {classes}')

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, collate_fn=padify, shuffle=True)

In [None]:
net.eval()

with torch.no_grad():
    for batch_idx, (target, data) in enumerate(test_loader):
        
        word_lookup = [vocab.itos[w] for w in data[batch_idx]]
        unknow_vals = {'<unk>'}
        word_lookup = [ele for ele in word_lookup if ele not in unknow_vals]
        print('Input text:\n {}\n'.format(word_lookup))
        
        data, target = data.to(device), target.to(device)
        pred = net(data)
        print(torch.argmax(pred[batch_idx]))
        print("Actual:\nvalue={}, class_name= {}\n".format(target[batch_idx], classes[target[batch_idx]]))
        print("Predicted:\nvalue={}, class_name= {}\n".format(pred[0].argmax(0),classes[pred[0].argmax(0)]))
        break

In [None]:
class LSTMClassifier(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_class):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
        self.embedding.weight.data = torch.randn_like(self.embedding.weight.data)-0.5
        self.rnn = torch.nn.LSTM(embed_dim,hidden_dim,batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, num_class)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.embedding(x)
        x,(h,c) = self.rnn(x)
        return self.fc(h[-1])

In [None]:
net = LSTMClassifier(vocab_size,64,32,len(classes)).to(device)
train_epoch(net,train_loader, lr=0.001)

In [None]:
def pad_length(b):
    # build vectorized sequence
    v = [encode(x[1]) for x in b]
    # compute max length of a sequence in this minibatch and length sequence itself
    len_seq = list(map(len,v))
    l = max(len_seq)
    return ( # tuple of three tensors - labels, padded features, length sequence
        torch.LongTensor([t[0]-1 for t in b]),
        torch.stack([torch.nn.functional.pad(torch.tensor(t),(0,l-len(t)),mode='constant',value=0) for t in v]),
        torch.tensor(len_seq)
    )

train_loader_len = torch.utils.data.DataLoader(train_dataset, batch_size=16, collate_fn=pad_length, shuffle=True)
test_loader_len = torch.utils.data.DataLoader(test_dataset, batch_size=16, collate_fn=pad_length, shuffle=True)

In [None]:
class LSTMPackClassifier(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_class):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
        self.embedding.weight.data = torch.randn_like(self.embedding.weight.data)-0.5
        self.rnn = torch.nn.LSTM(embed_dim,hidden_dim,batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, num_class)

    def forward(self, x, lengths):
        batch_size = x.size(0)
        x = self.embedding(x)
        pad_x = torch.nn.utils.rnn.pack_padded_sequence(x,lengths,batch_first=True,enforce_sorted=False)
        _,(h,c) = self.rnn(pad_x)
        return self.fc(h[-1])

In [None]:
net = LSTMPackClassifier(vocab_size,64,32,len(classes)).to(device)
train_epoch_emb(net,train_loader_len, lr=0.001,use_pack_sequence=True)


In [None]:
net.eval()

with torch.no_grad():
    for label,text,off in test_loader_len:
        
        text, label = text.to(device), label.to(device)
        off = off.to('cpu')
        print(f'off value: {off}')
        pred = net(text, off )
        print(f'target {label}')
        y=torch.argmax(pred, dim=1)
        print(f'pred: {y}')
        print("Predicted:\nvalue={}, class_name= {}\n".format(y[0],classes[y[0]]))
        print("Target:\nvalue={}, class_name= {}\n".format(label[0],classes[label[0]]))
        break
     