# LSTMs Lab

### Introduction

### Loading our Data

In [1]:
import torch
from torchtext import data
from torchtext import datasets
SEED = 12
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)



In [4]:
from torchtext import datasets

train_data, test_data = datasets.TREC.splits(TEXT, LABEL)

downloading train_5500.label


train_5500.label: 100%|██████████| 336k/336k [00:00<00:00, 2.60MB/s]
TREC_10.label: 100%|██████████| 23.4k/23.4k [00:00<00:00, 1.10MB/s]


downloading TREC_10.label


In [5]:
TEXT.build_vocab(train_data, 
                 max_size = 25_000, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

.vector_cache/glove.6B.zip: 862MB [06:50, 2.10MB/s]                               
100%|█████████▉| 399999/400000 [00:28<00:00, 14197.02it/s]


In [6]:
LABEL.build_vocab(train_data)

Then, let's bucket our data into batches.

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, test_data), 
    batch_size = 64,
    sort_within_batch = True,
    device = device)



In [10]:
for batch in train_iterator:
    first_batch = batch.text
    break



### Building our Layers

Let's start by initializing the layers of our LSTM model.

In [22]:
import torch.nn as nn
import torch.nn.functional as F
class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(9343, 100, padding_idx = 1)
        self.lstm_layer = nn.LSTM(100, 256, num_layers=4, bidirectional=True, 
                           dropout=.5)
        self.fc = nn.Linear(256 * 2, 6)
    def forward(self, text, document_lengths):
        embedded_batch = self.embedding(text) # torch.Size([713, 64, 100])
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded_batch, 
                                                    document_lengths, 
                                                    enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm_layer(packed_embedded)
        l2_forwards = hidden[-2,:,:]
        l2_backwards = hidden[-1, :, :]
        combined_hidden = torch.cat((l2_forwards, 
                                     l2_backwards), dim = 1)
        output_layer = self.fc(combined_hidden)
        return F.log_softmax(output_layer, dim = 1)

In [15]:
lstm = LSTM()

Then let's copy over our embeddings from the vocab object.

In [16]:
pretrained_embeddings = TEXT.vocab.vectors
lstm.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

lstm.embedding.weight.data[UNK_IDX] = torch.zeros(100)
lstm.embedding.weight.data[PAD_IDX] = torch.zeros(100)

Now, let's pass through our data.

In [17]:
predictions = lstm(*first_batch)

In [19]:
predictions.shape

torch.Size([64, 6])

Finally, let's initialize our optimizer and loss function and train our data.

In [24]:
import torch.optim as optim

optimizer = optim.Adam(lstm.parameters())

c_e_loss = nn.CrossEntropyLoss()

lstm = lstm.to(device)
c_e_loss = c_e_loss.to(device)

In [None]:
for epoch in range(7):
    for batch in train_iterator:
        preds = lstm(batch.text[0].cuda(), batch.text[1].cuda())
        loss = c_e_loss(preds.squeeze(1), batch.label.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(loss)

In [None]:
def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [None]:
def categorical_accuracy(preds, y):
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y)
    return correct.sum() / torch.FloatTensor([y.shape[0]])

In [25]:
# model.eval()
    
# with torch.no_grad():
#     for batch in iterator:
#         predictions = model(batch.text)
#         loss = criterion(predictions, batch.label)
#         acc = categorical_accuracy(predictions, batch.label)
#         epoch_loss += loss.item()
#         epoch_acc += acc.item()

In [None]:
updated_lstm.eval()

accuracies = []
batch_lengths = []
with torch.no_grad():
    for batch in test_iterator:
        outputs = lstm(*batch.text)
        labels = batch.label
        accuracy = binary_accuracy(outputs.squeeze(1), labels)
        accuracies.append(accuracy.item())
        batch_lengths.append(len(outputs))