In [None]:
# -*- coding: utf-8 -*-
"""BIDirectionalLSTM.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1FOR5cQKH3qu2Q9fvNyOYvyNZ0WW8UeMR
"""

!pip install torchtext --quiet

import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

tokenizer = get_tokenizer('basic_english')

def yield_tokens(data_iter):
    for label, text in data_iter:
        yield tokenizer(text)

train_iter = IMDB(split='train')
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<pad>", "<unk>"])
vocab.set_default_index(vocab["<unk>"])

text_pipeline = lambda x: vocab(tokenizer(x))
label_pipeline = lambda x: 1 if x == 'pos' else 0

def collate_batch(batch):
    text_list, label_list = [], []
    for label, text in batch:
        processed_text = torch.tensor(text_pipeline(text), dtype=torch.int64)
        text_list.append(processed_text)
        label_list.append(torch.tensor(label_pipeline(label), dtype=torch.int64))
    padded = pad_sequence(text_list, batch_first=True, padding_value=vocab['<pad>'])
    return padded, torch.tensor(label_list)

train_iter = IMDB(split='train')
train_loader = DataLoader(list(train_iter), batch_size=32, shuffle=True, collate_fn=collate_batch)

class BiLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab['<pad>'])
        self.lstm = nn.LSTM(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        _, (hidden, _) = self.lstm(embedded)
        # Concatenate final forward and backward hidden states
        final_hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
        return self.fc(final_hidden)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BiLSTMClassifier(len(vocab), embed_dim=100, hidden_dim=128, output_dim=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Training loop
for epoch in range(5):
    total_loss = 0
    model.train()
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        predictions = model(x_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")

def predict_sentiment(model, text):
    model.eval()
    with torch.no_grad():
        tensor = torch.tensor(text_pipeline(text), dtype=torch.int64).unsqueeze(0).to(device)
        output = model(tensor)
        return "Positive" if output.argmax(1).item() == 1 else "Negative"

print(predict_sentiment(model, "This movie was incredibly entertaining and emotional."))

import torch
import torch.nn as nn

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        # A single-layer feedforward network to compute attention scores
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, encoder_outputs):
        """
        encoder_outputs: [B, T, H]
        B = batch size, T = time steps, H = hidden size
        """
        # Compute raw attention scores → [B, T, 1]
        weights = torch.softmax(self.attn(encoder_outputs), dim=1)

        # Compute weighted sum → context vector [B, H]
        context = torch.sum(weights * encoder_outputs, dim=1)

        return context, weights

class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        self.ff = nn.Sequential(
            nn.Linear(embed_dim, ff_hidden_dim),
            nn.ReLU(),
            nn.Linear(ff_hidden_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_output, _ = self.attn(x, x, x)  # Self-attention: Q=K=V
        x = self.norm1(x + attn_output)
        ff_output = self.ff(x)
        x = self.norm2(x + ff_output)
        return x

