In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader, Dataset

# Sample toy dataset
data = [
    ("I love this movie", 1),
    ("This film was amazing", 1),
    ("I hate this movie", 0),
    ("This film was terrible", 0),
]

# Tokenization & Vocabulary
tokenizer = get_tokenizer("basic_english")

def yield_tokens(data):
    for text, _ in data:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(data), specials=["<pad>", "<unk>"])
vocab.set_default_index(vocab["<unk>"])

# Encode text
def encode(text):
    return vocab(tokenizer(text))

# Dataset
class TextDataset(Dataset):
    def __init__(self, data, max_len=10):
        self.data = data
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text, label = self.data[idx]
        token_ids = encode(text)
        # Pad or truncate
        token_ids = token_ids[:self.max_len] + [vocab["<pad>"]] * (self.max_len - len(token_ids))
        return torch.tensor(token_ids), torch.tensor(label)

# Model
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab["<pad>"])
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        embedded = self.embedding(x)
        _, (hn, _) = self.lstm(embedded)
        out = self.fc(hn[-1])
        return out

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = TextDataset(data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

model = LSTMClassifier(len(vocab), embed_dim=64, hidden_dim=128, output_dim=2).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
for epoch in range(5):
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        preds = model(inputs)
        loss = loss_fn(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


ModuleNotFoundError: No module named 'torch'