<a href="https://colab.research.google.com/github/anshulsinghkamboj-ml/nlp-/blob/main/lstm_gru_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset


In [22]:
texts = [
    "I love this movie",
    "This film was great",
    "What a wonderful experience really",
    "I hated this movie",
    "This film was terrible",
    "What a horrible experience",
]

In [23]:

labels = [1, 1, 1, 0, 0, 0]

In [24]:
def tokenize(sentence):
  return sentence.lower().split()

In [25]:
#build vocab
word2idx= {'<pad>':0}
idx=1

for sentence in texts:
  for word in tokenize(sentence):
    if word not in word2idx:
      word2idx[word] = idx
      idx +=1


vocab_size=len(word2idx)

max_len= max([ len(tokenize(sentence)) for sentence in texts ])


In [26]:
word2idx

{'<pad>': 0,
 'i': 1,
 'love': 2,
 'this': 3,
 'movie': 4,
 'film': 5,
 'was': 6,
 'great': 7,
 'what': 8,
 'a': 9,
 'wonderful': 10,
 'experience': 11,
 'really': 12,
 'hated': 13,
 'terrible': 14,
 'horrible': 15}

In [27]:
def encode(sentence):
  tokens = tokenize(sentence)
  ids = [word2idx[w] for w in tokens]
  ids +=[0]*(max_len-len(ids))
  return torch.tensor(ids)

In [28]:
encoded_texts = [encode(text) for text in texts]
encoded = torch.stack(encoded_texts)
encoded

tensor([[ 1,  2,  3,  4,  0],
        [ 3,  5,  6,  7,  0],
        [ 8,  9, 10, 11, 12],
        [ 1, 13,  3,  4,  0],
        [ 3,  5,  6, 14,  0],
        [ 8,  9, 15, 11,  0]])

In [29]:
encoded_texts

[tensor([1, 2, 3, 4, 0]),
 tensor([3, 5, 6, 7, 0]),
 tensor([ 8,  9, 10, 11, 12]),
 tensor([ 1, 13,  3,  4,  0]),
 tensor([ 3,  5,  6, 14,  0]),
 tensor([ 8,  9, 15, 11,  0])]

In [30]:
labels = torch.tensor(labels)

In [31]:
class TextDataset(Dataset):
  def __init__(self,X,y):
    self.X = X
    self.y = y

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

dataset = TextDataset(encoded_texts, labels)

loader = DataLoader(dataset, batch_size=2, shuffle=True)


In [32]:
class SeqModel(nn.Module):
    def __init__(self, model_type="rnn"):
        super().__init__()

        embed_dim = 50
        hidden_dim = 64

        self.embedding = nn.Embedding(vocab_size, embed_dim)

        if model_type == "rnn":
            self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        elif model_type == "lstm":
            self.rnn = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        elif model_type == "gru":
            self.rnn = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        else:
            raise ValueError("Unknown model type")

        self.fc = nn.Linear(hidden_dim, 2)
        self.model_type = model_type

    def forward(self, x):
        x = self.embedding(x)
        if self.model_type == "lstm":
            out, (h, c) = self.rnn(x)
        else:
            out, h = self.rnn(x)
        h = h[-1]   # last layer's hidden state
        return self.fc(h)




In [34]:
def train_model(model_type):
    model = SeqModel(model_type)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(20):
        for X_batch, y_batch in loader:
            optimizer.zero_grad()
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

    # accuracy
    with torch.no_grad():
        logits = model(encoded)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == labels).float().mean().item()

    print(f"{model_type.upper()} Accuracy: {acc:.2f}")

# -------------------------------------------------------------
# 6. Train all three
# -------------------------------------------------------------
train_model("rnn")
train_model("lstm")
train_model("gru")

RNN Accuracy: 1.00
LSTM Accuracy: 1.00
GRU Accuracy: 1.00
