<a href="https://colab.research.google.com/github/ankuj/teaching/blob/main/nlp_lab_day_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


<br>
RNN Practical — Intro to Recurrent Neural Networks<br>
Topics: Motivation, Basics, Architectures (One-to-Many, Many-to-One, etc.), Shared Parameters<br>
Instructions: Complete each task by filling in the "Your answer here" sections.<br>


In [3]:
import numpy as np

------------------------------<br>
Task 1: RNN Architectures <br>
------------------------------

In [None]:
def task1_architectures():
    """
    Identify the correct RNN architecture (One-to-One, One-to-Many, Many-to-One, Many-to-Many)
    for the following scenarios:
    a) Sentiment analysis of a sentence -> single label
    b) Music generation from a single start token -> output sequence
    c) Named entity recognition: tag each word in a sentence
    d) Machine translation: source sentence -> target sentence
    """
    # Your answer here:
    # a) Many-to-One
    # b) One-to-Many
    # c) Many-to-Many
    # d) Many-to-Many (encoder-decoder)

------------------------------<br>
Task 2: Shared Parameters <br>
------------------------------

In [1]:
def task2_shared_parameters():
    """
    Explain shared parameters in an RNN.
    Compute parameter counts for an example:
      input size d=4, hidden size h=3, sequence length T=10
    """
    parameters = 4*3 + 3*3 + 3
    return parameters

print(task2_shared_parameters())

24


------------------------------<br>
Task 3: Manual Forward Pass <br>
------------------------------

In [14]:
from os import X_OK
def task3_manual_forward_pass(x_seq):
    """
    Compute hidden states manually for a small RNN using np.tanh.
    Input sequence length T=3, input size=2, hidden size=2
    """
    # x_seq = [np.array([0.5, -1.0]),
    #          np.array([1.0, 0.0]),
    #          np.array([-0.5, 0.5])]
    h_prev = np.zeros(2)
    W_xh = np.array([[0.6, -0.2],
                     [0.1,  0.5]])
    W_hh = np.array([[0.3, 0.4],
                     [-0.2, 0.2]])
    b_h = np.array([0.0,0.1])
    h_list = [h_prev]

    # Your code here
    for i in range(len(x_seq)):
      ht1 = W_hh @ h_list[i]
      ht2 = W_xh @ x_seq[i]
      ht = np.tanh(ht1 + ht2 + b_h)
      h_list.append(ht)

    return h_list



------------------------------<br>
Task 4: NumPy RNN Cell Implementation <br>
------------------------------

In [19]:
def task4_numpy_rnn_cell():
    """
    Implement a simple Many-to-One RNN in NumPy.
    Use rnn_forward to compute h_T, then compute a readout: y = W_hy h_T + b_y
    Predict class = argmax(y)
    """

    # Toy dataset
    toy_sequences = [
        [np.array([1.0,0.5]), np.array([0.2,0.1]), np.array([0.3,-0.1])],
        [np.array([-0.5,-0.4]), np.array([0.1,-0.2]), np.array([-0.3,-0.1])],
        [np.array([0.8,0.2]), np.array([0.5,0.4]), np.array([0.1,0.2])],
        [np.array([-0.6,-0.2]), np.array([-0.4,-0.3]), np.array([0.0,-0.1])]
    ]
    labels = np.array([1,0,1,0])
    b_y = np.array([0.0,0.1])

    h = 2
    c = 2

    W_hy = np.random.randn(c,h)
    y_s=[]


    for x in range(len(toy_sequences)):
      h_t = task3_manual_forward_pass(toy_sequences[x])[-1]
      y = (W_hy @ h_t + b_y)
      softmax_y = np.exp(y) / np.sum(np.exp(y))
      y_s.append(softmax_y)
    return y_s


print(task4_numpy_rnn_cell())

[array([0.61516023, 0.38483977]), array([0.41120963, 0.58879037]), array([0.5667828, 0.4332172]), array([0.43510336, 0.56489664])]


In [28]:
"""
Goal:
- Introduction to tensors in PyTorch
- Build a simple RNN-based classifier

Dataset:
- We will classify short sequences of numbers as "increasing" or "decreasing"
  Example:
    [1, 2, 3, 4] → Label: 1 (increasing)
    [5, 3, 1, 0] → Label: 0 (decreasing)

----------------------------------------------------
"""

import torch
import torch.nn as nn
import torch.optim as optim

# ====================================================
# STEP 1: Create a Tiny Synthetic Dataset
# ====================================================

def generate_data(num_samples=100, seq_len=4):
    X = []
    y = []
    for _ in range(num_samples):
        if torch.rand(1).item() > 0.5:
            seq = torch.sort(torch.rand(seq_len))[0]   # Increasing
            label = 1
        else:
            seq = torch.sort(torch.rand(seq_len), descending=True)[0]  # Decreasing
            label = 0
        X.append(seq.unsqueeze(-1))  # Shape: (seq_len, input_size=1)
        y.append(label)
    return torch.stack(X), torch.tensor(y)

X, y = generate_data()
# X shape → (batch_size=100, seq_len=4, input_size=1)
# y shape → (batch_size=100)

# ====================================================
# STEP 2: Define a Simple RNN Classifier
# ====================================================

class RNNClassifier(nn.Module):
    def __init__(self, input_size=1, hidden_size=8, num_classes=2):
        super().__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)


    def forward(self, x):

        out, h_n = self.rnn(x)
        logits = self.fc(out[:, -1, :])
        return logits


model = RNNClassifier()
print(model)

# ====================================================
# STEP 3: Train the Model
# ====================================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


# ====================================================
# STEP 4: Test the Model on New Data
# ====================================================

test_X, test_y = generate_data(num_samples=10)

print("\nPredictions vs Actual:")
with torch.no_grad():
    test_outputs = model(test_X)
    _, predicted = torch.max(test_outputs.data, 1)

    for i in range(len(test_y)):
        print(f"Sequence: {test_X[i].squeeze().tolist()}, Predicted: {predicted[i].item()}, Actual: {test_y[i].item()}")

RNNClassifier(
  (rnn): RNN(1, 8, batch_first=True)
  (fc): Linear(in_features=8, out_features=2, bias=True)
)
Epoch [10/100], Loss: 0.5799
Epoch [20/100], Loss: 0.3751
Epoch [30/100], Loss: 0.1004
Epoch [40/100], Loss: 0.0373
Epoch [50/100], Loss: 0.0230
Epoch [60/100], Loss: 0.0170
Epoch [70/100], Loss: 0.0140
Epoch [80/100], Loss: 0.0119
Epoch [90/100], Loss: 0.0104
Epoch [100/100], Loss: 0.0093

Predictions vs Actual:
Sequence: [0.09198939800262451, 0.3211410641670227, 0.5555521249771118, 0.9068019390106201], Predicted: 1, Actual: 1
Sequence: [0.29357975721359253, 0.42326390743255615, 0.4981306195259094, 0.9300925731658936], Predicted: 1, Actual: 1
Sequence: [0.9191019535064697, 0.6401392817497253, 0.6161629557609558, 0.2959928512573242], Predicted: 0, Actual: 0
Sequence: [0.20755892992019653, 0.3255099058151245, 0.6439730525016785, 0.6946362257003784], Predicted: 1, Actual: 1
Sequence: [0.03713345527648926, 0.19796228408813477, 0.3342568278312683, 0.5729206800460815], Predicted: 1