<a href="https://colab.research.google.com/github/ankuj/teaching/blob/main/nlp_lab_day_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


<br>
RNN Practical — Intro to Recurrent Neural Networks<br>
Topics: Motivation, Basics, Architectures (One-to-Many, Many-to-One, etc.), Shared Parameters<br>
Instructions: Complete each task by filling in the "Your answer here" sections.<br>


In [1]:
import numpy as np

In [None]:
def task_zero():


------------------------------<br>
Task 1: RNN Architectures <br>
------------------------------

In [2]:
def task1_architectures():
    """
    Identify the correct RNN architecture (One-to-One, One-to-Many, Many-to-One, Many-to-Many)
    for the following scenarios:
    a) Sentiment analysis of a sentence -> single label
    b) Music generation from a single start token -> output sequence
    c) Named entity recognition: tag each word in a sentence
    d) Machine translation: source sentence -> target sentence
    """
    # Your answer here:
    # a) Many-to-One
    # b) One-to-Many
    # c) Many-to-Many
    # d) Many-to-Many (encoder-decoder)

------------------------------<br>
Task 2: Shared Parameters <br>
------------------------------

In [6]:
def task2_shared_parameters():
    """
    Explain shared parameters in an RNN.
    Compute parameter counts for an example:
      input size d=4, hidden size h=3, sequence length T=10
    """
    # Your answer here:

    d = 4
    h = 3
    T = 10

    num_params_Wxh = d * h
    num_params_Whh = h * h
    num_params_bh = h

    total_params = num_params_Wxh + num_params_Whh + num_params_bh

    print(f"Input size (d): {d}")
    print(f"Hidden size (h): {h}")
    print(f"Number of parameters in W_xh: {num_params_Wxh}")
    print(f"Number of parameters in W_hh: {num_params_Whh}")
    print(f"Number of parameters in b_h: {num_params_bh}")
    print(f"Total number of shared parameters: {total_params}")

task2_shared_parameters()

Input size (d): 4
Hidden size (h): 3
Number of parameters in W_xh: 12
Number of parameters in W_hh: 9
Number of parameters in b_h: 3
Total number of shared parameters: 24


------------------------------<br>
Task 3: Manual Forward Pass <br>
------------------------------

In [8]:
def task3_manual_forward_pass():
    """
    Compute hidden states manually for a small RNN using np.tanh.
    Input sequence length T=3, input size=2, hidden size=2
    """
    x_seq = [np.array([0.5, -1.0]),
             np.array([1.0, 0.0]),
             np.array([-0.5, 0.5])]
    h_prev = np.zeros(2)
    W_xh = np.array([[0.6, -0.2],
                     [0.1,  0.5]])
    W_hh = np.array([[0.3, 0.4],
                     [-0.2, 0.2]])
    b_h = np.array([0.0,0.1])
    h_list = []
    for x in x_seq:
        h_next = np.tanh(np.dot(W_xh, x) + np.dot(W_hh, h_prev) + b_h)
        h_list.append(h_next)
        h_prev = h_next

    for i, h in enumerate(h_list):
        print(f"Hidden state at time step {i+1}: {h}")

task3_manual_forward_pass()

Hidden state at time step 1: [ 0.46211716 -0.33637554]
Hidden state at time step 2: [0.53994993 0.04027965]
Hidden state at time step 3: [-0.21833125  0.19743869]


------------------------------<br>
Task 4: NumPy RNN Cell Implementation <br>
------------------------------

In [10]:
def task4_numpy_rnn_cell():
    """
    Implement a simple Many-to-One RNN in NumPy.
    Use rnn_forward to compute h_T, then compute a readout: y = W_hy h_T + b_y
    Predict class = argmax(y)
    """

    # Toy dataset
    toy_sequences = [
        [np.array([1.0,0.5]), np.array([0.2,0.1]), np.array([0.3,-0.1])],
        [np.array([-0.5,-0.4]), np.array([0.1,-0.2]), np.array([-0.3,-0.1])],
        [np.array([0.8,0.2]), np.array([0.5,0.4]), np.array([0.1,0.2])],
        [np.array([-0.6,-0.2]), np.array([-0.4,-0.3]), np.array([0.0,-0.1])]
    ]
    labels = np.array([1,0,1,0])


    input_size = 2
    hidden_size = 3
    output_size = 2

    W_xh = np.random.randn(hidden_size, input_size) * 0.01
    W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
    b_h = np.zeros(hidden_size)
    W_hy = np.random.randn(output_size, hidden_size) * 0.01
    b_y = np.zeros(output_size)

    def rnn_forward(sequence, h_prev, W_xh, W_hh, b_h):
        h = h_prev
        for x in sequence:
            h = np.tanh(np.dot(W_xh, x) + np.dot(W_hh, h) + b_h)
        return h

    predictions = []
    for seq in toy_sequences:
        h_T = rnn_forward(seq, np.zeros(hidden_size), W_xh, W_hh, b_h)
        y = np.dot(W_hy, h_T) + b_y
        predicted_class = np.argmax(y)
        predictions.append(predicted_class)

    print("Toy Sequences:")
    for i, seq in enumerate(toy_sequences):
        print(f"Sequence {i+1}: {seq}")
    print("\nLabels:", labels)
    print("\nPredictions:", predictions)

task4_numpy_rnn_cell()

Toy Sequences:
Sequence 1: [array([1. , 0.5]), array([0.2, 0.1]), array([ 0.3, -0.1])]
Sequence 2: [array([-0.5, -0.4]), array([ 0.1, -0.2]), array([-0.3, -0.1])]
Sequence 3: [array([0.8, 0.2]), array([0.5, 0.4]), array([0.1, 0.2])]
Sequence 4: [array([-0.6, -0.2]), array([-0.4, -0.3]), array([ 0. , -0.1])]

Labels: [1 0 1 0]

Predictions: [np.int64(1), np.int64(0), np.int64(1), np.int64(0)]


In [13]:
"""
Goal:
- Introduction to tensors in PyTorch
- Build a simple RNN-based classifier

Dataset:
- We will classify short sequences of numbers as "increasing" or "decreasing"
  Example:
    [1, 2, 3, 4] → Label: 1 (increasing)
    [5, 3, 1, 0] → Label: 0 (decreasing)

----------------------------------------------------
"""

import torch
import torch.nn as nn
import torch.optim as optim

# ====================================================
# STEP 1: Create a Tiny Synthetic Dataset
# ====================================================

def generate_data(num_samples=100, seq_len=4):
    X = []
    y = []
    for _ in range(num_samples):
        if torch.rand(1).item() > 0.5:
            seq = torch.sort(torch.rand(seq_len))[0]   # Increasing
            label = 1
        else:
            seq = torch.sort(torch.rand(seq_len), descending=True)[0]  # Decreasing
            label = 0
        X.append(seq.unsqueeze(-1))  # Shape: (seq_len, input_size=1)
        y.append(label)
    return torch.stack(X), torch.tensor(y)

X, y = generate_data()
# X shape → (batch_size=100, seq_len=4, input_size=1)
# y shape → (batch_size=100)

# ====================================================
# STEP 2: Define a Simple RNN Classifier
# ====================================================

class RNNClassifier(nn.Module):
    def __init__(self, input_size=1, hidden_size=8, num_classes=2):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)
        self.hidden_size = hidden_size


    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(1, batch_size, self.hidden_size)

        output, hn = self.rnn(x, h0)
        last_timestep_output = output[:, -1, :]
        logits = self.fc(last_timestep_output)
        return logits


model = RNNClassifier()
print(model)

# ====================================================
# STEP 3: Train the Model
# ====================================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(20):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    print(f'Epoch [{epoch+1}/20], Loss: {loss.item():.4f}')



# ====================================================
# STEP 4: Test the Model on New Data
# ====================================================

test_X, test_y = generate_data(num_samples=10)
with torch.no_grad():
    test_outputs = model(test_X)
    _, predicted = torch.max(test_outputs, 1)
    accuracy = (predicted == test_y).sum().item() / len(test_y)
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

print("\nPredictions vs Actual:")
for i in range(len(test_X)):
    print(f"Sequence: {test_X[i].squeeze().tolist()}, Actual: {test_y[i].item()}, Predicted: {predicted[i].item()}")

RNNClassifier(
  (rnn): RNN(1, 8, batch_first=True)
  (fc): Linear(in_features=8, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
)
Epoch [1/20], Loss: 0.7020
Epoch [2/20], Loss: 0.6912
Epoch [3/20], Loss: 0.6848
Epoch [4/20], Loss: 0.6800
Epoch [5/20], Loss: 0.6749
Epoch [6/20], Loss: 0.6690
Epoch [7/20], Loss: 0.6619
Epoch [8/20], Loss: 0.6539
Epoch [9/20], Loss: 0.6452
Epoch [10/20], Loss: 0.6362
Epoch [11/20], Loss: 0.6273
Epoch [12/20], Loss: 0.6182
Epoch [13/20], Loss: 0.6081
Epoch [14/20], Loss: 0.5961
Epoch [15/20], Loss: 0.5817
Epoch [16/20], Loss: 0.5650
Epoch [17/20], Loss: 0.5461
Epoch [18/20], Loss: 0.5250
Epoch [19/20], Loss: 0.5020
Epoch [20/20], Loss: 0.4768
Test Accuracy: 100.00%

Predictions vs Actual:
Sequence: [0.03545117378234863, 0.3732271194458008, 0.4970362186431885, 0.598281741142273], Actual: 1, Predicted: 1
Sequence: [0.22570812702178955, 0.4948967695236206, 0.8147835731506348, 0.8266753554344177], Actual: 1, Predicted: 1
Sequence: [0.9910620450973511,