<a href="https://colab.research.google.com/github/aarohishaiva/CNN_final-Project/blob/main/Sequence_to_Sequence_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence  # Import pad_sequence for handling variable length sequences

# Hyperparameters
input_dim = 1
hidden_dim = 128
output_dim = 1
learning_rate = 0.001
num_epochs = 20
batch_size = 32
num_samples = 1000
max_sequence_length = 10

class SyntheticDataset(Dataset):
    def __init__(self, num_samples, max_sequence_length):
        super().__init__()
        self.num_samples = num_samples
        self.max_sequence_length = max_sequence_length
        self.data = []
        for _ in range(num_samples):
            seq_length = np.random.randint(1, max_sequence_length + 1)
            seq = np.random.randn(seq_length)
            target = seq[::-1]  # Reverse the sequence to create the target
            self.data.append((seq.copy(), target.copy()))

    def __len__(self):
        return self.num_samples

    def __getitem__(self, index):
        seq, target = self.data[index]
        return torch.tensor(seq), torch.tensor(target)

# Define collate function to pad sequences in each batch
def collate_fn(batch):
    inputs = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Pad sequences to the same length
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
    padded_targets = pad_sequence(targets, batch_first=True, padding_value=0)

    return padded_inputs, padded_targets

# Create synthetic dataset and data loader with collate_fn
dataset = SyntheticDataset(num_samples, max_sequence_length)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

class Seq2SeqWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim

        # Encoder (Simple RNN for demonstration)
        self.encoder = nn.RNN(input_dim, hidden_dim, batch_first=True)

        # Decoder (Simple RNN for demonstration)
        self.decoder = nn.RNNCell(input_dim + hidden_dim, hidden_dim)

        # Output layer
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.softmax = nn.Softmax(dim=0)

    def forward(self, x): # Fixed indentation here
        encoder_outputs, (encoder_hidden, encoder_cell) = self.encoder(x)
        batch_size, seq_len, _ = encoder_outputs.size()

        # Initialize initial hidden and cell state for the decoder
        decoder_hidden = (torch.zeros(1, batch_size, self.hidden_dim, device=x.device),
                          torch.zeros(1, batch_size, self.hidden_dim, device=x.device))

        outputs = []

        for i in range(seq_len):
            # Prepare attention mechanism
            # Change is here: Repeat along dimension 0 (sequence length) instead of 1
            decoder_hidden_expanded = decoder_hidden[0].repeat(seq_len, 1, 1)
            concat_input = torch.cat((encoder_outputs, decoder_hidden_expanded), dim=2)
            # ... rest of your forward method logic

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence  # Import pad_sequence for handling variable length sequences

# Hyperparameters
input_dim = 1
hidden_dim = 128
output_dim = 1
learning_rate = 0.001
num_epochs = 20
batch_size = 32
num_samples = 1000
max_sequence_length = 10

class SyntheticDataset(Dataset):
    def __init__(self, num_samples, max_sequence_length):
        super().__init__()
        self.num_samples = num_samples
        self.max_sequence_length = max_sequence_length
        self.data = []
        for _ in range(num_samples):
            seq_length = np.random.randint(1, max_sequence_length + 1)
            seq = np.random.randn(seq_length)
            target = seq[::-1]  # Reverse the sequence to create the target
            self.data.append((seq.copy(), target.copy()))

    def __len__(self):
        return self.num_samples

    def __getitem__(self, index):
        seq, target = self.data[index]
        return torch.tensor(seq), torch.tensor(target)

# Define collate function to pad sequences in each batch
def collate_fn(batch):
    inputs = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Pad sequences to the same length
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
    padded_targets = pad_sequence(targets, batch_first=True, padding_value=0)

    return padded_inputs, padded_targets

# Create synthetic dataset and data loader with collate_fn
dataset = SyntheticDataset(num_samples, max_sequence_length)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

class Seq2SeqWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim

        # Encoder (Simple RNN for demonstration)
        self.encoder = nn.RNN(input_dim, hidden_dim, batch_first=True)

        # Decoder (Simple RNN for demonstration)
        self.decoder = nn.RNNCell(input_dim + hidden_dim, hidden_dim)

        # Output layer
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.softmax = nn.Softmax(dim=0)

    def forward(self, x): # Indent this function within the class
        # Pass the initial hidden state to the encoder
        encoder_hidden = torch.zeros(1, x.size(0), self.hidden_dim, device=x.device)
        encoder_outputs, encoder_hidden = self.encoder(x, encoder_hidden)
        batch_size, seq_len, _ = encoder_outputs.size()

        # Initialize initial hidden and cell state for the decoder (cell state is not needed for RNNCell)
        decoder_hidden = torch.zeros(1, batch_size, self.hidden_dim, device=x.device)

        outputs = []

        for i in range(seq_len):
            # Prepare attention mechanism
            decoder_hidden_expanded = decoder_hidden.repeat(seq_len, 1, 1)
            concat_input = torch.cat((encoder_outputs, decoder_hidden_expanded), dim=2)

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence  # Import pad_sequence for handling variable length sequences

# Hyperparameters
input_dim = 1
hidden_dim = 128
output_dim = 1
learning_rate = 0.001
num_epochs = 20
batch_size = 32
num_samples = 1000
max_sequence_length = 10

class SyntheticDataset(Dataset):
    def __init__(self, num_samples, max_sequence_length):
        super().__init__()
        self.num_samples = num_samples
        self.max_sequence_length = max_sequence_length
        self.data = []
        for _ in range(num_samples):
            seq_length = np.random.randint(1, max_sequence_length + 1)
            seq = np.random.randn(seq_length)
            target = seq[::-1]  # Reverse the sequence to create the target
            self.data.append((seq.copy(), target.copy()))

    def __len__(self):
        return self.num_samples

    def __getitem__(self, index):
        seq, target = self.data[index]
        return torch.tensor(seq), torch.tensor(target)

# Define collate function to pad sequences in each batch
def collate_fn(batch):
    inputs = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Pad sequences to the same length
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
    padded_targets = pad_sequence(targets, batch_first=True, padding_value=0)

    return padded_inputs, padded_targets

# Create synthetic dataset and data loader with collate_fn
dataset = SyntheticDataset(num_samples, max_sequence_length)
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

class Seq2SeqWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim

        # Encoder (Simple RNN for demonstration)
        self.encoder = nn.RNN(input_dim, hidden_dim, batch_first=True)

        # Decoder (Simple RNN for demonstration)
        self.decoder = nn.RNNCell(input_dim + hidden_dim, hidden_dim)

        # Output layer
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Attention mechanism
        self.attention = nn.Linear(hidden_dim * 2, 1)
        self.softmax = nn.Softmax(dim=1)  # Softmax along the sequence length dimension

    def forward(self, x):
        # Pass the initial hidden state to the encoder
        encoder_hidden = torch.zeros(1, x.size(0), self.hidden_dim, device=x.device)
        encoder_outputs, encoder_hidden = self.encoder(x, encoder_hidden)
        batch_size, seq_len, _ = encoder_outputs.size()

        # Initialize initial hidden state for the decoder (cell state is not needed for RNNCell)
        decoder_hidden = torch.zeros(batch_size, self.hidden_dim, device=x.device)

        outputs = []

        for i in range(seq_len):
            # Prepare attention mechanism
            decoder_hidden_expanded = decoder_hidden.unsqueeze(1).repeat(1, seq_len, 1)
            concat_input = torch.cat((encoder_outputs, decoder_hidden_expanded), dim=2)
            attention_scores = self.attention(concat_input)
            attention_weights = self.softmax(attention_scores.squeeze(-1))

            # Calculate context vector
            context_vector = torch.sum(attention_weights.unsqueeze(-1) * encoder_outputs, dim=1)
            # Decode step
            decoder_hidden = self.decoder(torch.cat((context_vector, x[:, i, :]), dim=1), decoder_hidden)
            output = self.fc(decoder_hidden)
            outputs.append(output)

        return torch.stack(outputs, dim=1)

# Instantiate the model, loss function, and optimizer
model = Seq2SeqWithAttention(input_dim, hidden_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0
    for batch_idx, (input_seq, target_seq) in enumerate(loader):
        optimizer.zero_grad()

        output = model(input_seq.unsqueeze(-1).float())
        loss = criterion(output, target_seq.unsqueeze(-1).float())
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(loader):.4f}')

# Example usage after training
# You can feed input sequences to the trained model to generate predictions
# For example:
# test_input = torch.tensor([1, 2, 3, 4, 5]).unsqueeze(0)  # Example input sequence
# predicted_output = model(test_input.unsqueeze(-1).float())
# print(predicted_output)


Epoch [1/20], Loss: 0.4822
Epoch [2/20], Loss: 0.4560
Epoch [3/20], Loss: 0.4499
Epoch [4/20], Loss: 0.4583
Epoch [5/20], Loss: 0.4568
Epoch [6/20], Loss: 0.4520
Epoch [7/20], Loss: 0.4479
Epoch [8/20], Loss: 0.4542
Epoch [9/20], Loss: 0.4481
Epoch [10/20], Loss: 0.4483
Epoch [11/20], Loss: 0.4465
Epoch [12/20], Loss: 0.4449
Epoch [13/20], Loss: 0.4477
Epoch [14/20], Loss: 0.4445
Epoch [15/20], Loss: 0.4393
Epoch [16/20], Loss: 0.4340
Epoch [17/20], Loss: 0.4347
Epoch [18/20], Loss: 0.4332
Epoch [19/20], Loss: 0.4418
Epoch [20/20], Loss: 0.4308


In [36]:
# Example usage after training
# You can feed input sequences to the trained model to generate predictions
test_input = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]])  # Example input sequence
predicted_output = model(test_input.unsqueeze(-1).float())
print(predicted_output)


tensor([[[4.5180],
         [3.6839],
         [3.3605],
         [3.1781],
         [3.0666]]], grad_fn=<StackBackward0>)
