In [None]:
#One-to-One (1-to-1) – Sentiment Classification for a Single Word
#Classify the sentiment of a single word embedding (Positive / Negative)
import torch
import torch.nn as nn

# Define One-to-One model
class OneToOneModel(nn.Module):
    def _init_(self):
        super(OneToOneModel, self)._init_()
        self.fc = nn.Linear(100, 2)  # 100 input features → 2 classes (positive, negative)

    def forward(self, x):
        return self.fc(x)

# Simulate a random word vector of 100 dimensions
x = torch.randn(1, 100)

# Create the model
model = OneToOneModel()

# Forward pass
output = model(x)

# Get predicted class
predicted_class = output.argmax().item()
print("Predicted Class:", predicted_class)

In [None]:
#One-to-Many (1-to-Many) – Image Captioning
#Task: Generate a sequence (caption) from a single image
#This uses:

#CNN (ResNet18) as encoder

#LSTM as decoder

#Code: CNN + LSTM for Captioning

import torch
import torch.nn as nn
import torchvision.models as models

# Load a pretrained ResNet18 model and remove the final fully connected layer
cnn = models.resnet18(pretrained=True)
modules = list(cnn.children())[:-1]  # Remove final FC layer
cnn = nn.Sequential(*modules)

# Freeze CNN parameters to avoid training them
for param in cnn.parameters():
    param.requires_grad = False

# Decoder for Captioning
class CaptionDecoder(nn.Module):
    def _init_(self, embed_size, hidden_size, vocab_size):
        super(CaptionDecoder, self)._init_()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, features, captions):
        # captions shape: [batch_size, caption_len]
        embeddings = self.embed(captions)  # [batch_size, caption_len, embed_size]
        features = features.unsqueeze(1)   # [batch_size, 1, embed_size]
        inputs = torch.cat((features, embeddings), dim=1)  # Concatenate image feature as first input
        hiddens, _ = self.lstm(inputs)
        outputs = self.fc(hiddens)
        return outputs

# Dummy inputs for demo purposes
vocab_size = 5000
embed_size = 256
hidden_size = 512

decoder = CaptionDecoder(embed_size, hidden_size, vocab_size)

# Simulate an image (batch of 1 image)
image = torch.randn(1, 3, 224, 224)

# Extract features using CNN encoder
with torch.no_grad():
    features = cnn(image)  # Output shape: [1, 512, 1, 1]
    features = features.view(features.size(0), -1)  # Flatten: [1, 512]

# Project image features to embedding size
feature_projector = nn.Linear(512, embed_size)
projected_features = feature_projector(features)  # [1, 256]

# Simulated input caption (token IDs)
caption_input = torch.randint(0, vocab_size, (1, 10))  # batch_size=1, length=10

# Generate outputs (logits)
outputs = decoder(projected_features, caption_input)
print("Output shape (logits):", outputs.shape)  # [1, 11, vocab_size]

In [None]:
#1. Many-to-One: Sentiment Classification (IMDB-style)
import torch
import torch.nn as nn

class ManyToOneLSTM(nn.Module):
    def _init_(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super()._init_()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)                       # (batch_size, seq_len, embed_dim)
        _, (hidden, _) = self.lstm(x)              # hidden: (1, batch_size, hidden_dim)
        return self.fc(hidden[-1])                 # (batch_size, output_dim)

# Example
vocab_size = 5000
model = ManyToOneLSTM(vocab_size, 128, 256, 2)     # Binary classification
x = torch.randint(0, vocab_size, (4, 20))          # batch_size=4, seq_len=20
out = model(x)
print("Output shape:", out.shape)                 # (4, 2)

In [None]:
# 2. Aligned Many-to-Many: POS Tagging
class AlignedManyToManyLSTM(nn.Module):
    def _init_(self, vocab_size, embed_dim, hidden_dim, tag_size):
        super()._init_()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, tag_size)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)                # (batch, seq_len, hidden_dim)
        return self.fc(lstm_out)                  # (batch, seq_len, tag_size)

# Example
vocab_size = 5000
tag_size = 10
model = AlignedManyToManyLSTM(vocab_size, 128, 256, tag_size)
x = torch.randint(0, vocab_size, (2, 15))         # batch of 2 sequences of length 15
out = model(x)
print("Output shape:", out.shape)                # (2, 15, 10)