# Generative AI for Machine Translation

Implement a basic Statistical Machine Translation (SMT) model that uses word-by-word translation with a
dictionary lookup approach

In [1]:
def build_translation_dictionary():
    """Build a basic translation dictionary."""
    return {
        'the': 'le',
        'cat': 'chat',
        'is': 'est',
        'on': 'sur',
        'mat': 'tapis',
        'dog': 'chien',
        'walks': 'marche',
        'in': 'dans',
        'park': 'parc'
    }

def word_by_word_translate(sentence, translation_dict):
    """Translate a sentence word by word using the given dictionary.

    Args:
        sentence (str): The input sentence in the source language.
        translation_dict (dict): A dictionary with word translations.

    Returns:
        str: The translated sentence.
    """
    words = sentence.lower().split()
    translated_words = [translation_dict.get(word, f"[{word}]") for word in words]
    return ' '.join(translated_words)

if __name__ == "__main__":
    # Build the translation dictionary
    translation_dict = build_translation_dictionary()

    # Input sentence to translate
    source_sentence = "The cat is on the mat"

    # Perform word-by-word translation
    translated_sentence = word_by_word_translate(source_sentence, translation_dict)

    print("Source Sentence:", source_sentence)
    print("Translated Sentence:", translated_sentence)


Source Sentence: The cat is on the mat
Translated Sentence: le chat est sur le tapis


 Implement an Attention mechanism in a Neural Machine Translation (NMT) model using PyTorch

In [2]:
!pip install torch
!pip install torchtext

Collecting torchtext
  Downloading torchtext-0.18.0-cp310-cp310-manylinux1_x86_64.whl.metadata (7.9 kB)
Downloading torchtext-0.18.0-cp310-cp310-manylinux1_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchtext
Successfully installed torchtext-0.18.0


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Encoder
class Encoder(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, num_layers, dropout):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        self.rnn = nn.GRU(embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, hidden = self.rnn(embedded)
        return outputs, hidden

# Define Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
        self.v = nn.Parameter(torch.rand(hidden_dim))

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(1)
        hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = torch.sum(self.v * energy, dim=2)
        return torch.softmax(attention, dim=1)

# Define the Decoder with Attention
class Decoder(nn.Module):
    def __init__(self, output_dim, embed_dim, hidden_dim, num_layers, dropout, attention):
        super(Decoder, self).__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, embed_dim)
        self.rnn = nn.GRU(hidden_dim + embed_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, hidden, encoder_outputs):
        tgt = tgt.unsqueeze(1)
        embedded = self.dropout(self.embedding(tgt))
        a = self.attention(hidden[-1], encoder_outputs)
        a = a.unsqueeze(1)
        weighted = torch.bmm(a, encoder_outputs)
        rnn_input = torch.cat((embedded, weighted), dim=2)
        output, hidden = self.rnn(rnn_input, hidden)
        prediction = self.fc_out(torch.cat((output.squeeze(1), weighted.squeeze(1)), dim=1))
        return prediction, hidden

# Define the Seq2Seq Model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, tgt, teacher_forcing_ratio=0.5):
        batch_size = src.size(0)
        tgt_len = tgt.size(1)
        tgt_vocab_size = self.decoder.output_dim

        outputs = torch.zeros(batch_size, tgt_len, tgt_vocab_size).to(self.device)

        encoder_outputs, hidden = self.encoder(src)
        input = tgt[:, 0]

        for t in range(1, tgt_len):
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            outputs[:, t, :] = output
            top1 = output.argmax(1)
            input = tgt[:, t] if torch.rand(1).item() < teacher_forcing_ratio else top1

        return outputs

# Hyperparameters and Initialization
INPUT_DIM = 100  # Size of source vocabulary
OUTPUT_DIM = 100  # Size of target vocabulary
EMBED_DIM = 256
HIDDEN_DIM = 512
NUM_LAYERS = 2
DROPOUT = 0.5
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(INPUT_DIM, EMBED_DIM, HIDDEN_DIM, NUM_LAYERS, DROPOUT)
attention = Attention(HIDDEN_DIM)
decoder = Decoder(OUTPUT_DIM, EMBED_DIM, HIDDEN_DIM, NUM_LAYERS, DROPOUT, attention)

model = Seq2Seq(encoder, decoder, DEVICE).to(DEVICE)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Assume <pad> token is index 0
optimizer = optim.Adam(model.parameters())

# Training Loop (Example)
for epoch in range(10):  # Assume 10 epochs
    model.train()
    src = torch.randint(1, INPUT_DIM, (32, 20)).to(DEVICE)  # Batch of 32, sequence length 20
    tgt = torch.randint(1, OUTPUT_DIM, (32, 20)).to(DEVICE)

    optimizer.zero_grad()
    output = model(src, tgt)
    output_dim = output.shape[-1]

    loss = criterion(output[:, 1:].reshape(-1, output_dim), tgt[:, 1:].reshape(-1))
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 4.603466987609863
Epoch 2, Loss: 4.604619979858398
Epoch 3, Loss: 4.606863975524902
Epoch 4, Loss: 4.613286018371582
Epoch 5, Loss: 4.616055011749268
Epoch 6, Loss: 4.608440399169922
Epoch 7, Loss: 4.605021953582764
Epoch 8, Loss: 4.61676549911499
Epoch 9, Loss: 4.605087757110596
Epoch 10, Loss: 4.602208137512207


 Use a pre-trained GPT model to perform machine translation from English to French

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load pre-trained model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-fr"  # A pre-trained English-to-French translation model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def translate_text(text, tokenizer, model, max_length=50):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_length)

    # Generate translation using the model
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        num_beams=4,
        early_stopping=True
    )

    # Decode and return the translation
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example English text
english_text = "The cat is on the mat."

# Translate to French
french_translation = translate_text(english_text, tokenizer, model)
print("English:", english_text)
print("French:", french_translation)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

English: The cat is on the mat.
French: Le chat est sur le tapis.


Generate a short poem using GPT-2 for a specific theme (e.g., "Nature")

In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"  # You can use a fine-tuned GPT-2 model for poetry
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

def generate_poem(prompt, max_length=50, temperature=0.7, top_k=50):
    # Tokenize input prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate poem
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=0.9,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id
    )

    # Decode generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Define the theme and prompt for the poem
theme = "Nature"
prompt = f"A gentle ode to {theme},"

# Generate the poem
poem = generate_poem(prompt)
print(poem)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


A gentle ode to Nature, and a gentle ode to the world.

The first thing I did was to write a poem. I had a lot of time to write it, and I was going to write it in a way that


 Implement a basic reinforcement learning setup for text generation using PyTorch's reward function

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

# Define the model (simple RNN as a text generator)
class TextGenerator(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(TextGenerator, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden

# Reward function: Encourage diversity and penalize repetition
def reward_function(sequence):
    unique_tokens = len(set(sequence.tolist()))
    return unique_tokens / len(sequence)  # Ratio of unique tokens to sequence length

# Generate a sequence and calculate rewards
def generate_sequence_and_reward(model, vocab_size, max_length, start_token):
    sequence = [start_token]
    hidden = None

    for _ in range(max_length - 1):
        input_seq = torch.tensor([sequence[-1]]).unsqueeze(0)  # Shape: (1, 1)
        logits, hidden = model(input_seq, hidden)
        probs = torch.softmax(logits[:, -1, :], dim=-1)
        dist = Categorical(probs)
        next_token = dist.sample().item()
        sequence.append(next_token)

        if next_token == vocab_size - 1:  # Assume the last token is the "end of sequence" token
            break

    reward = reward_function(torch.tensor(sequence))
    return sequence, reward

# Training loop
def train_model(model, vocab_size, num_epochs, max_length, start_token, lr=0.01):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        optimizer.zero_grad()

        # Generate a sequence and calculate reward
        sequence, reward = generate_sequence_and_reward(model, vocab_size, max_length, start_token)

        # Calculate policy gradient loss
        loss = 0
        hidden = None
        for i in range(len(sequence) - 1):
            input_seq = torch.tensor([sequence[i]]).unsqueeze(0)  # Shape: (1, 1)
            logits, hidden = model(input_seq, hidden)
            probs = torch.softmax(logits[:, -1, :], dim=-1)
            dist = Categorical(probs)
            log_prob = dist.log_prob(torch.tensor(sequence[i + 1]))
            loss -= log_prob * reward  # Reinforce with reward scaling

        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}, Reward: {reward}")

# Parameters
vocab_size = 20  # Example vocabulary size
embedding_dim = 32
hidden_dim = 64
max_length = 10
start_token = 0  # Assume 0 is the "start token"
num_epochs = 50

# Model and training
model = TextGenerator(vocab_size, embedding_dim, hidden_dim)
train_model(model, vocab_size, num_epochs, max_length, start_token)


Epoch 1/50, Loss: 18.832626342773438, Reward: 0.7
Epoch 2/50, Loss: 11.649974822998047, Reward: 1.0
Epoch 3/50, Loss: 22.032333374023438, Reward: 0.8
Epoch 4/50, Loss: 3.0354645252227783, Reward: 1.0
Epoch 5/50, Loss: 16.268957138061523, Reward: 0.6
Epoch 6/50, Loss: 12.185673713684082, Reward: 0.5
Epoch 7/50, Loss: 18.954010009765625, Reward: 0.7
Epoch 8/50, Loss: 20.55805206298828, Reward: 0.8
Epoch 9/50, Loss: 19.25259780883789, Reward: 0.8
Epoch 10/50, Loss: 15.265751838684082, Reward: 0.7
Epoch 11/50, Loss: 5.140851020812988, Reward: 0.75
Epoch 12/50, Loss: 2.6577494144439697, Reward: 1.0
Epoch 13/50, Loss: 9.931920051574707, Reward: 1.0
Epoch 14/50, Loss: 19.302051544189453, Reward: 0.8888888888888888
Epoch 15/50, Loss: 4.253350257873535, Reward: 0.75
Epoch 16/50, Loss: 3.7889621257781982, Reward: 0.75
Epoch 17/50, Loss: 19.48383140563965, Reward: 0.8888888888888888
Epoch 18/50, Loss: 16.646665573120117, Reward: 0.7
Epoch 19/50, Loss: 2.6951406002044678, Reward: 0.75
Epoch 20/50,

 Create a simple multimodal generative model that generates an image caption given an image

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from PIL import Image
import numpy as np

# Parameters
embedding_dim = 256
hidden_dim = 512
vocab_size = 1000  # Example vocabulary size
max_caption_length = 20  # Maximum length of captions

# Define the Encoder (CNN for image features)
class ImageEncoder(nn.Module):
    def __init__(self):
        super(ImageEncoder, self).__init__()
        # Pretrained ResNet-18 as the feature extractor
        resnet = models.resnet18(pretrained=True)
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])  # Remove the classifier
        self.fc = nn.Linear(resnet.fc.in_features, embedding_dim)

    def forward(self, x):
        features = self.feature_extractor(x).squeeze()  # Shape: (batch_size, resnet.fc.in_features)
        features = self.fc(features)  # Shape: (batch_size, embedding_dim)
        return features

# Define the Decoder (RNN for caption generation)
class CaptionDecoder(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size):
        super(CaptionDecoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, features, captions):
        embeddings = self.embedding(captions)  # Shape: (batch_size, seq_length, embedding_dim)
        inputs = torch.cat((features.unsqueeze(1), embeddings), dim=1)  # Append image features as the first input
        outputs, _ = self.rnn(inputs)
        outputs = self.fc(outputs)
        return outputs

# Combine Encoder and Decoder
class ImageCaptioningModel(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size):
        super(ImageCaptioningModel, self).__init__()
        self.encoder = ImageEncoder()
        self.decoder = CaptionDecoder(embedding_dim, hidden_dim, vocab_size)

    def forward(self, images, captions):
        features = self.encoder(images)
        outputs = self.decoder(features, captions)
        return outputs

# Helper function to preprocess images
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)  # Add batch dimension

# Training loop (dummy dataset for illustration)
def train_model(model, data_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        for images, captions in data_loader:
            optimizer.zero_grad()
            outputs = model(images, captions[:, :-1])  # Exclude <end> token for inputs

            # Align dimensions of outputs and targets
            outputs = outputs[:, :-1, :]  # Remove last token prediction to match target
            loss = criterion(
                outputs.contiguous().view(-1, vocab_size),
                captions[:, 1:].contiguous().view(-1)  # Target is shifted
            )
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")


# Instantiate the model
model = ImageCaptioningModel(embedding_dim, hidden_dim, vocab_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy data (replace with real dataset for actual use)
dummy_images = torch.randn(10, 3, 224, 224)  # 10 random images
dummy_captions = torch.randint(0, vocab_size, (10, max_caption_length))  # Random captions
dummy_data_loader = [(dummy_images, dummy_captions)]

# Train the model
train_model(model, dummy_data_loader, criterion, optimizer, num_epochs=5)

# Generate captions for a new image
def generate_caption(model, image_path, max_length=20):
    model.eval()
    image = preprocess_image(image_path)
    with torch.no_grad():
        features = model.encoder(image)
        caption = []
        input_token = torch.tensor([0])  # Assume <start> token is 0
        hidden = None

        for _ in range(max_length):
            input_token = input_token.unsqueeze(0)  # Add batch dimension
            embeddings = model.decoder.embedding(input_token)
            inputs = torch.cat((features.unsqueeze(1), embeddings), dim=1)
            outputs, hidden = model.decoder.rnn(inputs, hidden)
            logits = model.decoder.fc(outputs.squeeze(1))
            next_token = logits.argmax(dim=1).item()
            caption.append(next_token)
            if next_token == 1:  # Assume <end> token is 1
                break
            input_token = torch.tensor([next_token])

    return caption

# Example usage
# Replace "image.jpg" with the path to your image file
# caption = generate_caption(model, "image.jpg")
# print("Generated Caption:", caption)


Epoch 1/5, Loss: 6.91055154800415
Epoch 2/5, Loss: 6.750436305999756
Epoch 3/5, Loss: 6.591473579406738
Epoch 4/5, Loss: 6.425230026245117
Epoch 5/5, Loss: 6.239101886749268
