Trying to make pretrained encoder, decoder architecture. 

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import MarianMTModel, MarianTokenizer
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [25]:
model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE").to(device)
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE")
encoder = model.model.encoder  # âœ… this works now


In [26]:
for param in encoder.parameters():
    param.requires_grad = False

# Custom GRU decoder
class CustomDecoder(nn.Module):
    def __init__(self, input_size, hidden_size, vocab_size):
        super().__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, encoder_outputs, hidden_state=None):
        output, hidden = self.gru(encoder_outputs, hidden_state)
        return self.fc(output), hidden


In [27]:
hidden_size = 512
vocab_size = tokenizer.vocab_size
languages = ["en", "fr", "es"]

# Language decoders & optimizers
decoders = {
    lang: CustomDecoder(512, hidden_size, vocab_size).to(device) for lang in languages
}
optimizers = {
    lang: optim.Adam(dec.parameters(), lr=1e-4) for lang, dec in decoders.items()
}
criterion = nn.CrossEntropyLoss()


In [28]:
def load_data(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return [line.strip() for line in f if line.strip()]

datasets = {
    "en": load_data("en.txt"),
    "fr": load_data("fr.txt"),
    "es": load_data("es.txt"),
}


In [None]:
def train_step(sentence, lang):
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
    input_ids = inputs["input_ids"]

    with torch.no_grad():
        encoder_outputs = encoder(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"]
        ).last_hidden_state
        
    decoder = decoders[lang]
    decoder.train()
    logits, _ = decoder(encoder_outputs)

    target = input_ids.view(-1)
    logits = logits.view(-1, logits.size(-1))

    loss = criterion(logits, target)
    optimizers[lang].zero_grad()
    loss.backward()
    optimizers[lang].step()

    return loss.item()


In [21]:
epochs = 5
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}")
    for lang in languages:
        total_loss = 0
        for sentence in datasets[lang]:
            total_loss += train_step(sentence, lang)
        print(f"{lang} Avg Loss: {total_loss / len(datasets[lang]):.4f}")



Epoch 1


AttributeError: 'MarianModel' object has no attribute 'model'