In [6]:

!pip install transformers sentencepiece torch gradio -q


In [2]:

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import gradio as gr
import torch

# Load the NLLB-200 model
checkpoint = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)


In [3]:
language_map = {
    "English": "eng_Latn",
    "Hindi": "hin_Deva",
    "Sanskrit": "san_Deva",    # ← Sanskrit support
    "Spanish": "spa_Latn",
    "French": "fra_Latn",
    "German": "deu_Latn",
    "Tamil": "tam_Taml",
    "Chinese": "zho_Hans",
    "Japanese": "jpn_Jpan"
}


In [4]:

def translate(text, source_lang, target_lang):
    src_code = language_map.get(source_lang, source_lang)
    tgt_code = language_map.get(target_lang, target_lang)

    translator = pipeline(
        "translation",
        model=model,
        tokenizer=tokenizer,
        src_lang=src_code,
        tgt_lang=tgt_code,
        max_length=400,
        device=0 if torch.cuda.is_available() else -1
    )
    result = translator(text)
    return result[0]['translation_text']


In [5]:
# ✅ STEP 5: Example Test (Manual Translation)
text = "The sun rises in the east."
source_lang = "English"
target_lang = "Sanskrit"

translated = translate(text, source_lang, target_lang)
print(f"Original ({source_lang}): {text}")
print(f"Translated ({target_lang}): {translated}")


Device set to use cpu


Original (English): The sun rises in the east.
Translated (Sanskrit): सूर्योदयः पूर्वं भवति।
