In [4]:
from transformers import MarianMTModel, MarianTokenizer

# Dictionary of language codes and their respective names (50+ languages)
LANGUAGES = {
    "en": "English", "fr": "French", "de": "German", "es": "Spanish",
    "it": "Italian", "ru": "Russian", "zh": "Chinese", "ja": "Japanese",
    "ko": "Korean", "ar": "Arabic", "pt": "Portuguese", "tr": "Turkish",
    "nl": "Dutch", "sv": "Swedish", "no": "Norwegian", "da": "Danish",
    "fi": "Finnish", "el": "Greek", "he": "Hebrew", "pl": "Polish",
    "ro": "Romanian", "bg": "Bulgarian", "hu": "Hungarian", "cs": "Czech",
    "sk": "Slovak", "sl": "Slovenian", "hr": "Croatian", "sr": "Serbian",
    "uk": "Ukrainian", "hi": "Hindi", "bn": "Bengali", "ta": "Tamil",
    "ml": "Malayalam", "te": "Telugu", "ur": "Urdu", "fa": "Persian",
    "id": "Indonesian", "ms": "Malay", "th": "Thai", "vi": "Vietnamese",
    "sw": "Swahili", "am": "Amharic", "et": "Estonian", "lv": "Latvian",
    "lt": "Lithuanian", "is": "Icelandic", "mk": "Macedonian", "sq": "Albanian",
    "ka": "Georgian", "hy": "Armenian", "az": "Azerbaijani", "kk": "Kazakh",
    "uz": "Uzbek", "mn": "Mongolian", "km": "Khmer", "lo": "Lao",
    "my": "Burmese", "ne": "Nepali", "si": "Sinhala"
}

# Function to load MarianMT model and tokenizer based on language choice
def load_model(source_lang, target_lang):
    model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return model, tokenizer

# Function to perform translation
def translate_text(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Compact display of available language codes (5 per line)
def display_language_choices():
    print("\nAvailable Languages (Choose by Code):")
    codes = list(LANGUAGES.keys())
    # Print in rows of 5 languages per line
    for i in range(0, len(codes), 5):
        print(" | ".join(f"{codes[j]}: {LANGUAGES[codes[j]]}" for j in range(i, min(i + 5, len(codes)))))

# Main function for user input
def main():
    # Display available languages in a compact format
    display_language_choices()

    # Ask user for source and target languages
    source_lang = input("\nChoose source language code (e.g., 'en' for English): ").strip()
    target_lang = input("Choose target language code (e.g., 'fr' for French): ").strip()

    # Validate language choices
    if source_lang not in LANGUAGES or target_lang not in LANGUAGES:
        print("Invalid language code. Please choose from the available languages.")
        return

    # Load the MarianMT model and tokenizer
    model, tokenizer = load_model(source_lang, target_lang)

    # Get input text from user
    text = input(f"\nEnter text in {LANGUAGES[source_lang]} to translate to {LANGUAGES[target_lang]}: ")

    # Perform translation
    translated_text = translate_text(text, model, tokenizer)

    # Output the translated text
    print(f"\nTranslated Text: {translated_text}")

if __name__ == "__main__":
    main()



Available Languages (Choose by Code):
en: English | fr: French | de: German | es: Spanish | it: Italian
ru: Russian | zh: Chinese | ja: Japanese | ko: Korean | ar: Arabic
pt: Portuguese | tr: Turkish | nl: Dutch | sv: Swedish | no: Norwegian
da: Danish | fi: Finnish | el: Greek | he: Hebrew | pl: Polish
ro: Romanian | bg: Bulgarian | hu: Hungarian | cs: Czech | sk: Slovak
sl: Slovenian | hr: Croatian | sr: Serbian | uk: Ukrainian | hi: Hindi
bn: Bengali | ta: Tamil | ml: Malayalam | te: Telugu | ur: Urdu
fa: Persian | id: Indonesian | ms: Malay | th: Thai | vi: Vietnamese
sw: Swahili | am: Amharic | et: Estonian | lv: Latvian | lt: Lithuanian
is: Icelandic | mk: Macedonian | sq: Albanian | ka: Georgian | hy: Armenian
az: Azerbaijani | kk: Kazakh | uz: Uzbek | mn: Mongolian | km: Khmer
lo: Lao | my: Burmese | ne: Nepali | si: Sinhala

Choose source language code (e.g., 'en' for English): en
Choose target language code (e.g., 'fr' for French): hi


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]


Enter text in English to translate to Hindi: where are you from and why you came here?

Translated Text: तुम कहाँ से हो और तुम यहाँ क्यों आए हो?
