In [4]:
!pip install transformers langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m29.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993221 sha256=f272faeb301d72222aba4213a305441070007a4b5f2781427a6c03d7cb7a7826
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


In [5]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from langdetect import detect
import re

# تحميل النموذج والـ tokenizer مرة واحدة فقط
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

# قائمة بأكواد اللغات والأسماء
language_options = {
    "English": "en",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Arabic": "ar",
}

# دالة لترجمة الجمل
def translate_sentence(sentence, tgt_lang):
    encoded_text = tokenizer(sentence, return_tensors="pt")
    generated_tokens = model.generate(encoded_text["input_ids"], forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
    translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    return translation

# دالة لترجمة المقال كاملاً
def translate_article(article, tgt_lang):
    paragraphs = re.split(r'([\r\n]+)', article)
    for i, p in enumerate(paragraphs):
        if len(p.strip()) == 0:
            continue
        paragraphs[i] = translate_paragraph(p, tgt_lang)
    return ''.join(paragraphs)

# دالة لترجمة الفقرات
def translate_paragraph(paragraph, tgt_lang):
    sentences = []
    cursor = 0
    for i, c in enumerate(paragraph):
        if c == '.':
            sentences.append(paragraph[cursor:i + 1])
            cursor = i + 1
    if paragraph and paragraph[-1] != '.':
        sentences.append(paragraph[cursor:])
    return ' '.join(translate_sentence(s, tgt_lang) for s in sentences)

# دالة الكشف عن اللغة والترجمة إلى لغات متعددة
def detect_and_translate_multiple(text, tgt_langs):
    detected_lang = detect(text)
    src_lang_code = detected_lang if detected_lang in language_options.values() else "en"
    tokenizer.src_lang = src_lang_code

    translations = {}
    for tgt_lang in tgt_langs:
        translations[tgt_lang] = translate_article(text, language_options[tgt_lang])

    return "\n\n".join([f"{lang}: {translations[lang]}" for lang in translations])

# دالة لقراءة محتوى الملف النصي فقط
def translate_file_in_chunks(file_path, tgt_langs, chunk_size=2000):
    translations = []
    try:
        # قراءة الملف النصي فقط
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()

        # ترجمة المحتوى
        translation = detect_and_translate_multiple(content, tgt_langs)
        translations.append(translation)

        return "\n\n".join(translations)
    except Exception as e:
        return f"Error during file translation: {str(e)}"


# مثال لاستخدام الكود بدون واجهة Gradio:

# 1. ترجمة النص المدخل يدويًا
input_text = "Hello, how are you?"
target_languages = ["French", "German", "Arabic"]

translated_text = detect_and_translate_multiple(input_text, target_languages)
print("Translated Text:")
print(translated_text)

# 2. ترجمة محتوى ملف نصي
file_path = ""
translated_file_content = translate_file_in_chunks(file_path, target_languages)
print("Translated File Content:")
print(translated_file_content)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/908 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/298 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]



Translated Text:
French: Bonjour, comment vous êtes-vous?

German: Hallo, wie bist du?

Arabic: مرحبا، كيف حالك؟
Translated File Content:
Error during file translation: [Errno 2] No such file or directory: ''


In [2]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.114.1-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g

In [6]:
import gradio as gr
with gr.Blocks() as interface:
    gr.Markdown("## Text Translation with Auto Language Detection (Multiple Outputs)")

    # مكونات الإدخال: مربع نص، قائمة منسدلة متعددة الاختيارات للغات الهدف
    with gr.Row(): #Fixed indentation
        input_text = gr.Textbox(label="Input Text")
        file_input = gr.File(label="Upload Text File (.txt)")  # دعم فقط لملفات TXT

    tgt_langs = gr.CheckboxGroup(list(language_options.keys()), label="Target Languages", value=["English"])

    # مكون الإخراج: نتيجة الترجمة
    output_text = gr.Textbox(label="Translated Text")

    # زر الترجمة للنص المُدخل يدويًا
    translate_button = gr.Button("Translate Text")

    # زر الترجمة للملف المُحمل
    translate_file_button = gr.Button("Translate File")

    # تحديد ما يحدث عند الضغط على زر الترجمة للنص المُدخل
    translate_button.click(
        detect_and_translate_multiple,
        inputs=[input_text, tgt_langs],
        outputs=output_text
    )

    # تحديد ما يحدث عند الضغط على زر الترجمة للملف المُحمل
    translate_file_button.click(
        translate_file_in_chunks,
        inputs=[file_input, tgt_langs],
        outputs=output_text
    )

# تشغيل الواجهة
interface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://3c2f7b775c05d3c1b5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


