<a href="https://colab.research.google.com/github/erniesg/gallery/blob/main/text_translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-cloud-translate requests openai python-docx

Collecting gradio
  Downloading gradio-4.20.1-py3-none-any.whl (17.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m61.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.13.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.4/227.4 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-docx
  Downloading python_docx-1.1.0-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.6/239.6 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Prep

In [None]:
from google.colab import output, userdata, widgets
from IPython.display import display, HTML
import ipywidgets as widgets
import json
import sys
import uuid
import requests
import hashlib
import time
from openai import OpenAI
import asyncio
from google.cloud import translate_v2 as translate
import docx
from io import BytesIO
import base64
import time

YOUDAO_URL = 'https://openapi.youdao.com/api'
APP_KEY = userdata.get('YOUDAO_APP_KEY')
APP_SECRET = userdata.get('YOUDAO_APP_SECRET')
VOCAB_ID = userdata.get('VOCAB_ID')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
GOOGLE_TRANSLATE_API_KEY = userdata.get('GOOGLE_TRANSLATE_API_KEY')

# Initialize the OpenAI client
client = OpenAI(api_key=OPENAI_API_KEY)

def translate_with_openai_gpt4(text, prompt_language):
    """
    Translate text using OpenAI's GPT-4 with a custom prompt for a specific language.
    """
    prompt = f"Translate this text into {prompt_language} for a gallery in Singapore: {text}"

    response = client.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[{"role": "system", "content": "You are a highly intelligent translator well-versed in Southeast Asian art."},
                  {"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content

def translate_with_google(text, target_lang):
    """
    Translate text using Google Translate REST API with support for Tamil.
    """
    url = "https://translation.googleapis.com/language/translate/v2"
    params = {
        "q": text,
        "target": target_lang,
        "format": "text",
        "key": GOOGLE_TRANSLATE_API_KEY,
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        result = response.json()
        translated_text = result['data']['translations'][0]['translatedText']
        return translated_text
    else:
        print(f"Failed to translate with Google Translate: {response.text}")
        return "Translation failed."

def encrypt(signStr):
    hash_algorithm = hashlib.sha256()
    hash_algorithm.update(signStr.encode('utf-8'))
    return hash_algorithm.hexdigest()


def truncate(q):
    if q is None:
        return None
    size = len(q)
    return q if size <= 20 else q[0:10] + str(size) + q[size - 10:size]


def do_request(data):
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    return requests.post(YOUDAO_URL, data=data, headers=headers)

def connect(q, target_lang):
    if target_lang == 'zh-CHS':
        # Use Youdao for Mandarin translations
        data = {
            'from': 'en',
            'to': target_lang,
            'signType': 'v3',
            'curtime': str(int(time.time())),
            'salt': str(uuid.uuid1()),
            'appKey': APP_KEY,
            'q': q,
            'vocabId': VOCAB_ID
        }
        signStr = APP_KEY + truncate(q) + data['salt'] + data['curtime'] + APP_SECRET
        data['sign'] = encrypt(signStr)

        response = do_request(data)
        if response.status_code == 200:
            response_json = response.json()
            translated_text = response_json.get('translation', [""])[0]
            return translated_text
        else:
            return "Translation failed."

    elif target_lang == 'ms':
        # Use OpenAI for Malay translations
        try:
            translated_text = translate_with_openai_gpt4(q, "Malay")
            return translated_text
        except Exception as e:
            print(f"Failed to translate with OpenAI: {e}")
            return "Translation failed."

    elif target_lang == 'ta':
        # Use Google Translate for Tamil translations
        try:
            translated_text = translate_with_google(q, "ta")
            return translated_text
        except Exception as e:
            print(f"Failed to translate with Google Translate: {e}")
            return "Translation failed."
    else:
        return "Unsupported language."

In [None]:
# Setup widgets
text = widgets.Textarea(
    value='',
    placeholder='Type something',
    description='Text for Translation:',
    disabled=False,
    layout=widgets.Layout(width='100%', height='200px')
)

language_dropdown = widgets.Dropdown(
    options=[('Mandarin', 'zh-CHS'), ('Malay', 'ms'), ('Tamil', 'ta')],
    value='zh-CHS',
    description='Target Language:',
)

translate_button = widgets.Button(description="Translate")
progress_bar = widgets.IntProgress(value=0, min=0, max=1, description='Processing:', bar_style='info', orientation='horizontal', visible=False)
output_area = widgets.Output()

# Event Handlers
def on_translate_button_clicked(b):
    # Disable the button and show the progress bar
    translate_button.disabled = True
    progress_bar.visible = True
    progress_bar.value = 0

    output_area.clear_output()
    with output_area:
        # Execute the translation
        translated_text = connect(text.value, language_dropdown.value)

        # Display translated text
        print(translated_text)
        progress_bar.value = 1

        # Re-enable the button and hide progress bar
        translate_button.disabled = False
        progress_bar.visible = False

translate_button.on_click(on_translate_button_clicked)

# Display UI components
display(language_dropdown, text, translate_button, progress_bar, output_area)

Dropdown(description='Target Language:', options=(('Mandarin', 'zh-CHS'), ('Malay', 'ms'), ('Tamil', 'ta')), v…

Textarea(value='', description='Text for Translation:', layout=Layout(height='200px', width='100%'), placehold…

Button(description='Translate', style=ButtonStyle())

IntProgress(value=0, bar_style='info', description='Processing:', max=1)

Output()

In [None]:
def extract_and_print_paragraphs(file_content):
    file_stream = BytesIO(file_content)
    doc = docx.Document(file_stream)
    paragraphs = [para.text for para in doc.paragraphs]
    for i, para in enumerate(paragraphs):
        truncated_para = (para[:50] + '...') if len(para) > 50 else para
        print(f"Paragraph {i+1} (truncated):", truncated_para)
    return paragraphs

def replace_text_in_docx(file_content, translated_paragraphs):
    doc = docx.Document(BytesIO(file_content))
    for para, new_text in zip(doc.paragraphs, translated_paragraphs):
        para.clear()
        para.add_run(new_text)
    output_stream = BytesIO()
    doc.save(output_stream)
    output_stream.seek(0)
    return output_stream.getvalue()

# UI Components
language_dropdown = widgets.Dropdown(
    options=[('Mandarin', 'zh-CHS'), ('Malay', 'ms'), ('Tamil', 'ta')],
    value='zh-CHS',
    description='Language:',
)

uploader = widgets.FileUpload(
    accept='.docx',
    multiple=False,
    description='Upload DOCX'
)

translate_button = widgets.Button(description="Translate & Download")
progress_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=1,  # Adjust max based on the number of paragraphs if needed
    description='Processing:',
    bar_style='info',  # 'success', 'info', 'warning', 'danger' or ''
    orientation='horizontal'
)
progress_bar.layout.visibility = 'hidden'  # Initially hide the progress bar

output_area = widgets.Output()

# Event Handlers
def on_translate_button_clicked(b):
    translate_button.disabled = True
    progress_bar.layout.visibility = 'visible'
    progress_bar.value = 0  # Reset progress bar
    output_area.clear_output()

    with output_area:
        if uploader.value:
            uploaded_file = next(iter(uploader.value.values()))
            file_name, file_content = uploaded_file['metadata']['name'], uploaded_file['content']
            target_lang = language_dropdown.value

            # Display processing message
            print("Processing document...")

            paragraphs = extract_and_print_paragraphs(file_content)
            # You may adjust the progress bar's max value here if needed
            progress_bar.max = len(paragraphs)

            translated_paragraphs = []
            for para in paragraphs:
                translated_paragraphs.append(connect(para, target_lang))
                progress_bar.value += 1  # Update progress bar after each paragraph

            new_doc_content = replace_text_in_docx(file_content, translated_paragraphs)
            new_file_name = f"{file_name.rsplit('.', 1)[0]}_{target_lang}.{file_name.rsplit('.', 1)[1]}"

            b64 = base64.b64encode(new_doc_content).decode()
            href = f'<a href="data:application/octet-stream;base64,{b64}" download="{new_file_name}">Download Translated Document</a>'
            display(HTML(href))

        translate_button.disabled = False
        progress_bar.layout.visibility = 'hidden'

translate_button.on_click(on_translate_button_clicked)

# Display UI
display(language_dropdown, uploader, translate_button, progress_bar, output_area)

Dropdown(description='Language:', options=(('Mandarin', 'zh-CHS'), ('Malay', 'ms'), ('Tamil', 'ta')), value='z…

FileUpload(value={}, accept='.docx', description='Upload DOCX')

Button(description='Translate & Download', style=ButtonStyle())

IntProgress(value=0, bar_style='info', description='Processing:', layout=Layout(visibility='hidden'), max=1)

Output()