In [33]:
from pptx import Presentation
from pptx.enum.lang import MSO_LANGUAGE_ID

from genai.credentials import Credentials
from genai.schemas import GenerateParams
from genai.model import Model

import os
from dotenv import load_dotenv

In [188]:
load_dotenv()

api_key = os.getenv("GENAI_KEY", None)
api_endpoint = os.getenv("GENAI_API", None)

creds = Credentials(api_key,api_endpoint)

params = GenerateParams(
    decoding_method="greedy",
    max_new_tokens=50,
    min_new_tokens=1,
    stream=False,
    temperature=0.05,
    top_k=50,
    top_p=1,
    stop_sequences=["\\n\\n"]
)

# llm = Model(model="ibm/granite-13b-chat-v1",credentials=creds, params=params)
llm = Model(model="meta-llama/llama-2-70b-chat",credentials=creds, params=params)

In [207]:
def buildprompt(text):
    return f"""[INST]be a translator, be concise.
    return the translated content only.
    dont output note.
    keep the time format.
    keep watsonx, watson.
    please help translate following english to traditional chinese.
    <<SYS>>
    english:{text}
    <</SYS>>
    [/INST]
    traditional chinese:"""

In [211]:
# input_file_path = "/Users/yingkitw/Desktop/Incubation program deck tw/8 RAG with watsonx.pptx"
input_file_path =  "sample.pptx"

sourcelang = MSO_LANGUAGE_ID.ENGLISH_US
targetlang = MSO_LANGUAGE_ID.CHINESE_HONG_KONG_SAR

presentation = Presentation(input_file_path)

slide_number = 1
for slide in presentation.slides:
    print('Slide {slide_number} of {number_of_slides}'.format(
            slide_number=slide_number,
            number_of_slides=len(presentation.slides)))
    slide_number += 1

    # translate comments
    if slide.has_notes_slide:
        text_frame = slide.notes_slide.notes_text_frame
        if len(text_frame.text) > 0:
            prompttemplate = buildprompt(text_frame.text)
            response = llm.generate([prompttemplate])
            slide.notes_slide.notes_text_frame.text = response[0].generated_text


    # translate other texts
    for shape in slide.shapes:
        if shape.has_table:
            for cell in shape.table.iter_cells():
                engtext = cell.text
                prompttemplate = buildprompt(cell.text)
                response = llm.generate([prompttemplate])
                cell.text = response[0].generated_text
                # print(engtext+'->'+response[0].generated_text)

        if shape.has_text_frame:
            for paragraph in shape.text_frame.paragraphs:
                for index, paragraph_run in enumerate(paragraph.runs):
                    engtext = paragraph_run.text
                    prompttemplate = buildprompt(paragraph_run.text)
                    response = llm.generate([prompttemplate])
                    paragraph.runs[index].text = response[0].generated_text
                    # print(engtext+'->'+response[0].generated_text)
                    paragraph.runs[index].font.language_id = targetlang

Slide 1 of 17
Slide 2 of 17
Slide 3 of 17
Slide 4 of 17
Slide 5 of 17
Slide 6 of 17
Slide 7 of 17
Slide 8 of 17
Slide 9 of 17
Slide 10 of 17
Slide 11 of 17
Slide 12 of 17
Slide 13 of 17
Slide 14 of 17
Slide 15 of 17
Slide 16 of 17
Slide 17 of 17


In [212]:
output_file_path = input_file_path.replace(
        '.pptx', '-{language_code}.pptx'.format(language_code='tw'))

presentation.save(output_file_path)