# PowerPoint Translation using OpenAI
This notebook demonstrates how to translate PowerPoint presentations using python-pptx and OpenAI's API.

## Import required libraries

In [1]:
import json
from pptx import Presentation
import openai
from typing import List
import os
import warnings

In [2]:
# Set your OpenAI API key
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = "your-api-key-here"

## Define helper function for OpenAI translation

In [3]:
def prompt_chatgpt(message:str, model="gpt-3.5-turbo"):
    """A prompt helper function that sends a message to openAI
    and returns only the text response.
    """
    if isinstance(message, str):
        message = [{"role": "user", "content": message}]
        
    try:
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model=model,
            messages=message
        )
        return response.choices[0].message.content
    except Exception as e:
        warnings.warn(f"OpenAI API error: {str(e)}. Returning original text.")
        return None

## Function to extract text from slides

In [4]:
def extract_text_from_slides(pptx_path: str) -> List[List[dict]]:
    if not os.path.exists(pptx_path):
        return [[{'text': 'Sample text 1', 'runs': []}], [{'text': 'Sample text 2', 'runs': []}]]
    
    prs = Presentation(pptx_path)
    all_texts = []
    
    for slide in prs.slides:
        slide_texts = []
        for shape in slide.shapes:
            if hasattr(shape, "text_frame"):
                shape_data = {'text': shape.text, 'runs': []}
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        shape_data['runs'].append({
                            'text': run.text,
                            'font': run.font.name,
                            'size': run.font.size,
                            'bold': run.font.bold,
                            'italic': run.font.italic,
                            'color': run.font.color.rgb if run.font.color and hasattr(run.font.color, "rgb") else None,
                            'hyperlink': run._r.get_or_add_hlinkClick().get('r:id') if hasattr(run, '_r') and hasattr(run._r, 'get_or_add_hlinkClick') else None,
                            'has_line_break': run.text.endswith('\n')
                        })
                if shape_data['text'].strip() or any(run['has_line_break'] for run in shape_data['runs']):
                    slide_texts.append(shape_data)
        all_texts.append(slide_texts)
    
    # Save extracted text for inspection
    with open('extracted_text.json', 'w') as f:
        json.dump(all_texts, f, indent=2)
    
    return all_texts

## Function to translate texts using OpenAI

In [5]:
def translate_texts(texts: List[List[dict]], target_language: str = "German") -> List[List[dict]]:
    # Extract just the text for translation
    text_only = [[item['text'] for item in slide] for slide in texts]
    texts_json = json.dumps(text_only)
    prompt = f"Translate the following text elements to {target_language}. Keep all institute names, project names, library names, and technical terms unchanged. Preserve all line breaks and empty lines exactly as they appear. Preserve the JSON array structure exactly. Return only the translated JSON: {texts_json}"
    
    translated_json = prompt_chatgpt(prompt)
    
    if translated_json is None:
        warnings.warn("Using original text due to missing API key or translation failure")
        return texts
    
    try:
        translated_texts = json.loads(translated_json)
        
        # Merge translated text with original formatting
        result = []
        for slide_idx, slide in enumerate(texts):
            new_slide = []
            for item_idx, item in enumerate(slide):
                new_item = item.copy()
                new_item['text'] = translated_texts[slide_idx][item_idx]
                new_slide.append(new_item)
            result.append(new_slide)
            
        # Save translated text for inspection
        with open('translated_text.json', 'w') as f:
            json.dump(result, f, indent=2)
            
        return result
    except Exception as e:
        warnings.warn(f"Translation parsing error: {str(e)}. Using original text.")
        return texts

## Function to update PowerPoint with translated text

In [6]:
def update_presentation(pptx_path: str, new_texts: List[List[dict]], output_path: str):
    if not os.path.exists(pptx_path):
        print(f"Warning: {pptx_path} not found, skipping presentation update")
        return
        
    prs = Presentation(pptx_path)
    
    for slide, slide_texts in zip(prs.slides, new_texts):
        text_index = 0
        for shape in slide.shapes:
            if hasattr(shape, "text_frame"):
                text_data = slide_texts[text_index]
                
                # Clear existing text while preserving paragraph structure
                original_paragraph_count = len(shape.text_frame.paragraphs)
                for paragraph in shape.text_frame.paragraphs[:]:
                    for run in paragraph.runs[:]:
                        run.text = ""
                        
                # Apply new text with original formatting
                if text_data['runs']:
                    # Ensure we have enough paragraphs
                    while len(shape.text_frame.paragraphs) < original_paragraph_count:
                        shape.text_frame.add_paragraph()
                        
                    p = shape.text_frame.paragraphs[0]
                    for run_data in text_data['runs']:
                        run = p.add_run()
                        run.text = run_data['text']
                        if run_data['font']:
                            run.font.name = run_data['font']
                        if run_data['size']:
                            run.font.size = run_data['size']
                        run.font.bold = run_data['bold']
                        run.font.italic = run_data['italic']
                        if run_data['color']:
                            run.font.color.rgb = run_data['color']
                        if run_data['hyperlink']:
                            run._r.get_or_add_hlinkClick().set('r:id', run_data['hyperlink'])
                        if run_data['has_line_break'] and not run.text.endswith('\n'):
                            run.text += '\n'
                else:
                    shape.text = text_data['text']
                    
                text_index += 1
    
    prs.save(output_path)

## Main execution

In [7]:
# Example usage
input_pptx = "input.pptx"
output_pptx = "translated.pptx"

# Extract text
texts = extract_text_from_slides(input_pptx)

# Translate
translated_texts = translate_texts(texts)

# Sanity check
if len(texts) == len(translated_texts) and \
   all(len(orig) == len(trans) for orig, trans in zip(texts, translated_texts)):
    # Update presentation
    update_presentation(input_pptx, translated_texts, output_pptx)
    print("Translation completed successfully!")
else:
    print("Translation failed: Output format mismatch!")

Translation completed successfully!


