In [1]:
#!pip install gradio
#!pip install openai

In [2]:
import gradio as gr
import openai
import docx
import spacy
import os
import json
import warnings
warnings.filterwarnings('ignore')
# Set your OpenAI API key
openai.api_key =  "OPEN_API_KEY"


def split_document_into_segments(document_text, max_tokens_per_segment):
    segments = []
    current_segment = ""
    
    for paragraph in document_text.split("\n"):
        if len(current_segment) + len(paragraph) < max_tokens_per_segment:
            current_segment += paragraph + "\n"
        else:
            segments.append(current_segment)
            current_segment = paragraph + "\n"
    
    if current_segment:
        segments.append(current_segment)
    
    return segments

def translate_document_segment(segment, target_language):
    prompt = f"Translate the following segment to {target_language}:\n{segment}"
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=2000  # Adjust the value based on your plan's token limit
    )
    translated_segment = response.choices[0].text.strip()
    return translated_segment

def document_translation_interface(docx_file, target_language):
    document_text = docx_file.read().decode('utf-8')
    
    max_tokens_per_segment = 2000  # Adjust the value based on your plan's token limit
    
    segments = split_document_into_segments(document_text, max_tokens_per_segment)
    
    translated_segments = []
    for segment in segments:
        translated_segment = translate_document_segment(segment, target_language)
        translated_segments.append(translated_segment)
    
    translated_document = "\n".join(translated_segments)
    
    # Save the translated document to a temporary file
    translated_filename = "translated_document.txt"
    with open(translated_filename, "w", encoding="utf-8") as file:
        file.write(translated_document)
    
    return translated_filename

# Load the English language model
nlp = spacy.load("en_core_web_sm")
def extract_text_from_docx(docx_path):
    doc = docx.Document(docx_path)
    content = ""
    for paragraph in doc.paragraphs:
        content += paragraph.text + "\n"
    return content

# Define a function to find and replace names
def find_names(docx_path):
    
    document_text = extract_text_from_docx(docx_path)
        # Process the document text
    doc = nlp(document_text)

        # Extract named entities of the "PERSON" type from the document
    names = [ent.text for ent in doc.ents if ent.label_ == "PERSON" ]
    return names
def replace_names_in(docx_path, json_replacements):
    # Load the JSON data
    try:
        replacements = json.loads(json_replacements)
    except json.JSONDecodeError as e:
        return f"Error: Invalid JSON format - {str(e)}"

    # Iterate through the names and replacements
    text = extract_text_from_docx(docx_path)
    for old_name, new_name in replacements.items():
        # Replace all occurrences of old_name with new_name in the text
        text = text.replace(old_name, new_name)
    
    updated_filename = "text.txt"
    with open(updated_filename, "w", encoding="utf-8") as file:
        file.write(text)
    
    return updated_filename
with gr.Blocks() as demo:
    docx_path= gr.Textbox(label="Enter Document Path")
    target_language = gr.Radio(["Spanish", "French", "German"], label="Target Language")
    translated_document=gr.File(label="translated Document")
    translate_btn = gr.Button('Translate')
    translate_btn.click(fn=document_translation_interface, inputs=[docx_path,target_language], outputs=translated_document, api_name="Translate")
    doc_path= gr.Textbox(label="doc_path")
    name = gr.Textbox(label="name")
    greet_btn = gr.Button("find_names")
    greet_btn.click(fn=find_names, inputs=doc_path, outputs=name, api_name="find_names")
    doc_path= gr.Textbox(label="doc_path")
    replacement = gr.Textbox(label="New Names (JSON format)")
    updated_document=gr.File(label="Updated Document")
    replace_btn = gr.Button("replace_names")
    replace_btn.click(fn=replace_names_in, inputs=[doc_path,replacement], outputs=updated_document, api_name="replace_names")


demo.launch()


Running on local URL:  http://127.0.0.1:7887

To create a public link, set `share=True` in `launch()`.


