In [1]:
# the module for extract text from a given PDF file
from extract_text import PDF_to_Text
# the module for ChatGPT translation via API call
from translator import Translator

In [2]:
def text_extractor(file_path):
    """This funciton is to extract the text contents from a PDF file.

    Parameters
    ----------
    file_path : str
        the absolute file path where the PDF file locates

    Returns
    -------
    str
        the last paragraph of the PDF file and the path for storing these \
        paragraph txt files
    """
    try:
        # initiate the class:
        t_extractor = PDF_to_Text(file_path)
        # create a directory to store the extracted text
        t_extractor.create_directory()
        # read a PDF file:
        pdf = t_extractor.read_pdf()
        # get all texts from a PDF file:
        articles = t_extractor.get_txt()
        # break down the document into paragraph level
        doc, para_file_path = t_extractor.iterate()
        
        return doc, para_file_path
    
    except Exception as err:
        print(err)

def ChatGPT_translation(path, language):
    """This function is to execute the API call for ChatGPT translation.

    Parameters
    ----------
    path : str
        the location where the text files store for translation
    language : str
        the language you want ChatGPT to translate into
    Returns
    -------
    Pandas Dataframe, str
        the df with API log and the translation text
    """
    try:
        # initiate the class
        t = Translator(path, language)
        # create a directory for storing translation results
        t.create_directory()
        # get the file list from the paragraph text files
        metadata = t.get_metadata()
        # Ask ChatGPT to translate each paragraph txt file via API call
        doc, translation_content = t.iterate()
        # merge all the translation result files into a final txt file
        t_results = t.merge_files()
        # showing the cumulative price & tokens for this PDF translation
        total_tokens = t.price_calculation()

        return doc, t_results, total_tokens
    
    except Exception as err:
        print(err)

In [3]:
if __name__ == '__main__':
    
    language = 'traditional chinese' # put the language you'd like to ChatGPT to translate
    file_path = '/Users/belleshen/Documents/translator_ChatGPT/translator_ChatGPT/Sci. Rep. 7, 161 (2017).pdf' # the PDF file absolute path
    doc, para_file_path = text_extractor(file_path)
    doc, t_results, total_tokens = ChatGPT_translation(para_file_path, language)

the begin words you type in: The local electronic and atomic structures
the end words you type in: least-squares-fitting computer program
finishing extracting


100%|██████████| 2/2 [00:39<00:00, 19.99s/it]


Here is the summary of cost for this API call:
total tokens: 1228, price: $0.002456 (US)



