## Step 1. Import packages

In [1]:
import os
import time
from tqdm import tqdm
from utils import get_split_list, read_specific_rows, create_txt_file
from selenium.webdriver.remote.webdriver import By
import undetected_chromedriver as uc
from selenium.webdriver.common.keys import Keys

class gptParser:
    def __init__(self,
                 driver,
                 gpt_url: str = 'https://chat.openai.com/'):
        """ ChatGPT parser
        Args:
            driver_path (str, optional): The path of the chromedriver.
            gpt_url (str, optional): The url of ChatGPT.
        """
        # Start a webdriver instance and open ChatGPT
        self.driver = driver
        self.driver.get(gpt_url)

    @staticmethod
    def get_driver(driver_path: str = None):
        options = uc.ChromeOptions()
        return uc.Chrome(options=options, executable_path=driver_path) if driver_path is not None else uc.Chrome(options=options)


    def __call__(self, msg: str):
        # Find the input field and send a question
        input_field = self.driver.find_elements(
            By.TAG_NAME, 'textarea')[0]
        input_field.send_keys(msg)
        input_field.send_keys(Keys.RETURN)

    def read_respond(self):
        try:
            response = self.driver.find_elements(By.TAG_NAME, 'p')[-2].text
            return response
        except:
            return None

    def new_chat(self):
        self.driver.find_element(By.XPATH, '//a[text()="New chat"]').click()

    def close(self):
        self.driver.quit()

## Step 2. Login to ChatGPT

In [2]:
driver_path = 'chromedriver.exe'
driver = gptParser.get_driver(driver_path)
gpt_parser = gptParser(driver)

## Step 3. Transfer .pdf file to .txt (change the pdf file name you want to parse)

In [7]:
file_name = '3432212' # change the pdf file name you want to parse
result = []
split_list = get_split_list(file_name)

## Step 4. Start translation

In [8]:
print("Start Translating...")
i = 0
while i < len(split_list)-1:
    print(f"Current page: [{i}/{len(split_list)-1}]")
    query = read_specific_rows(f'text/{file_name}.txt', split_list[i], split_list[i+1])

    try:
        gpt_parser(query) # send the query
        time.sleep(15)
        response = gpt_parser.read_respond()
    except Exception as e:
        response = None
        print(e)

    if len(result)  == 0: # first response
        result.append(response)
        i += 1
        continue

    if response is None: # error occured
        print(f"Error occured, number {i}")
        i += 1
        continue
    elif response == result[-1]: # same response
        print("Same response, try again...")
        continue # try again
    else: # new response
        result.append(response)
        i += 1

Start Translating...
Current page: [0/114]
Current page: [1/114]
Current page: [2/114]


## Step 5. write the translation to the "result" folder

In [5]:
# check if "result" folder exists, if not, create one
if not os.path.exists('results'):
    os.makedirs('results')
file_name = f'results/{file_name}_translated.txt'
create_txt_file(result, file_name)