# Docs翻译

In [1]:
import os
import json
from glob import glob
from pathlib import Path
import openai  
import re 
from concurrent.futures import ThreadPoolExecutor, as_completed

In [2]:
!pwd

/root/Dropbox/jupyter-ny/jwst


In [12]:
# 设置 OpenAI API Key
API_KEY = ""
client = openai.OpenAI(api_key=API_KEY)
model = "gpt-4o-mini"
temperature = 0.2
max_tokens = 16384

# 设置要遍历的根目录
ROOT_DIR = "/root/Dropbox/jupyter-ny/jwst/" 

MAX_WORKERS = 8  # 线程池的最大并行数，可调整
SEPARATOR = "\n   \n" 
TIMEOUT = 300

# 生成OpenAI客户端
client = openai.OpenAI(api_key=API_KEY, timeout=TIMEOUT)


In [13]:
def process_rst(content):
    """AI翻译RST文档"""
    system_prompt = (
        "作为天文学文档翻译专家，精通JWST望远镜数据处理，请：\n"
        "1. 精准翻译RST文档内容\n"
        "2. 严格保留所有RST格式标记，包括但不限于：\n"
           "- 章节标记 (##, **, etc)\n"
           "- 指令块 (.. directive::)\n"
           "- 引用标记 (:ref:, `link`_)\n"
           "- 图片引用和缩进\n"
        "3. 维持中英术语对照\n"
        "4. 使用流畅的中文母语\n"
    )
    
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"{content}\n{SEPARATOR}"}
            ],
            temperature=temperature,
            max_tokens=max_tokens
        )
        processed = clean_output(response.choices[0].message.content)
        return processed.split(SEPARATOR)[0]
    except Exception as e:
        print(f"处理异常: {str(e)}")
        return content

def process_file(file_path):
    """处理RST文件"""
    try:
        # 读取RST文件
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
            
        # 翻译内容
        translated_content = process_rst(content)
            
        # 保存翻译后的文件
        original_dir = Path(file_path).parent
        new_file_name = f"{Path(file_path).stem}_cn.rst"
        new_file_path = original_dir / new_file_name
        
        with open(new_file_path, "w", encoding="utf-8") as f:
            f.write(translated_content)
            
        return True
    except Exception as e:
        print(f"文件处理失败: {file_path} - {str(e)}")
        return False
    
def clean_output(text):
    """清理输出文本"""
    # 移除可能的多余分隔符
    text = text.strip()
    # 可以添加其他清理规则
    return text

def main():
    """主函数"""
    # 查找所有rst文件
    rst_files = [f for f in glob(f"{ROOT_DIR}/**/*.rst", recursive=True)
                if "_cn" not in f and "_translated" not in f]
    
    # 筛选未翻译的文件
    files_to_process = []
    for rst in rst_files:
        file_path_obj = Path(rst)
        translated_file = file_path_obj.parent / f"{file_path_obj.stem}_cn.rst"
        if not translated_file.exists():
            files_to_process.append(rst)
        else:
            print(f"跳过已存在的翻译: {translated_file}")
    
    print(f"发现 {len(rst_files)} 个文件，需要处理 {len(files_to_process)} 个")
    success_count = 0
    
    if not files_to_process:
        print("没有需要翻译的新文件")
        return
    
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {executor.submit(process_file, f): f for f in files_to_process}
        
        for future in as_completed(futures):
            file_name = futures[future]
            try:
                if future.result():
                    success_count += 1
                    print(f"成功处理: {file_name}")
                else:
                    print(f"处理失败: {file_name}")
            except Exception as e:
                print(f"系统错误: {file_name} - {str(e)}")
    
    print(f"处理完成 | 成功: {success_count} | 失败: {len(files_to_process)-success_count}")

In [14]:
if __name__ == "__main__":
    main()

跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/github_setup_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/github_workflow_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/data_files_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/notebooks_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/github_pr_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/submitting_notebooks_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/stsci_notebook_leads_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/notebook_development_workflow_cn.rst
跳过已存在的翻译: /root/Dropbox/jupyter-ny/jwst/docs/requirements_cn.rst
发现 12 个文件，需要处理 3 个
成功处理: /root/Dropbox/jupyter-ny/jwst/install.rst
成功处理: /root/Dropbox/jupyter-ny/jwst/index.rst
成功处理: /root/Dropbox/jupyter-ny/jwst/CONTRIBUTING.rst
处理完成 | 成功: 3 | 失败: 0
