In [10]:
from langchain_anthropic import ChatAnthropic
from langchain_core.output_parsers import StrOutputParser
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
from tqdm import tqdm

# 初始化 Claude
llm = ChatAnthropic(model="claude-3-5-sonnet-20241022",max_tokens=8192)
chain = llm | StrOutputParser()

def translate_batch(texts):
    """批次翻譯文本"""
    if not texts:
        return []
    
    # 將文本組合成一個批次請求
    batch_prompt = "\n".join([
        f"Text {i+1}: {text}" for i, text in enumerate(texts)
    ])
    
    prompt = \
    f"""請將以下文本從英文翻譯成繁體中文。只需要輸出翻譯結果，每個翻譯之間插入<split>方便我之後分割，
    若提供內容的是類似json格式，請只翻譯value的部分，其他部份請保持原樣，若為空json請保持空json
    不需要覆述開頭的Text，若為空行請保持空行，請確保你有生成 TEXT -1 個<split>用於正確對應翻譯數量。 
    
    {batch_prompt}"""
    
    response = chain.invoke(prompt)
    
    # 解析回應
    translations = response.strip().split('<split>')
    return translations

def process_dataframe_parallel(df, column, num_workers=4, batch_size=10):
    """平行處理 DataFrame 的翻譯任務"""
    # 將資料分成批次
    texts = df[column].fillna('').tolist()
    batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
    
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        # 使用 tqdm 顯示進度
        futures = list(tqdm(
            executor.map(translate_batch, batches),
            total=len(batches),
            desc="翻譯進度"
        ))
        
        # 合併結果
        for batch_result in futures:
            results.extend(batch_result)
    
    return results

# 載入資料
file_path = './proxmox_api_translated.csv'
data = pd.read_csv(file_path)

# 執行翻譯
print("開始翻譯 Description...")
data['Parameter_Details_zh'] = process_dataframe_parallel(data, 'Parameter Details')

# 儲存結果
translated_file_path = 'proxmox_api_translated2.csv'
data.to_csv(translated_file_path, index=False)
print(f"翻譯完成，結果已儲存至: {translated_file_path}")

開始翻譯 Description...


翻譯進度: 100%|██████████| 61/61 [05:30<00:00,  5.42s/it]

翻譯完成，結果已儲存至: proxmox_api_translated2.csv



