In [None]:
# ==============================================================
#  0. 环境安装
# ============================================================== 
!pip install --quiet openai chardet

# ==============================================================
#  1. 引入库 & 配置 OpenAI Key + LLM 型号
# ============================================================== 
import os, json, time, datetime, glob, re
import openai, chardet

openai.api_key = "your-default-api-key"   # ← 改成你的 key
MODEL_NAME     = "chatgpt-4o-latest"                             # ← 换模型直接改这里
TEMPERATURE    = 0                                               # 可调

# ==============================================================
#  2. 用户输入：A/B 两个文件夹 + 章节范围
# ============================================================== 
FOLDER_A = "/content/translate_back_ZH/《超级惊悚直播》作者_宇文长弓"                                   # ← 文件夹 A
FOLDER_B = "/content/novels_chapters/《超级惊悚直播》作者_宇文长弓"                                   # ← 文件夹 B
CHAPTER_RANGE = list(range(2, 5))                                # ← 章节号列表，如 [1,3,5] 或 range

# ==============================================================
#  3. 工具：读取 TXT（自动探测编码） & 按章节查找文件
# ============================================================== 
def read_txt(path):
    raw = open(path, "rb").read()
    enc = chardet.detect(raw)["encoding"] or "utf-8"
    return raw.decode(enc, errors="ignore")

CHAP_PATTERNS = [
    r"[_-](\d{1,4})[_-]",                 # 001_
    r"第\s*(\d{1,4})\s*章",               # 第3章
    r"^\s*(\d{1,4})[._\s]",               # 003. or 3_
]

def find_chapter_file(folder: str, chap_num: int):
    """在 folder 中寻找与章节号匹配的 txt 文件；若有多个返回第一个"""
    chap_num_str = str(chap_num)
    for path in glob.glob(os.path.join(folder, "**/*.txt"), recursive=True):
        name = os.path.basename(path)
        for pat in CHAP_PATTERNS:
            m = re.search(pat, name)
            if m and str(int(m.group(1))) == chap_num_str:
                return path
    return None

# ==============================================================
#  4. 构造 GPT 评估 Prompt（五维度）
# ============================================================== 
PROMPT_TEMPLATE = """
你是一位专业文学编辑，将对比两篇中文文本并给出 5 个 0-1 评分，1 表示极度相似，0 表示完全不同。

**评分维度**
1. semantic_similarity 整体语义/主题
2. plot_similarity   情节、剧情、事件发展
3. character_similarity 人物出场数量、人物名字与性格
4. background_similarity 环境、场景与世界观背景
5. style_similarity   文字语言风格

**严格返回** *只包含 JSON*，格式如下（不要有其它文字）：
{{
  "semantic_similarity": <0-1>,
  "plot_similarity": <0-1>,
  "character_similarity": <0-1>,
  "background_similarity": <0-1>,
  "style_similarity": <0-1>
}}

请阅读【文本A】与【文本B】并打分。

【文本A】
{text_a}

【文本B】
{text_b}
"""

# ==============================================================
#  5. 调用 GPT 并解析
# ============================================================== 
EXPECTED_KEYS = [
    "semantic_similarity",
    "plot_similarity",
    "character_similarity",
    "background_similarity",
    "style_similarity",
]

def call_gpt(prompt):
    resp = openai.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=TEMPERATURE
    )
    return resp.choices[0].message.content.strip()

def judge_pair(text_a, text_b):
    prompt = PROMPT_TEMPLATE.format(text_a=text_a, text_b=text_b)
    for attempt in range(2):
        reply = call_gpt(prompt)
        try:
            scores = json.loads(reply)
            if all(key in scores for key in EXPECTED_KEYS):
                return {k: float(scores[k]) for k in EXPECTED_KEYS}
            raise ValueError("缺键")
        except Exception:
            if attempt == 0:
                print("⚠️ GPT 输出格式异常，重试一次…")
                time.sleep(1)
            else:
                raise RuntimeError(f"GPT 输出无法解析为 JSON：\n{reply}")

# ==============================================================
#  6. 主流程：逐章节对比
# ============================================================== 
results = []
for chap in CHAPTER_RANGE:
    print(f"\n🚩 章节 {chap}")
    path_a = find_chapter_file(FOLDER_A, chap)
    path_b = find_chapter_file(FOLDER_B, chap)

    if not path_a or not path_b:
        raise FileNotFoundError(
            f"章节 {chap} 缺少对应文件："
            f"{'A ❌' if not path_a else 'A ✅'} "
            f"{'B ❌' if not path_b else 'B ✅'}"
        )

    text_a = read_txt(path_a)
    text_b = read_txt(path_b)

    scores = judge_pair(text_a, text_b)

    print(f"文件A: {path_a}")
    print(f"文件B: {path_b}")
    for k, v in scores.items():
        print(f"  {k:<22}: {v:.3f}")

    results.append({
        "chapter": chap,
        "model": MODEL_NAME,
        "file_a": path_a,
        "file_b": path_b,
        "scores": scores,
    })

# ==============================================================
#  7. 保存汇总 JSON
# ============================================================== 
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = "/content/similarity_result"
os.makedirs(out_dir, exist_ok=True)
out_path = os.path.join(out_dir, f"folder_similarity_{timestamp}.json")

with open(out_path, "w", encoding="utf-8") as f:
    json.dump({"generated_at": datetime.datetime.now().isoformat(),
               "model": MODEL_NAME,
               "folder_a": FOLDER_A,
               "folder_b": FOLDER_B,
               "chapters": results},
              f, ensure_ascii=False, indent=2)

print(f"\n✅ 全部完成，结果已保存到: {out_path}")

In [1]:
# ==============================================================
# Combined Script: Evaluate Expanded Chapters vs Original
# Based on Random Seed Selection
# ==============================================================

# ==============================================================
#  0. Environment Installation (if needed)
# ==============================================================
# !pip install --quiet openai chardet tqdm google-generativeai # Install all potential dependencies

# ==============================================================
#  1. Import Libraries
# ==============================================================
import os
import json
import time
import datetime
import glob
import re
import logging
import unicodedata
from pathlib import Path
from tqdm.auto import tqdm

# Use OpenAI for evaluation as per Script 1
import openai
import chardet

# ==============================================================
#  2. Configuration
# ==============================================================

# --- Evaluation LLM Configuration (from Script 1) ---
# ⚠️ IMPORTANT: Replace with your actual OpenAI API Key
OPENAI_API_KEY = "your-default-api-key"
EVALUATION_MODEL_NAME = "gpt-4o" # Changed to standard name, was "chatgpt-4o-latest" which might be custom
EVALUATION_TEMPERATURE = 0

# --- Directory Configuration ---
# Root directory containing original chapter subdirectories (one per book)
ORIGINAL_CHAPTERS_ROOT = Path("/content/novels_chapters")
# Root directory where Script 2 saved the expanded chapters (one subdir per book)
EXPANDED_CHAPTERS_ROOT = Path("/content/1000_word_chapters_expanded")
# Directory containing the chapter selection files (e.g., BookName_randomseed.txt)
SEED_DIR = Path("/content/randomseed")
# Root directory to save the evaluation results (will create subdirs per book)
EVALUATION_OUTPUT_ROOT = Path("/content/evaluation_results_randomseed")

# --- Logging Configuration ---
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# --- OpenAI Client Initialization ---
try:
    openai.api_key = OPENAI_API_KEY
    # Optional: Test connection or list models
    # openai.models.list()
    logging.info(f"OpenAI API configured for evaluation model: {EVALUATION_MODEL_NAME}")
except Exception as e:
    logging.error(f"Failed to configure OpenAI API: {e}")
    # Consider exiting if API key is essential and fails configuration
    # exit() # Uncomment to exit if OpenAI config fails

# ==============================================================
#  3. Utility Functions
# ==============================================================

def read_txt(path: Path) -> str:
    """Reads a text file with automatic encoding detection."""
    try:
        raw = path.read_bytes()
        enc = chardet.detect(raw)["encoding"] or "utf-8"
        return raw.decode(enc, errors="ignore")
    except FileNotFoundError:
        logging.error(f"File not found during read: {path}")
        return ""
    except Exception as e:
        logging.error(f"Error reading file {path}: {e}")
        return ""

# --- Chapter Number Extraction Patterns (Adapted from Script 1) ---
# Added pattern for "第001章.txt" format used by expansion script
CHAP_PATTERNS = [
    re.compile(r"[_-](\d{1,4})[_-]"),        # Matches _001_ or -1- etc.
    re.compile(r"第\s*(\d{1,4})\s*章"),     # Matches 第 3 章 or 第003章
    re.compile(r"^\s*(\d{1,4})[._\s]"),      # Matches 003. or 3_ at the start
    re.compile(r"^第(\d{1,4})章\.txt$"),    # Matches 第001章.txt (exact filename)
]

def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
    """
    Finds a TXT file matching the chapter number within a folder.
    Handles various naming conventions including '第XXX章.txt'.
    """
    chap_num_str = str(chap_num)
    if not folder.is_dir():
        logging.warning(f"Attempted to search non-existent folder: {folder}")
        return None

    # Prioritize exact match for expanded chapter format if applicable
    exact_match_name = f"第{chap_num:03d}章.txt"
    exact_path = folder / exact_match_name
    if exact_path.is_file():
        # logging.debug(f"Found exact match: {exact_path}")
        return exact_path

    # Search recursively using patterns if exact match failed or wasn't applicable
    try:
        # Using iterdir for potentially better performance on very large directories than glob(**/*)
        for item in folder.rglob("*.txt"): # Recursive glob
             if item.is_file():
                name = item.name
                for pat in CHAP_PATTERNS:
                    m = pat.search(name)
                    # Compare as integers to handle leading zeros (e.g., '001' == '1')
                    if m and int(m.group(1)) == chap_num:
                        # logging.debug(f"Found pattern match ({pat.pattern}): {item}")
                        return item
    except Exception as e:
        logging.error(f"Error searching for chapter {chap_num} in {folder}: {e}")

    logging.warning(f"Chapter {chap_num} file not found in {folder}")
    return None


# --- Read Seed Chapters Function (from Script 2) ---
def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
    """Reads the list of chapter numbers from the _randomseed.txt file for a given book."""
    def normalize(name: str) -> str:
        # Normalize book names for robust matching
        name = unicodedata.normalize("NFKC", name)
        name = name.lower()
        name = name.replace("_utf8", "")
        name = re.sub(r"[《》]", "", name) # Remove book title marks
        name = re.sub(r"\.summary$", "", name) # Remove potential suffixes
        # Remove various separators and whitespace more aggressively
        name = re.sub(r"[\s:：()（）【】\[\]“”\"',.!.?？\-·•_]+", "", name)
        return name.strip()

    norm_target = normalize(book_name)
    if not norm_target:
        logging.warning(f"Normalized book name is empty for '{book_name}', cannot match seed file.")
        return []

    seed_file_found = None
    try:
        for file in seed_dir.glob("*_randomseed.txt"):
            # Extract candidate book name from filename
            candidate = file.stem.replace("_randomseed", "")
            if normalize(candidate) == norm_target:
                seed_file_found = file
                break # Stop after finding the first match

        if seed_file_found:
            logging.info(f"📌 Found seed file for '{book_name}': {seed_file_found.name}")
            lines = seed_file_found.read_text(encoding="utf-8").splitlines()
            # Filter for digits, convert to int, remove duplicates, and sort
            chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
            if not chapters:
                 logging.warning(f"Seed file {seed_file_found.name} for '{book_name}' contains no valid chapter numbers.")
            return chapters
        else:
            logging.warning(f"⚠️ No matching seed file found in {seed_dir} for book: '{book_name}' (normalized: '{norm_target}')")
            return []
    except FileNotFoundError:
         logging.error(f"Seed directory not found: {seed_dir}")
         return []
    except Exception as e:
        logging.error(f"Error reading seed file for '{book_name}': {e}")
        return []


# ==============================================================
#  4. Evaluation Prompt & Logic (from Script 1)
# ==============================================================
PROMPT_TEMPLATE = """
你是一位专业文学编辑，将对比两篇中文文本并给出 5 个 0-1 评分，1 表示极度相似，0 表示完全不同。

**评分维度**
1. semantic_similarity 整体语义/主题
2. plot_similarity   情节、剧情、事件发展
3. character_similarity 人物出场数量、人物名字与性格
4. background_similarity 环境、场景与世界观背景
5. style_similarity   文字语言风格

**严格返回** *只包含 JSON*，格式如下（不要有其它文字）：
{{
  "semantic_similarity": <0-1>,
  "plot_similarity": <0-1>,
  "character_similarity": <0-1>,
  "background_similarity": <0-1>,
  "style_similarity": <0-1>
}}

请阅读【文本A】(原文)与【文本B】(AI生成)并打分。

【文本A】
{text_a}

【文本B】
{text_b}
"""

EXPECTED_KEYS = [
    "semantic_similarity",
    "plot_similarity",
    "character_similarity",
    "background_similarity",
    "style_similarity",
]

def call_gpt(prompt):
    """Calls the configured OpenAI model."""
    try:
        # Ensure API key is set before calling
        if not openai.api_key:
             raise ValueError("OpenAI API key not configured.")

        resp = openai.chat.completions.create(
            model=EVALUATION_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=EVALUATION_TEMPERATURE,
            response_format={"type": "json_object"} # Enforce JSON output if model supports it
        )
        return resp.choices[0].message.content.strip()
    except openai.AuthenticationError as e:
         logging.error(f"OpenAI Authentication Error: Please check your API key. {e}")
         raise # Re-raise after logging
    except openai.RateLimitError as e:
         logging.error(f"OpenAI Rate Limit Error: {e}. Waiting and retrying might be needed.")
         raise # Re-raise for potential handling upstream
    except Exception as e:
        logging.error(f"Error calling OpenAI API: {e}")
        raise # Re-raise other unexpected errors


def judge_pair(text_a, text_b, chap_num):
    """Evaluates a pair of texts using the LLM, handles retries and parsing."""
    prompt = PROMPT_TEMPLATE.format(text_a=text_a, text_b=text_b)
    max_retries = 2
    for attempt in range(max_retries):
        try:
            reply = call_gpt(prompt)
            scores = json.loads(reply) # GPT should return valid JSON now
            # Validate keys and values
            if all(key in scores for key in EXPECTED_KEYS) and \
               all(isinstance(scores[key], (int, float)) and 0 <= scores[key] <= 1 for key in EXPECTED_KEYS):
                return {k: float(scores[k]) for k in EXPECTED_KEYS}
            else:
                 logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: GPT JSON missing keys or invalid values: {scores}")
                 raise ValueError("缺键或值无效")

        except json.JSONDecodeError as e:
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: GPT output is not valid JSON: {e}. Reply: '{reply[:200]}...'")
            if attempt < max_retries - 1: time.sleep(2 ** attempt) # Exponential backoff
            else: raise RuntimeError(f"Chapter {chap_num}: GPT output failed JSON parsing after {max_retries} attempts. Last reply:\n{reply}") from e
        except Exception as e:
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: Error during GPT call or processing: {e}")
            if attempt < max_retries - 1: time.sleep(2 ** attempt)
            else: raise RuntimeError(f"Chapter {chap_num}: Evaluation failed after {max_retries} attempts.") from e


# ==============================================================
#  5. Main Evaluation Loop
# ==============================================================
def main():
    logging.info("Starting evaluation process...")
    EVALUATION_OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)

    # Iterate through book directories in the original chapters root
    original_book_dirs = [d for d in ORIGINAL_CHAPTERS_ROOT.iterdir() if d.is_dir()]
    if not original_book_dirs:
        logging.error(f"No book directories found in ORIGINAL_CHAPTERS_ROOT: {ORIGINAL_CHAPTERS_ROOT}")
        return

    logging.info(f"Found {len(original_book_dirs)} potential books in {ORIGINAL_CHAPTERS_ROOT}.")

    for original_book_dir in tqdm(original_book_dirs, desc="📚 Evaluating Books"):
        book_name = original_book_dir.name
        logging.info(f"\n{'='*10} Processing Book: {book_name} {'='*10}")

        # Find corresponding directory for expanded chapters
        expanded_book_dir = EXPANDED_CHAPTERS_ROOT / book_name
        if not expanded_book_dir.is_dir():
            logging.warning(f"Expanded chapter directory not found for '{book_name}' at {expanded_book_dir}. Skipping book.")
            continue

        # Get chapters to evaluate for this book from the seed file
        chapters_to_evaluate = read_seed_chapters(book_name, SEED_DIR)
        if not chapters_to_evaluate:
            logging.warning(f"No chapters selected from seed file for '{book_name}'. Skipping book.")
            continue

        logging.info(f"Selected chapters for '{book_name}': {chapters_to_evaluate}")

        book_results = []
        chapters_evaluated_count = 0
        chapters_failed_count = 0

        for chap_num in tqdm(chapters_to_evaluate, desc=f"   Evaluating Chapters", leave=False):
            logging.info(f"--- Chapter {chap_num} ---")

            # Find original and expanded files
            path_original = find_chapter_file(original_book_dir, chap_num)
            path_expanded = find_chapter_file(expanded_book_dir, chap_num)

            if not path_original:
                logging.error(f"Chapter {chap_num}: Original file NOT FOUND in {original_book_dir}")
                chapters_failed_count += 1
                continue
            if not path_expanded:
                logging.error(f"Chapter {chap_num}: Expanded file NOT FOUND in {expanded_book_dir}")
                chapters_failed_count += 1
                continue

            # Read file contents
            text_original = read_txt(path_original)
            text_expanded = read_txt(path_expanded)

            if not text_original or not text_expanded:
                logging.error(f"Chapter {chap_num}: Failed to read content from one or both files ({path_original.name}, {path_expanded.name})")
                chapters_failed_count += 1
                continue

            # Perform evaluation
            try:
                scores = judge_pair(text_original, text_expanded, chap_num)
                logging.info(f"Chapter {chap_num}: Evaluation successful.")
                for k, v in scores.items():
                     logging.info(f"  {k:<22}: {v:.3f}")

                book_results.append({
                    "chapter": chap_num,
                    "original_file": str(path_original.relative_to(ORIGINAL_CHAPTERS_ROOT)), # Store relative path
                    "expanded_file": str(path_expanded.relative_to(EXPANDED_CHAPTERS_ROOT)), # Store relative path
                    "scores": scores,
                })
                chapters_evaluated_count += 1

            except Exception as e:
                logging.error(f"Chapter {chap_num}: Evaluation failed. Error: {e}")
                chapters_failed_count += 1
                # Add a failure entry? Optional.
                book_results.append({
                    "chapter": chap_num,
                    "original_file": str(path_original.relative_to(ORIGINAL_CHAPTERS_ROOT)),
                    "expanded_file": str(path_expanded.relative_to(EXPANDED_CHAPTERS_ROOT)),
                    "scores": None, # Indicate failure
                    "error": str(e)
                })

            time.sleep(1) # Add a small delay between API calls

        # Save results for the current book
        if book_results: # Only save if some results were generated
            book_output_dir = EVALUATION_OUTPUT_ROOT / book_name
            book_output_dir.mkdir(parents=True, exist_ok=True)
            # Use timestamp for uniqueness within the book's folder if script is run multiple times
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            output_json_path = book_output_dir / f"evaluation_{timestamp}.json"

            output_data = {
                "evaluation_run_at": datetime.datetime.now().isoformat(),
                "book_name": book_name,
                "evaluation_model": EVALUATION_MODEL_NAME,
                "original_chapters_root": str(ORIGINAL_CHAPTERS_ROOT),
                "expanded_chapters_root": str(EXPANDED_CHAPTERS_ROOT),
                "seed_file_used": f"(Logic based on matching '{book_name}' in {SEED_DIR})",
                "chapters_evaluated_count": chapters_evaluated_count,
                "chapters_failed_or_skipped": chapters_failed_count,
                "evaluation_details": book_results
            }

            try:
                with open(output_json_path, "w", encoding="utf-8") as f:
                    json.dump(output_data, f, ensure_ascii=False, indent=2)
                logging.info(f"✅ Evaluation results for '{book_name}' saved to: {output_json_path}")
            except Exception as e:
                logging.error(f"Failed to save evaluation results for '{book_name}' to {output_json_path}: {e}")
        else:
            logging.warning(f"No evaluation results generated for book '{book_name}'. Nothing saved.")

    logging.info("\n🎉 Evaluation process finished for all books.")

# ==============================================================
#  6. Run the Main Function
# ==============================================================
if __name__ == "__main__":
    main()

2025-05-01 19:17:03,450 - INFO - OpenAI API configured for evaluation model: gpt-4o
2025-05-01 19:17:03,452 - INFO - Starting evaluation process...
2025-05-01 19:17:03,455 - INFO - Found 40 potential books in /content/novels_chapters.


📚 Evaluating Books:   0%|          | 0/40 [00:00<?, ?it/s]

2025-05-01 19:17:03,465 - INFO - 
2025-05-01 19:17:03,467 - INFO - 
2025-05-01 19:17:03,469 - INFO - 
2025-05-01 19:17:03,470 - INFO - 
2025-05-01 19:17:03,472 - INFO - 
2025-05-01 19:17:03,473 - INFO - 
2025-05-01 19:17:03,475 - INFO - 
2025-05-01 19:17:03,476 - INFO - 
2025-05-01 19:17:03,477 - INFO - 
2025-05-01 19:17:03,479 - INFO - 
2025-05-01 19:17:03,481 - INFO - 
2025-05-01 19:17:03,482 - INFO - 
2025-05-01 19:17:03,484 - INFO - 
2025-05-01 19:17:03,485 - INFO - 
2025-05-01 19:17:03,487 - INFO - 
2025-05-01 19:17:03,489 - INFO - 
2025-05-01 19:17:03,490 - INFO - 
2025-05-01 19:17:03,492 - INFO - 
2025-05-01 19:17:03,493 - INFO - 
2025-05-01 19:17:03,495 - INFO - 
2025-05-01 19:17:03,496 - INFO - 
2025-05-01 19:17:03,498 - INFO - 
2025-05-01 19:17:03,499 - INFO - 
2025-05-01 19:17:03,501 - INFO - 
2025-05-01 19:17:03,502 - INFO - 
2025-05-01 19:17:03,504 - INFO - 
2025-05-01 19:17:03,505 - INFO - 
2025-05-01 19:17:03,507 - INFO - 
2025-05-01 19:17:03,509 - INFO - 
2025-05-01 19:

In [3]:
# ==============================================================
# Combined Script: Evaluate Expanded Chapters vs Original
# Based on Random Seed Selection (v3 - Lenient File Matching)
# ==============================================================

# ==============================================================
#  0. Environment Installation (if needed)
# ==============================================================
# !pip install --quiet openai chardet tqdm google-generativeai

# ==============================================================
#  1. Import Libraries
# ==============================================================
import os
import json
import time
import datetime
import glob
import re
import logging
import unicodedata
from pathlib import Path
from tqdm.auto import tqdm

# Use OpenAI for evaluation as per Script 1
import openai
import chardet

# ==============================================================
#  2. Configuration
# ==============================================================

# --- Evaluation LLM Configuration (from Script 1) ---
# ⚠️ IMPORTANT: Replace with your actual OpenAI API Key
OPENAI_API_KEY = "your-default-api-key" # <<<--- REPLACE THIS
EVALUATION_MODEL_NAME = "gpt-4o"
EVALUATION_TEMPERATURE = 0

# --- Directory Configuration ---
ORIGINAL_CHAPTERS_ROOT = Path("/content/novels_chapters")
EXPANDED_CHAPTERS_ROOT = Path("/content/1000_word_chapters_expanded")
SEED_DIR = Path("/content/randomseed")
EVALUATION_OUTPUT_ROOT = Path("/content/evaluation_results_randomseed")

# --- Logging Configuration ---
# Set level to DEBUG to see detailed file matching logs, INFO for less noise
LOGGING_LEVEL = logging.INFO
logging.basicConfig(level=LOGGING_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")

# --- OpenAI Client Initialization ---
try:
    if not OPENAI_API_KEY or "REPLACE" in OPENAI_API_KEY or len(OPENAI_API_KEY) < 15: # Basic checks
         raise ValueError("OpenAI API key is missing, a placeholder, or too short. Please replace it.")
    openai.api_key = OPENAI_API_KEY
    logging.info(f"OpenAI API configured for evaluation model: {EVALUATION_MODEL_NAME}")
except Exception as e:
    logging.error(f"Failed to configure OpenAI API: {e}. Please check the API key.")
    # raise SystemExit("Exiting due to OpenAI configuration error.") from e

# ==============================================================
#  3. Utility Functions (Updated find_chapter_file)
# ==============================================================

def read_txt(path: Path) -> str:
    """Reads a text file with automatic encoding detection."""
    try:
        raw = path.read_bytes()
        detected = chardet.detect(raw)
        enc = detected['encoding'] if detected else None
        confidence = detected['confidence'] if detected else 0

        # If low confidence or common ambiguous encodings, try UTF-8 first
        if confidence < 0.9 or (enc and enc.lower() in ['ascii', 'windows-1252']):
            try:
                decoded_text = raw.decode('utf-8', errors='strict')
                logging.debug(f"Decoded {path.name} as UTF-8 (low confidence/ambiguous initial detection: {enc} conf={confidence:.2f})")
                return decoded_text
            except UnicodeDecodeError:
                logging.debug(f"Strict UTF-8 decoding failed for {path.name}, falling back to detected/common.")
                # Fall through to try detected encoding or others

        if not enc: # If no encoding detected, default to utf-8 with fallback
             enc = 'utf-8'
             logging.warning(f"No encoding detected for {path.name}, defaulting to {enc}.")

        common_encodings = list(dict.fromkeys([enc, 'utf-8', 'gbk', 'gb2312'])) # Unique, prioritize detected
        decoded_text = None

        for encoding in common_encodings:
             if not encoding: continue # Skip if encoding is None
             try:
                 decoded_text = raw.decode(encoding, errors='strict')
                 logging.debug(f"Successfully decoded {path.name} with {encoding}")
                 break
             except (UnicodeDecodeError, TypeError, LookupError): # LookupError for unknown codec
                 logging.debug(f"Strict decoding with {encoding} failed for {path.name}")
                 continue

        if decoded_text is None:
             # Fallback with error ignoring
             final_encoding = 'utf-8' # Default fallback encoding
             logging.warning(f"Could not decode {path.name} strictly with common encodings, using fallback {final_encoding} with errors='ignore'.")
             decoded_text = raw.decode(final_encoding, errors='ignore')

        return decoded_text.strip() # Strip whitespace from final result

    except FileNotFoundError:
        logging.error(f"File not found during read: {path}")
        return ""
    except Exception as e:
        logging.error(f"Error reading file {path}: {e}", exc_info=True)
        return ""


# --- Chapter Number Extraction Patterns (Reordered and Expanded for Leniency) ---
CHAP_PATTERNS = [
    # Most Specific First (often used by generators or clean sources)
    re.compile(r"^第(\d{1,5})章\.txt$"),           # Exact name: 第001章.txt
    re.compile(r"^(\d{1,5})\.txt$"),               # Exact name: 001.txt
    # Common Prefixes/Formats with separators
    re.compile(r"^(\d{1,5})[_\s.-].*?\.txt$", re.IGNORECASE),# Starts with num + separator: 001_..., 001 ..., 001-...
    # Common Keywords
    re.compile(r"第\s*(\d{1,5})\s*章", re.IGNORECASE), # Contains "第 X 章"
    re.compile(r"chapter\s*(\d{1,5})", re.IGNORECASE), # Contains "Chapter X"
    re.compile(r"chap\s*(\d{1,5})", re.IGNORECASE),    # Contains "Chap X"
    # Separated Numbers (less specific)
    re.compile(r"[_-](\d{1,5})[_-]"),             # Contains _001_, -1-
    # General Number (not part of a larger number) - Last resort, potentially risky
    re.compile(r"(?<!\d)(\d{1,5})(?!\d)"),       # Matches number not surrounded by other digits
]

def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
    """
    Finds a TXT file matching the chapter number within a folder (recursive).
    Uses a broader set of patterns for more lenient matching.
    """
    if not folder.is_dir():
        logging.warning(f"Attempted to search non-existent folder: {folder}")
        return None

    possible_matches = []
    files_checked_count = 0
    try:
        # Iterate recursively using rglob
        for item in folder.rglob("*.txt"):
            files_checked_count += 1
            if item.is_file():
                name = item.name
                logging.log(logging.DEBUG - 1, f"Checking file: {name} for chapter {chap_num}") # Use lower level for very verbose

                for i, pat in enumerate(CHAP_PATTERNS):
                    # Use search to find pattern anywhere in the name
                    match = pat.search(name)
                    if match:
                        try:
                            # Try to extract the first capturing group
                            if match.groups(): # Check if there are any capturing groups
                                extracted_num_str = match.group(1)
                                if extracted_num_str and extracted_num_str.isdigit():
                                    extracted_num = int(extracted_num_str)
                                    if extracted_num == chap_num:
                                        logging.debug(f"Found potential match for chap {chap_num} in '{name}' using pattern #{i+1} ({pat.pattern}) -> {item}")
                                        # Store the match and the pattern index (lower index is better/more specific)
                                        possible_matches.append({"path": item, "pattern_index": i})
                                        # Break from pattern loop for this file once a match is found
                                        break
                                # else: # Group 1 wasn't digits
                                #    logging.log(logging.DEBUG - 1, f"Pattern {pat.pattern} group 1 '{extracted_num_str}' not digits in {name}")
                            # else: # Pattern matched but had no capturing group (e.g., if pattern was just `第\d+章`) - should not happen with current patterns
                            #     logging.log(logging.DEBUG - 1, f"Pattern {pat.pattern} matched {name} but had no capturing groups")

                        except (ValueError, IndexError) as e:
                             logging.debug(f"Error processing match for pattern {pat.pattern} on {name}: {e}")
                             # Continue to next pattern for this file
                             continue
                        except Exception as e_inner:
                             # Catch other potential errors during match processing
                             logging.warning(f"Unexpected error processing match for pattern {pat.pattern} on {name}: {e_inner}")
                             continue

    except PermissionError as e:
         logging.error(f"Permission denied while searching in {folder}: {e}")
         return None # Cannot search here
    except Exception as e:
        logging.error(f"Error during file search for chapter {chap_num} in {folder}: {e}", exc_info=True)
        # Continue execution if possible, but log the error

    logging.debug(f"Checked {files_checked_count} files in {folder} for chapter {chap_num}. Found {len(possible_matches)} potential matches.")

    if not possible_matches:
        logging.warning(f"Chapter {chap_num} file not found in {folder} using lenient patterns.")
        return None
    elif len(possible_matches) == 1:
        # Exactly one match found
        logging.info(f"Found unique match for chapter {chap_num}: '{possible_matches[0]['path'].name}' in {folder} (pattern index {possible_matches[0]['pattern_index']})")
        return possible_matches[0]['path']
    else:
        # Multiple files matched the chapter number with different patterns or names
        # Prioritize the match found with the most specific pattern (lowest index)
        possible_matches.sort(key=lambda x: x['pattern_index'])
        best_match = possible_matches[0]
        logging.warning(f"Multiple files found for chapter {chap_num} in {folder}. Prioritizing match '{best_match['path'].name}' found with pattern index {best_match['pattern_index']}. All found: {[p['path'].name for p in possible_matches]}")
        return best_match['path']


# --- Read Seed Chapters Function (from Script 2 - unchanged is fine) ---
def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
    """Reads the list of chapter numbers from the _randomseed.txt file for a given book."""
    def normalize(name: str) -> str:
        name = str(name); name = unicodedata.normalize("NFKC", name); name = name.lower()
        name = name.replace("_utf8", ""); name = re.sub(r"[《》]", "", name)
        name = re.sub(r"\.summary$", "", name)
        name = re.sub(r"[\s:：()（）【】\[\]“”\"',.!.?？\-·•_]+", "", name)
        return name.strip()
    norm_target = normalize(book_name)
    if not norm_target: logging.warning(f"Normalized book name is empty for '{book_name}', cannot match seed file."); return []
    seed_file_found = None
    try:
        if not seed_dir.is_dir(): logging.error(f"Seed directory does not exist: {seed_dir}"); return []
        for file in seed_dir.glob("*_randomseed.txt"):
            candidate = file.stem.replace("_randomseed", "")
            if normalize(candidate) == norm_target:
                seed_file_found = file; logging.info(f"Found matching seed file: {file.name} for book '{book_name}'"); break
        if seed_file_found:
            lines = seed_file_found.read_text(encoding="utf-8").splitlines()
            chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
            if not chapters: logging.warning(f"Seed file {seed_file_found.name} for '{book_name}' contains no valid chapter numbers.")
            return chapters
        else:
            checked_files = [f.name for f in seed_dir.glob("*_randomseed.txt")]
            logging.warning(f"⚠️ No matching seed file found in {seed_dir} for book: '{book_name}' (normalized: '{norm_target}'). Checked: {checked_files if checked_files else 'No seed files found.'}")
            return []
    except Exception as e: logging.error(f"Error reading seed file for '{book_name}': {e}", exc_info=True); return []


# --- Evaluation Prompt & Logic (Using lenient prompt from previous step) ---
PROMPT_TEMPLATE = """
你是一位文学评论员，任务是对比两篇中文文本，侧重于它们核心内容的一致性，进行相对宽松的评估。请给出 5 个 0-1 评分，其中 1 表示核心内容高度一致，0 表示核心内容完全不同。

**评估要求：**
* 请更注重**整体情节走向、主要人物的核心行为与命运、以及故事的核心主题**是否相似。
* 对于**写作风格、具体措辞、次要角色的细节、非关键性的场景或背景描述**的差异可以适当宽容。
* 目标是判断【文本B】(AI生成)是否在**核心故事层面上**大致复现了【文本A】(原文)的内容，即使表达方式有所不同。

**评分维度 (宽松解读)**
1. semantic_similarity  核心主题/意义相似度 (Focus on core message/theme)
2. plot_similarity      主要情节/核心事件发展相似度 (Focus on main plot points & progression)
3. character_similarity 主要人物塑造/作用相似度 (Focus on key characters' roles & arcs)
4. background_similarity 关键背景/场景设定相似度 (Focus on essential setting elements)
5. style_similarity      整体风格感受相似度 (Overall stylistic feel similarity - allow more variance)

**严格返回** *只包含 JSON*，格式如下（不要有其它文字）：
{{
  "semantic_similarity": <0-1 'float'>,
  "plot_similarity": <0-1 'float'>,
  "character_similarity": <0-1 'float'>,
  "background_similarity": <0-1 'float'>,
  "style_similarity": <0-1 'float'>
}}

请阅读【文本A】(原文)与【文本B】(AI生成)并进行宽松评估打分。

【文本A】
{text_a}

【文本B】
{text_b}
"""

EXPECTED_KEYS = [
    "semantic_similarity", "plot_similarity", "character_similarity",
    "background_similarity", "style_similarity",
]

# --- call_gpt and judge_pair functions (no changes needed from previous complete block) ---
def call_gpt(prompt):
    try:
        if not openai.api_key: raise ValueError("OpenAI API key not configured.")
        logging.debug(f"Calling OpenAI model {EVALUATION_MODEL_NAME} with temperature {EVALUATION_TEMPERATURE}")
        resp = openai.chat.completions.create(
            model=EVALUATION_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=EVALUATION_TEMPERATURE,
            response_format={"type": "json_object"}
        )
        return resp.choices[0].message.content.strip()
    # ... (Keep existing detailed error handling for OpenAI API calls) ...
    except openai.AuthenticationError as e: logging.error(f"OpenAI Authentication Error: {e}"); raise
    except openai.RateLimitError as e: logging.error(f"OpenAI Rate Limit Error: {e}"); raise
    except openai.APITimeoutError as e: logging.error(f"OpenAI API Timeout Error: {e}"); raise
    except openai.APIConnectionError as e: logging.error(f"OpenAI API Connection Error: {e}"); raise
    except openai.APIError as e: logging.error(f"OpenAI API Error: Status={e.status_code}, Message={e.message}"); raise
    except Exception as e: logging.error(f"Unexpected error calling OpenAI API: {e}", exc_info=True); raise

def judge_pair(text_a, text_b, chap_num):
    prompt = PROMPT_TEMPLATE.format(text_a=text_a, text_b=text_b)
    max_retries = 3; last_error = None
    for attempt in range(max_retries):
        try:
            logging.info(f"Attempt {attempt+1}/{max_retries} to evaluate chapter {chap_num}...")
            reply = call_gpt(prompt)
            logging.debug(f"Raw reply from GPT chapter {chap_num} attempt {attempt+1}: '{reply[:300]}...'")
            scores = json.loads(reply)
            missing_keys = [k for k in EXPECTED_KEYS if k not in scores]
            invalid_values = []
            for k in EXPECTED_KEYS:
                 if k not in missing_keys:
                     v = scores.get(k)
                     if not isinstance(v, (int, float)) or not (0 <= v <= 1): invalid_values.append(f"{k}={v} (type: {type(v)})")
            if not missing_keys and not invalid_values: return {k: float(scores[k]) for k in EXPECTED_KEYS}
            else:
                 error_msg = []; full_error = ""
                 if missing_keys: error_msg.append(f"Missing keys: {missing_keys}")
                 if invalid_values: error_msg.append(f"Invalid values: {invalid_values}")
                 if error_msg: full_error = ". ".join(error_msg)
                 else: full_error = "Unknown validation issue"
                 logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: GPT JSON validation failed: {full_error}. Scores: {scores}")
                 last_error = ValueError(f"GPT JSON validation failed: {full_error}")
        except json.JSONDecodeError as e:
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: GPT output is not valid JSON: {e}. Reply: '{reply[:300]}...'")
            last_error = e
        except Exception as e:
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: Error during GPT call/processing: {e}", exc_info=True)
            last_error = e
            if isinstance(e, (openai.RateLimitError, openai.APITimeoutError, openai.APIConnectionError)):
                 wait_time = 3 ** (attempt + 1); logging.info(f"Waiting {wait_time}s before next attempt..."); time.sleep(wait_time); continue
        if attempt < max_retries - 1:
            wait_time = 2 ** attempt; logging.info(f"Waiting {wait_time}s before retrying chapter {chap_num}..."); time.sleep(wait_time)
    raise RuntimeError(f"Chapter {chap_num}: Evaluation failed after {max_retries} attempts. Last error: {last_error}") from last_error


# ==============================================================
# ==============================================================
#  5. Main Evaluation Loop (Using Normalization for Directory Matching)
# ==============================================================
def main():
    """Main function to orchestrate the evaluation process."""
    logging.info("Starting evaluation process...")
    start_time = time.time()
    EVALUATION_OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)

    # --- Make normalize function accessible ---
    # (Ensure the normalize function definition from read_seed_chapters is accessible here,
    # either by defining it globally or copying it inside main if needed)
    def normalize(name: str) -> str:
        name = str(name); name = unicodedata.normalize("NFKC", name); name = name.lower()
        name = name.replace("_utf8", ""); name = re.sub(r"[《》]", "", name)
        name = re.sub(r"\.summary$", "", name) # Also remove .summary if present
        # Updated regex to handle more punctuation including full-width colon
        name = re.sub(r"[\s:：()（）【】\[\]“”\"',.!.?？\-·•_]+", "", name)
        return name.strip()
    # -----------------------------------------

    # --- Basic Path Validation ---
    if not ORIGINAL_CHAPTERS_ROOT.is_dir(): logging.error(f"CRITICAL: Original chapters root not found: {ORIGINAL_CHAPTERS_ROOT}"); return
    if not EXPANDED_CHAPTERS_ROOT.is_dir(): logging.error(f"CRITICAL: Expanded chapters root not found: {EXPANDED_CHAPTERS_ROOT}"); return
    if not SEED_DIR.is_dir(): logging.error(f"CRITICAL: Seed directory not found: {SEED_DIR}"); return
    # --- End Path Validation ---

    original_book_dirs = sorted([d for d in ORIGINAL_CHAPTERS_ROOT.iterdir() if d.is_dir()])
    if not original_book_dirs: logging.error(f"No book directories found in {ORIGINAL_CHAPTERS_ROOT}"); return

    logging.info(f"Found {len(original_book_dirs)} potential books in {ORIGINAL_CHAPTERS_ROOT}.")
    overall_evaluated_count = 0; overall_failed_count = 0; books_processed_count = 0

    # Pre-normalize expanded directory names for efficient lookup
    # Assumes EXPANDED_CHAPTERS_ROOT contains DIRECTORIES, not files like '.summary'
    # Please verify this structure!
    logging.info(f"Scanning {EXPANDED_CHAPTERS_ROOT} for expanded book directories...")
    expanded_dir_map = {}
    try:
        for exp_dir in EXPANDED_CHAPTERS_ROOT.iterdir():
            if exp_dir.is_dir(): # Only consider directories
                normalized_exp_name = normalize(exp_dir.name)
                if normalized_exp_name in expanded_dir_map:
                     logging.warning(f"Duplicate normalized name '{normalized_exp_name}' found in {EXPANDED_CHAPTERS_ROOT}. Using first found: {expanded_dir_map[normalized_exp_name]}. Skipping {exp_dir}")
                else:
                     expanded_dir_map[normalized_exp_name] = exp_dir
    except FileNotFoundError:
         logging.error(f"Expanded chapters root directory not found during scan: {EXPANDED_CHAPTERS_ROOT}")
         return
    except Exception as e:
         logging.error(f"Error scanning expanded directories: {e}", exc_info=True)
         return # Stop if we can't scan expanded dirs

    logging.info(f"Found {len(expanded_dir_map)} potential expanded book directories.")


    for original_book_dir in tqdm(original_book_dirs, desc="📚 Evaluating Books"):
        book_name_original = original_book_dir.name
        logging.info(f"\n{'='*15} Processing Book: {book_name_original} {'='*15}")

        # --- FIX: Use normalization to find matching expanded directory ---
        normalized_original_name = normalize(book_name_original)
        expanded_book_dir = expanded_dir_map.get(normalized_original_name) # Lookup in pre-scanned map
        # --- END FIX ---

        if not expanded_book_dir:
            # Log the normalized name searched for, helps debugging
            logging.warning(f"No matching expanded directory found for '{book_name_original}' (normalized: '{normalized_original_name}') in pre-scanned map from {EXPANDED_CHAPTERS_ROOT}. Skipping book.")
            continue
        else:
             # Check if the found path is actually a directory (should be due to pre-scan)
             if not expanded_book_dir.is_dir():
                  logging.error(f"Matched path '{expanded_book_dir}' for book '{book_name_original}' exists but is not a directory. Skipping.")
                  continue
             logging.info(f"Found corresponding expanded directory using normalization: {expanded_book_dir}")

        # Get chapters using the original name for seed file lookup
        chapters_to_evaluate = read_seed_chapters(book_name_original, SEED_DIR)
        if not chapters_to_evaluate:
            logging.warning(f"No chapters selected from seed file for '{book_name_original}'. Skipping book evaluation.")
            continue

        logging.info(f"Selected chapters for '{book_name_original}': {chapters_to_evaluate}")
        books_processed_count += 1
        book_results = []; chapters_evaluated_count = 0; chapters_failed_count = 0

        # --- Chapter Loop ---
        for chap_num in tqdm(chapters_to_evaluate, desc=f"   Chapters", leave=False):
            # ... (The rest of the chapter loop: find files, read, evaluate, save results)
            # ... (Should use the correctly found 'expanded_book_dir')
            logging.info(f"--- Chapter {chap_num} ---")
            path_original = find_chapter_file(original_book_dir, chap_num)
            path_expanded = find_chapter_file(expanded_book_dir, chap_num) # Use the found expanded_book_dir

            result_entry = {
                "chapter": chap_num,
                "original_file": str(path_original.relative_to(ORIGINAL_CHAPTERS_ROOT)) if path_original else "NOT_FOUND",
                "expanded_file": str(path_expanded.relative_to(EXPANDED_CHAPTERS_ROOT)) if path_expanded else "NOT_FOUND",
                "scores": None, "error": None
            }

            if not path_original or not path_expanded:
                 error_msg = f"Input file(s) missing: {'Original ' if not path_original else ''}{'Expanded' if not path_expanded else ''}"
                 logging.error(f"Chapter {chap_num}: {error_msg}")
                 result_entry["error"] = error_msg
                 book_results.append(result_entry)
                 chapters_failed_count += 1
                 continue

            # ... (Read files, check length, evaluate, append results - Keep this logic) ...
            logging.debug(f"Reading original: {path_original}")
            text_original = read_txt(path_original)
            logging.debug(f"Reading expanded: {path_expanded}")
            text_expanded = read_txt(path_expanded)

            if not text_original or not text_expanded:
                error_msg = f"Failed to read content: {'Original ' if not text_original else ''}{'Expanded' if not text_expanded else ''}"
                logging.error(f"Chapter {chap_num}: {error_msg} ({path_original.name}, {path_expanded.name})")
                result_entry["error"] = error_msg
                book_results.append(result_entry)
                chapters_failed_count += 1
                continue

            try:
                if len(text_original) < 50 or len(text_expanded) < 50:
                     short_err_msg = f"Text too short (Orig: {len(text_original)}, Exp: {len(text_expanded)})"
                     logging.warning(f"Chapter {chap_num}: {short_err_msg}. Skipping evaluation.")
                     raise ValueError(short_err_msg)

                scores = judge_pair(text_original, text_expanded, chap_num)
                logging.info(f"Chapter {chap_num}: Evaluation successful.")
                for k, v in scores.items(): logging.info(f"  {k:<22}: {v:.3f}")
                result_entry["scores"] = scores
                book_results.append(result_entry)
                chapters_evaluated_count += 1

            except Exception as e:
                logging.error(f"Chapter {chap_num}: Evaluation failed. Error: {e}")
                result_entry["error"] = repr(e)
                book_results.append(result_entry)
                chapters_failed_count += 1

            time.sleep(0.5) # Short delay

        # --- Save results for the current book ---
        overall_evaluated_count += chapters_evaluated_count
        overall_failed_count += chapters_failed_count
        if book_results:
            # Use the original name for the output directory for clarity? Or normalized? Let's use original.
            book_output_dir = EVALUATION_OUTPUT_ROOT / original_book_dir.name # Use original name for output dir
            book_output_dir.mkdir(parents=True, exist_ok=True)
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            output_json_path = book_output_dir / f"evaluation_{timestamp}.json"
            output_data = {
                "evaluation_run_at": datetime.datetime.now().isoformat(),
                "book_name_original": book_name_original,
                "matched_expanded_dir_name": expanded_book_dir.name, # Store the actual matched dir name
                "evaluation_model": EVALUATION_MODEL_NAME,
                "original_chapters_root": str(ORIGINAL_CHAPTERS_ROOT),
                "expanded_chapters_root": str(EXPANDED_CHAPTERS_ROOT),
                "seed_file_used": f"(Logic based on matching '{book_name_original}' in {SEED_DIR})",
                "chapters_evaluated_count": chapters_evaluated_count,
                "chapters_failed_or_skipped_count": chapters_failed_count,
                "evaluation_details": book_results
            }
            try:
                with open(output_json_path, "w", encoding="utf-8") as f: json.dump(output_data, f, ensure_ascii=False, indent=2)
                logging.info(f"✅ Evaluation results for '{book_name_original}' saved to: {output_json_path}")
            except Exception as e: logging.error(f"Failed to save results for '{book_name_original}' to {output_json_path}: {e}", exc_info=True)
        else: logging.warning(f"No evaluation results processed for book '{book_name_original}'. Nothing saved.")
        # --- End Book Loop ---

    # --- End of all books ---
    end_time = time.time(); total_time = end_time - start_time
    logging.info(f"\n{'='*20} Evaluation Summary {'='*20}")
    logging.info(f"Processed {books_processed_count} books where matching expanded directory and seed chapters were found.")
    logging.info(f"Total chapters successfully evaluated: {overall_evaluated_count}")
    logging.info(f"Total chapters failed or skipped: {overall_failed_count}")
    logging.info(f"Total execution time: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")
    logging.info(f"Evaluation results saved in subdirectories under: {EVALUATION_OUTPUT_ROOT}")
    logging.info("🎉 Evaluation process finished.")

# ==============================================================
#  6. Run the Main Function
# ==============================================================
if __name__ == "__main__":
    main()

2025-05-01 20:32:52,406 - INFO - OpenAI API configured for evaluation model: gpt-4o
2025-05-01 20:32:52,412 - INFO - Starting evaluation process...
2025-05-01 20:32:52,414 - INFO - Found 40 potential books in /content/novels_chapters.
2025-05-01 20:32:52,416 - INFO - Scanning /content/1000_word_chapters_expanded for expanded book directories...
2025-05-01 20:32:52,419 - INFO - Found 33 potential expanded book directories.


📚 Evaluating Books:   0%|          | 0/40 [00:00<?, ?it/s]

2025-05-01 20:32:52,434 - INFO - 
2025-05-01 20:32:52,435 - INFO - Found corresponding expanded directory using normalization: /content/1000_word_chapters_expanded/上品寒士（校对版全本）作者：贼道三痴_utf8.summary
2025-05-01 20:32:52,439 - INFO - Found matching seed file: 《上品寒士》（校对版全本）作者：贼道三痴_utf8_randomseed.txt for book '《上品寒士》(校对版全本)作者_贼道三痴_utf8'
2025-05-01 20:32:52,441 - INFO - Selected chapters for '《上品寒士》(校对版全本)作者_贼道三痴_utf8': [8, 34, 63]


   Chapters:   0%|          | 0/3 [00:00<?, ?it/s]

2025-05-01 20:32:52,456 - INFO - --- Chapter 8 ---
2025-05-01 20:32:52,477 - INFO - Found unique match for chapter 8: '008_第八章调戏.txt' in /content/novels_chapters/《上品寒士》(校对版全本)作者_贼道三痴_utf8 (pattern index 2)
2025-05-01 20:32:52,479 - INFO - Found unique match for chapter 8: '第008章.txt' in /content/1000_word_chapters_expanded/上品寒士（校对版全本）作者：贼道三痴_utf8.summary (pattern index 0)
2025-05-01 20:32:52,559 - INFO - Attempt 1/3 to evaluate chapter 8...
2025-05-01 20:32:54,825 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:32:54,830 - INFO - Chapter 8: Evaluation successful.
2025-05-01 20:32:54,831 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:32:54,832 - INFO -   plot_similarity       : 0.200
2025-05-01 20:32:54,833 - INFO -   character_similarity  : 0.300
2025-05-01 20:32:54,834 - INFO -   background_similarity : 0.200
2025-05-01 20:32:54,836 - INFO -   style_similarity      : 0.100
2025-05-01 20:32:55,340 - INFO - --- Chapter 34 --

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:32:59,140 - INFO - --- Chapter 38 ---
2025-05-01 20:32:59,174 - INFO - Found unique match for chapter 38: '038_第十九章三骑冲阵(下).txt' in /content/novels_chapters/《人道天堂》(校对版全本)作者_荆柯守_utf8 (pattern index 2)
2025-05-01 20:32:59,177 - INFO - Found unique match for chapter 38: '第038章.txt' in /content/1000_word_chapters_expanded/人道天堂（校对版全本）作者：荆柯守_utf8.summary (pattern index 0)
2025-05-01 20:32:59,258 - INFO - Attempt 1/3 to evaluate chapter 38...
2025-05-01 20:33:00,452 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:33:00,456 - INFO - Chapter 38: Evaluation successful.
2025-05-01 20:33:00,457 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:33:00,458 - INFO -   plot_similarity       : 0.100
2025-05-01 20:33:00,459 - INFO -   character_similarity  : 0.200
2025-05-01 20:33:00,460 - INFO -   background_similarity : 0.100
2025-05-01 20:33:00,461 - INFO -   style_similarity      : 0.100
2025-05-01 20:33:00,965 - INFO - --- Chap

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:33:19,477 - INFO - --- Chapter 1 ---
2025-05-01 20:33:19,505 - INFO - Found unique match for chapter 1: '001_第一章高考放榜.txt' in /content/novels_chapters/《全球进化》(精校版全本)作者_咬狗_utf8 (pattern index 2)
2025-05-01 20:33:19,507 - INFO - Found unique match for chapter 1: '第001章.txt' in /content/1000_word_chapters_expanded/全球进化（精校版全本）作者：咬狗_utf8.summary (pattern index 0)
2025-05-01 20:33:19,575 - INFO - Attempt 1/3 to evaluate chapter 1...
2025-05-01 20:33:27,737 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:33:27,749 - INFO - Chapter 1: Evaluation successful.
2025-05-01 20:33:27,751 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:33:27,752 - INFO -   plot_similarity       : 0.100
2025-05-01 20:33:27,754 - INFO -   character_similarity  : 0.300
2025-05-01 20:33:27,755 - INFO -   background_similarity : 0.200
2025-05-01 20:33:27,756 - INFO -   style_similarity      : 0.100
2025-05-01 20:33:28,260 - INFO - --- Chapter 17 ---


   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:33:41,427 - INFO - --- Chapter 8 ---
2025-05-01 20:33:41,453 - INFO - Found unique match for chapter 8: '第008章.txt' in /content/1000_word_chapters_expanded/反正我是超能力者（校对版全本）作者：吃书妖_utf8.summary (pattern index 0)
2025-05-01 20:33:41,545 - INFO - Attempt 1/3 to evaluate chapter 8...
2025-05-01 20:33:43,001 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:33:43,016 - INFO - Chapter 8: Evaluation successful.
2025-05-01 20:33:43,017 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:33:43,019 - INFO -   plot_similarity       : 0.100
2025-05-01 20:33:43,020 - INFO -   character_similarity  : 0.100
2025-05-01 20:33:43,022 - INFO -   background_similarity : 0.100
2025-05-01 20:33:43,023 - INFO -   style_similarity      : 0.200
2025-05-01 20:33:43,527 - INFO - --- Chapter 15 ---
2025-05-01 20:33:43,552 - INFO - Found unique match for chapter 15: '第015章.txt' in /content/1000_word_chapters_expanded/反正我是超能力者（校对版全本）作者：吃书妖_utf8.sum

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:33:58,002 - INFO - --- Chapter 8 ---
2025-05-01 20:33:58,029 - INFO - Found unique match for chapter 8: '008_第八章独自在家的时候要乖乖的.txt' in /content/novels_chapters/《回到过去变成猫》(精校版全本)作者_陈词懒调_utf8 (pattern index 2)
2025-05-01 20:33:58,035 - INFO - Found unique match for chapter 8: '第008章.txt' in /content/1000_word_chapters_expanded/回到过去变成猫（精校版全本）作者：陈词懒调_utf8.summary (pattern index 0)
2025-05-01 20:33:58,116 - INFO - Attempt 1/3 to evaluate chapter 8...
2025-05-01 20:34:00,880 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:34:00,890 - INFO - Chapter 8: Evaluation successful.
2025-05-01 20:34:00,892 - INFO -   semantic_similarity   : 0.800
2025-05-01 20:34:00,894 - INFO -   plot_similarity       : 0.700
2025-05-01 20:34:00,895 - INFO -   character_similarity  : 0.600
2025-05-01 20:34:00,897 - INFO -   background_similarity : 0.500
2025-05-01 20:34:00,898 - INFO -   style_similarity      : 0.400
2025-05-01 20:34:01,402 - INFO - --

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:34:15,399 - INFO - --- Chapter 70 ---
2025-05-01 20:34:15,433 - INFO - Found unique match for chapter 70: '070_第70章我不认识人家.txt' in /content/novels_chapters/《大画家》(校对版全本)作者_醛石_utf8 (pattern index 2)
2025-05-01 20:34:15,453 - INFO - Found unique match for chapter 70: '第070章.txt' in /content/1000_word_chapters_expanded/大画家（校对版全本）作者：醛石_utf8.summary (pattern index 0)
2025-05-01 20:34:15,529 - INFO - Attempt 1/3 to evaluate chapter 70...
2025-05-01 20:34:16,542 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:34:16,545 - INFO - Chapter 70: Evaluation successful.
2025-05-01 20:34:16,546 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:34:16,547 - INFO -   plot_similarity       : 0.300
2025-05-01 20:34:16,548 - INFO -   character_similarity  : 0.400
2025-05-01 20:34:16,549 - INFO -   background_similarity : 0.600
2025-05-01 20:34:16,549 - INFO -   style_similarity      : 0.200
2025-05-01 20:34:17,053 - INFO - --- Chapter 8

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:34:33,385 - INFO - --- Chapter 20 ---
2025-05-01 20:34:33,420 - INFO - Found unique match for chapter 20: '020_第二十章玉碎.txt' in /content/novels_chapters/《天可汗》(校对版全本)作者_西风紧_utf8 (pattern index 2)
2025-05-01 20:34:33,422 - INFO - Found unique match for chapter 20: '第020章.txt' in /content/1000_word_chapters_expanded/天可汗（校对版全本）作者：西风紧_utf8.summary (pattern index 0)
2025-05-01 20:34:33,493 - INFO - Attempt 1/3 to evaluate chapter 20...
2025-05-01 20:34:35,440 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:34:35,445 - INFO - Chapter 20: Evaluation successful.
2025-05-01 20:34:35,446 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:34:35,447 - INFO -   plot_similarity       : 0.200
2025-05-01 20:34:35,448 - INFO -   character_similarity  : 0.300
2025-05-01 20:34:35,449 - INFO -   background_similarity : 0.200
2025-05-01 20:34:35,451 - INFO -   style_similarity      : 0.100
2025-05-01 20:34:35,954 - INFO - --- Chapter 28 

   Chapters:   0%|          | 0/10 [00:00<?, ?it/s]

2025-05-01 20:34:49,719 - INFO - --- Chapter 1 ---
2025-05-01 20:34:49,736 - INFO - Found unique match for chapter 1: '001_第一章要认清这个世道.txt' in /content/novels_chapters/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8 (pattern index 2)
2025-05-01 20:34:49,762 - INFO - Found unique match for chapter 1: '第001章.txt' in /content/1000_word_chapters_expanded/奋斗在新明朝（校对版全本）作者：随轻风去_utf8.summary (pattern index 0)
2025-05-01 20:34:49,826 - INFO - Attempt 1/3 to evaluate chapter 1...
2025-05-01 20:34:51,864 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:34:51,872 - INFO - Chapter 1: Evaluation successful.
2025-05-01 20:34:51,874 - INFO -   semantic_similarity   : 0.800
2025-05-01 20:34:51,876 - INFO -   plot_similarity       : 0.700
2025-05-01 20:34:51,877 - INFO -   character_similarity  : 0.600
2025-05-01 20:34:51,878 - INFO -   background_similarity : 0.700
2025-05-01 20:34:51,879 - INFO -   style_similarity      : 0.500
2025-05-01 20:34:52,383 - INFO - --- Chap

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:35:18,373 - INFO - --- Chapter 18 ---
2025-05-01 20:35:18,412 - INFO - Found unique match for chapter 18: '第018章.txt' in /content/1000_word_chapters_expanded/崩坏世界的传奇大冒险（精校版全本）作者：国王陛下_utf8.summary (pattern index 0)
2025-05-01 20:35:18,486 - INFO - Attempt 1/3 to evaluate chapter 18...
2025-05-01 20:35:20,424 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:35:20,428 - INFO - Chapter 18: Evaluation successful.
2025-05-01 20:35:20,429 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:35:20,430 - INFO -   plot_similarity       : 0.200
2025-05-01 20:35:20,430 - INFO -   character_similarity  : 0.300
2025-05-01 20:35:20,430 - INFO -   background_similarity : 0.200
2025-05-01 20:35:20,431 - INFO -   style_similarity      : 0.100
2025-05-01 20:35:20,933 - INFO - --- Chapter 43 ---
2025-05-01 20:35:20,975 - INFO - Found unique match for chapter 43: '第043章.txt' in /content/1000_word_chapters_expanded/崩坏世界的传奇大冒险（精校版全本）作者：国王陛

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:35:37,358 - INFO - --- Chapter 8 ---
2025-05-01 20:35:37,368 - INFO - Found unique match for chapter 8: '008_第二卷蜃楼城之夏.txt' in /content/novels_chapters/《搜神记》(精校版全本)作者_树下野狐_utf8 (pattern index 2)
2025-05-01 20:35:37,395 - INFO - Found unique match for chapter 8: '第008章.txt' in /content/1000_word_chapters_expanded/搜神记（精校版全本）作者：树下野狐_utf8.summary (pattern index 0)
2025-05-01 20:35:37,445 - ERROR - Chapter 8: Failed to read content: Original  (008_第二卷蜃楼城之夏.txt, 第008章.txt)
2025-05-01 20:35:37,447 - INFO - --- Chapter 10 ---
2025-05-01 20:35:37,455 - INFO - Found unique match for chapter 10: '010_第二章咫尺天涯.txt' in /content/novels_chapters/《搜神记》(精校版全本)作者_树下野狐_utf8 (pattern index 2)
2025-05-01 20:35:37,458 - INFO - Found unique match for chapter 10: '第010章.txt' in /content/1000_word_chapters_expanded/搜神记（精校版全本）作者：树下野狐_utf8.summary (pattern index 0)
2025-05-01 20:35:37,562 - INFO - Attempt 1/3 to evaluate chapter 10...
2025-05-01 20:35:39,208 - INFO - HTTP Request: POST https://api.op

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:35:56,044 - INFO - --- Chapter 47 ---
2025-05-01 20:35:56,078 - INFO - Found unique match for chapter 47: '047_第47章买房.txt' in /content/novels_chapters/《未来天王》(校对版全本)作者_陈词懒调_utf8 (pattern index 2)
2025-05-01 20:35:56,096 - INFO - Found unique match for chapter 47: '第047章.txt' in /content/1000_word_chapters_expanded/未来天王（校对版全本）作者：陈词懒调_utf8.summary (pattern index 0)
2025-05-01 20:35:56,167 - INFO - Attempt 1/3 to evaluate chapter 47...
2025-05-01 20:35:57,202 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:35:57,206 - INFO - Chapter 47: Evaluation successful.
2025-05-01 20:35:57,207 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:35:57,208 - INFO -   plot_similarity       : 0.300
2025-05-01 20:35:57,210 - INFO -   character_similarity  : 0.400
2025-05-01 20:35:57,211 - INFO -   background_similarity : 0.200
2025-05-01 20:35:57,212 - INFO -   style_similarity      : 0.300
2025-05-01 20:35:57,716 - INFO - --- Chapter

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:36:19,735 - INFO - --- Chapter 34 ---
2025-05-01 20:36:19,772 - INFO - Found unique match for chapter 34: '034_第三十四章少主.txt' in /content/novels_chapters/《武林半侠传》(校对版全本)作者_文抄公_utf8 (pattern index 2)
2025-05-01 20:36:19,775 - INFO - Found unique match for chapter 34: '第034章.txt' in /content/1000_word_chapters_expanded/武林半侠传（校对版全本）作者：文抄公_utf8.summary (pattern index 0)
2025-05-01 20:36:19,844 - INFO - Attempt 1/3 to evaluate chapter 34...
2025-05-01 20:36:22,679 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:36:22,689 - INFO - Chapter 34: Evaluation successful.
2025-05-01 20:36:22,691 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:36:22,692 - INFO -   plot_similarity       : 0.200
2025-05-01 20:36:22,694 - INFO -   character_similarity  : 0.200
2025-05-01 20:36:22,695 - INFO -   background_similarity : 0.300
2025-05-01 20:36:22,696 - INFO -   style_similarity      : 0.100
2025-05-01 20:36:23,199 - INFO - --- Chapte

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:36:41,726 - INFO - --- Chapter 18 ---
2025-05-01 20:36:41,762 - INFO - Found unique match for chapter 18: '018_第015回入魔惑财色,走火冲夹脊.txt' in /content/novels_chapters/《神游》(校对版全本)作者_徐公子胜治_utf8 (pattern index 2)
2025-05-01 20:36:41,770 - INFO - Found unique match for chapter 18: '第018章.txt' in /content/1000_word_chapters_expanded/神游（校对版全本）作者：徐公子胜治_utf8.summary (pattern index 0)
2025-05-01 20:36:41,855 - INFO - Attempt 1/3 to evaluate chapter 18...
2025-05-01 20:36:43,079 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:36:43,086 - INFO - Chapter 18: Evaluation successful.
2025-05-01 20:36:43,087 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:36:43,088 - INFO -   plot_similarity       : 0.300
2025-05-01 20:36:43,090 - INFO -   character_similarity  : 0.400
2025-05-01 20:36:43,091 - INFO -   background_similarity : 0.200
2025-05-01 20:36:43,093 - INFO -   style_similarity      : 0.300
2025-05-01 20:36:43,597 - INFO - ---

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:36:52,576 - INFO - --- Chapter 14 ---
2025-05-01 20:36:52,601 - INFO - Found unique match for chapter 14: '014_第一节接下命令后,黄石把募兵、练兵的任务和亲兵们交待了一番,就穿着崭新的军服赶回柳河,鲜衣怒马正好衣锦还乡,在很大程度上黄石已经把柳河当作自己第二个家乡了。.txt' in /content/novels_chapters/《窃明》(校对版全本)作者_大爆炸(灰熊猫)_utf8 (pattern index 2)
2025-05-01 20:36:52,604 - INFO - Found unique match for chapter 14: '第014章.txt' in /content/1000_word_chapters_expanded/窃明（校对版全本）作者：大爆炸(灰熊猫)_utf8.summary (pattern index 0)
2025-05-01 20:36:52,671 - INFO - Attempt 1/3 to evaluate chapter 14...
2025-05-01 20:36:53,810 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:36:53,813 - INFO - Chapter 14: Evaluation successful.
2025-05-01 20:36:53,815 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:36:53,816 - INFO -   plot_similarity       : 0.400
2025-05-01 20:36:53,817 - INFO -   character_similarity  : 0.600
2025-05-01 20:36:53,818 - INFO -   background_similarity : 0.300
2025-05-01 20:36:53,819 - INFO - 

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:37:15,728 - INFO - --- Chapter 9 ---
2025-05-01 20:37:15,749 - INFO - Found unique match for chapter 9: '第009章.txt' in /content/1000_word_chapters_expanded/老子是癞蛤蟆（校对版全本） 作者：烽火戏诸侯_utf8.summary (pattern index 0)
2025-05-01 20:37:15,785 - INFO - Attempt 1/3 to evaluate chapter 9...
2025-05-01 20:37:18,272 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:37:18,274 - INFO - Chapter 9: Evaluation successful.
2025-05-01 20:37:18,275 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:37:18,275 - INFO -   plot_similarity       : 0.100
2025-05-01 20:37:18,276 - INFO -   character_similarity  : 0.300
2025-05-01 20:37:18,276 - INFO -   background_similarity : 0.200
2025-05-01 20:37:18,277 - INFO -   style_similarity      : 0.100
2025-05-01 20:37:18,780 - INFO - --- Chapter 21 ---
2025-05-01 20:37:18,808 - INFO - Found unique match for chapter 21: '第021章.txt' in /content/1000_word_chapters_expanded/老子是癞蛤蟆（校对版全本） 作者：烽火戏诸侯_utf8.s

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:37:33,270 - INFO - --- Chapter 15 ---
2025-05-01 20:37:33,309 - INFO - Found unique match for chapter 15: '015_第十五章梦醒来还是梦.txt' in /content/novels_chapters/《肆虐韩娱》(校对版全本)作者_姬叉_utf8 (pattern index 2)
2025-05-01 20:37:33,317 - INFO - Found unique match for chapter 15: '第015章.txt' in /content/1000_word_chapters_expanded/肆虐韩娱（校对版全本）作者：姬叉_utf8.summary (pattern index 0)
2025-05-01 20:37:33,396 - INFO - Attempt 1/3 to evaluate chapter 15...
2025-05-01 20:37:34,879 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:37:34,881 - INFO - Chapter 15: Evaluation successful.
2025-05-01 20:37:34,882 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:37:34,883 - INFO -   plot_similarity       : 0.300
2025-05-01 20:37:34,884 - INFO -   character_similarity  : 0.400
2025-05-01 20:37:34,885 - INFO -   background_similarity : 0.200
2025-05-01 20:37:34,885 - INFO -   style_similarity      : 0.300
2025-05-01 20:37:35,389 - INFO - --- Chapter

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:37:52,114 - INFO - --- Chapter 27 ---
2025-05-01 20:37:52,129 - INFO - Found unique match for chapter 27: '027_第九回荆棘妖怪.txt' in /content/novels_chapters/《蜀山》(精校版全本)作者_流浪的蛤蟆_utf8 (pattern index 2)
2025-05-01 20:37:52,131 - INFO - Found unique match for chapter 27: '第027章.txt' in /content/1000_word_chapters_expanded/蜀山（精校版全本）作者：流浪的蛤蟆_utf8.summary (pattern index 0)
2025-05-01 20:37:52,172 - INFO - Attempt 1/3 to evaluate chapter 27...
2025-05-01 20:37:53,600 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:37:53,614 - INFO - Chapter 27: Evaluation successful.
2025-05-01 20:37:53,615 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:37:53,617 - INFO -   plot_similarity       : 0.100
2025-05-01 20:37:53,618 - INFO -   character_similarity  : 0.100
2025-05-01 20:37:53,619 - INFO -   background_similarity : 0.200
2025-05-01 20:37:53,621 - INFO -   style_similarity      : 0.300
2025-05-01 20:37:54,126 - INFO - --- Chapter 

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:38:07,614 - INFO - --- Chapter 44 ---
2025-05-01 20:38:07,631 - INFO - Found unique match for chapter 44: '044_第四十四章金铃.txt' in /content/novels_chapters/《诛仙》(校对版全本)作者_萧鼎_utf8 (pattern index 2)
2025-05-01 20:38:07,637 - INFO - Found unique match for chapter 44: '第044章.txt' in /content/1000_word_chapters_expanded/诛仙（校对版全本）作者：萧鼎_utf8.summary (pattern index 0)
2025-05-01 20:38:07,721 - INFO - Attempt 1/3 to evaluate chapter 44...
2025-05-01 20:38:08,890 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:38:08,896 - INFO - Chapter 44: Evaluation successful.
2025-05-01 20:38:08,897 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:38:08,898 - INFO -   plot_similarity       : 0.200
2025-05-01 20:38:08,899 - INFO -   character_similarity  : 0.400
2025-05-01 20:38:08,900 - INFO -   background_similarity : 0.300
2025-05-01 20:38:08,902 - INFO -   style_similarity      : 0.200
2025-05-01 20:38:09,405 - INFO - --- Chapter 51 ---

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:38:26,341 - INFO - --- Chapter 8 ---
2025-05-01 20:38:26,365 - INFO - Found unique match for chapter 8: '008_第七章更早的顾客.txt' in /content/novels_chapters/《贩罪》(精校版全本)作者_三天两觉_utf8 (pattern index 2)
2025-05-01 20:38:26,367 - INFO - Found unique match for chapter 8: '第008章.txt' in /content/1000_word_chapters_expanded/贩罪（精校版全本）作者：三天两觉_utf8.summary (pattern index 0)
2025-05-01 20:38:26,435 - INFO - Attempt 1/3 to evaluate chapter 8...
2025-05-01 20:38:27,863 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:38:27,866 - INFO - Chapter 8: Evaluation successful.
2025-05-01 20:38:27,867 - INFO -   semantic_similarity   : 0.700
2025-05-01 20:38:27,868 - INFO -   plot_similarity       : 0.600
2025-05-01 20:38:27,869 - INFO -   character_similarity  : 0.500
2025-05-01 20:38:27,871 - INFO -   background_similarity : 0.400
2025-05-01 20:38:27,872 - INFO -   style_similarity      : 0.300
2025-05-01 20:38:28,377 - INFO - --- Chapter 28 ---

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:38:43,253 - INFO - --- Chapter 82 ---
2025-05-01 20:38:43,263 - INFO - Found unique match for chapter 82: '082_第八十二章又见故人.txt' in /content/novels_chapters/《道缘浮图》(校对版全本)作者_烟雨江南_utf8 (pattern index 2)
2025-05-01 20:38:43,266 - INFO - Found unique match for chapter 82: '第082章.txt' in /content/1000_word_chapters_expanded/道缘浮图（校对版全本）作者：烟雨江南_utf8.summary (pattern index 0)
2025-05-01 20:38:43,345 - INFO - Attempt 1/3 to evaluate chapter 82...
2025-05-01 20:38:46,575 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:38:46,579 - INFO - Chapter 82: Evaluation successful.
2025-05-01 20:38:46,581 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:38:46,582 - INFO -   plot_similarity       : 0.200
2025-05-01 20:38:46,584 - INFO -   character_similarity  : 0.300
2025-05-01 20:38:46,585 - INFO -   background_similarity : 0.200
2025-05-01 20:38:46,587 - INFO -   style_similarity      : 0.100
2025-05-01 20:38:47,091 - INFO - --- Chap

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:39:08,546 - INFO - --- Chapter 5 ---
2025-05-01 20:39:08,572 - INFO - Found unique match for chapter 5: '第005章.txt' in /content/1000_word_chapters_expanded/重活了（精校版全本）作者： 尝谕_utf8.summary (pattern index 0)
2025-05-01 20:39:08,664 - INFO - Attempt 1/3 to evaluate chapter 5...
2025-05-01 20:39:09,707 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:39:09,714 - INFO - Chapter 5: Evaluation successful.
2025-05-01 20:39:09,715 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:39:09,716 - INFO -   plot_similarity       : 0.100
2025-05-01 20:39:09,718 - INFO -   character_similarity  : 0.200
2025-05-01 20:39:09,719 - INFO -   background_similarity : 0.100
2025-05-01 20:39:09,720 - INFO -   style_similarity      : 0.300
2025-05-01 20:39:10,224 - INFO - --- Chapter 33 ---
2025-05-01 20:39:10,247 - INFO - Found unique match for chapter 33: '第033章.txt' in /content/1000_word_chapters_expanded/重活了（精校版全本）作者： 尝谕_utf8.summary (patt

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:39:26,732 - INFO - --- Chapter 12 ---
2025-05-01 20:39:26,772 - INFO - Found unique match for chapter 12: '012_第十章我话说完,谁想上前_.txt' in /content/novels_chapters/《重生之出人头地》(校对版全本)作者_闹闹不爱闹_utf8 (pattern index 2)
2025-05-01 20:39:26,774 - INFO - Found unique match for chapter 12: '第012章.txt' in /content/1000_word_chapters_expanded/重生之出人头地（校对版全本）作者：闹闹不爱闹_utf8.summary (pattern index 0)
2025-05-01 20:39:26,845 - INFO - Attempt 1/3 to evaluate chapter 12...
2025-05-01 20:39:29,052 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:39:29,056 - INFO - Chapter 12: Evaluation successful.
2025-05-01 20:39:29,057 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:39:29,059 - INFO -   plot_similarity       : 0.100
2025-05-01 20:39:29,060 - INFO -   character_similarity  : 0.200
2025-05-01 20:39:29,062 - INFO -   background_similarity : 0.300
2025-05-01 20:39:29,063 - INFO -   style_similarity      : 0.100
2025-05-01 20:39:29,567 - INF

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:39:47,605 - INFO - --- Chapter 38 ---
2025-05-01 20:39:47,627 - INFO - Found unique match for chapter 38: '038_第三十七章偷窥.txt' in /content/novels_chapters/《陈二狗的妖孽人生》(校对版全本)作者_烽火戏诸侯_utf8 (pattern index 2)
2025-05-01 20:39:47,630 - INFO - Found unique match for chapter 38: '第038章.txt' in /content/1000_word_chapters_expanded/陈二狗的妖孽人生（校对版全本）作者：烽火戏诸侯_utf8.summary (pattern index 0)
2025-05-01 20:39:47,698 - INFO - Attempt 1/3 to evaluate chapter 38...
2025-05-01 20:39:49,490 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:39:49,506 - INFO - Chapter 38: Evaluation successful.
2025-05-01 20:39:49,507 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:39:49,509 - INFO -   plot_similarity       : 0.200
2025-05-01 20:39:49,511 - INFO -   character_similarity  : 0.200
2025-05-01 20:39:49,512 - INFO -   background_similarity : 0.100
2025-05-01 20:39:49,514 - INFO -   style_similarity      : 0.100
2025-05-01 20:39:50,018 - INFO - 

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:40:05,688 - INFO - --- Chapter 3 ---
2025-05-01 20:40:05,698 - INFO - Found unique match for chapter 3: '003_第三章蒙目棋.txt' in /content/novels_chapters/《雅骚》(校对版全本)作者_贼道三痴_utf8 (pattern index 2)
2025-05-01 20:40:05,700 - INFO - Found unique match for chapter 3: '第003章.txt' in /content/1000_word_chapters_expanded/雅骚（校对版全本）作者：贼道三痴_utf8.summary (pattern index 0)
2025-05-01 20:40:05,730 - INFO - Attempt 1/3 to evaluate chapter 3...
2025-05-01 20:40:06,855 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:40:06,856 - INFO - Chapter 3: Evaluation successful.
2025-05-01 20:40:06,857 - INFO -   semantic_similarity   : 0.800
2025-05-01 20:40:06,858 - INFO -   plot_similarity       : 0.700
2025-05-01 20:40:06,859 - INFO -   character_similarity  : 0.900
2025-05-01 20:40:06,860 - INFO -   background_similarity : 0.800
2025-05-01 20:40:06,861 - INFO -   style_similarity      : 0.600
2025-05-01 20:40:07,363 - INFO - --- Chapter 18 ---
2

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:40:21,717 - INFO - --- Chapter 22 ---
2025-05-01 20:40:21,754 - INFO - Found unique match for chapter 22: '022_第十五章龙雀.txt' in /content/novels_chapters/《食物链顶端的男人》(校对版全本)作者_熊狼狗_utf8 (pattern index 2)
2025-05-01 20:40:21,756 - INFO - Found unique match for chapter 22: '第022章.txt' in /content/1000_word_chapters_expanded/食物链顶端的男人（校对版全本）作者：熊狼狗_utf8.summary (pattern index 0)
2025-05-01 20:40:21,829 - INFO - Attempt 1/3 to evaluate chapter 22...
2025-05-01 20:40:23,358 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:40:23,364 - INFO - Chapter 22: Evaluation successful.
2025-05-01 20:40:23,365 - INFO -   semantic_similarity   : 0.700
2025-05-01 20:40:23,366 - INFO -   plot_similarity       : 0.600
2025-05-01 20:40:23,368 - INFO -   character_similarity  : 0.500
2025-05-01 20:40:23,369 - INFO -   background_similarity : 0.400
2025-05-01 20:40:23,371 - INFO -   style_similarity      : 0.300
2025-05-01 20:40:23,875 - INFO - --- C

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:40:37,641 - INFO - --- Chapter 46 ---
2025-05-01 20:40:37,718 - INFO - Found unique match for chapter 46: '046_第一节话说.txt' in /content/novels_chapters/《高手寂寞2》(校对版全本)作者_兰帝魅晨_utf8 (pattern index 2)
2025-05-01 20:40:37,720 - INFO - Found unique match for chapter 46: '第046章.txt' in /content/1000_word_chapters_expanded/高手寂寞2（校对版全本）作者：兰帝魅晨_utf8.summary (pattern index 0)
2025-05-01 20:40:37,781 - INFO - Attempt 1/3 to evaluate chapter 46...
2025-05-01 20:40:39,170 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:40:39,178 - INFO - Chapter 46: Evaluation successful.
2025-05-01 20:40:39,179 - INFO -   semantic_similarity   : 0.300
2025-05-01 20:40:39,180 - INFO -   plot_similarity       : 0.200
2025-05-01 20:40:39,181 - INFO -   character_similarity  : 0.200
2025-05-01 20:40:39,182 - INFO -   background_similarity : 0.100
2025-05-01 20:40:39,183 - INFO -   style_similarity      : 0.100
2025-05-01 20:40:39,687 - INFO - --- Chapte

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:40:52,509 - INFO - --- Chapter 17 ---
2025-05-01 20:40:52,551 - INFO - Found unique match for chapter 17: '第017章.txt' in /content/1000_word_chapters_expanded/黑龙法典（校对版全本）作者：欢声_utf8.summary (pattern index 0)
2025-05-01 20:40:52,621 - INFO - Attempt 1/3 to evaluate chapter 17...
2025-05-01 20:40:53,588 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:40:53,598 - INFO - Chapter 17: Evaluation successful.
2025-05-01 20:40:53,599 - INFO -   semantic_similarity   : 0.700
2025-05-01 20:40:53,601 - INFO -   plot_similarity       : 0.600
2025-05-01 20:40:53,602 - INFO -   character_similarity  : 0.500
2025-05-01 20:40:53,603 - INFO -   background_similarity : 0.400
2025-05-01 20:40:53,604 - INFO -   style_similarity      : 0.300
2025-05-01 20:40:54,107 - INFO - --- Chapter 56 ---
2025-05-01 20:40:54,151 - INFO - Found unique match for chapter 56: '第056章.txt' in /content/1000_word_chapters_expanded/黑龙法典（校对版全本）作者：欢声_utf8.summary (

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:41:12,888 - INFO - --- Chapter 9 ---
2025-05-01 20:41:12,972 - INFO - Found unique match for chapter 9: '009_第9章陈少荣.txt' in /content/novels_chapters/八零喜事_当家肥妻大翻身 (pattern index 2)
2025-05-01 20:41:12,974 - INFO - Found unique match for chapter 9: '第009章.txt' in /content/1000_word_chapters_expanded/八零喜事：当家肥妻大翻身.summary (pattern index 0)
2025-05-01 20:41:13,040 - INFO - Attempt 1/3 to evaluate chapter 9...
2025-05-01 20:41:15,279 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:41:15,286 - INFO - Chapter 9: Evaluation successful.
2025-05-01 20:41:15,287 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:41:15,289 - INFO -   plot_similarity       : 0.400
2025-05-01 20:41:15,290 - INFO -   character_similarity  : 0.600
2025-05-01 20:41:15,292 - INFO -   background_similarity : 0.300
2025-05-01 20:41:15,293 - INFO -   style_similarity      : 0.200
2025-05-01 20:41:15,797 - INFO - --- Chapter 34 ---
2025-05-01 20:41:15,8

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:41:31,827 - INFO - --- Chapter 18 ---
2025-05-01 20:41:31,894 - INFO - Found unique match for chapter 18: '018_第18章又寻短见.txt' in /content/novels_chapters/八零年代好时光 (pattern index 2)
2025-05-01 20:41:31,896 - INFO - Found unique match for chapter 18: '第018章.txt' in /content/1000_word_chapters_expanded/八零年代好时光.summary (pattern index 0)
2025-05-01 20:41:31,943 - INFO - Attempt 1/3 to evaluate chapter 18...
2025-05-01 20:41:32,908 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:41:32,911 - INFO - Chapter 18: Evaluation successful.
2025-05-01 20:41:32,912 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:41:32,913 - INFO -   plot_similarity       : 0.300
2025-05-01 20:41:32,914 - INFO -   character_similarity  : 0.400
2025-05-01 20:41:32,915 - INFO -   background_similarity : 0.200
2025-05-01 20:41:32,915 - INFO -   style_similarity      : 0.300
2025-05-01 20:41:33,419 - INFO - --- Chapter 63 ---
2025-05-01 20:41:33,488 

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:41:47,262 - INFO - --- Chapter 11 ---
2025-05-01 20:41:47,290 - INFO - Found unique match for chapter 11: '011_第11章翻脸不认人.txt' in /content/novels_chapters/八零福星俏媳妇 (pattern index 2)
2025-05-01 20:41:47,291 - INFO - Found unique match for chapter 11: '第011章.txt' in /content/1000_word_chapters_expanded/八零福星俏媳妇.summary (pattern index 0)
2025-05-01 20:41:47,318 - INFO - Attempt 1/3 to evaluate chapter 11...
2025-05-01 20:41:48,259 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:41:48,266 - INFO - Chapter 11: Evaluation successful.
2025-05-01 20:41:48,267 - INFO -   semantic_similarity   : 0.500
2025-05-01 20:41:48,267 - INFO -   plot_similarity       : 0.300
2025-05-01 20:41:48,268 - INFO -   character_similarity  : 0.400
2025-05-01 20:41:48,268 - INFO -   background_similarity : 0.600
2025-05-01 20:41:48,269 - INFO -   style_similarity      : 0.200
2025-05-01 20:41:48,773 - INFO - --- Chapter 33 ---
2025-05-01 20:41:48,823

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:42:00,751 - INFO - --- Chapter 19 ---
2025-05-01 20:42:00,828 - INFO - Found unique match for chapter 19: '019_第19章为难.txt' in /content/novels_chapters/国宴大厨在八零 (pattern index 2)
2025-05-01 20:42:00,829 - INFO - Found unique match for chapter 19: '第019章.txt' in /content/1000_word_chapters_expanded/国宴大厨在八零.summary (pattern index 0)
2025-05-01 20:42:00,869 - INFO - Attempt 1/3 to evaluate chapter 19...
2025-05-01 20:42:01,991 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:42:01,995 - INFO - Chapter 19: Evaluation successful.
2025-05-01 20:42:01,995 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:42:01,996 - INFO -   plot_similarity       : 0.100
2025-05-01 20:42:01,997 - INFO -   character_similarity  : 0.200
2025-05-01 20:42:01,997 - INFO -   background_similarity : 0.100
2025-05-01 20:42:01,998 - INFO -   style_similarity      : 0.300
2025-05-01 20:42:02,501 - INFO - --- Chapter 29 ---
2025-05-01 20:42:02,580 - 

   Chapters:   0%|          | 0/8 [00:00<?, ?it/s]

2025-05-01 20:42:15,081 - INFO - --- Chapter 11 ---
2025-05-01 20:42:15,139 - INFO - Found unique match for chapter 11: '011_第11章你们出多少彩礼_.txt' in /content/novels_chapters/重生八零_毒妻不好惹 (pattern index 2)
2025-05-01 20:42:15,225 - INFO - Attempt 1/3 to evaluate chapter 11...
2025-05-01 20:42:16,545 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-01 20:42:16,549 - INFO - Chapter 11: Evaluation successful.
2025-05-01 20:42:16,550 - INFO -   semantic_similarity   : 0.200
2025-05-01 20:42:16,551 - INFO -   plot_similarity       : 0.100
2025-05-01 20:42:16,552 - INFO -   character_similarity  : 0.200
2025-05-01 20:42:16,552 - INFO -   background_similarity : 0.100
2025-05-01 20:42:16,553 - INFO -   style_similarity      : 0.100
2025-05-01 20:42:17,056 - INFO - --- Chapter 37 ---
2025-05-01 20:42:17,092 - INFO - Found unique match for chapter 37: '037_第37章不可能!应该是她认错了人!.txt' in /content/novels_chapters/重生八零_毒妻不好惹 (pattern index 2)
2025-05-01 20:42:1

In [None]:
!pip install sentence-transformers bert-score torch transformers chardet tqdm pandas

In [7]:
# ==============================================================
# Script: Calculate Cosine Similarity & BERTScore between folders
# - Uses robust file matching based on seed files
# - Saves individual JSON output per chapter pair
# ==============================================================

# ==============================================================
#  1. Import Libraries
# ==============================================================
import os
import json
import time
import datetime
import re
import logging
import unicodedata
from pathlib import Path
from tqdm import tqdm # Use standard tqdm
import chardet

# --- Sentence Transformers ---
from sentence_transformers import SentenceTransformer, util

# --- BERT Score ---
try:
    from bert_score import score as calculate_bert_score
except ImportError:
    logging.error("bert_score library not found. Please install it: pip install bert-score")
    calculate_bert_score = None # Set to None if import fails

# ==============================================================
#  2. Configuration
# ==============================================================

# --- Input/Output Folders ---
# ⚠️ MODIFY THESE PATHS
FOLDER_A = Path("/content/novels_chapters") # Example: Folder with original chapters
FOLDER_B = Path("/content/1000_word_chapters_expanded") # Example: Folder with generated/comparison chapters
SEED_DIR = Path("/content/randomseed") # Folder with _randomseed.txt files
OUTPUT_ROOT = Path("/content/bertscore_randomseed/1000_word_chapters_expanded_evaluation") # Main output folder

# --- Model Configuration ---
SBERT_MODEL_NAME = "shibing624/text2vec-base-chinese"
BERTSCORE_MODEL_TYPE = "bert-base-chinese"
BERTSCORE_LANG = "zh"

# --- Logging ---
LOGGING_LEVEL = logging.INFO # Set to DEBUG for more verbose file matching logs
logging.basicConfig(level=LOGGING_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")

# ==============================================================
#  3. Utility Functions (Adapted from OpenAI evaluation script)
# ==============================================================

def read_txt(path: Path) -> str:
    """Reads a text file with automatic encoding detection."""
    try:
        raw = path.read_bytes()
        detected = chardet.detect(raw); enc = detected['encoding'] if detected else None
        confidence = detected['confidence'] if detected else 0
        if confidence < 0.9 or (enc and enc.lower() in ['ascii', 'windows-1252']):
            try: decoded_text = raw.decode('utf-8', errors='strict'); return decoded_text.strip()
            except UnicodeDecodeError: logging.debug(f"Strict UTF-8 failed for {path.name}, fallback.")
        if not enc: enc = 'utf-8'; logging.warning(f"No encoding detected for {path.name}, defaulting to {enc}.")
        common_encodings = list(dict.fromkeys([enc, 'utf-8', 'gbk', 'gb2312']))
        decoded_text = None
        for encoding in common_encodings:
             if not encoding: continue
             try: decoded_text = raw.decode(encoding, errors='strict'); break
             except (UnicodeDecodeError, TypeError, LookupError): continue
        if decoded_text is None:
             final_encoding = 'utf-8'; logging.warning(f"Strict decoding failed, using fallback {final_encoding} with errors='ignore'.")
             decoded_text = raw.decode(final_encoding, errors='ignore')
        return decoded_text.strip()
    except FileNotFoundError: logging.error(f"File not found: {path}"); return ""
    except Exception as e: logging.error(f"Error reading file {path}: {e}", exc_info=True); return ""

# --- Chapter Number Extraction Patterns (for find_chapter_file) ---
CHAP_PATTERNS = [
    re.compile(r"^第(\d{1,5})章\.txt$"), re.compile(r"^(\d{1,5})\.txt$"),
    re.compile(r"^(\d{1,5})[_\s.-].*?\.txt$", re.IGNORECASE),
    re.compile(r"第\s*(\d{1,5})\s*章", re.IGNORECASE),
    re.compile(r"chapter\s*(\d{1,5})", re.IGNORECASE), re.compile(r"chap\s*(\d{1,5})", re.IGNORECASE),
    re.compile(r"[_-](\d{1,5})[_-]"), re.compile(r"(?<!\d)(\d{1,5})(?!\d)"),
]

def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
    """Finds a TXT file matching the chapter number within a folder (recursive)."""
    if not folder.is_dir(): logging.warning(f"Search folder doesn't exist: {folder}"); return None
    possible_matches = []
    try:
        for item in folder.rglob("*.txt"):
            if item.is_file():
                name = item.name
                for i, pat in enumerate(CHAP_PATTERNS):
                    match = pat.search(name)
                    if match and match.groups():
                        try:
                            extracted_num = int(match.group(1))
                            if extracted_num == chap_num:
                                possible_matches.append({"path": item, "pattern_index": i}); break
                        except (ValueError, IndexError): continue
    except Exception as e: logging.error(f"Error searching {folder} for chap {chap_num}: {e}", exc_info=True)

    if not possible_matches: logging.warning(f"Chapter {chap_num} not found in {folder}."); return None
    if len(possible_matches) > 1:
        possible_matches.sort(key=lambda x: x['pattern_index'])
        best_match = possible_matches[0]
        logging.warning(f"Multiple matches for chap {chap_num} in {folder}. Using '{best_match['path'].name}' (pattern index {best_match['pattern_index']}). All: {[p['path'].name for p in possible_matches]}")
        return best_match['path']
    return possible_matches[0]['path']

# --- Normalize Function (for directory/seed matching) ---
def normalize(name: str) -> str:
    """Normalizes names for robust matching."""
    name = str(name); name = unicodedata.normalize("NFKC", name); name = name.lower()
    name = name.replace("_utf8", ""); name = re.sub(r"[《》]", "", name)
    name = re.sub(r"\.(summary|txt)$", "", name) # Remove common extensions/suffixes
    name = re.sub(r"[\s:：()（）【】\[\]“”\"',.!.?？\-·•_]+", "", name) # Aggressive removal
    return name.strip()

# --- Read Seed Chapters Function ---
def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
    """Reads the list of chapter numbers from the _randomseed.txt file."""
    norm_target = normalize(book_name)
    if not norm_target: logging.warning(f"Normalized book name is empty: '{book_name}'"); return []
    seed_file_found = None
    try:
        if not seed_dir.is_dir(): logging.error(f"Seed directory missing: {seed_dir}"); return []
        for file in seed_dir.glob("*_randomseed.txt"):
            candidate = file.stem.replace("_randomseed", "")
            if normalize(candidate) == norm_target:
                seed_file_found = file; break
        if seed_file_found:
            logging.info(f"Found seed file: {seed_file_found.name} for book '{book_name}'")
            lines = seed_file_found.read_text(encoding="utf-8").splitlines()
            chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
            if not chapters: logging.warning(f"Seed file {seed_file_found.name} is empty or has no digits.")
            return chapters
        else: logging.warning(f"No matching seed file in {seed_dir} for book: '{book_name}' (normalized: '{norm_target}')"); return []
    except Exception as e: logging.error(f"Error reading seed file for '{book_name}': {e}", exc_info=True); return []

# ==============================================================
# 4. Main Calculation Function
# ==============================================================

def calculate_and_save_similarities(folder_a: Path, folder_b: Path, seed_dir: Path, output_root: Path):
    """
    Calculates Cosine Similarity & BERTScore using robust file matching
    and saves results to individual JSON files per chapter.
    """
    logging.info("Starting similarity calculations with robust matching...")
    start_time = time.time()
    output_root.mkdir(parents=True, exist_ok=True)

    # --- Path Validation ---
    if not folder_a.is_dir(): logging.error(f"CRITICAL: Folder A not found: {folder_a}"); return
    if not folder_b.is_dir(): logging.error(f"CRITICAL: Folder B not found: {folder_b}"); return
    if not seed_dir.is_dir(): logging.error(f"CRITICAL: Seed directory not found: {seed_dir}"); return
    if calculate_bert_score is None: logging.error("CRITICAL: bert_score library not loaded."); return
    # --- End Path Validation ---

    # --- Load SBERT Model ---
    try:
        logging.info(f"Loading Sentence Transformer model: {SBERT_MODEL_NAME}...")
        sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
        logging.info("Sentence Transformer model loaded.")
    except Exception as e:
        logging.error(f"Failed to load SBERT model '{SBERT_MODEL_NAME}': {e}", exc_info=True); return

    # --- Pre-scan Folder B directories ---
    logging.info(f"Scanning {folder_b} for comparison book directories...")
    folder_b_dir_map = {}
    try:
        for b_dir in folder_b.iterdir():
            if b_dir.is_dir():
                normalized_b_name = normalize(b_dir.name)
                if normalized_b_name in folder_b_dir_map:
                     logging.warning(f"Duplicate normalized name '{normalized_b_name}' in {folder_b}. Using first found: {folder_b_dir_map[normalized_b_name]}. Skipping {b_dir}")
                else: folder_b_dir_map[normalized_b_name] = b_dir
    except Exception as e: logging.error(f"Error scanning Folder B directories: {e}", exc_info=True); return
    logging.info(f"Found {len(folder_b_dir_map)} potential comparison book directories.")

    # --- Iterate through books in Folder A ---
    folder_a_book_dirs = sorted([d for d in folder_a.iterdir() if d.is_dir()])
    if not folder_a_book_dirs: logging.error(f"No book directories found in {folder_a}"); return
    logging.info(f"Found {len(folder_a_book_dirs)} potential books in {folder_a}.")

    overall_chapters_processed = 0
    overall_chapters_error = 0
    books_processed_count = 0

    for book_dir_a in tqdm(folder_a_book_dirs, desc="📚 Processing Books"):
        book_name_original = book_dir_a.name
        logging.info(f"\n{'='*15} Processing Book: {book_name_original} {'='*15}")

        # Find corresponding Folder B directory using normalization
        normalized_a_name = normalize(book_name_original)
        book_dir_b = folder_b_dir_map.get(normalized_a_name)

        if not book_dir_b:
            logging.warning(f"No matching comparison directory found for '{book_name_original}' (normalized: '{normalized_a_name}') in pre-scanned map from {folder_b}. Skipping book.")
            continue
        elif not book_dir_b.is_dir(): # Should not happen with pre-scan logic but double-check
             logging.error(f"Matched path '{book_dir_b}' for book '{book_name_original}' is not a directory. Skipping.")
             continue
        else:
             logging.info(f"Found corresponding comparison directory: {book_dir_b}")

        # Get chapters to compare for this book from seed file
        chapters_to_compare = read_seed_chapters(book_name_original, seed_dir)
        if not chapters_to_compare:
            logging.warning(f"No chapters selected from seed file for '{book_name_original}'. Skipping book.")
            continue

        logging.info(f"Selected chapters for '{book_name_original}': {chapters_to_compare}")
        books_processed_count += 1

        # Create output directory for this book's results
        # Use original name for output directory clarity
        book_output_dir = output_root / book_name_original
        book_output_dir.mkdir(parents=True, exist_ok=True)

        # --- Process Chapters for this Book ---
        book_chapters_processed = 0
        book_chapters_error = 0
        for chap_num in tqdm(chapters_to_compare, desc=f"   Chapters", leave=False):
            logging.info(f"--- Chapter {chap_num} ---")

            path_a = find_chapter_file(book_dir_a, chap_num)
            path_b = find_chapter_file(book_dir_b, chap_num) # Search within the matched folder_b subdir

            # Initialize result dict
            result_data = {
                "book_name": book_name_original,
                "chapter": chap_num,
                "timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
                "file_a": str(path_a.relative_to(folder_a)) if path_a else "NOT_FOUND",
                "file_b": str(path_b.relative_to(folder_b)) if path_b else "NOT_FOUND",
                "cosine_similarity": None,
                "bert_score_f1": None,
                "bert_score_precision": None,
                "bert_score_recall": None,
                "error": None
            }

            if not path_a or not path_b:
                error_msg = f"Input file(s) missing: {'File A ' if not path_a else ''}{'File B' if not path_b else ''}"
                logging.error(f"Chapter {chap_num}: {error_msg}")
                result_data["error"] = error_msg
                book_chapters_error += 1
            else:
                text_a = read_txt(path_a)
                text_b = read_txt(path_b)

                if not text_a or not text_b:
                    error_msg = f"Failed to read content: {'File A ' if not text_a else ''}{'File B' if not text_b else ''}"
                    logging.error(f"Chapter {chap_num}: {error_msg}")
                    result_data["error"] = error_msg
                    book_chapters_error += 1
                else:
                    # --- Calculate Similarities ---
                    try:
                        # Cosine Similarity
                        logging.debug(f"Calculating Cosine Sim for Chapter {chap_num}")
                        emb1 = sbert_model.encode(text_a, normalize_embeddings=True, show_progress_bar=False)
                        emb2 = sbert_model.encode(text_b, normalize_embeddings=True, show_progress_bar=False)
                        cosine_sim = util.cos_sim(emb1, emb2).item()
                        result_data["cosine_similarity"] = cosine_sim
                        logging.info(f"  Cosine Similarity: {cosine_sim:.4f}")

                        # BERTScore
                        logging.debug(f"Calculating BERTScore for Chapter {chap_num}")
                        P, R, F1 = calculate_bert_score([text_a], [text_b],
                                                        model_type=BERTSCORE_MODEL_TYPE,
                                                        lang=BERTSCORE_LANG,
                                                        rescale_with_baseline=True,
                                                        verbose=False)
                        result_data["bert_score_precision"] = P.item()
                        result_data["bert_score_recall"] = R.item()
                        result_data["bert_score_f1"] = F1.item()
                        logging.info(f"  BERTScore: P={P.item():.4f} R={R.item():.4f} F1={F1.item():.4f}")

                        book_chapters_processed += 1

                    except Exception as e:
                        logging.error(f"Chapter {chap_num}: Error during similarity calculation: {e}", exc_info=True)
                        result_data["error"] = repr(e)
                        book_chapters_error += 1

            # --- Save individual JSON for this chapter ---
            # Use a consistent filename format
            output_json_filename = f"chapter_{chap_num:05d}_similarity.json" # Pad chapter number
            output_json_path = book_output_dir / output_json_filename
            try:
                with open(output_json_path, "w", encoding="utf-8") as f:
                    json.dump(result_data, f, ensure_ascii=False, indent=2)
                logging.debug(f"Saved chapter result to {output_json_path}")
            except Exception as e:
                logging.error(f"Failed to save result for Chapter {chap_num} to {output_json_path}: {e}")
                # Increment error count again? Or rely on the initial calculation error count?
                # Let's assume if we got here, calculation might have finished but saving failed.
                if result_data["error"] is None: # If no calculation error previously recorded
                      book_chapters_error +=1 # Count saving error
                      book_chapters_processed -=1 # Decrement success count if saving failed

            time.sleep(0.1) # Small delay optional

        # --- End Chapter Loop for Book ---
        logging.info(f"Finished book '{book_name_original}'. Processed: {book_chapters_processed}, Errors/Skipped: {book_chapters_error}")
        overall_chapters_processed += book_chapters_processed
        overall_chapters_error += book_chapters_error

    # --- End of all books ---
    end_time = time.time(); total_time = end_time - start_time
    logging.info(f"\n{'='*20} Calculation Summary {'='*20}")
    logging.info(f"Processed {books_processed_count} books (found matching comparison dir and seed chapters).")
    logging.info(f"Total chapters successfully processed (both scores): {overall_chapters_processed}")
    logging.info(f"Total chapters failed or skipped (missing files/read errors/calc errors): {overall_chapters_error}")
    logging.info(f"Total execution time: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")
    logging.info(f"Individual chapter results saved in subdirectories under: {output_root}")
    logging.info("🎉 Similarity calculation finished.")


# ==============================================================
# 5. Run the Script
# ==============================================================
if __name__ == "__main__":
    # --- Ensure models can be loaded before main loop ---
    try:
        logging.info("Pre-loading models...")
        _ = SentenceTransformer(SBERT_MODEL_NAME)
        # BERTScore model loading is usually deferred until first calculation, which is fine.
        logging.info("SBERT model loaded successfully.")
    except Exception as e:
        logging.error(f"CRITICAL: Failed to load SBERT model '{SBERT_MODEL_NAME}' during pre-check: {e}", exc_info=True)
        # Decide whether to exit if models can't load
        # raise SystemExit("Exiting due to model loading error.")

    # --- Run Main Calculation ---
    calculate_and_save_similarities(FOLDER_A, FOLDER_B, SEED_DIR, OUTPUT_ROOT)

2025-05-01 21:58:41,082 - INFO - Pre-loading models...
2025-05-01 21:58:41,084 - INFO - Use pytorch device_name: cuda:0
2025-05-01 21:58:41,085 - INFO - Load pretrained SentenceTransformer: shibing624/text2vec-base-chinese
2025-05-01 21:58:42,006 - INFO - SBERT model loaded successfully.
2025-05-01 21:58:42,007 - INFO - Starting similarity calculations with robust matching...
2025-05-01 21:58:42,007 - INFO - Loading Sentence Transformer model: shibing624/text2vec-base-chinese...
2025-05-01 21:58:42,008 - INFO - Use pytorch device_name: cuda:0
2025-05-01 21:58:42,009 - INFO - Load pretrained SentenceTransformer: shibing624/text2vec-base-chinese
2025-05-01 21:58:42,522 - INFO - Sentence Transformer model loaded.
2025-05-01 21:58:42,523 - INFO - Scanning /content/1000_word_chapters_expanded for comparison book directories...
2025-05-01 21:58:42,524 - INFO - Found 33 potential comparison book directories.
2025-05-01 21:58:42,525 - INFO - Found 40 potential books in /content/novels_chapters

In [10]:
# # ==============================================================
# # Merged Script: Calculate Cosine Similarity, BERTScore & LLM Eval
# # - Uses robust file matching based on seed files
# # - Calls OpenAI API for qualitative evaluation
# # - Saves combined results (Cosine, BERTScore, LLM) to individual JSON per chapter
# # ==============================================================

# # ==============================================================
# #  0. Environment Installation (if needed)
# # ==============================================================
# # !pip install --quiet sentence-transformers bert-score torch torchvision torchaudio openai chardet tqdm pandas # Ensure all dependencies are installed

# # ==============================================================
# #  1. Import Libraries
# # ==============================================================
# import os
# import json
# import time
# import datetime
# import re
# import logging
# import unicodedata
# from pathlib import Path
# from tqdm import tqdm # Use standard tqdm
# import chardet

# # --- Sentence Transformers ---
# from sentence_transformers import SentenceTransformer, util

# # --- BERT Score ---
# try:
#     from bert_score import score as calculate_bert_score
# except ImportError:
#     logging.error("bert_score library not found. Please install it: pip install bert-score")
#     calculate_bert_score = None # Set to None if import fails

# # --- OpenAI for LLM Evaluation ---
# try:
#     import openai
# except ImportError:
#     logging.error("openai library not found. Please install it: pip install openai")
#     openai = None # Set to None if import fails

# # ==============================================================
# #  2. Configuration
# # ==============================================================

# # --- Input/Output Folders ---
# # ⚠️ MODIFY THESE PATHS
# FOLDER_A = Path("/content/novels_chapters") # Folder with original chapters (Script 1's FOLDER_A / Script 2's ORIGINAL_CHAPTERS_ROOT)
# FOLDER_B = Path("/content/1000_word_chapters_expanded") # Folder with generated/comparison chapters (Script 1's FOLDER_B / Script 2's EXPANDED_CHAPTERS_ROOT)
# SEED_DIR = Path("/content/randomseed") # Folder with _randomseed.txt files
# OUTPUT_ROOT = Path("/content/combined_evaluation_results/1000_word_chapters_expanded") # Main output folder for combined results

# # --- Similarity Model Configuration ---
# SBERT_MODEL_NAME = "shibing624/text2vec-base-chinese"
# BERTSCORE_MODEL_TYPE = "bert-base-chinese" # Make sure this matches the language/domain
# BERTSCORE_LANG = "zh" # Set to your language code (e.g., "en", "zh")

# # --- LLM Evaluation Configuration ---
# # ⚠️ IMPORTANT SECURITY WARNING:
# # Do NOT hardcode your API key directly in the script in production or shared environments.
# # Use environment variables (os.getenv('OPENAI_API_KEY')) or a secure secrets management system.
# OPENAI_API_KEY = "your-default-api-key" # <-- REPLACE THIS DANGEROUSLY! Best practice: os.getenv("OPENAI_API_KEY")
# EVALUATION_MODEL_NAME = "gpt-4o" # Or your preferred OpenAI model
# EVALUATION_TEMPERATURE = 0.0
# LLM_EVALUATION_TIMEOUT = 120 # Timeout for OpenAI API call in seconds
# LLM_MAX_RETRIES = 2 # Number of retries for LLM call failures

# # --- LLM Prompt Configuration ---
# PROMPT_TEMPLATE = """
# 你是一位专业文学编辑，将对比两篇中文文本并给出 5 个 0-1 评分，1 表示极度相似，0 表示完全不同。

# **评分维度**
# 1. semantic_similarity 整体语义/主题
# 2. plot_similarity   情节、剧情、事件发展
# 3. character_similarity 人物出场数量、人物名字与性格
# 4. background_similarity 环境、场景与世界观背景
# 5. style_similarity   文字语言风格

# **严格返回** *只包含 JSON*，格式如下（不要有其它文字）：
# {{
#   "semantic_similarity": <0-1>,
#   "plot_similarity": <0-1>,
#   "character_similarity": <0-1>,
#   "background_similarity": <0-1>,
#   "style_similarity": <0-1>
# }}

# 请阅读【文本A】(原文)与【文本B】(AI生成)并打分。

# 【文本A】
# {text_a}

# 【文本B】
# {text_b}
# """

# EXPECTED_LLM_KEYS = [
#     "semantic_similarity",
#     "plot_similarity",
#     "character_similarity",
#     "background_similarity",
#     "style_similarity",
# ]

# # --- Logging ---
# LOGGING_LEVEL = logging.INFO # Set to DEBUG for more verbose file matching/API logs
# logging.basicConfig(level=LOGGING_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")

# # ==============================================================
# #  3. Utility Functions
# # ==============================================================

# def read_txt(path: Path) -> str:
#     """Reads a text file with automatic encoding detection."""
#     try:
#         raw = path.read_bytes()
#         detected = chardet.detect(raw); enc = detected['encoding'] if detected else None
#         confidence = detected['confidence'] if detected else 0

#         # If confidence is low or encoding looks like standard Latin, try UTF-8 first rigorously
#         if confidence < 0.9 or (enc and enc.lower() in ['ascii', 'windows-1252']):
#             try:
#                 decoded_text = raw.decode('utf-8', errors='strict')
#                 logging.debug(f"Read {path.name} successfully with strict UTF-8.")
#                 return decoded_text.strip()
#             except UnicodeDecodeError:
#                 logging.debug(f"Strict UTF-8 failed for {path.name}, proceeding with detection/fallback.")
#                 # Fall through to use detected encoding or other fallbacks

#         # If no encoding detected, default to UTF-8
#         if not enc:
#             enc = 'utf-8'
#             logging.warning(f"No encoding detected for {path.name}, defaulting to {enc}.")

#         # Create a list of encodings to try: detected, utf-8, common Chinese encodings
#         common_encodings = list(dict.fromkeys([enc, 'utf-8', 'gbk', 'gb18030', 'gb2312'])) # Added gb18030
#         decoded_text = None
#         for encoding in common_encodings:
#             if not encoding: continue # Skip if encoding is None
#             try:
#                 decoded_text = raw.decode(encoding, errors='strict')
#                 logging.debug(f"Read {path.name} successfully with strict {encoding}.")
#                 break # Success
#             except (UnicodeDecodeError, TypeError, LookupError):
#                 logging.debug(f"Strict decoding with {encoding} failed for {path.name}.")
#                 continue # Try next encoding

#         # If all strict decoding failed, use fallback with error ignoring
#         if decoded_text is None:
#             final_encoding = 'utf-8' # Default fallback
#             logging.warning(f"Strict decoding failed with all attempted encodings for {path.name}. Using fallback {final_encoding} with errors='ignore'.")
#             decoded_text = raw.decode(final_encoding, errors='ignore')

#         return decoded_text.strip()

#     except FileNotFoundError:
#         logging.error(f"File not found: {path}")
#         return ""
#     except Exception as e:
#         logging.error(f"Error reading file {path}: {e}", exc_info=True)
#         return ""

# # --- Chapter Number Extraction Patterns (Comprehensive) ---
# CHAP_PATTERNS = [
#     re.compile(r"^第(\d{1,5})章\.txt$"),             # Matches "第123章.txt" exactly
#     re.compile(r"^(\d{1,5})\.txt$"),                 # Matches "123.txt" exactly
#     re.compile(r"^(\d{1,5})[_\s.-].*?\.txt$", re.IGNORECASE), # Matches "123_abc.txt", "123 def.txt" etc. at start
#     re.compile(r"第\s*(\d{1,5})\s*章", re.IGNORECASE), # Matches "第 123 章", "第123章" anywhere
#     re.compile(r"chapter\s*(\d{1,5})", re.IGNORECASE),# Matches "chapter 123", "Chapter123"
#     re.compile(r"chap\s*(\d{1,5})", re.IGNORECASE),   # Matches "chap 123", "Chap123"
#     re.compile(r"[_-](\d{1,5})[_-]"),                # Matches "-123-", "_123_"
#     re.compile(r"(?<!\d)(\d{1,5})(?!\d)"),           # Matches standalone number "123" (last resort)
# ]

# def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
#     """Finds a TXT file matching the chapter number within a folder (recursive). Prioritizes more specific patterns."""
#     if not folder.is_dir():
#         logging.warning(f"Search folder doesn't exist or is not a directory: {folder}")
#         return None

#     possible_matches = []
#     try:
#         # Use rglob for recursive search
#         for item in folder.rglob("*.txt"):
#             if item.is_file():
#                 name = item.name
#                 for i, pat in enumerate(CHAP_PATTERNS):
#                     match = pat.search(name)
#                     # Check if match found and captured group exists
#                     if match and match.groups():
#                         try:
#                             extracted_num = int(match.group(1))
#                             if extracted_num == chap_num:
#                                 # Store path and pattern index (lower index = more specific pattern)
#                                 possible_matches.append({"path": item, "pattern_index": i})
#                                 # logging.debug(f"Potential match for {chap_num}: {item.name} (pattern index {i})")
#                                 break # Found a match with this pattern, move to next file
#                         except (ValueError, IndexError):
#                             # Should not happen if regex has a capturing group, but safety first
#                             logging.debug(f"Could not extract number from match object for pattern {pat.pattern} on file {name}")
#                             continue
#     except Exception as e:
#         logging.error(f"Error searching {folder} for chapter {chap_num}: {e}", exc_info=True)
#         return None # Return None on search error

#     if not possible_matches:
#         logging.warning(f"Chapter {chap_num} file not found in {folder}")
#         return None

#     # If multiple matches, sort by pattern index (lower is better) and maybe filename length?
#     possible_matches.sort(key=lambda x: (x['pattern_index'], len(x['path'].name)))
#     best_match = possible_matches[0]

#     if len(possible_matches) > 1:
#         logging.warning(f"Multiple file matches for chapter {chap_num} in {folder}. "
#                         f"Using '{best_match['path'].name}' (best pattern index {best_match['pattern_index']}). "
#                         f"All matches (path, pattern index): {[(p['path'].name, p['pattern_index']) for p in possible_matches]}")

#     logging.debug(f"Found chapter {chap_num} file: {best_match['path']}")
#     return best_match['path']


# def normalize(name: str) -> str:
#     """Normalizes directory/book names for robust matching."""
#     try:
#         name = str(name)
#         name = unicodedata.normalize("NFKC", name) # Normalize Unicode characters
#         name = name.lower() # Convert to lowercase
#         # Remove common suffixes and prefixes often added to folder names
#         name = name.replace("_utf8", "").replace(".utf8", "")
#         name = name.replace("_校对版全本", "").replace("（校对版全本）", "")
#         name = re.sub(r"[《》【】]", "", name) # Remove brackets often used in titles
#         name = re.sub(r"\.(summary|txt|eval)$", "", name, flags=re.IGNORECASE) # Remove common file extensions or suffixes if they appear in dir name
#         # Aggressively remove whitespace and various punctuation/separators
#         name = re.sub(r"[\s:：()（）“”\"',.!?;?？\-·•_#]+", "", name)
#         return name.strip()
#     except Exception as e:
#         logging.error(f"Error normalizing name '{name}': {e}")
#         return "" # Return empty string on error


# def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
#     """Reads the list of chapter numbers from the _randomseed.txt file for a given book."""
#     norm_target = normalize(book_name)
#     if not norm_target:
#         logging.warning(f"Normalized book name is empty for '{book_name}', cannot match seed file.")
#         return []

#     seed_file_found = None
#     try:
#         if not seed_dir.is_dir():
#             logging.error(f"Seed directory missing or not a directory: {seed_dir}")
#             return []

#         logging.debug(f"Searching for seed file matching normalized name '{norm_target}' in {seed_dir}")
#         # Iterate through potential seed files
#         for file in seed_dir.glob("*_randomseed.txt"):
#             # Extract candidate book name from filename, remove suffix
#             candidate_name = file.stem.replace("_randomseed", "")
#             norm_candidate = normalize(candidate_name)
#             logging.debug(f"Comparing '{norm_candidate}' (from {file.name}) with target '{norm_target}'")
#             if norm_candidate == norm_target:
#                 seed_file_found = file
#                 logging.info(f"Found matching seed file: {seed_file_found.name} for book '{book_name}'")
#                 break # Stop after finding the first match

#         if seed_file_found:
#             lines = seed_file_found.read_text(encoding="utf-8").splitlines()
#             # Filter for digits, convert to int, remove duplicates, and sort
#             chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
#             if not chapters:
#                 logging.warning(f"Seed file {seed_file_found.name} for '{book_name}' contains no valid chapter numbers.")
#             return chapters
#         else:
#             logging.warning(f"⚠️ No matching seed file found in {seed_dir} for book: '{book_name}' (normalized target: '{norm_target}')")
#             return []

#     except FileNotFoundError: # Should be caught by is_dir check, but belt-and-suspenders
#          logging.error(f"Seed directory not found: {seed_dir}")
#          return []
#     except Exception as e:
#         logging.error(f"Error reading seed file for '{book_name}': {e}", exc_info=True)
#         return []

# # --- LLM Evaluation Functions ---

# openai_client = None # Global client instance

# def initialize_openai_client():
#     """Initializes the OpenAI client."""
#     global openai_client
#     if openai is None:
#         logging.error("OpenAI library not imported. Cannot initialize client.")
#         return False
#     if not OPENAI_API_KEY or OPENAI_API_KEY == "YOUR_OPENAI_API_KEY_HERE":
#         logging.error("OpenAI API Key is missing or still set to placeholder. Please configure OPENAI_API_KEY.")
#         return False
#     try:
#         openai_client = openai.OpenAI(api_key=OPENAI_API_KEY, timeout=LLM_EVALUATION_TIMEOUT)
#         # Test connection by listing models (optional, can increase startup time)
#         # openai_client.models.list()
#         logging.info(f"OpenAI client initialized successfully for model {EVALUATION_MODEL_NAME}.")
#         return True
#     except Exception as e:
#         logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
#         openai_client = None
#         return False

# def call_llm_evaluator(prompt: str, chap_num: int) -> dict | None:
#     """Calls the configured OpenAI model with retries and JSON parsing."""
#     if openai_client is None:
#         logging.error(f"Chapter {chap_num}: OpenAI client not initialized. Skipping LLM evaluation.")
#         return None

#     last_exception = None
#     for attempt in range(LLM_MAX_RETRIES + 1): # Try LLM_MAX_RETRIES times
#         try:
#             logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: Calling LLM ({EVALUATION_MODEL_NAME})...")
#             resp = openai_client.chat.completions.create(
#                 model=EVALUATION_MODEL_NAME,
#                 messages=[{"role": "user", "content": prompt}],
#                 temperature=EVALUATION_TEMPERATURE,
#                 response_format={"type": "json_object"} # Request JSON output
#             )
#             reply_content = resp.choices[0].message.content.strip()
#             logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: LLM raw response: {reply_content[:500]}...") # Log snippet

#             scores = json.loads(reply_content)

#             # Validate keys and values
#             if not all(key in scores for key in EXPECTED_LLM_KEYS):
#                  missing_keys = [k for k in EXPECTED_LLM_KEYS if k not in scores]
#                  raise ValueError(f"LLM JSON response missing expected keys: {missing_keys}")
#             if not all(isinstance(scores[key], (int, float)) and 0 <= scores[key] <= 1 for key in EXPECTED_LLM_KEYS):
#                  invalid_values = {k: scores[k] for k in EXPECTED_LLM_KEYS if not (isinstance(scores[k], (int, float)) and 0 <= scores[k] <= 1)}
#                  raise ValueError(f"LLM JSON response has invalid values (not 0-1 float/int): {invalid_values}")

#             logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: LLM evaluation successful and parsed.")
#             # Convert all scores to float for consistency
#             return {k: float(scores[k]) for k in EXPECTED_LLM_KEYS}

#         except json.JSONDecodeError as e:
#             last_exception = e
#             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: LLM output failed JSON parsing: {e}. Reply snippet: '{reply_content[:200]}...'")
#         except openai.APITimeoutError as e:
#              last_exception = e
#              logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API call timed out ({LLM_EVALUATION_TIMEOUT}s).")
#         except openai.RateLimitError as e:
#              last_exception = e
#              logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI Rate Limit Error: {e}. Consider adding delays or checking your plan.")
#         except openai.APIConnectionError as e:
#              last_exception = e
#              logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API connection error: {e}.")
#         except openai.AuthenticationError as e:
#              last_exception = e
#              logging.error(f"Chapter {chap_num}: OpenAI Authentication Error. Check API Key. Evaluation stopped for this chapter. Error: {e}")
#              # Authentication errors are critical, likely won't succeed on retry
#              return {"error": f"OpenAI Authentication Error: {e}"}
#         except Exception as e: # Catch other potential errors (like ValueError from validation)
#             last_exception = e
#             logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: Error during LLM call or processing: {e}", exc_info=True) # Include traceback for unexpected errors

#         # If not the last attempt, wait before retrying
#         if attempt < LLM_MAX_RETRIES:
#             wait_time = 2 ** attempt # Exponential backoff
#             logging.info(f"Chapter {chap_num}: Retrying LLM call in {wait_time} seconds...")
#             time.sleep(wait_time)

#     # If all retries failed
#     logging.error(f"Chapter {chap_num}: LLM evaluation failed after {LLM_MAX_RETRIES + 1} attempts.")
#     error_message = f"LLM evaluation failed after retries. Last error: {repr(last_exception)}"
#     return {"error": error_message} # Return dict indicating error

# # ==============================================================
# # 4. Main Calculation Function
# # ==============================================================

# def calculate_and_save_evaluations(folder_a: Path, folder_b: Path, seed_dir: Path, output_root: Path):
#     """
#     Calculates Cosine Similarity, BERTScore, and LLM Evaluation using robust
#     file matching and saves combined results to individual JSON files per chapter.
#     """
#     logging.info("🚀 Starting combined evaluation process...")
#     start_time_overall = time.time()
#     output_root.mkdir(parents=True, exist_ok=True)

#     # --- Essential Library Checks ---
#     if calculate_bert_score is None:
#         logging.error("CRITICAL: bert_score library not loaded. Cannot calculate BERTScore.")
#         # Decide whether to proceed without BERTScore or exit
#         # return # Uncomment to exit if BERTScore is mandatory
#     if openai is None:
#         logging.error("CRITICAL: openai library not loaded. Cannot perform LLM evaluation.")
#         # return # Uncomment to exit if LLM evaluation is mandatory

#     # --- Path Validation ---
#     if not folder_a.is_dir(): logging.error(f"CRITICAL: Folder A not found: {folder_a}"); return
#     if not folder_b.is_dir(): logging.error(f"CRITICAL: Folder B not found: {folder_b}"); return
#     if not seed_dir.is_dir(): logging.error(f"CRITICAL: Seed directory not found: {seed_dir}"); return

#     # --- Initialize OpenAI Client ---
#     if not initialize_openai_client():
#         logging.error("CRITICAL: Failed to initialize OpenAI client. LLM evaluations will be skipped.")
#         # Depending on requirements, you might want to exit here:
#         # return

#     # --- Load SBERT Model ---
#     sbert_model = None
#     try:
#         logging.info(f"Loading Sentence Transformer model: {SBERT_MODEL_NAME}...")
#         sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
#         logging.info("✅ Sentence Transformer model loaded.")
#     except Exception as e:
#         logging.error(f"CRITICAL: Failed to load SBERT model '{SBERT_MODEL_NAME}': {e}", exc_info=True)
#         # Decide whether to proceed without Cosine Similarity or exit
#         # return # Uncomment to exit if SBERT model is mandatory

#     # --- Pre-scan Folder B directories for efficient matching ---
#     logging.info(f"Scanning {folder_b} for comparison book directories...")
#     folder_b_dir_map = {}
#     try:
#         for b_dir in folder_b.iterdir():
#             if b_dir.is_dir():
#                 normalized_b_name = normalize(b_dir.name)
#                 if normalized_b_name: # Only add if normalization produced a non-empty string
#                     if normalized_b_name in folder_b_dir_map:
#                          logging.warning(f"Duplicate normalized name '{normalized_b_name}' in {folder_b}. Using first found: '{folder_b_dir_map[normalized_b_name].name}'. Skipping dir '{b_dir.name}'")
#                     else:
#                         folder_b_dir_map[normalized_b_name] = b_dir
#                         logging.debug(f"Mapped normalized '{normalized_b_name}' to '{b_dir.name}'")
#                 else:
#                      logging.warning(f"Skipping directory '{b_dir.name}' in Folder B as its normalized name is empty.")
#     except Exception as e:
#         logging.error(f"Error scanning Folder B directories: {e}", exc_info=True)
#         return # Critical error during setup
#     logging.info(f"Found {len(folder_b_dir_map)} potential comparison book directories in {folder_b}.")

#     # --- Iterate through books in Folder A ---
#     folder_a_book_dirs = sorted([d for d in folder_a.iterdir() if d.is_dir()])
#     if not folder_a_book_dirs:
#         logging.error(f"No book directories found in {folder_a}. Exiting.")
#         return
#     logging.info(f"Found {len(folder_a_book_dirs)} potential books in {folder_a} to process.")

#     # --- Overall Counters ---
#     overall_chapters_processed_success = 0
#     overall_chapters_processed_partial = 0 # Succeeded some metrics but not all
#     overall_chapters_failed = 0
#     books_processed_count = 0
#     books_skipped_count = 0

#     # --- Main Book Loop ---
#     for book_dir_a in tqdm(folder_a_book_dirs, desc="📚 Processing Books"):
#         book_name_original = book_dir_a.name
#         logging.info(f"\n{'='*15} Processing Book: {book_name_original} {'='*15}")

#         # Find corresponding Folder B directory using normalization
#         normalized_a_name = normalize(book_name_original)
#         if not normalized_a_name:
#              logging.warning(f"Skipping book '{book_name_original}' as its normalized name is empty.")
#              books_skipped_count += 1
#              continue

#         book_dir_b = folder_b_dir_map.get(normalized_a_name)

#         if not book_dir_b:
#             logging.warning(f"No matching comparison directory found for '{book_name_original}' (normalized: '{normalized_a_name}') in pre-scanned map from {folder_b}. Skipping book.")
#             books_skipped_count += 1
#             continue
#         # Check again if it's actually a directory (should be, due to pre-scan, but safety first)
#         elif not book_dir_b.is_dir():
#               logging.error(f"Matched path '{book_dir_b}' for book '{book_name_original}' is not a directory. Skipping book.")
#               books_skipped_count += 1
#               continue
#         else:
#               logging.info(f"Found corresponding comparison directory: {book_dir_b.name}")

#         # Get chapters to compare for this book from seed file
#         chapters_to_compare = read_seed_chapters(book_name_original, seed_dir)
#         if not chapters_to_compare:
#             logging.warning(f"No chapters selected from seed file (or seed file not found/empty) for '{book_name_original}'. Skipping book.")
#             books_skipped_count += 1
#             continue

#         logging.info(f"Selected {len(chapters_to_compare)} chapters for '{book_name_original}': {chapters_to_compare[:10]}...") # Show first few
#         books_processed_count += 1

#         # Create output directory for this book's results
#         # Use original name for output directory clarity
#         book_output_dir = output_root / book_name_original
#         book_output_dir.mkdir(parents=True, exist_ok=True)

#         # --- Process Chapters for this Book ---
#         book_chapters_success = 0
#         book_chapters_partial = 0
#         book_chapters_failed = 0

#         for chap_num in tqdm(chapters_to_compare, desc=f"  ├── Chapters ({book_name_original})", leave=False):
#             logging.info(f"--- Evaluating Chapter {chap_num} ---")
#             chapter_start_time = time.time()
#             calculation_error = None # Store first critical error for this chapter
#             llm_eval_result = None # Store LLM result or error dict
#             cosine_sim = None
#             bert_p, bert_r, bert_f1 = None, None, None

#             path_a = find_chapter_file(book_dir_a, chap_num)
#             path_b = find_chapter_file(book_dir_b, chap_num) # Search within the matched folder_b subdir

#             # Initialize result dict structure
#             result_data = {
#                 "book_name": book_name_original,
#                 "chapter": chap_num,
#                 "timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
#                 "file_a": str(path_a.relative_to(FOLDER_A.parent)) if path_a else "NOT_FOUND", # Use parent for full relative path
#                 "file_b": str(path_b.relative_to(FOLDER_B.parent)) if path_b else "NOT_FOUND",
#                 "cosine_similarity": None,
#                 "bert_score_f1": None,
#                 "bert_score_precision": None,
#                 "bert_score_recall": None,
#                 "llm_evaluation": None, # Placeholder for LLM scores dict or error message
#                 "llm_model_used": EVALUATION_MODEL_NAME if openai_client else "N/A", # Record model used or N/A
#                 "metrics_calculated": [], # List successful metrics
#                 "error": None # Overall error status for the chapter
#             }

#             # 1. Check if files were found
#             if not path_a or not path_b:
#                 error_msg = f"Input file(s) missing: {'File A ' if not path_a else ''}{'File B' if not path_b else ''}"
#                 logging.error(f"Chapter {chap_num}: {error_msg}")
#                 result_data["error"] = error_msg
#                 book_chapters_failed += 1
#             else:
#                 # 2. Read file contents
#                 logging.debug(f"Reading File A: {path_a.name}")
#                 text_a = read_txt(path_a)
#                 logging.debug(f"Reading File B: {path_b.name}")
#                 text_b = read_txt(path_b)

#                 if not text_a or not text_b:
#                     error_msg = f"Failed to read content or content empty: {'File A ' if not text_a else ''}{'File B ' if not text_b else ''}"
#                     logging.error(f"Chapter {chap_num}: {error_msg}")
#                     result_data["error"] = error_msg
#                     book_chapters_failed += 1
#                 else:
#                     # --- Start Calculations ---
#                     metrics_ok = []
#                     calculation_step_error = None # Track error within this block

#                     # 3. Calculate Cosine Similarity (if model loaded)
#                     if sbert_model:
#                         try:
#                             logging.debug(f"Calculating Cosine Sim for Chapter {chap_num}")
#                             # Consider adding batching if many chapters, but individual encoding is fine here
#                             emb1 = sbert_model.encode(text_a, normalize_embeddings=True, show_progress_bar=False)
#                             emb2 = sbert_model.encode(text_b, normalize_embeddings=True, show_progress_bar=False)
#                             # Ensure embeddings are 1D if encode returns single vectors
#                             if emb1.ndim > 1: emb1 = emb1.flatten()
#                             if emb2.ndim > 1: emb2 = emb2.flatten()
#                             cosine_sim = util.cos_sim(emb1, emb2).item()
#                             result_data["cosine_similarity"] = cosine_sim
#                             metrics_ok.append("cosine")
#                             logging.info(f"  ✅ Cosine Similarity: {cosine_sim:.4f}")
#                         except Exception as e:
#                             logging.error(f"Chapter {chap_num}: Error during Cosine Similarity calculation: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
#                             calculation_step_error = f"Cosine Sim failed: {repr(e)}"

#                     # 4. Calculate BERTScore (if library loaded)
#                     if calculate_bert_score:
#                          try:
#                              logging.debug(f"Calculating BERTScore for Chapter {chap_num} (Model: {BERTSCORE_MODEL_TYPE}, Lang: {BERTSCORE_LANG})")
#                              # Ensure texts are not empty strings, bert-score might handle this but safety first
#                              if text_a and text_b:
#                                  # Note: BERTScore can be slow, especially without GPU
#                                  # Using lists as required by bert_score input format
#                                  P, R, F1 = calculate_bert_score([text_a], [text_b],
#                                                                  model_type=BERTSCORE_MODEL_TYPE,
#                                                                  lang=BERTSCORE_LANG,
#                                                                  rescale_with_baseline=True, # Good practice
#                                                                  verbose=False) # Set verbose=True for more bert-score internal logging
#                                  bert_p = P.item()
#                                  bert_r = R.item()
#                                  bert_f1 = F1.item()
#                                  result_data["bert_score_precision"] = bert_p
#                                  result_data["bert_score_recall"] = bert_r
#                                  result_data["bert_score_f1"] = bert_f1
#                                  metrics_ok.append("bertscore")
#                                  logging.info(f"  ✅ BERTScore: P={bert_p:.4f} R={bert_r:.4f} F1={bert_f1:.4f}")
#                              else:
#                                  raise ValueError("Input text for BERTScore is empty.")
#                          except Exception as e:
#                              logging.error(f"Chapter {chap_num}: Error during BERTScore calculation: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
#                              # Append error message if another didn't occur yet
#                              if not calculation_step_error: calculation_step_error = f"BERTScore failed: {repr(e)}"
#                              else: calculation_step_error += f"; BERTScore failed: {repr(e)}"

#                     # 5. Perform LLM Evaluation (if client initialized)
#                     if openai_client:
#                          try:
#                              logging.debug(f"Performing LLM evaluation for Chapter {chap_num}")
#                              prompt = PROMPT_TEMPLATE.format(text_a=text_a[:20000], text_b=text_b[:20000]) # Truncate input texts if they are very long to avoid excessive token usage/cost
#                              if len(text_a) > 20000 or len(text_b) > 20000:
#                                  logging.warning(f"Chapter {chap_num}: Input text truncated to 20000 chars for LLM evaluation.")

#                              llm_eval_result = call_llm_evaluator(prompt, chap_num)

#                              # Check if LLM call returned scores or an error dictionary
#                              if llm_eval_result and "error" not in llm_eval_result:
#                                  result_data["llm_evaluation"] = llm_eval_result
#                                  metrics_ok.append("llm")
#                                  logging.info("  ✅ LLM Evaluation: Success")
#                                  # Optional: Log individual LLM scores if needed
#                                  # for k, v in llm_eval_result.items(): logging.info(f"     {k}: {v:.3f}")
#                              else:
#                                  # LLM call failed or returned an error structure
#                                  error_detail = llm_eval_result.get("error", "Unknown LLM evaluation error") if llm_eval_result else "LLM evaluation returned None"
#                                  logging.error(f"Chapter {chap_num}: LLM evaluation failed. Details: {error_detail}")
#                                  result_data["llm_evaluation"] = {"error": error_detail} # Store error info
#                                  if not calculation_step_error: calculation_step_error = f"LLM Eval failed: {error_detail}"
#                                  else: calculation_step_error += f"; LLM Eval failed: {error_detail}"

#                          except Exception as e:
#                              # Catch unexpected errors during prompt formatting or function call itself
#                              logging.error(f"Chapter {chap_num}: Unexpected error during LLM evaluation step: {e}", exc_info=True)
#                              error_detail = f"Unexpected LLM step error: {repr(e)}"
#                              result_data["llm_evaluation"] = {"error": error_detail}
#                              if not calculation_step_error: calculation_step_error = error_detail
#                              else: calculation_step_error += f"; {error_detail}"

#                     # --- End Calculations ---
#                     result_data["metrics_calculated"] = metrics_ok
#                     if calculation_step_error:
#                         result_data["error"] = calculation_step_error # Record calculation errors
#                         if metrics_ok: # Some metrics succeeded, some failed
#                             book_chapters_partial += 1
#                         else: # All attempted metrics failed
#                             book_chapters_failed += 1
#                     elif not metrics_ok: # No metrics were even attempted (e.g., models didn't load)
#                          result_data["error"] = "No metrics were attempted (check model loading status)."
#                          book_chapters_failed += 1
#                     else: # All attempted metrics succeeded
#                          book_chapters_success += 1


#             # 6. Save individual JSON for this chapter
#             output_json_filename = f"chapter_{chap_num:05d}_evaluation.json" # Pad chapter number
#             output_json_path = book_output_dir / output_json_filename
#             try:
#                 with open(output_json_path, "w", encoding="utf-8") as f:
#                     json.dump(result_data, f, ensure_ascii=False, indent=2)
#                 logging.debug(f"Saved chapter result to {output_json_path}")
#             except Exception as e:
#                 logging.error(f"CRITICAL: Failed to save result for Chapter {chap_num} to {output_json_path}: {e}")
#                 # If saving failed, we should count it as an error, potentially downgrading from success/partial
#                 if result_data["error"] is None: # Only count as new error if no previous error recorded
#                     result_data["error"] = f"Failed to save JSON: {repr(e)}" # Update error status
#                     if book_chapters_success > 0: book_chapters_success -= 1 # Was success, now failed
#                     elif book_chapters_partial > 0: book_chapters_partial -= 1 # Was partial, now failed
#                     book_chapters_failed += 1 # Increment failure count due to save error

#             chapter_duration = time.time() - chapter_start_time
#             logging.info(f"--- Chapter {chap_num} finished in {chapter_duration:.2f} seconds. Status: {'Success' if result_data['error'] is None else 'Partial' if result_data.get('metrics_calculated') else 'Failed'} ---")
#             # Optional small delay to avoid hitting API limits aggressively or overloading system
#             # time.sleep(0.5) # Adjust as needed

#         # --- End Chapter Loop for Book ---
#         logging.info(f"Finished book '{book_name_original}'. Success: {book_chapters_success}, Partial: {book_chapters_partial}, Failed/Skipped: {book_chapters_failed}")
#         overall_chapters_processed_success += book_chapters_success
#         overall_chapters_processed_partial += book_chapters_partial
#         overall_chapters_failed += book_chapters_failed

#     # --- End of all books ---
#     end_time_overall = time.time()
#     total_time_overall = end_time_overall - start_time_overall
#     logging.info(f"\n{'='*20} Overall Evaluation Summary {'='*20}")
#     logging.info(f"Processed {books_processed_count} books (found matching comparison dir and seed chapters).")
#     logging.info(f"Skipped {books_skipped_count} books (no matching dir, empty seed, or normalization issues).")
#     logging.info(f"Total chapters fully processed (all metrics attempted succeeded): {overall_chapters_processed_success}")
#     logging.info(f"Total chapters partially processed (some metrics failed): {overall_chapters_processed_partial}")
#     logging.info(f"Total chapters failed or skipped (missing files/read errors/all metrics failed/save errors): {overall_chapters_failed}")
#     logging.info(f"Total execution time: {total_time_overall:.2f} seconds ({total_time_overall/60:.2f} minutes)")
#     logging.info(f"Individual chapter results saved in subdirectories under: {output_root}")
#     logging.info("🎉 Combined evaluation finished.")

# # ==============================================================
# # 5. Run the Script
# # ==============================================================
# if __name__ == "__main__":
#     # --- Run Main Calculation ---
#     calculate_and_save_evaluations(FOLDER_A, FOLDER_B, SEED_DIR, OUTPUT_ROOT)

2025-05-01 22:34:51,776 - INFO - 🚀 Starting combined evaluation process...
2025-05-01 22:34:51,820 - INFO - OpenAI client initialized successfully for model gpt-4o.
2025-05-01 22:34:51,821 - INFO - Loading Sentence Transformer model: shibing624/text2vec-base-chinese...
2025-05-01 22:34:51,824 - INFO - Use pytorch device_name: cuda:0
2025-05-01 22:34:51,825 - INFO - Load pretrained SentenceTransformer: shibing624/text2vec-base-chinese
2025-05-01 22:34:52,762 - INFO - ✅ Sentence Transformer model loaded.
2025-05-01 22:34:52,763 - INFO - Scanning /content/1000_word_chapters_expanded for comparison book directories...
2025-05-01 22:34:52,765 - INFO - Found 33 potential comparison book directories in /content/1000_word_chapters_expanded.
2025-05-01 22:34:52,767 - INFO - Found 40 potential books in /content/novels_chapters to process.
2025-05-01 22:34:52,770 - INFO -    | 0/40 [00:00<?, ?it/s]
2025-05-01 22:34:52,771 - INFO - Found corresponding comparison directory: 上品寒士（校对版全本）作者：贼道三痴_utf8.

In [9]:
!rm -rf /content/combined_evaluation_results/1000_word_chapters_expanded

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [11]:
!zip -r /content/json_results.zip /content/json_results

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  adding: content/json_results/ (stored 0%)
  adding: content/json_results/50000_json/ (stored 0%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/ (stored 0%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/ (stored 0%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/001_第一章要认清这个世道_processed.txt (deflated 40%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/002_第二章观音庵里斗淫尼_processed.txt (deflated 43%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/003_第三章桃色话题人物_processed.txt (deflated 44%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/004_第四章不抄枉作穿越人_processed.txt (deflated 49%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/005_第五章剽窃的后果_processed.txt (deflated 42%)
  adding: content/json_results/50000_json/《奋斗在新明朝》(校对版全本)作者_随轻风去_utf8/001-010/006_第六章人生弱智如初见_processed.txt (deflated 40%

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [12]:
!rm -rf /content/combined_evaluation_results/1000_word_chapters_expanded

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [20]:
# ==============================================================
# Merged Script: Calculate Cosine Similarity, BERTScore & LLM Eval
# - Uses robust file matching based on seed files
# - Calls OpenAI API for DETAILED qualitative evaluation
#   (Props, Characters, Scenes extraction + 5 similarity scores)
# - Saves combined results (Cosine, BERTScore, Full LLM Eval) to individual JSON per chapter
# ==============================================================

# ==============================================================
#  0. Environment Installation (if needed)
# ==============================================================
# !pip install --quiet sentence-transformers bert-score torch torchvision torchaudio openai chardet tqdm pandas # Ensure all dependencies are installed

# ==============================================================
#  1. Import Libraries
# ==============================================================
import os
import json
import time
import datetime
import re
import logging
import unicodedata
from pathlib import Path
from tqdm import tqdm # Use standard tqdm
import chardet

# --- Sentence Transformers ---
from sentence_transformers import SentenceTransformer, util

# --- BERT Score ---
try:
    from bert_score import score as calculate_bert_score
except ImportError:
    logging.error("bert_score library not found. Please install it: pip install bert-score")
    calculate_bert_score = None # Set to None if import fails

# --- OpenAI for LLM Evaluation ---
try:
    import openai
except ImportError:
    logging.error("openai library not found. Please install it: pip install openai")
    openai = None # Set to None if import fails

# ==============================================================
#  2. Configuration
# ==============================================================

# --- Input/Output Folders ---
# ⚠️ MODIFY THESE PATHS AS NEEDED
FOLDER_A = Path("/content/novels_normalized") # Folder with original chapters
FOLDER_B = Path("/content/output/100000_novel_1000_100000_generated_gemini") # Folder with generated/comparison chapters
SEED_DIR = Path("/content/randomseed") # Folder with _randomseed.txt files
OUTPUT_ROOT = Path("/content/combined_evaluation_results/100000_novel_1000_100000_generated_gemini_evaluation") # Main output folder for combined results

# --- Similarity Model Configuration ---
SBERT_MODEL_NAME = "shibing624/text2vec-base-chinese"
BERTSCORE_MODEL_TYPE = "bert-base-chinese" # Make sure this matches the language/domain
BERTSCORE_LANG = "zh" # Set to your language code (e.g., "en", "zh")

# --- LLM Evaluation Configuration ---
# ⚠️ IMPORTANT SECURITY WARNING:
# Hardcoding API keys is a major security risk!
# Use environment variables (os.getenv('OPENAI_API_KEY')) or a secure secrets management system in production.
OPENAI_API_KEY = "your-default-api-key" # <-- PASTE YOUR KEY HERE or use os.getenv("OPENAI_API_KEY")
EVALUATION_MODEL_NAME = "gpt-4o"
EVALUATION_TEMPERATURE = 0.0 # Set to 0 for deterministic scoring
LLM_EVALUATION_TIMEOUT = 180 # Increased timeout slightly for more complex task
LLM_MAX_RETRIES = 2 # Number of retries for LLM call failures

# --- LLM Score Keys (for validation) ---
# These are the keys expected WITHIN the "scores" sub-dictionary of the LLM response
EXPECTED_LLM_KEYS = [
    "semantic_similarity",
    "plot_similarity",
    "character_similarity",
    "background_similarity",
    "style_similarity",
]

# --- Logging ---
LOGGING_LEVEL = logging.INFO # Set to DEBUG for more verbose file matching/API logs
logging.basicConfig(level=LOGGING_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")

# ==============================================================
#  3. Utility Functions (Including LLM Helpers from Script 1)
# ==============================================================

def read_txt(path: Path) -> str:
    """Reads a text file with automatic encoding detection."""
    # (Same implementation as before)
    try:
        raw = path.read_bytes()
        detected = chardet.detect(raw); enc = detected['encoding'] if detected else None
        confidence = detected['confidence'] if detected else 0
        if confidence < 0.9 or (enc and enc.lower() in ['ascii', 'windows-1252']):
            try:
                decoded_text = raw.decode('utf-8', errors='strict')
                logging.debug(f"Read {path.name} successfully with strict UTF-8.")
                return decoded_text.strip()
            except UnicodeDecodeError:
                logging.debug(f"Strict UTF-8 failed for {path.name}, proceeding with detection/fallback.")
        if not enc: enc = 'utf-8'; logging.warning(f"No encoding detected for {path.name}, defaulting to {enc}.")
        common_encodings = list(dict.fromkeys([enc, 'utf-8', 'gbk', 'gb18030', 'gb2312']))
        decoded_text = None
        for encoding in common_encodings:
            if not encoding: continue
            try:
                decoded_text = raw.decode(encoding, errors='strict')
                logging.debug(f"Read {path.name} successfully with strict {encoding}.")
                break
            except (UnicodeDecodeError, TypeError, LookupError):
                logging.debug(f"Strict decoding with {encoding} failed for {path.name}.")
                continue
        if decoded_text is None:
            final_encoding = 'utf-8'
            logging.warning(f"Strict decoding failed for {path.name}. Using fallback {final_encoding} with errors='ignore'.")
            decoded_text = raw.decode(final_encoding, errors='ignore')
        return decoded_text.strip()
    except FileNotFoundError: logging.error(f"File not found: {path}"); return ""
    except Exception as e: logging.error(f"Error reading file {path}: {e}", exc_info=True); return ""

# --- Chapter Number Extraction Patterns (Comprehensive) ---
# (Same implementation as before)
CHAP_PATTERNS = [
    re.compile(r"^第(\d{1,5})章\.txt$"), re.compile(r"^(\d{1,5})\.txt$"),
    re.compile(r"^(\d{1,5})[_\s.-].*?\.txt$", re.IGNORECASE),
    re.compile(r"第\s*(\d{1,5})\s*章", re.IGNORECASE),
    re.compile(r"chapter\s*(\d{1,5})", re.IGNORECASE),
    re.compile(r"chap\s*(\d{1,5})", re.IGNORECASE),
    re.compile(r"[_-](\d{1,5})[_-]"), re.compile(r"(?<!\d)(\d{1,5})(?!\d)"),
]

def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
    """Finds a TXT file matching the chapter number within a folder (recursive)."""
    # (Same implementation as before)
    if not folder.is_dir(): logging.warning(f"Search folder doesn't exist: {folder}"); return None
    possible_matches = []
    try:
        for item in folder.rglob("*.txt"):
            if item.is_file():
                name = item.name
                for i, pat in enumerate(CHAP_PATTERNS):
                    match = pat.search(name)
                    if match and match.groups():
                        try:
                            extracted_num = int(match.group(1))
                            if extracted_num == chap_num:
                                possible_matches.append({"path": item, "pattern_index": i})
                                break
                        except (ValueError, IndexError): continue
    except Exception as e: logging.error(f"Error searching {folder} for chapter {chap_num}: {e}", exc_info=True); return None
    if not possible_matches: logging.warning(f"Chapter {chap_num} file not found in {folder}"); return None
    possible_matches.sort(key=lambda x: (x['pattern_index'], len(x['path'].name)))
    best_match = possible_matches[0]
    if len(possible_matches) > 1: logging.warning(f"Multiple file matches for chapter {chap_num} in {folder}. Using '{best_match['path'].name}'.")
    logging.debug(f"Found chapter {chap_num} file: {best_match['path']}")
    return best_match['path']

def normalize(name: str) -> str:
    """Normalizes directory/book names for robust matching."""
    # (Same implementation as before)
    try:
        name = str(name); name = unicodedata.normalize("NFKC", name); name = name.lower()
        name = name.replace("_utf8", "").replace(".utf8", "")
        name = name.replace("_校对版全本", "").replace("（校对版全本）", "")
        name = re.sub(r"[《》【】]", "", name)
        name = re.sub(r"\.(summary|txt|eval)$", "", name, flags=re.IGNORECASE)
        name = re.sub(r"[\s:：()（）“”\"',.!?;?？\-·•_#]+", "", name)
        return name.strip()
    except Exception as e: logging.error(f"Error normalizing name '{name}': {e}"); return ""

def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
    """Reads the list of chapter numbers from the _randomseed.txt file for a given book."""
    # (Same implementation as before)
    norm_target = normalize(book_name)
    if not norm_target: logging.warning(f"Normalized book name is empty for '{book_name}', cannot match seed file."); return []
    seed_file_found = None
    try:
        if not seed_dir.is_dir(): logging.error(f"Seed directory missing: {seed_dir}"); return []
        logging.debug(f"Searching for seed file matching '{norm_target}' in {seed_dir}")
        for file in seed_dir.glob("*_randomseed.txt"):
            candidate_name = file.stem.replace("_randomseed", "")
            norm_candidate = normalize(candidate_name)
            logging.debug(f"Comparing '{norm_candidate}' (from {file.name}) with target '{norm_target}'")
            if norm_candidate == norm_target:
                seed_file_found = file; logging.info(f"Found matching seed file: {seed_file_found.name} for book '{book_name}'"); break
        if seed_file_found:
            lines = seed_file_found.read_text(encoding="utf-8").splitlines()
            chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
            if not chapters: logging.warning(f"Seed file {seed_file_found.name} for '{book_name}' contains no valid chapter numbers.")
            return chapters
        else:
            logging.warning(f"⚠️ No matching seed file found in {seed_dir} for book: '{book_name}' (normalized target: '{norm_target}')")
            return []
    except FileNotFoundError: logging.error(f"Seed directory not found: {seed_dir}"); return []
    except Exception as e: logging.error(f"Error reading seed file for '{book_name}': {e}", exc_info=True); return []

# --- LLM Helper Functions (Incorporating logic from Script 1) ---

def build_prompt(text_a: str, text_b: str) -> str:
    """
    Constructs the detailed prompt for the LLM, asking for element
    extraction (props, characters, scenes) and 5 similarity scores.
    """
    prompt = f"""
你是一位专业中文小说编辑，请你阅读【文本A】与【文本B】，完成以下任务：

1. 分别提取文本A与文本B中的：
   - 出现道具列表（如：剑、玉、令牌等）
   - 出现人物名称列表
   - 出现场景/环境名称列表
   * 提取时请尽量精确，去除通用词语 (例如: '人', '地方')，只保留具体名称。
   * 如果某一项在文本中没有出现，请返回空列表 `[]`。

2. 分别统计每类元素的数量（去重后），并输出每类的元素列表与数量。

3. 接着对比两段文本内容，按照以下 5 个维度进行 0-1 评分（1 表示非常相似，0 表示完全不同）：
   - semantic_similarity   整体语义/主题
   - plot_similarity       情节、事件发展
   - character_similarity  人物名称、数量与设定（综合考虑）
   - background_similarity 场景与世界设定
   - style_similarity      语言风格与表达方式
   * 评分请基于文本内容，给出客观评估。

**请严格输出以下 JSON 格式，不要包含 markdown ```json ... ``` 标记，直接输出 JSON 对象：**

{{
  "text_a": {{
    "props": ["道具A1", "道具A2", ...],
    "props_count": <整数>,
    "characters": ["人物A1", "人物A2", ...],
    "characters_count": <整数>,
    "scenes": ["场景A1", "场景A2", ...],
    "scenes_count": <整数>
  }},
  "text_b": {{
    "props": ["道具B1", "道具B2", ...],
    "props_count": <整数>,
    "characters": ["人物B1", "人物B2", ...],
    "characters_count": <整数>,
    "scenes": ["场景B1", "场景B2", ...],
    "scenes_count": <整数>
  }},
  "scores": {{
    "semantic_similarity": <0.0-1.0>,
    "plot_similarity": <0.0-1.0>,
    "character_similarity": <0.0-1.0>,
    "background_similarity": <0.0-1.0>,
    "style_similarity": <0.0-1.0>
  }}
}}

【文本A】
{text_a}

【文本B】
{text_b}
    """
    return prompt

def validate_scores(score_dict: dict) -> bool:
    """
    Validates the 'scores' sub-dictionary within the LLM response.
    Checks if all expected keys are present and values are numbers between 0 and 1.
    """
    if not isinstance(score_dict, dict):
        logging.warning("Score validation failed: input is not a dictionary.")
        return False
    for key in EXPECTED_LLM_KEYS:
        val = score_dict.get(key)
        if not isinstance(val, (int, float)):
            logging.warning(f"Score validation failed: Key '{key}' is missing or not a number (value: {val}).")
            return False
        if not (0 <= val <= 1):
            logging.warning(f"Score validation failed: Key '{key}' value {val} is outside the [0, 1] range.")
            return False
    return True

# --- Modified LLM Call Function ---
openai_client = None # Global client instance

def initialize_openai_client():
    """Initializes the OpenAI client."""
    global openai_client
    if openai is None: logging.error("OpenAI library not imported."); return False
    if not OPENAI_API_KEY or OPENAI_API_KEY.startswith("sk-proj-"): # Basic check
        logging.warning("OpenAI API Key might be missing or using placeholder. Ensure it's correctly set.")
        # Proceeding, but check key if errors occur
    try:
        openai_client = openai.OpenAI(api_key=OPENAI_API_KEY, timeout=LLM_EVALUATION_TIMEOUT)
        logging.info(f"OpenAI client initialized for model {EVALUATION_MODEL_NAME}.")
        # Optional: Test connection here if needed
        # openai_client.models.list()
        return True
    except Exception as e:
        logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
        openai_client = None
        return False

def call_llm_evaluator_detailed(prompt: str, chap_num: int) -> dict | None:
    """
    Calls the configured OpenAI model with retries, expects the detailed
    JSON structure (extractions + scores), validates it, and returns the full parsed dict.
    """
    if openai_client is None:
        logging.error(f"Chapter {chap_num}: OpenAI client not initialized. Skipping LLM evaluation.")
        return {"error": "OpenAI client not initialized"}

    last_exception = None
    reply_content = "[No response received]"
    for attempt in range(LLM_MAX_RETRIES + 1):
        try:
            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: Calling LLM for detailed eval ({EVALUATION_MODEL_NAME})...")
            resp = openai_client.chat.completions.create(
                model=EVALUATION_MODEL_NAME,
                messages=[{"role": "user", "content": prompt}],
                temperature=EVALUATION_TEMPERATURE,
                response_format={"type": "json_object"} # Request JSON output
            )
            reply_content = resp.choices[0].message.content.strip()
            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: LLM raw response: {reply_content[:500]}...")

            # Parse the JSON response
            parsed_llm_output = json.loads(reply_content)

            # --- Validate Structure ---
            if not isinstance(parsed_llm_output, dict):
                raise ValueError("LLM response is not a JSON dictionary.")

            required_keys = ["text_a", "text_b", "scores"]
            if not all(key in parsed_llm_output for key in required_keys):
                missing = [k for k in required_keys if k not in parsed_llm_output]
                raise ValueError(f"LLM JSON response missing top-level keys: {missing}")

            # --- Validate Sub-structures (Basic checks) ---
            for key in ["text_a", "text_b"]:
                 sub_dict = parsed_llm_output.get(key)
                 if not isinstance(sub_dict, dict):
                     raise ValueError(f"LLM JSON response key '{key}' is not a dictionary.")
                 # Could add more checks here for props, characters, scenes keys if needed
                 required_sub_keys = ["props", "props_count", "characters", "characters_count", "scenes", "scenes_count"]
                 if not all(sub_key in sub_dict for sub_key in required_sub_keys):
                      missing_sub = [sk for sk in required_sub_keys if sk not in sub_dict]
                      raise ValueError(f"LLM JSON response key '{key}' is missing sub-keys: {missing_sub}")

            # --- Validate Scores ---
            scores_dict = parsed_llm_output.get("scores", {})
            if not validate_scores(scores_dict): # Use the dedicated validation function
                 # validate_scores logs the specific failure reason
                 raise ValueError("LLM JSON response 'scores' validation failed (see previous warning log).")

            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: Detailed LLM evaluation successful and validated.")
            # Return the entire parsed and validated dictionary
            return parsed_llm_output

        except json.JSONDecodeError as e:
            last_exception = e
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: LLM output failed JSON parsing: {e}. Reply snippet: '{reply_content[:200]}...'")
        except ValueError as e: # Catch validation errors raised above
             last_exception = e
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: LLM response validation failed: {e}. Reply snippet: '{reply_content[:200]}...'")
        except openai.APITimeoutError as e:
             last_exception = e
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API call timed out ({LLM_EVALUATION_TIMEOUT}s).")
        except openai.RateLimitError as e:
             last_exception = e
             wait_time = 5 * (2 ** attempt)
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI Rate Limit Error: {e}. Waiting {wait_time}s...")
             time.sleep(wait_time)
             continue
        except openai.APIConnectionError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API connection error: {e}.")
        except openai.AuthenticationError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}: OpenAI Authentication Error: {e}. Check API Key.")
             return {"error": f"OpenAI Authentication Error: {e}"} # Fatal
        except openai.BadRequestError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI Bad Request Error (400): {e}. Input: '{prompt[:200]}...'")
             return {"error": f"OpenAI Bad Request Error: {e}"} # Likely fatal for this input
        except Exception as e:
            last_exception = e
            logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: Unexpected error during LLM call/processing: {e}", exc_info=True)

        if attempt < LLM_MAX_RETRIES:
            wait_time = 2 ** attempt
            logging.info(f"Chapter {chap_num}: Retrying detailed LLM call in {wait_time} seconds...")
            time.sleep(wait_time)

    logging.error(f"Chapter {chap_num}: Detailed LLM evaluation failed after {LLM_MAX_RETRIES + 1} attempts.")
    error_message = f"Detailed LLM evaluation failed. Last error: {repr(last_exception)}. Last response snippet: '{reply_content[:200]}...'"
    return {"error": error_message}

# ==============================================================
# 4. Main Calculation Function (Modified to use detailed LLM eval)
# ==============================================================

def calculate_and_save_evaluations(folder_a: Path, folder_b: Path, seed_dir: Path, output_root: Path):
    """
    Calculates Cosine Similarity, BERTScore, and DETAILED LLM Evaluation,
    saving combined results to individual JSON files per chapter.
    """
    logging.info("🚀 Starting combined evaluation process (with detailed LLM eval)...")
    start_time_overall = time.time()
    output_root.mkdir(parents=True, exist_ok=True)

    # --- Essential Library Checks ---
    if calculate_bert_score is None: logging.error("CRITICAL: bert_score library not loaded.")
    if openai is None: logging.error("CRITICAL: openai library not loaded."); return

    # --- Path Validation ---
    if not folder_a.is_dir(): logging.error(f"CRITICAL: Folder A not found: {folder_a}"); return
    if not folder_b.is_dir(): logging.error(f"CRITICAL: Folder B not found: {folder_b}"); return
    if not seed_dir.is_dir(): logging.error(f"CRITICAL: Seed directory not found: {seed_dir}"); return

    # --- Initialize OpenAI Client ---
    if not initialize_openai_client():
        logging.error("CRITICAL: Failed to initialize OpenAI client. LLM evaluations cannot proceed.")
        return # Exit if LLM Eval is mandatory

    # --- Load SBERT Model ---
    sbert_model = None
    try:
        logging.info(f"Loading Sentence Transformer model: {SBERT_MODEL_NAME}...")
        sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
        logging.info("✅ Sentence Transformer model loaded.")
    except Exception as e:
        logging.error(f"CRITICAL: Failed to load SBERT model '{SBERT_MODEL_NAME}': {e}", exc_info=True)
        # Optionally return if SBERT is critical

    # --- Pre-scan Folder B directories ---
    # (Same implementation as before)
    logging.info(f"Scanning {folder_b} for comparison book directories...")
    folder_b_dir_map = {}
    try:
        for b_dir in folder_b.iterdir():
            if b_dir.is_dir():
                normalized_b_name = normalize(b_dir.name)
                if normalized_b_name:
                    if normalized_b_name in folder_b_dir_map: logging.warning(f"Duplicate normalized name '{normalized_b_name}' in {folder_b}. Using first: '{folder_b_dir_map[normalized_b_name].name}'. Skipping '{b_dir.name}'")
                    else: folder_b_dir_map[normalized_b_name] = b_dir; logging.debug(f"Mapped '{normalized_b_name}' to '{b_dir.name}'")
                else: logging.warning(f"Skipping directory '{b_dir.name}' in Folder B (empty normalized name).")
    except Exception as e: logging.error(f"Error scanning Folder B: {e}", exc_info=True); return
    logging.info(f"Found {len(folder_b_dir_map)} potential comparison book directories in {folder_b}.")

    # --- Iterate through books in Folder A ---
    # (Same implementation as before)
    folder_a_book_dirs = sorted([d for d in folder_a.iterdir() if d.is_dir()])
    if not folder_a_book_dirs: logging.error(f"No book directories found in {folder_a}."); return
    logging.info(f"Found {len(folder_a_book_dirs)} potential books in {folder_a} to process.")

    # --- Overall Counters ---
    overall_chapters_processed_success = 0
    overall_chapters_processed_partial = 0
    overall_chapters_failed = 0
    books_processed_count = 0
    books_skipped_count = 0

    # --- Main Book Loop ---
    for book_dir_a in tqdm(folder_a_book_dirs, desc="📚 Processing Books"):
        book_name_original = book_dir_a.name
        logging.info(f"\n{'='*15} Processing Book: {book_name_original} {'='*15}")

        normalized_a_name = normalize(book_name_original)
        if not normalized_a_name: logging.warning(f"Skipping book '{book_name_original}' (empty normalized name)."); books_skipped_count += 1; continue

        book_dir_b = folder_b_dir_map.get(normalized_a_name)
        if not book_dir_b: logging.warning(f"No matching comparison dir for '{book_name_original}' (normalized: '{normalized_a_name}'). Skipping."); books_skipped_count += 1; continue
        elif not book_dir_b.is_dir(): logging.error(f"Matched path '{book_dir_b}' is not a directory. Skipping."); books_skipped_count += 1; continue
        else: logging.info(f"Found corresponding comparison directory: {book_dir_b.name}")

        chapters_to_compare = read_seed_chapters(book_name_original, seed_dir)
        if not chapters_to_compare: logging.warning(f"No chapters selected from seed file for '{book_name_original}'. Skipping."); books_skipped_count += 1; continue

        logging.info(f"Selected {len(chapters_to_compare)} chapters for '{book_name_original}'.")
        books_processed_count += 1
        book_output_dir = output_root / book_name_original
        book_output_dir.mkdir(parents=True, exist_ok=True)

        # --- Process Chapters for this Book ---
        book_chapters_success = 0; book_chapters_partial = 0; book_chapters_failed = 0
        for chap_num in tqdm(chapters_to_compare, desc=f"  ├── Chapters ({book_name_original})", leave=False):
            logging.info(f"--- Evaluating Chapter {chap_num} ---")
            chapter_start_time = time.time()

            path_a = find_chapter_file(book_dir_a, chap_num)
            path_b = find_chapter_file(book_dir_b, chap_num)

            # Initialize result dict structure - Added llm_detailed_evaluation
            result_data = {
                "book_name": book_name_original,
                "chapter": chap_num,
                "timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
                "file_a": str(path_a.relative_to(FOLDER_A.parent)) if path_a else "NOT_FOUND",
                "file_b": str(path_b.relative_to(FOLDER_B.parent)) if path_b else "NOT_FOUND",
                "cosine_similarity": None,
                "bert_score_f1": None,
                "bert_score_precision": None,
                "bert_score_recall": None,
                "llm_detailed_evaluation": None, # <--- STORE THE FULL LLM RESULT HERE
                "llm_model_used": EVALUATION_MODEL_NAME if openai_client else "N/A",
                "metrics_calculated": [],
                "error": None
            }

            # 1. Check files and read contents
            if not path_a or not path_b: error_msg = f"Input file(s) missing: {'A ' if not path_a else ''}{'B' if not path_b else ''}"
            else: text_a = read_txt(path_a); text_b = read_txt(path_b); error_msg = None
            if not error_msg and (not text_a or not text_b): error_msg = f"Content empty/read failed: {'A ' if not text_a else ''}{'B' if not text_b else ''}"

            if error_msg:
                logging.error(f"Chapter {chap_num}: {error_msg}")
                result_data["error"] = error_msg
                book_chapters_failed += 1
            else:
                # --- Start Calculations ---
                metrics_ok = []
                calculation_step_error = None

                # 2. Calculate Cosine Similarity
                if sbert_model:
                    try:
                        # (Same SBERT calculation logic as before)
                        emb1 = sbert_model.encode(text_a, normalize_embeddings=True, show_progress_bar=False)
                        emb2 = sbert_model.encode(text_b, normalize_embeddings=True, show_progress_bar=False)
                        if emb1.ndim > 1: emb1 = emb1.flatten()
                        if emb2.ndim > 1: emb2 = emb2.flatten()
                        cosine_sim = util.cos_sim(emb1, emb2).item()
                        result_data["cosine_similarity"] = round(float(cosine_sim), 6)
                        metrics_ok.append("cosine")
                        logging.info(f"  ✅ Cosine Similarity: {cosine_sim:.4f}")
                    except Exception as e:
                        logging.error(f"Chapter {chap_num}: Error during Cosine Sim: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
                        calculation_step_error = f"Cosine Sim failed: {repr(e)}"

                # 3. Calculate BERTScore
                if calculate_bert_score:
                     try:
                        # (Same BERTScore calculation logic as before)
                         if text_a and text_b:
                             P, R, F1 = calculate_bert_score([text_a], [text_b], model_type=BERTSCORE_MODEL_TYPE, lang=BERTSCORE_LANG, rescale_with_baseline=True, verbose=False, batch_size=1)
                             result_data["bert_score_precision"] = round(float(P.item()), 6)
                             result_data["bert_score_recall"] = round(float(R.item()), 6)
                             result_data["bert_score_f1"] = round(float(F1.item()), 6)
                             metrics_ok.append("bertscore")
                             logging.info(f"  ✅ BERTScore: P={P.item():.4f} R={R.item():.4f} F1={F1.item():.4f}")
                         else: raise ValueError("Input text for BERTScore is empty.")
                     except Exception as e:
                         logging.error(f"Chapter {chap_num}: Error during BERTScore: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
                         if not calculation_step_error: calculation_step_error = f"BERTScore failed: {repr(e)}"
                         else: calculation_step_error += f"; BERTScore failed: {repr(e)}"

                # 4. Perform DETAILED LLM Evaluation
                if openai_client:
                     try:
                         logging.debug(f"Performing detailed LLM evaluation for Chapter {chap_num}")
                         # Truncate input texts if needed (adjust limit as necessary)
                         MAX_LLM_INPUT_CHARS = 30000
                         truncated = False
                         text_a_llm = text_a[:MAX_LLM_INPUT_CHARS]; text_b_llm = text_b[:MAX_LLM_INPUT_CHARS]
                         if len(text_a) > MAX_LLM_INPUT_CHARS or len(text_b) > MAX_LLM_INPUT_CHARS:
                             truncated = True
                             logging.warning(f"Chapter {chap_num}: Input text truncated to {MAX_LLM_INPUT_CHARS} chars for LLM.")

                         # Use the new build_prompt function
                         prompt = build_prompt(text_a=text_a_llm, text_b=text_b_llm)

                         # Use the modified evaluator function
                         llm_eval_result = call_llm_evaluator_detailed(prompt, chap_num) # Renamed for clarity

                         # Store the *entire* result (or error)
                         result_data["llm_detailed_evaluation"] = llm_eval_result

                         # Check for success/error
                         if llm_eval_result and "error" not in llm_eval_result:
                             metrics_ok.append("llm_detailed") # Indicate detailed eval success
                             logging.info("  ✅ Detailed LLM Evaluation: Success (Extraction + Scoring)")
                         else:
                             error_detail = llm_eval_result.get("error", "Unknown LLM error") if llm_eval_result else "LLM returned None"
                             logging.error(f"Chapter {chap_num}: Detailed LLM evaluation failed. Details: {error_detail}")
                             # Append to overall step error
                             if not calculation_step_error: calculation_step_error = f"LLM Eval failed: {error_detail}"
                             else: calculation_step_error += f"; LLM Eval failed: {error_detail}"

                     except Exception as e:
                         logging.error(f"Chapter {chap_num}: Unexpected error during LLM step: {e}", exc_info=True)
                         error_detail = f"Unexpected LLM step error: {repr(e)}"
                         result_data["llm_detailed_evaluation"] = {"error": error_detail}
                         if not calculation_step_error: calculation_step_error = error_detail
                         else: calculation_step_error += f"; {error_detail}"

                # --- End Calculations ---
                result_data["metrics_calculated"] = sorted(metrics_ok)
                if calculation_step_error:
                    result_data["error"] = calculation_step_error
                    if metrics_ok: book_chapters_partial += 1
                    else: book_chapters_failed += 1
                elif not metrics_ok:
                     result_data["error"] = "No metrics attempted (check model/library loading)."
                     book_chapters_failed += 1
                else: book_chapters_success += 1

            # 5. Save individual JSON for this chapter
            output_json_filename = f"chapter_{chap_num:05d}_evaluation.json"
            output_json_path = book_output_dir / output_json_filename
            try:
                with open(output_json_path, "w", encoding="utf-8") as f:
                    json.dump(result_data, f, ensure_ascii=False, indent=2, default=str)
                logging.debug(f"Saved chapter result to {output_json_path}")
            except Exception as e:
                logging.error(f"CRITICAL: Failed to save result for Chapter {chap_num} to {output_json_path}: {e}")
                if result_data["error"] is None: # Downgrade status if saving failed
                    result_data["error"] = f"Failed to save JSON: {repr(e)}"
                    if book_chapters_success > 0: book_chapters_success -= 1
                    elif book_chapters_partial > 0: book_chapters_partial -= 1
                    book_chapters_failed += 1
                elif not result_data["error"].startswith("Failed to save JSON"):
                     result_data["error"] += f"; Failed to save JSON: {repr(e)}"

            # Determine final status string
            # (Same status logic as before)
            if result_data["error"] and not result_data.get("metrics_calculated"): status_str = "Failed (Setup/Read Error)"
            elif result_data["error"]: status_str = "Partial (Calculation/Save Error)"
            else: status_str = "Success"
            chapter_duration = time.time() - chapter_start_time
            logging.info(f"--- Chapter {chap_num} finished in {chapter_duration:.2f} seconds. Status: {status_str} ---")
            # time.sleep(0.2) # Optional delay

        # --- End Chapter Loop for Book ---
        logging.info(f"Finished book '{book_name_original}'. Success: {book_chapters_success}, Partial: {book_chapters_partial}, Failed/Skipped: {book_chapters_failed}")
        overall_chapters_processed_success += book_chapters_success
        overall_chapters_processed_partial += book_chapters_partial
        overall_chapters_failed += book_chapters_failed

    # --- End of all books ---
    # (Same summary logging as before)
    end_time_overall = time.time()
    total_time_overall = end_time_overall - start_time_overall
    logging.info(f"\n{'='*20} Overall Evaluation Summary {'='*20}")
    logging.info(f"Processed {books_processed_count} books.")
    logging.info(f"Skipped {books_skipped_count} books.")
    logging.info(f"Total chapters fully processed: {overall_chapters_processed_success}")
    logging.info(f"Total chapters partially processed: {overall_chapters_processed_partial}")
    logging.info(f"Total chapters failed/skipped: {overall_chapters_failed}")
    logging.info(f"Total execution time: {total_time_overall:.2f} seconds ({total_time_overall/60:.2f} minutes)")
    logging.info(f"Individual chapter results saved in subdirectories under: {output_root}")
    logging.info("🎉 Combined evaluation finished.")


# ==============================================================
# 5. Run the Script
# ==============================================================
if __name__ == "__main__":
    # --- Run Main Calculation ---
    calculate_and_save_evaluations(FOLDER_A, FOLDER_B, SEED_DIR, OUTPUT_ROOT)

2025-05-03 16:50:36,176 - INFO - 🚀 Starting combined evaluation process (with detailed LLM eval)...
2025-05-03 16:50:36,209 - INFO - OpenAI client initialized for model gpt-4o.
2025-05-03 16:50:36,209 - INFO - Loading Sentence Transformer model: shibing624/text2vec-base-chinese...
2025-05-03 16:50:36,210 - INFO - Use pytorch device_name: cuda:0
2025-05-03 16:50:36,210 - INFO - Load pretrained SentenceTransformer: shibing624/text2vec-base-chinese
2025-05-03 16:50:36,752 - INFO - ✅ Sentence Transformer model loaded.
2025-05-03 16:50:36,753 - INFO - Scanning /content/output/100000_novel_1000_100000_generated_gemini for comparison book directories...
2025-05-03 16:50:36,755 - INFO - Found 40 potential comparison book directories in /content/output/100000_novel_1000_100000_generated_gemini.
2025-05-03 16:50:36,757 - INFO - Found 40 potential books in /content/novels_normalized to process.
2025-05-03 16:50:36,760 - INFO -    | 0/40 [00:00<?, ?it/s]
2025-05-03 16:50:36,762 - INFO - Found corr

In [27]:
# ==============================================================
# Merged Script: Calculate Cosine Similarity, BERTScore & LLM Eval
# - Uses robust file matching based on seed files
# - Calls OpenAI API for DETAILED qualitative evaluation
#   (Props, Characters, Scenes extraction + 5 similarity scores)
# - Saves combined results (Cosine, BERTScore, Full LLM Eval) to individual JSON per chapter
# ==============================================================

# ==============================================================
#  0. Environment Installation (if needed)
# ==============================================================
# !pip install --quiet sentence-transformers bert-score torch torchvision torchaudio openai chardet tqdm pandas # Ensure all dependencies are installed

# ==============================================================
#  1. Import Libraries
# ==============================================================
import os
import json
import time
import datetime
import re
import logging
import unicodedata
from pathlib import Path
from tqdm import tqdm # Use standard tqdm
import chardet

# --- Sentence Transformers ---
from sentence_transformers import SentenceTransformer, util

# --- BERT Score ---
try:
    from bert_score import score as calculate_bert_score
except ImportError:
    logging.error("bert_score library not found. Please install it: pip install bert-score")
    calculate_bert_score = None # Set to None if import fails

# --- OpenAI for LLM Evaluation ---
try:
    import openai
except ImportError:
    logging.error("openai library not found. Please install it: pip install openai")
    openai = None # Set to None if import fails

# ==============================================================
#  2. Configuration
# ==============================================================

# --- Input/Output Folders ---
# ⚠️ MODIFY THESE PATHS AS NEEDED
FOLDER_A = Path("/content/novels_normalized") # Folder with original chapters
FOLDER_B = Path("/content/10000_word_chapters_expanded") # Folder with generated/comparison chapters
SEED_DIR = Path("/content/randomseed") # Folder with _randomseed.txt files
OUTPUT_ROOT = Path("/content/combined_evaluation_results/10000_word_chapters_expanded_evaluation") # Main output folder for combined results

# --- Similarity Model Configuration ---
SBERT_MODEL_NAME = "shibing624/text2vec-base-chinese"
BERTSCORE_MODEL_TYPE = "bert-base-chinese" # Make sure this matches the language/domain
BERTSCORE_LANG = "zh" # Set to your language code (e.g., "en", "zh")

# --- LLM Evaluation Configuration ---
# ⚠️ IMPORTANT SECURITY WARNING:
# Hardcoding API keys is a major security risk!
# Use environment variables (os.getenv('OPENAI_API_KEY')) or a secure secrets management system in production.
OPENAI_API_KEY = "your-default-api-key" # <-- PASTE YOUR KEY HERE or use os.getenv("OPENAI_API_KEY")
EVALUATION_MODEL_NAME = "gpt-4o"
EVALUATION_TEMPERATURE = 0.0 # Set to 0 for deterministic scoring
LLM_EVALUATION_TIMEOUT = 180 # Increased timeout slightly for more complex task
LLM_MAX_RETRIES = 2 # Number of retries for LLM call failures

# --- LLM Score Keys (for validation) ---
# These are the keys expected WITHIN the "scores" sub-dictionary of the LLM response
EXPECTED_LLM_KEYS = [
    "semantic_similarity",
    "plot_similarity",
    "character_similarity",
    "background_similarity",
    "style_similarity",
]

# --- Logging ---
LOGGING_LEVEL = logging.INFO # Set to DEBUG for more verbose file matching/API logs
logging.basicConfig(level=LOGGING_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")

# ==============================================================
#  3. Utility Functions (Including LLM Helpers from Script 1)
# ==============================================================

def read_txt(path: Path) -> str:
    """Reads a text file with automatic encoding detection."""
    # (Same implementation as before)
    try:
        raw = path.read_bytes()
        detected = chardet.detect(raw); enc = detected['encoding'] if detected else None
        confidence = detected['confidence'] if detected else 0
        if confidence < 0.9 or (enc and enc.lower() in ['ascii', 'windows-1252']):
            try:
                decoded_text = raw.decode('utf-8', errors='strict')
                logging.debug(f"Read {path.name} successfully with strict UTF-8.")
                return decoded_text.strip()
            except UnicodeDecodeError:
                logging.debug(f"Strict UTF-8 failed for {path.name}, proceeding with detection/fallback.")
        if not enc: enc = 'utf-8'; logging.warning(f"No encoding detected for {path.name}, defaulting to {enc}.")
        common_encodings = list(dict.fromkeys([enc, 'utf-8', 'gbk', 'gb18030', 'gb2312']))
        decoded_text = None
        for encoding in common_encodings:
            if not encoding: continue
            try:
                decoded_text = raw.decode(encoding, errors='strict')
                logging.debug(f"Read {path.name} successfully with strict {encoding}.")
                break
            except (UnicodeDecodeError, TypeError, LookupError):
                logging.debug(f"Strict decoding with {encoding} failed for {path.name}.")
                continue
        if decoded_text is None:
            final_encoding = 'utf-8'
            logging.warning(f"Strict decoding failed for {path.name}. Using fallback {final_encoding} with errors='ignore'.")
            decoded_text = raw.decode(final_encoding, errors='ignore')
        return decoded_text.strip()
    except FileNotFoundError: logging.error(f"File not found: {path}"); return ""
    except Exception as e: logging.error(f"Error reading file {path}: {e}", exc_info=True); return ""

# --- Chapter Number Extraction Patterns (Comprehensive) ---
# (Same implementation as before)
CHAP_PATTERNS = [
    re.compile(r"^第(\d{1,5})章\.txt$"), re.compile(r"^(\d{1,5})\.txt$"),
    re.compile(r"^(\d{1,5})[_\s.-].*?\.txt$", re.IGNORECASE),
    re.compile(r"第\s*(\d{1,5})\s*章", re.IGNORECASE),
    re.compile(r"chapter\s*(\d{1,5})", re.IGNORECASE),
    re.compile(r"chap\s*(\d{1,5})", re.IGNORECASE),
    re.compile(r"[_-](\d{1,5})[_-]"), re.compile(r"(?<!\d)(\d{1,5})(?!\d)"),
]

def find_chapter_file(folder: Path, chap_num: int) -> Path | None:
    """Finds a TXT file matching the chapter number within a folder (recursive)."""
    # (Same implementation as before)
    if not folder.is_dir(): logging.warning(f"Search folder doesn't exist: {folder}"); return None
    possible_matches = []
    try:
        for item in folder.rglob("*.txt"):
            if item.is_file():
                name = item.name
                for i, pat in enumerate(CHAP_PATTERNS):
                    match = pat.search(name)
                    if match and match.groups():
                        try:
                            extracted_num = int(match.group(1))
                            if extracted_num == chap_num:
                                possible_matches.append({"path": item, "pattern_index": i})
                                break
                        except (ValueError, IndexError): continue
    except Exception as e: logging.error(f"Error searching {folder} for chapter {chap_num}: {e}", exc_info=True); return None
    if not possible_matches: logging.warning(f"Chapter {chap_num} file not found in {folder}"); return None
    possible_matches.sort(key=lambda x: (x['pattern_index'], len(x['path'].name)))
    best_match = possible_matches[0]
    if len(possible_matches) > 1: logging.warning(f"Multiple file matches for chapter {chap_num} in {folder}. Using '{best_match['path'].name}'.")
    logging.debug(f"Found chapter {chap_num} file: {best_match['path']}")
    return best_match['path']

def normalize(name: str) -> str:
    """Normalizes directory/book names for robust matching."""
    # (Same implementation as before)
    try:
        name = str(name); name = unicodedata.normalize("NFKC", name); name = name.lower()
        name = name.replace("_utf8", "").replace(".utf8", "")
        name = name.replace("_校对版全本", "").replace("（校对版全本）", "")
        name = re.sub(r"[《》【】]", "", name)
        name = re.sub(r"\.(summary|txt|eval)$", "", name, flags=re.IGNORECASE)
        name = re.sub(r"[\s:：()（）“”\"',.!?;?？\-·•_#]+", "", name)
        return name.strip()
    except Exception as e: logging.error(f"Error normalizing name '{name}': {e}"); return ""

def read_seed_chapters(book_name: str, seed_dir: Path) -> list[int]:
    """Reads the list of chapter numbers from the _randomseed.txt file for a given book."""
    # (Same implementation as before)
    norm_target = normalize(book_name)
    if not norm_target: logging.warning(f"Normalized book name is empty for '{book_name}', cannot match seed file."); return []
    seed_file_found = None
    try:
        if not seed_dir.is_dir(): logging.error(f"Seed directory missing: {seed_dir}"); return []
        logging.debug(f"Searching for seed file matching '{norm_target}' in {seed_dir}")
        for file in seed_dir.glob("*_randomseed.txt"):
            candidate_name = file.stem.replace("_randomseed", "")
            norm_candidate = normalize(candidate_name)
            logging.debug(f"Comparing '{norm_candidate}' (from {file.name}) with target '{norm_target}'")
            if norm_candidate == norm_target:
                seed_file_found = file; logging.info(f"Found matching seed file: {seed_file_found.name} for book '{book_name}'"); break
        if seed_file_found:
            lines = seed_file_found.read_text(encoding="utf-8").splitlines()
            chapters = sorted(list(set(int(x.strip()) for x in lines if x.strip().isdigit())))
            if not chapters: logging.warning(f"Seed file {seed_file_found.name} for '{book_name}' contains no valid chapter numbers.")
            return chapters
        else:
            logging.warning(f"⚠️ No matching seed file found in {seed_dir} for book: '{book_name}' (normalized target: '{norm_target}')")
            return []
    except FileNotFoundError: logging.error(f"Seed directory not found: {seed_dir}"); return []
    except Exception as e: logging.error(f"Error reading seed file for '{book_name}': {e}", exc_info=True); return []

# --- LLM Helper Functions (Incorporating logic from Script 1) ---

def build_prompt(text_a: str, text_b: str) -> str:
    """
    Constructs the detailed prompt for the LLM, asking for element
    extraction (props, characters, scenes) and 5 similarity scores.
    """
    prompt = f"""
你是一位专业中文小说编辑，请你阅读【文本A】与【文本B】，完成以下任务：

1. 分别提取文本A与文本B中的：
   - 出现道具列表（如：剑、玉、令牌等）
   - 出现人物名称列表
   - 出现场景/环境名称列表
   * 提取时请尽量精确，去除通用词语 (例如: '人', '地方')，只保留具体名称。
   * 如果某一项在文本中没有出现，请返回空列表 `[]`。

2. 分别统计每类元素的数量（去重后），并输出每类的元素列表与数量。

3. 接着对比两段文本内容，按照以下 5 个维度进行 0-1 评分（1 表示非常相似，0 表示完全不同）：
   - semantic_similarity   整体语义/主题
   - plot_similarity       情节、事件发展
   - character_similarity  人物名称、数量与设定（综合考虑）
   - background_similarity 场景与世界设定
   - style_similarity      语言风格与表达方式
   * 评分请基于文本内容，给出客观评估。

**请严格输出以下 JSON 格式，不要包含 markdown ```json ... ``` 标记，直接输出 JSON 对象：**

{{
  "text_a": {{
    "props": ["道具A1", "道具A2", ...],
    "props_count": <整数>,
    "characters": ["人物A1", "人物A2", ...],
    "characters_count": <整数>,
    "scenes": ["场景A1", "场景A2", ...],
    "scenes_count": <整数>
  }},
  "text_b": {{
    "props": ["道具B1", "道具B2", ...],
    "props_count": <整数>,
    "characters": ["人物B1", "人物B2", ...],
    "characters_count": <整数>,
    "scenes": ["场景B1", "场景B2", ...],
    "scenes_count": <整数>
  }},
  "scores": {{
    "semantic_similarity": <0.0-1.0>,
    "plot_similarity": <0.0-1.0>,
    "character_similarity": <0.0-1.0>,
    "background_similarity": <0.0-1.0>,
    "style_similarity": <0.0-1.0>
  }}
}}

【文本A】
{text_a}

【文本B】
{text_b}
    """
    return prompt

def validate_scores(score_dict: dict) -> bool:
    """
    Validates the 'scores' sub-dictionary within the LLM response.
    Checks if all expected keys are present and values are numbers between 0 and 1.
    """
    if not isinstance(score_dict, dict):
        logging.warning("Score validation failed: input is not a dictionary.")
        return False
    for key in EXPECTED_LLM_KEYS:
        val = score_dict.get(key)
        if not isinstance(val, (int, float)):
            logging.warning(f"Score validation failed: Key '{key}' is missing or not a number (value: {val}).")
            return False
        if not (0 <= val <= 1):
            logging.warning(f"Score validation failed: Key '{key}' value {val} is outside the [0, 1] range.")
            return False
    return True

# --- Modified LLM Call Function ---
openai_client = None # Global client instance

def initialize_openai_client():
    """Initializes the OpenAI client."""
    global openai_client
    if openai is None: logging.error("OpenAI library not imported."); return False
    if not OPENAI_API_KEY or OPENAI_API_KEY.startswith("sk-proj-"): # Basic check
        logging.warning("OpenAI API Key might be missing or using placeholder. Ensure it's correctly set.")
        # Proceeding, but check key if errors occur
    try:
        openai_client = openai.OpenAI(api_key=OPENAI_API_KEY, timeout=LLM_EVALUATION_TIMEOUT)
        logging.info(f"OpenAI client initialized for model {EVALUATION_MODEL_NAME}.")
        # Optional: Test connection here if needed
        # openai_client.models.list()
        return True
    except Exception as e:
        logging.error(f"Failed to initialize OpenAI client: {e}", exc_info=True)
        openai_client = None
        return False

def call_llm_evaluator_detailed(prompt: str, chap_num: int) -> dict | None:
    """
    Calls the configured OpenAI model with retries, expects the detailed
    JSON structure (extractions + scores), validates it, and returns the full parsed dict.
    """
    if openai_client is None:
        logging.error(f"Chapter {chap_num}: OpenAI client not initialized. Skipping LLM evaluation.")
        return {"error": "OpenAI client not initialized"}

    last_exception = None
    reply_content = "[No response received]"
    for attempt in range(LLM_MAX_RETRIES + 1):
        try:
            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: Calling LLM for detailed eval ({EVALUATION_MODEL_NAME})...")
            resp = openai_client.chat.completions.create(
                model=EVALUATION_MODEL_NAME,
                messages=[{"role": "user", "content": prompt}],
                temperature=EVALUATION_TEMPERATURE,
                response_format={"type": "json_object"} # Request JSON output
            )
            reply_content = resp.choices[0].message.content.strip()
            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: LLM raw response: {reply_content[:500]}...")

            # Parse the JSON response
            parsed_llm_output = json.loads(reply_content)

            # --- Validate Structure ---
            if not isinstance(parsed_llm_output, dict):
                raise ValueError("LLM response is not a JSON dictionary.")

            required_keys = ["text_a", "text_b", "scores"]
            if not all(key in parsed_llm_output for key in required_keys):
                missing = [k for k in required_keys if k not in parsed_llm_output]
                raise ValueError(f"LLM JSON response missing top-level keys: {missing}")

            # --- Validate Sub-structures (Basic checks) ---
            for key in ["text_a", "text_b"]:
                 sub_dict = parsed_llm_output.get(key)
                 if not isinstance(sub_dict, dict):
                     raise ValueError(f"LLM JSON response key '{key}' is not a dictionary.")
                 # Could add more checks here for props, characters, scenes keys if needed
                 required_sub_keys = ["props", "props_count", "characters", "characters_count", "scenes", "scenes_count"]
                 if not all(sub_key in sub_dict for sub_key in required_sub_keys):
                      missing_sub = [sk for sk in required_sub_keys if sk not in sub_dict]
                      raise ValueError(f"LLM JSON response key '{key}' is missing sub-keys: {missing_sub}")

            # --- Validate Scores ---
            scores_dict = parsed_llm_output.get("scores", {})
            if not validate_scores(scores_dict): # Use the dedicated validation function
                 # validate_scores logs the specific failure reason
                 raise ValueError("LLM JSON response 'scores' validation failed (see previous warning log).")

            logging.debug(f"Chapter {chap_num}, Attempt {attempt+1}: Detailed LLM evaluation successful and validated.")
            # Return the entire parsed and validated dictionary
            return parsed_llm_output

        except json.JSONDecodeError as e:
            last_exception = e
            logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: LLM output failed JSON parsing: {e}. Reply snippet: '{reply_content[:200]}...'")
        except ValueError as e: # Catch validation errors raised above
             last_exception = e
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: LLM response validation failed: {e}. Reply snippet: '{reply_content[:200]}...'")
        except openai.APITimeoutError as e:
             last_exception = e
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API call timed out ({LLM_EVALUATION_TIMEOUT}s).")
        except openai.RateLimitError as e:
             last_exception = e
             wait_time = 5 * (2 ** attempt)
             logging.warning(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI Rate Limit Error: {e}. Waiting {wait_time}s...")
             time.sleep(wait_time)
             continue
        except openai.APIConnectionError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI API connection error: {e}.")
        except openai.AuthenticationError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}: OpenAI Authentication Error: {e}. Check API Key.")
             return {"error": f"OpenAI Authentication Error: {e}"} # Fatal
        except openai.BadRequestError as e:
             last_exception = e
             logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: OpenAI Bad Request Error (400): {e}. Input: '{prompt[:200]}...'")
             return {"error": f"OpenAI Bad Request Error: {e}"} # Likely fatal for this input
        except Exception as e:
            last_exception = e
            logging.error(f"Chapter {chap_num}, Attempt {attempt+1}: Unexpected error during LLM call/processing: {e}", exc_info=True)

        if attempt < LLM_MAX_RETRIES:
            wait_time = 2 ** attempt
            logging.info(f"Chapter {chap_num}: Retrying detailed LLM call in {wait_time} seconds...")
            time.sleep(wait_time)

    logging.error(f"Chapter {chap_num}: Detailed LLM evaluation failed after {LLM_MAX_RETRIES + 1} attempts.")
    error_message = f"Detailed LLM evaluation failed. Last error: {repr(last_exception)}. Last response snippet: '{reply_content[:200]}...'"
    return {"error": error_message}

# ==============================================================
# 4. Main Calculation Function (Modified to use detailed LLM eval)
# ==============================================================

def calculate_and_save_evaluations(folder_a: Path, folder_b: Path, seed_dir: Path, output_root: Path):
    """
    Calculates Cosine Similarity, BERTScore, and DETAILED LLM Evaluation,
    saving combined results to individual JSON files per chapter.
    """
    logging.info("🚀 Starting combined evaluation process (with detailed LLM eval)...")
    start_time_overall = time.time()
    output_root.mkdir(parents=True, exist_ok=True)

    # --- Essential Library Checks ---
    if calculate_bert_score is None: logging.error("CRITICAL: bert_score library not loaded.")
    if openai is None: logging.error("CRITICAL: openai library not loaded."); return

    # --- Path Validation ---
    if not folder_a.is_dir(): logging.error(f"CRITICAL: Folder A not found: {folder_a}"); return
    if not folder_b.is_dir(): logging.error(f"CRITICAL: Folder B not found: {folder_b}"); return
    if not seed_dir.is_dir(): logging.error(f"CRITICAL: Seed directory not found: {seed_dir}"); return

    # --- Initialize OpenAI Client ---
    if not initialize_openai_client():
        logging.error("CRITICAL: Failed to initialize OpenAI client. LLM evaluations cannot proceed.")
        return # Exit if LLM Eval is mandatory

    # --- Load SBERT Model ---
    sbert_model = None
    try:
        logging.info(f"Loading Sentence Transformer model: {SBERT_MODEL_NAME}...")
        sbert_model = SentenceTransformer(SBERT_MODEL_NAME)
        logging.info("✅ Sentence Transformer model loaded.")
    except Exception as e:
        logging.error(f"CRITICAL: Failed to load SBERT model '{SBERT_MODEL_NAME}': {e}", exc_info=True)
        # Optionally return if SBERT is critical

    # --- Pre-scan Folder B directories ---
    # (Same implementation as before)
    logging.info(f"Scanning {folder_b} for comparison book directories...")
    folder_b_dir_map = {}
    try:
        for b_dir in folder_b.iterdir():
            if b_dir.is_dir():
                normalized_b_name = normalize(b_dir.name)
                if normalized_b_name:
                    if normalized_b_name in folder_b_dir_map: logging.warning(f"Duplicate normalized name '{normalized_b_name}' in {folder_b}. Using first: '{folder_b_dir_map[normalized_b_name].name}'. Skipping '{b_dir.name}'")
                    else: folder_b_dir_map[normalized_b_name] = b_dir; logging.debug(f"Mapped '{normalized_b_name}' to '{b_dir.name}'")
                else: logging.warning(f"Skipping directory '{b_dir.name}' in Folder B (empty normalized name).")
    except Exception as e: logging.error(f"Error scanning Folder B: {e}", exc_info=True); return
    logging.info(f"Found {len(folder_b_dir_map)} potential comparison book directories in {folder_b}.")

    # --- Iterate through books in Folder A ---
    # (Same implementation as before)
    folder_a_book_dirs = sorted([d for d in folder_a.iterdir() if d.is_dir()])
    if not folder_a_book_dirs: logging.error(f"No book directories found in {folder_a}."); return
    logging.info(f"Found {len(folder_a_book_dirs)} potential books in {folder_a} to process.")

    # --- Overall Counters ---
    overall_chapters_processed_success = 0
    overall_chapters_processed_partial = 0
    overall_chapters_failed = 0
    books_processed_count = 0
    books_skipped_count = 0

        # --- Main Book Loop ---
    import difflib
    
    # --- Pre-scan Folder B directories ---
    logging.info(f"Scanning {folder_b} for comparison book directories...")
    folder_b_dirs = [b_dir for b_dir in folder_b.iterdir() if b_dir.is_dir()]
    folder_b_names = [b.name for b in folder_b_dirs]
    logging.info(f"Found {len(folder_b_dirs)} comparison directories in {folder_b}.")
    
    # --- Main Book Loop ---
    for book_dir_a in tqdm(folder_a_book_dirs, desc="📚 Processing Books"):
        book_name_original = book_dir_a.name
        logging.info(f"\n{'='*15} Processing Book: {book_name_original} {'='*15}")
    
        # ✅ 尝试模糊匹配对应目录名（不再 normalize）
        matched_name = difflib.get_close_matches(book_name_original, folder_b_names, n=1, cutoff=0.8)
        if matched_name:
            book_dir_b = folder_b / matched_name[0]
            logging.info(f"✅ Matched comparison directory: {book_dir_b.name}")
        else:
            logging.warning(f"No matching comparison dir for '{book_name_original}'. Skipping.")
            books_skipped_count += 1
            continue
    
        # 验证是否是目录
        if not book_dir_b.is_dir():
            logging.error(f"Matched path '{book_dir_b}' is not a directory. Skipping.")
            books_skipped_count += 1
            continue
    
        # 读取随机种子章节
        chapters_to_compare = read_seed_chapters(book_name_original, seed_dir)
        if not chapters_to_compare:
            logging.warning(f"No chapters selected from seed file for '{book_name_original}'. Skipping.")
            books_skipped_count += 1
            continue
        
        book_output_dir = output_root / book_name_original
        book_output_dir.mkdir(parents=True, exist_ok=True)
        
        # === Skip already processed chapters ===
        chapters_remaining = []
        for chap_num in chapters_to_compare:
            chapter_file = book_output_dir / f"第{chap_num:03d}章.txt"
            if chapter_file.exists():
                logging.info(f"⏭️ Chapter {chap_num} already exists. Skipping.")
            else:
                chapters_remaining.append(chap_num)
        
        if not chapters_remaining:
            logging.info(f"✅ All chapters already processed for '{book_name_original}'. Skipping.")
            books_skipped_count += 1
            continue
        
        logging.info(f"Selected {len(chapters_remaining)} chapters for '{book_name_original}'")
        books_processed_count += 1
        
        # 👉 使用 chapters_remaining 继续后续处理
        for chap_num in chapters_remaining:
    
            logging.info(f"Selected {len(chapters_to_compare)} chapters for '{book_name_original}'.")
            books_processed_count += 1
            book_output_dir = output_root / book_name_original
            book_output_dir.mkdir(parents=True, exist_ok=True)
            
            # --- Process Chapters for this Book ---
            book_chapters_success = 0; book_chapters_partial = 0; book_chapters_failed = 0
            for chap_num in tqdm(chapters_to_compare, desc=f"  ├── Chapters ({book_name_original})", leave=False):
                logging.info(f"--- Evaluating Chapter {chap_num} ---")
                chapter_start_time = time.time()
    
                path_a = find_chapter_file(book_dir_a, chap_num)
                path_b = find_chapter_file(book_dir_b, chap_num)
    
                # Initialize result dict structure - Added llm_detailed_evaluation
                result_data = {
                    "book_name": book_name_original,
                    "chapter": chap_num,
                    "timestamp_utc": datetime.datetime.now(datetime.timezone.utc).isoformat(),
                    "file_a": str(path_a.relative_to(FOLDER_A.parent)) if path_a else "NOT_FOUND",
                    "file_b": str(path_b.relative_to(FOLDER_B.parent)) if path_b else "NOT_FOUND",
                    "cosine_similarity": None,
                    "bert_score_f1": None,
                    "bert_score_precision": None,
                    "bert_score_recall": None,
                    "llm_detailed_evaluation": None, # <--- STORE THE FULL LLM RESULT HERE
                    "llm_model_used": EVALUATION_MODEL_NAME if openai_client else "N/A",
                    "metrics_calculated": [],
                    "error": None
                }
    
                # 1. Check files and read contents
                if not path_a or not path_b: error_msg = f"Input file(s) missing: {'A ' if not path_a else ''}{'B' if not path_b else ''}"
                else: text_a = read_txt(path_a); text_b = read_txt(path_b); error_msg = None
                if not error_msg and (not text_a or not text_b): error_msg = f"Content empty/read failed: {'A ' if not text_a else ''}{'B' if not text_b else ''}"
    
                if error_msg:
                    logging.error(f"Chapter {chap_num}: {error_msg}")
                    result_data["error"] = error_msg
                    book_chapters_failed += 1
                else:
                    # --- Start Calculations ---
                    metrics_ok = []
                    calculation_step_error = None
    
                    # 2. Calculate Cosine Similarity
                    if sbert_model:
                        try:
                            # (Same SBERT calculation logic as before)
                            emb1 = sbert_model.encode(text_a, normalize_embeddings=True, show_progress_bar=False)
                            emb2 = sbert_model.encode(text_b, normalize_embeddings=True, show_progress_bar=False)
                            if emb1.ndim > 1: emb1 = emb1.flatten()
                            if emb2.ndim > 1: emb2 = emb2.flatten()
                            cosine_sim = util.cos_sim(emb1, emb2).item()
                            result_data["cosine_similarity"] = round(float(cosine_sim), 6)
                            metrics_ok.append("cosine")
                            logging.info(f"  ✅ Cosine Similarity: {cosine_sim:.4f}")
                        except Exception as e:
                            logging.error(f"Chapter {chap_num}: Error during Cosine Sim: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
                            calculation_step_error = f"Cosine Sim failed: {repr(e)}"
    
                    # 3. Calculate BERTScore
                    if calculate_bert_score:
                         try:
                            # (Same BERTScore calculation logic as before)
                             if text_a and text_b:
                                 P, R, F1 = calculate_bert_score([text_a], [text_b], model_type=BERTSCORE_MODEL_TYPE, lang=BERTSCORE_LANG, rescale_with_baseline=True, verbose=False, batch_size=1)
                                 result_data["bert_score_precision"] = round(float(P.item()), 6)
                                 result_data["bert_score_recall"] = round(float(R.item()), 6)
                                 result_data["bert_score_f1"] = round(float(F1.item()), 6)
                                 metrics_ok.append("bertscore")
                                 logging.info(f"  ✅ BERTScore: P={P.item():.4f} R={R.item():.4f} F1={F1.item():.4f}")
                             else: raise ValueError("Input text for BERTScore is empty.")
                         except Exception as e:
                             logging.error(f"Chapter {chap_num}: Error during BERTScore: {e}", exc_info=LOGGING_LEVEL <= logging.DEBUG)
                             if not calculation_step_error: calculation_step_error = f"BERTScore failed: {repr(e)}"
                             else: calculation_step_error += f"; BERTScore failed: {repr(e)}"
    
                    # 4. Perform DETAILED LLM Evaluation
                    if openai_client:
                         try:
                             logging.debug(f"Performing detailed LLM evaluation for Chapter {chap_num}")
                             # Truncate input texts if needed (adjust limit as necessary)
                             MAX_LLM_INPUT_CHARS = 30000
                             truncated = False
                             text_a_llm = text_a[:MAX_LLM_INPUT_CHARS]; text_b_llm = text_b[:MAX_LLM_INPUT_CHARS]
                             if len(text_a) > MAX_LLM_INPUT_CHARS or len(text_b) > MAX_LLM_INPUT_CHARS:
                                 truncated = True
                                 logging.warning(f"Chapter {chap_num}: Input text truncated to {MAX_LLM_INPUT_CHARS} chars for LLM.")
    
                             # Use the new build_prompt function
                             prompt = build_prompt(text_a=text_a_llm, text_b=text_b_llm)
    
                             # Use the modified evaluator function
                             llm_eval_result = call_llm_evaluator_detailed(prompt, chap_num) # Renamed for clarity
    
                             # Store the *entire* result (or error)
                             result_data["llm_detailed_evaluation"] = llm_eval_result
    
                             # Check for success/error
                             if llm_eval_result and "error" not in llm_eval_result:
                                 metrics_ok.append("llm_detailed") # Indicate detailed eval success
                                 logging.info("  ✅ Detailed LLM Evaluation: Success (Extraction + Scoring)")
                             else:
                                 error_detail = llm_eval_result.get("error", "Unknown LLM error") if llm_eval_result else "LLM returned None"
                                 logging.error(f"Chapter {chap_num}: Detailed LLM evaluation failed. Details: {error_detail}")
                                 # Append to overall step error
                                 if not calculation_step_error: calculation_step_error = f"LLM Eval failed: {error_detail}"
                                 else: calculation_step_error += f"; LLM Eval failed: {error_detail}"
    
                         except Exception as e:
                             logging.error(f"Chapter {chap_num}: Unexpected error during LLM step: {e}", exc_info=True)
                             error_detail = f"Unexpected LLM step error: {repr(e)}"
                             result_data["llm_detailed_evaluation"] = {"error": error_detail}
                             if not calculation_step_error: calculation_step_error = error_detail
                             else: calculation_step_error += f"; {error_detail}"
    
                    # --- End Calculations ---
                    result_data["metrics_calculated"] = sorted(metrics_ok)
                    if calculation_step_error:
                        result_data["error"] = calculation_step_error
                        if metrics_ok: book_chapters_partial += 1
                        else: book_chapters_failed += 1
                    elif not metrics_ok:
                         result_data["error"] = "No metrics attempted (check model/library loading)."
                         book_chapters_failed += 1
                    else: book_chapters_success += 1
    
                # 5. Save individual JSON for this chapter
                output_json_filename = f"chapter_{chap_num:05d}_evaluation.json"
                output_json_path = book_output_dir / output_json_filename
                try:
                    with open(output_json_path, "w", encoding="utf-8") as f:
                        json.dump(result_data, f, ensure_ascii=False, indent=2, default=str)
                    logging.debug(f"Saved chapter result to {output_json_path}")
                except Exception as e:
                    logging.error(f"CRITICAL: Failed to save result for Chapter {chap_num} to {output_json_path}: {e}")
                    if result_data["error"] is None: # Downgrade status if saving failed
                        result_data["error"] = f"Failed to save JSON: {repr(e)}"
                        if book_chapters_success > 0: book_chapters_success -= 1
                        elif book_chapters_partial > 0: book_chapters_partial -= 1
                        book_chapters_failed += 1
                    elif not result_data["error"].startswith("Failed to save JSON"):
                         result_data["error"] += f"; Failed to save JSON: {repr(e)}"
    
                # Determine final status string
                # (Same status logic as before)
                if result_data["error"] and not result_data.get("metrics_calculated"): status_str = "Failed (Setup/Read Error)"
                elif result_data["error"]: status_str = "Partial (Calculation/Save Error)"
                else: status_str = "Success"
                chapter_duration = time.time() - chapter_start_time
                logging.info(f"--- Chapter {chap_num} finished in {chapter_duration:.2f} seconds. Status: {status_str} ---")
                # time.sleep(0.2) # Optional delay
    
            # --- End Chapter Loop for Book ---
            logging.info(f"Finished book '{book_name_original}'. Success: {book_chapters_success}, Partial: {book_chapters_partial}, Failed/Skipped: {book_chapters_failed}")
            overall_chapters_processed_success += book_chapters_success
            overall_chapters_processed_partial += book_chapters_partial
            overall_chapters_failed += book_chapters_failed

    # --- End of all books ---
    # (Same summary logging as before)
    end_time_overall = time.time()
    total_time_overall = end_time_overall - start_time_overall
    logging.info(f"\n{'='*20} Overall Evaluation Summary {'='*20}")
    logging.info(f"Processed {books_processed_count} books.")
    logging.info(f"Skipped {books_skipped_count} books.")
    logging.info(f"Total chapters fully processed: {overall_chapters_processed_success}")
    logging.info(f"Total chapters partially processed: {overall_chapters_processed_partial}")
    logging.info(f"Total chapters failed/skipped: {overall_chapters_failed}")
    logging.info(f"Total execution time: {total_time_overall:.2f} seconds ({total_time_overall/60:.2f} minutes)")
    logging.info(f"Individual chapter results saved in subdirectories under: {output_root}")
    logging.info("🎉 Combined evaluation finished.")


# ==============================================================
# 5. Run the Script
# ==============================================================
if __name__ == "__main__":
    # --- Run Main Calculation ---
    calculate_and_save_evaluations(FOLDER_A, FOLDER_B, SEED_DIR, OUTPUT_ROOT)

2025-05-04 03:45:10,477 - INFO - 🚀 Starting combined evaluation process (with detailed LLM eval)...
2025-05-04 03:45:10,539 - INFO - OpenAI client initialized for model gpt-4o.
2025-05-04 03:45:10,539 - INFO - Loading Sentence Transformer model: shibing624/text2vec-base-chinese...
2025-05-04 03:45:10,540 - INFO - Use pytorch device_name: cuda:0
2025-05-04 03:45:10,541 - INFO - Load pretrained SentenceTransformer: shibing624/text2vec-base-chinese
2025-05-04 03:45:11,110 - INFO - ✅ Sentence Transformer model loaded.
2025-05-04 03:45:11,111 - INFO - Scanning /content/10000_word_chapters_expanded for comparison book directories...
2025-05-04 03:45:11,113 - INFO - Found 40 potential comparison book directories in /content/10000_word_chapters_expanded.
2025-05-04 03:45:11,113 - INFO - Found 40 potential books in /content/novels_normalized to process.
2025-05-04 03:45:11,114 - INFO - Scanning /content/10000_word_chapters_expanded for comparison book directories...
2025-05-04 03:45:11,115 - IN

KeyboardInterrupt: 