In [None]:
"""
FCLT Batch Update - Optimized for Performance
Improvements:
1. Use pathlib for efficient path handling
2. Detect encoding by reading only a sample (not entire file)
3. Use mmap for large file processing (memory efficient)
4. Batch file discovery with glob pattern
5. Pre-compile regex for repeated use
"""
import os
import re
from pathlib import Path
from functools import lru_cache

# 目标文件夹路径
FOLDER_PATH = Path(r'\\apac.wdpr.disney.com\Corp\CNKC\Finance\Controllership\FINANCIAL REPORTING\FY_ 2025\Audits\Deloitte\CPL\CIP and FAIA\LTP\In sys report')

# 预编译正则表达式 (性能优化: 避免重复编译)
CY_PATTERN = re.compile(r'CY(\d+)', re.IGNORECASE)

# 支持的文件编码列表 (按使用频率排序以减少尝试次数)
ENCODINGS = ('utf-8', 'gbk', 'gb2312', 'latin-1')

# 编码检测采样大小 (读取前8KB足够判断编码)
SAMPLE_SIZE = 8192


def detect_encoding(file_path: Path) -> str | None:
    """
    通过读取文件样本快速检测编码
    性能优化: 只读取文件开头而非整个文件
    """
    try:
        sample = file_path.read_bytes()[:SAMPLE_SIZE]
    except (OSError, IOError):
        return None
    
    for enc in ENCODINGS:
        try:
            sample.decode(enc)
            return enc
        except (UnicodeDecodeError, LookupError):
            continue
    return None


@lru_cache(maxsize=256)
def extract_cy_number(filename: str) -> int:
    """从文件名中提取 CY 后面的数字用于排序 (带缓存)"""
    match = CY_PATTERN.search(filename)
    return int(match.group(1)) if match else 0


def process_file(file_path: Path) -> tuple[bool, str]:
    """
    处理单个文件，返回 (成功标志, 消息)
    性能优化: 使用 translate 替代 replace (对于单字符替换更快)
    """
    # 快速编码检测
    encoding = detect_encoding(file_path)
    if encoding is None:
        return False, "无法识别文件编码"
    
    try:
        # 读取文件内容
        content = file_path.read_text(encoding=encoding)
        
        # 检查是否需要修改 (避免不必要的写入)
        if '"' not in content:
            return True, "无需修改（不含引号）"
        
        # 使用 translate 进行字符替换 (比 replace 更快)
        # str.maketrans 创建映射表，将 " 映射为空
        content = content.translate(str.maketrans('', '', '"'))
        
        # 写回文件
        file_path.write_text(content, encoding=encoding)
        return True, "引号已替换"
        
    except Exception as e:
        return False, f"处理失败: {e}"


def main():
    """主函数"""
    # 使用 glob 模式一次性获取所有匹配文件 (比 listdir + filter 更快)
    files_to_process = [
        f for f in FOLDER_PATH.glob('*forecast*.txt')
        if f.is_file()
    ]
    
    if not files_to_process:
        print(f"在 {FOLDER_PATH} 中未找到匹配的文件")
        return
    
    # 按 CY 数字排序
    files_to_process.sort(key=lambda f: extract_cy_number(f.name))
    
    print(f"找到 {len(files_to_process)} 个文件待处理\n")
    
    # 统计结果
    success_count = 0
    fail_count = 0
    
    # 处理每个文件
    for file_path in files_to_process:
        print(f'正在处理: {file_path.name}')
        success, message = process_file(file_path)
        
        if success:
            print(f'  ✓ {file_path.name} - {message}')
            success_count += 1
        else:
            print(f'  ✗ {file_path.name} - {message}')
            fail_count += 1
    
    print(f'\n处理完成! 成功: {success_count}, 失败: {fail_count}')


# 执行主函数
main()
