In [None]:
import pandas as pd
import numpy as np
import ast
import re
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from collections import defaultdict
import platform
import glob
import os
import ast

# 根据操作系统设置合适的中文字体
system = platform.system()
if system == 'Darwin':  # macOS
    plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'Heiti TC', 'PingFang HK', 'Apple Color Emoji']
elif system == 'Windows':
    plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun']
else:  # Linux或其他
    plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'WenQuanYi Micro Hei', 'WenQuanYi Zen Hei']

# 正常显示负号
plt.rcParams['axes.unicode_minus'] = False

print(f"当前操作系统: {system}")
print(f"当前字体设置: {plt.rcParams['font.sans-serif']}")


In [1]:
import os
import glob
import pandas as pd
import ast

def find_file(pattern):
    """查找匹配模式的文件，返回第一个匹配的文件路径"""
    files = glob.glob(pattern)
    if not files:
        raise FileNotFoundError(f"未找到匹配 {pattern} 的文件")
    return files[0]

def merge_analysis_with_original(analysis_pattern, original_pattern, analysis_column, output_dir, output_prefix):
    """
    合并分析结果与原始数据
    
    参数:
    - analysis_pattern: 分析结果文件的模式
    - original_pattern: 原始数据文件的模式
    - analysis_column: 需要合并到原始数据的分析结果列名
    - output_dir: 输出目录
    - output_prefix: 输出文件名前缀
    """
    # 查找文件
    analysis_file = find_file(analysis_pattern)
    original_file = find_file(original_pattern)
    
    # 读取文件
    analysis_df = pd.read_excel(analysis_file)
    original_df = pd.read_excel(original_file)
    
    # 创建新的DataFrame，复制原始数据
    result_df = original_df.copy()
    
    # 初始化新列
    result_df[analysis_column] = None
    
    # 遍历分析结果
    for _, row in analysis_df.iterrows():
        # 确保Review_Indices是列表形式
        if isinstance(row['Review_Indices'], str):
            indices = ast.literal_eval(row['Review_Indices'])
        else:
            indices = row['Review_Indices']
        
        # 确保indices是列表
        if not isinstance(indices, list):
            indices = [indices]
        
        # 为匹配的索引赋值
        for idx in indices:
            if 0 <= idx < len(result_df):
                result_df.at[idx, analysis_column] = row[analysis_column]
    
    # 构建输出文件路径
    file_name = os.path.basename(original_file)
    base_name = os.path.splitext(file_name)[0]
    output_file = os.path.join(output_dir, f"{output_prefix}_{base_name}.xlsx")
    
    # 确保输出目录存在
    os.makedirs(output_dir, exist_ok=True)
    
    # 保存结果
    result_df.to_excel(output_file, index=False)
    print(f"已保存结果到 {output_file}")
    
    return output_file

def main():
    # 1. 文件路径和模式
    matches_dir = '生成结果/matches_analysis/'
    defect_dir = '生成结果/defect_analysis/'
    needs_dir = '生成结果/needs_analysis/'
    original_data_dir = './Data/'
    
    # 2. 具体文件名或模式
    matches_result_pattern = os.path.join(matches_dir, 'refined_*_matches_analysis.xlsx')
    defect_result_pattern = os.path.join(defect_dir, 'refined_*_defects_analysis.xlsx')
    needs_result_pattern = os.path.join(needs_dir, 'refined_*_needs_analysis.xlsx')  # 注意：这里修正了路径，原来是defect_dir
    original_data_pattern = os.path.join(original_data_dir, '*_校对.xlsx')
    
    # 3. 执行合并操作
    # (1) 处理matches分析结果
    merge_analysis_with_original(
        matches_result_pattern, 
        original_data_pattern, 
        "Match",  # 注意大小写
        matches_dir, 
        "merged_matches"
    )
    
    # (2) 处理defect分析结果
    merge_analysis_with_original(
        defect_result_pattern, 
        original_data_pattern, 
        "Defect",  # 注意大小写
        defect_dir, 
        "merged_defect"
    )
    
    # (3) 处理needs分析结果
    merge_analysis_with_original(
        needs_result_pattern, 
        original_data_pattern, 
        "Need",  # 注意大小写
        needs_dir, 
        "merged_needs"
    )

if __name__ == "__main__":
    main()


已保存结果到 生成结果/matches_analysis/merged_matches_双竹2025-01-10之后VOC数据.xlsx
已保存结果到 生成结果/defect_analysis/merged_defect_双竹2025-01-10之后VOC数据.xlsx


KeyError: 'Needs'