In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
from scipy.stats import norm
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
# --- 1. 风险模型函数 (基于新的M4模型) ---
def calculate_r_late(t_days):
    """计算过晚检测风险 R_late"""
    if 70 <= t_days <= 84: return (2/14) * (t_days - 70)
    if 84 < t_days <= 189: return 2 + (4/105) * (t_days - 84)
    if 189 < t_days <= 210: return 0.2225 * np.exp(0.0243 * t_days)
    return np.inf

def calculate_time_value_weight(t_days):
    """计算时间价值权重 W(t)，已乘以0.18"""
    return 800*(0.01728 + 0.16344 / (1 + np.exp(0.1 * (t_days - 154))))

def get_individual_p_fail(t_days, bmi, fe_params, random_effects_subject, resid_std):
    """计算单个个体的失败概率 (使用二次随机斜率模型)"""
    t_weeks = t_days / 7.0
    
    # 构建个体化截距和斜率
    individual_intercept = fe_params['Intercept'] + random_effects_subject['Group']
    individual_slope = fe_params['孕周'] + random_effects_subject['孕周']
    
    # 使用个体化参数和新公式进行预测
    y_hat_individual = (individual_intercept +
                      individual_slope * t_weeks +
                      fe_params["Q('孕妇BMI')"] * bmi +
                      fe_params["I(孕周 ** 2)"] * (t_weeks**2))
    
    z_score = (0.04 - y_hat_individual) / resid_std
    return norm.cdf(z_score)

def calculate_risk_for_group(t_days, group_data, fe_params, random_effects, resid_std):
    """为整个分组计算在时间t的总期望风险"""
    r_late_current = calculate_r_late(t_days)
    r_late_future = calculate_r_late(t_days + 14)
    opportunity_cost = r_late_future - r_late_current
    
    # 蒙特卡洛期望：对组内每个个体计算其唯一概率
    individual_p_fails = []
    for _, row in group_data.iterrows():
        subject_id = row['孕妇代码']
        # 若个体未参与拟合（离群点），其随机效应为0
        random_effects_subject = random_effects.get(subject_id, pd.Series({'Group': 0, '孕周': 0}))
        p_fail = get_individual_p_fail(t_days, row['孕妇BMI'], fe_params, random_effects_subject, resid_std)
        individual_p_fails.append(p_fail)
    
    mean_p_fail = np.mean(individual_p_fails)
    w_t = calculate_time_value_weight(t_days)
    r_fail = mean_p_fail * w_t * opportunity_cost
    
    return r_late_current + r_fail

# --- 2. 主流程 ---
if __name__ == '__main__':
    warnings.filterwarnings("ignore", category=UserWarning)
    # --- 标定-应用分离 ---
    INPUT_FILE_PATH = '../../Data/0/男胎_预处理后数据.csv'
    RESULT_DIR = 'Result'
    os.makedirs(RESULT_DIR, exist_ok=True)
    
    print("加载数据...")
    df_for_analysis = pd.read_csv(INPUT_FILE_PATH)
    print(f"加载完成，使用完整的 {len(df_for_analysis)} 行预处理数据集进行分析。")

    print("\n步骤1: 创建用于模型拟合的干净数据集...")
    outlier_cols = ['Y染色体浓度', '孕周', '孕妇BMI'] # 年龄已移除
    masks = [(df_for_analysis[col] < df_for_analysis[col].quantile(0.25) - 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) | 
             (df_for_analysis[col] > df_for_analysis[col].quantile(0.75) + 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) 
             for col in outlier_cols]
    df_for_fitting = df_for_analysis[~pd.concat(masks, axis=1).any(axis=1)].copy()
    print(f"用于模型拟合的干净数据集共 {len(df_for_fitting)} 行。")

    print("\n步骤2: 在干净数据集上拟合混合效应模型以获得稳定参数...")
    model_formula = "Q('Y染色体浓度') ~ 孕周 + Q('孕妇BMI') + I(孕周**2)"
    mixed_model = smf.mixedlm(model_formula, df_for_fitting, groups=df_for_fitting["孕妇代码"], re_formula="~孕周")
    results = mixed_model.fit()
    print("模型拟合完成。")
    
    fe_params = results.fe_params
    random_effects = results.random_effects
    resid_std = np.sqrt(results.scale)
    
    # --- 双层优化流程 ---
    print("\n--- 开始执行双层优化流程 ---")
    t_grid = np.arange(70, 197)
    all_results = []
    k_risks = {}
    K_RANGE = range(2, 9) # K值范围

    for k in K_RANGE:
        print(f"\n------ 正在处理 K = {k} 的情况 ---")
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
        df_for_analysis['cluster'] = kmeans.fit_predict(df_for_analysis[['孕妇BMI']])
        
        total_weighted_risk = 0
        
        for i in range(k):
            group_df = df_for_analysis[df_for_analysis['cluster'] == i]
            
            risks = [calculate_risk_for_group(t, group_df, fe_params, random_effects, resid_std) for t in t_grid]
            
            min_risk_index = np.argmin(risks)
            best_t = t_grid[min_risk_index]
            min_risk = risks[min_risk_index]
            
            total_weighted_risk += min_risk * len(group_df)
            
            result_entry = {
                'K': k, '群组ID': i, '孕妇数量': len(group_df),
                'BMI范围': f"[{group_df['孕妇BMI'].min():.2f}, {group_df['孕妇BMI'].max():.2f}]",
                '平均BMI': group_df['孕妇BMI'].mean(),
                '最佳时点_天': best_t,
                '最小风险': min_risk
            }
            all_results.append(result_entry)
            print(f"  - 分组 {i}: N={len(group_df)}, BMI范围=[{group_df['孕妇BMI'].min():.2f}, {group_df['孕妇BMI'].max():.2f}], "
                  f"最优时点 t*={best_t} 天 (约{best_t/7:.1f}周), 最小期望风险={min_risk:.4f}")

        k_risks[k] = total_weighted_risk / len(df_for_analysis)
        print(f"--- K = {k} 的加权平均总风险为: {k_risks[k]:.6f} ---")

    # --- 确定最优方案并保存 ---
    best_k = min(k_risks, key=k_risks.get)
    print(f"\n--- 优化流程结束 ---\n最优分组数 K* = {best_k}，其加权平均总风险最低，为 {k_risks[best_k]:.6f}")

    all_results_df = pd.DataFrame(all_results)
    best_k_results_df = all_results_df[all_results_df['K'] == best_k].copy()
    
    # 为了报告美观和一致性，给群组命名
    bmi_means = best_k_results_df.sort_values('平均BMI')['群组ID'].values
    name_map = {bmi_means[i]: f'群组{i+1}' for i, _ in enumerate(bmi_means)}
    best_k_results_df['群组名称'] = best_k_results_df['群组ID'].map(name_map)
    best_k_results_df['最佳时点_周'] = best_k_results_df['最佳时点_天'] / 7
    
    final_cols = ['群组名称', 'BMI范围', '孕妇数量', '平均BMI', '最佳时点_天', '最佳时点_周', '最小风险']
    print("\n最终最优方案详情:")
    print(best_k_results_df.sort_values('平均BMI')[final_cols].to_string(index=False))

    best_k_results_df.to_csv(os.path.join(RESULT_DIR, '问题二_最终分组优化方案.csv'), index=False, encoding='utf-8-sig')
    
    k_risks_df = pd.DataFrame(list(k_risks.items()), columns=['K值', '加权平均总风险'])
    k_risks_df.to_csv(os.path.join(RESULT_DIR, '问题二_不同K值总风险对比.csv'), index=False, encoding='utf-8-sig')

    kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
    df_for_analysis['cluster'] = kmeans.fit_predict(df_for_analysis[['孕妇BMI']])
    df_for_analysis.to_csv(os.path.join(RESULT_DIR, '问题二_带最优分组标签的数据.csv'), index=False, encoding='utf-8-sig')

    risk_curves_data = []
    for i in range(best_k):
        group_df = df_for_analysis[df_for_analysis['cluster'] == i]
        group_name = name_map[i]
        for t in t_grid:
            risk = calculate_risk_for_group(t, group_df, fe_params, random_effects, resid_std)
            risk_curves_data.append({'K': best_k, '群组名称': group_name, '天数': t, '期望风险': risk})
    
    pd.DataFrame(risk_curves_data).to_csv(os.path.join(RESULT_DIR, '问题二_最优方案风险曲线数据.csv'), index=False, encoding='utf-8-sig')
    
    print("\n所有计算与数据保存已完成。")

加载数据...
加载完成，使用完整的 998 行预处理数据集进行分析。

步骤1: 创建用于模型拟合的干净数据集...
用于模型拟合的干净数据集共 962 行。

步骤2: 在干净数据集上拟合混合效应模型以获得稳定参数...
模型拟合完成。

--- 开始执行双层优化流程 ---

------ 正在处理 K = 2 的情况 ---
  - 分组 0: N=362, BMI范围=[32.94, 46.88], 最优时点 t*=175 天 (约25.0周), 最小期望风险=5.9411
  - 分组 1: N=636, BMI范围=[20.70, 32.93], 最优时点 t*=167 天 (约23.9周), 最小期望风险=5.5142
--- K = 2 的加权平均总风险为: 5.669056 ---

------ 正在处理 K = 3 的情况 ---
  - 分组 0: N=416, BMI范围=[31.53, 35.44], 最优时点 t*=170 天 (约24.3周), 最小期望风险=5.7249
  - 分组 1: N=132, BMI范围=[35.49, 46.88], 最优时点 t*=175 天 (约25.0周), 最小期望风险=6.2665
  - 分组 2: N=450, BMI范围=[20.70, 31.48], 最优时点 t*=165 天 (约23.6周), 最小期望风险=5.4044
--- K = 3 的加权平均总风险为: 5.652034 ---

------ 正在处理 K = 4 的情况 ---
  - 分组 0: N=383, BMI范围=[20.70, 31.00], 最优时点 t*=165 天 (约23.6周), 最小期望风险=5.4261
  - 分组 1: N=372, BMI范围=[31.02, 33.89], 最优时点 t*=171 天 (约24.4周), 最小期望风险=5.7551
  - 分组 2: N=43, BMI范围=[38.22, 46.88], 最优时点 t*=175 天 (约25.0周), 最小期望风险=5.8428


In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
from scipy.stats import norm
import statsmodels.formula.api as smf
import warnings
from itertools import product

# --- 1. 风险模型函数 (可接受误差参数) ---
N_SIMULATIONS = 500  # 蒙特卡洛模拟次数

def calculate_r_late(t_days):
    if 70 <= t_days <= 84: return (2/14) * (t_days - 70)
    if 84 < t_days <= 189: return 2 + (4/105) * (t_days - 84)
    if 189 < t_days <= 210: return 0.2225 * np.exp(0.0243 * t_days)
    return np.inf

def calculate_time_value_weight(t_days):
    return 0.01728 + 0.16344 / (1 + np.exp(0.1 * (t_days - 154)))

def get_individual_p_fail_with_error(t_days, bmi, fe_params, random_effects_subject, error_std):
    t_weeks = t_days / 7.0
    individual_intercept = fe_params['Intercept'] + random_effects_subject['Group']
    individual_slope = fe_params['孕周'] + random_effects_subject['孕周']
    
    y_hat_individual = (individual_intercept +
                      individual_slope * t_weeks +
                      fe_params["Q('孕妇BMI')"] * bmi +
                      fe_params["I(孕周 ** 2)"] * (t_weeks**2))
    
    random_errors = np.random.normal(0, error_std, N_SIMULATIONS)
    y_measured_simulated = y_hat_individual + random_errors
    
    fail_count = np.sum(y_measured_simulated < 0.04)
    return fail_count / N_SIMULATIONS

def calculate_risk_for_group(t_days, group_data, fe_params, random_effects, delta_t, error_std):
    r_late_current = calculate_r_late(t_days)
    r_late_future = calculate_r_late(t_days + delta_t)
    opportunity_cost = r_late_future - r_late_current
    
    individual_p_fails = []
    for _, row in group_data.iterrows():
        subject_id = row['孕妇代码']
        random_effects_subject = random_effects.get(subject_id, pd.Series({'Group': 0, '孕周': 0}))
        p_fail = get_individual_p_fail_with_error(t_days, row['孕妇BMI'], fe_params, random_effects_subject, error_std)
        individual_p_fails.append(p_fail)
    
    mean_p_fail = np.mean(individual_p_fails)
    w_t = calculate_time_value_weight(t_days)
    r_fail = mean_p_fail * w_t * opportunity_cost
    
    return r_late_current + r_fail

# --- 2. 主分析流程 ---
if __name__ == '__main__':
    warnings.filterwarnings("ignore", category=UserWarning)
    INPUT_FILE_PATH = '../../Data/0/男胎_预处理后数据.csv'
    RESULT_DIR = 'Result'
    os.makedirs(RESULT_DIR, exist_ok=True)
    
    print("加载数据并拟合模型...")
    df_for_analysis = pd.read_csv(INPUT_FILE_PATH)
    
    outlier_cols = ['Y染色体浓度', '孕周', '孕妇BMI']
    masks = [(df_for_analysis[col] < df_for_analysis[col].quantile(0.25) - 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) | 
             (df_for_analysis[col] > df_for_analysis[col].quantile(0.75) + 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) 
             for col in outlier_cols]
    df_for_fitting = df_for_analysis[~pd.concat(masks, axis=1).any(axis=1)].copy()

    model_formula = "Q('Y染色体浓度') ~ 孕周 + Q('孕妇BMI') + I(孕周**2)"
    mixed_model = smf.mixedlm(model_formula, df_for_fitting, groups=df_for_fitting["孕妇代码"], re_formula="~孕周")
    results = mixed_model.fit()
    fe_params, random_effects, resid_std = results.fe_params, results.random_effects, np.sqrt(results.scale)
    
    # 尝试从主优化结果中读取最优K值，如果失败则默认为7
    try:
        k_risks_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_不同K值总风险对比.csv'))
        BEST_K = k_risks_df.loc[k_risks_df['加权平均总风险'].idxmin()]['K值']
    except FileNotFoundError:
        BEST_K = 7
        print(f"警告: 未找到主优化结果，默认使用 K={BEST_K} 进行稳健性分析。")

    print(f"\n固定最优分组 K={BEST_K} 进行稳健性分析。")
    kmeans = KMeans(n_clusters=BEST_K, random_state=42, n_init=10)
    df_for_analysis['cluster'] = kmeans.fit_predict(df_for_analysis[['孕妇BMI']])
    
    delta_t_values = [14, 21, 28]
    error_multipliers = [0.0, 0.5, 1.0, 1.5]
    t_grid = np.arange(70, 197)
    robustness_results = []
    
    print("\n--- 开始执行稳健性分析计算 ---")
    
    print("\n分析机会成本时间延迟 Δt...")
    for delta_t in delta_t_values:
        print(f"  正在分析: Δt = {delta_t} 天")
        for i in range(BEST_K):
            group_df = df_for_analysis[df_for_analysis['cluster'] == i]
            risks = [calculate_risk_for_group(t, group_df, fe_params, random_effects, delta_t, 0.0) for t in t_grid]
            best_t = t_grid[np.argmin(risks)]
            robustness_results.append({'analysis_type': 'delta_t', 'param_value': delta_t, 'cluster_id': i, 'best_t_days': best_t, 'mean_bmi': group_df['孕妇BMI'].mean()})

    print("\n分析检测误差的影响...")
    for multiplier in error_multipliers:
        error_std = resid_std * multiplier
        print(f"  正在分析: 误差水平 = {multiplier}σ (σ_error={error_std:.4f})")
        for i in range(BEST_K):
            group_df = df_for_analysis[df_for_analysis['cluster'] == i]
            risks = [calculate_risk_for_group(t, group_df, fe_params, random_effects, 14, error_std) for t in t_grid]
            best_t = t_grid[np.argmin(risks)]
            robustness_results.append({'analysis_type': 'error', 'param_value': multiplier, 'cluster_id': i, 'best_t_days': best_t, 'mean_bmi': group_df['孕妇BMI'].mean()})

    results_df = pd.DataFrame(robustness_results)
    mean_bmi_per_cluster = results_df.groupby('cluster_id')['mean_bmi'].first().sort_values().index
    name_map = {cluster_id: f'群组{i+1}' for i, cluster_id in enumerate(mean_bmi_per_cluster)}
    results_df['group_name'] = results_df['cluster_id'].map(name_map)
    
    SAVE_PATH = os.path.join(RESULT_DIR, '问题二_稳健性分析结果.csv')
    results_df.to_csv(SAVE_PATH, index=False, encoding='utf-8-sig')
    
    print(f"\n--- 稳健性分析计算完成 ---\n结果已保存至: {SAVE_PATH}")

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from scipy.stats import norm
import statsmodels.formula.api as smf
from sklearn.cluster import KMeans
import warnings

# --- 1. 绘图风格与中文支持设置 ---
def setup_plot_style():
    """设置图表风格和中文支持"""
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    academic_colors = {
        'blue': '#0c5da5', 'teal': '#00b9a9', 'orange': '#f7931e',
        'red': '#d52b1e', 'purple': '#8f5b9f', 'gray': '#808080',
        'green': '#5a8e00', 'brown': '#a0522d'
    }
    plt.style.use('default')
    plt.rcParams.update({
        'axes.grid': True, 'grid.color': '#cccccc', 'grid.linestyle': '--',
        'grid.linewidth': 0.5, 'axes.edgecolor': 'black', 'axes.linewidth': 1.5,
        'axes.labelsize': 18, 'axes.titlesize': 20, 'xtick.labelsize': 16,
        'ytick.labelsize': 16, 'legend.fontsize': 14, 'figure.figsize': [12, 8],
        'figure.dpi': 300,
    })
    return academic_colors

# --- 2. 核心绘图函数 ---
def plot_risk_curves_for_best_k(risk_curves_df, summary_df, colors, result_dir):
    """图1: 绘制最优分组方案下各群组的期望风险曲线"""
    fig, ax = plt.subplots()
    color_keys = list(colors.keys())

    for i, row in summary_df.iterrows():
        group_name = row['群组名称']
        group_curve = risk_curves_df[risk_curves_df['群组名称'] == group_name]
        color = colors[color_keys[i % len(color_keys)]]
        ax.plot(group_curve['天数']/7, group_curve['期望风险'],
                label=f"{group_name} (BMI: {row['BMI范围']})", color=color, lw=2.5)
        ax.plot(row['最佳时点_周'], row['最小风险'], 'o', color=color, markersize=12,
                markeredgecolor='black', markeredgewidth=1.5)

    ax.set_xlabel('检测孕周'); ax.set_ylabel('总期望风险')
    ax.legend(title='BMI 分组'); ax.xaxis.set_major_locator(mticker.MultipleLocator(2))
    ax.xaxis.set_minor_locator(mticker.MultipleLocator(1)); ax.set_xlim(left=10)
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, '图1_最优分组方案期望风险曲线.png')); plt.close()
    print("图1: 最优分组方案期望风险曲线 已保存。")

def plot_risk_composition(risk_curves_df, summary_df, colors, result_dir):
    """图2: 绘制期望风险构成分析图"""
    def calculate_r_late(t_days):
        if 70 <= t_days <= 84: return (2/14) * (t_days - 70)
        if 84 < t_days <= 189: return 2 + (4/105) * (t_days - 84)
        if 189 < t_days <= 210: return 0.2225 * np.exp(0.0243 * t_days)
        return np.inf

    t_grid_days = np.arange(70, 197)
    r_late_curve = np.array([calculate_r_late(t) for t in t_grid_days])
    
    rep_group_name = summary_df['群组名称'].iloc[0] # 使用排序后的第一个群组
    total_risk_df = risk_curves_df[risk_curves_df['群组名称'] == rep_group_name].sort_values('天数')
    if total_risk_df.empty or len(total_risk_df) != len(t_grid_days):
        print("警告: 风险曲线数据异常，跳过风险构成图绘制。"); return

    total_risk_curve = total_risk_df['期望风险'].values
    r_fail_curve = total_risk_curve - r_late_curve
    r_fail_curve[r_fail_curve < 0] = 0

    fig, ax = plt.subplots();
    ax.plot(t_grid_days / 7, total_risk_curve, label='总期望风险 (R_total)', color=colors['red'], lw=3)
    ax.plot(t_grid_days / 7, r_late_curve, label='过晚检测风险 (R_late)', color=colors['blue'], lw=2, linestyle='--')
    ax.plot(t_grid_days / 7, r_fail_curve, label='检测失败风险 (R_fail)', color=colors['teal'], lw=2, linestyle=':')
    
    min_risk_idx = np.argmin(total_risk_curve)
    best_t_weeks = t_grid_days[min_risk_idx] / 7
    min_risk = total_risk_curve[min_risk_idx]
    ax.axvline(x=best_t_weeks, color=colors['gray'], linestyle='-.', lw=1.5, label=f'最优时点 (约{best_t_weeks:.1f}周)')
    ax.plot(best_t_weeks, min_risk, 'o', color=colors['red'], markersize=12, markeredgecolor='black', markeredgewidth=1.5)

    ax.set_xlabel('检测孕周'); ax.set_ylabel('期望风险'); ax.legend()
    ax.set_xlim(10, 28); ax.set_ylim(bottom=0); ax.xaxis.set_major_locator(mticker.MultipleLocator(2))
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, f'图2_期望风险构成分析.png')); plt.close()
    print(f"图2: 期望风险构成分析 已保存。")

def plot_bmi_clusters(labeled_data_df, colors, result_dir):
    """图3: 绘制最优分组数K下的BMI聚类结果"""
    fig, ax = plt.subplots()
    color_keys = list(colors.keys())
    
    sorted_groups = sorted(labeled_data_df.groupby('group_name'), key=lambda x: x[1]['孕妇BMI'].mean())
    
    for i, (group_name, group_data) in enumerate(sorted_groups):
        color = colors[color_keys[i % len(color_keys)]]
        ax.scatter(group_data['孕妇BMI'], group_data['孕周'], color=color, alpha=0.6, label=group_name, s=50)

    ax.set_xlabel('孕妇BMI'); ax.set_ylabel('孕周'); ax.legend(title='BMI 分组')
    best_k = labeled_data_df['group_name'].nunique()
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, f'图3_最优分组数K{best_k}_BMI聚类结果.png')); plt.close()
    print(f"图3: 最优分组数K={best_k}的BMI聚类结果 已保存。")

def plot_k_risks(k_risks_df, colors, result_dir):
    """图4: 绘制不同K值总风险对比图"""
    fig, ax = plt.subplots(figsize=(10, 7))
    bars = ax.bar(k_risks_df['K值'], k_risks_df['加权平均总风险'], color=colors['blue'], width=0.6)
    best_k_row = k_risks_df.loc[k_risks_df['加权平均总风险'].idxmin()]
    ax.bar(best_k_row['K值'], best_k_row['加权平均总风险'], color=colors['red'], width=0.6)
    
    ax.set_xlabel('分组数量 (K)'); ax.set_ylabel('加权平均总风险'); ax.set_xticks(k_risks_df['K值'])
    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2.0, yval, f'{yval:.5f}', va='bottom', ha='center', fontsize=14)
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, '图4_不同K值总风险对比.png')); plt.close()
    print("图4: 不同K值总风险对比 已保存。")

def plot_30_individual_trends(colors, result_dir):
    """图5: 绘制30个孕妇个体Y染色体浓度达标概率趋势"""
    print("\n正在为图5执行模型拟合与计算...")
    df_for_analysis = pd.read_csv('../../Data/0/男胎_预处理后数据.csv')
    
    outlier_cols = ['Y染色体浓度', '孕周', '孕妇BMI']
    masks = [(df_for_analysis[col] < df_for_analysis[col].quantile(0.25) - 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) | 
             (df_for_analysis[col] > df_for_analysis[col].quantile(0.75) + 1.5 * (df_for_analysis[col].quantile(0.75) - df_for_analysis[col].quantile(0.25))) 
             for col in outlier_cols]
    df_for_fitting = df_for_analysis[~pd.concat(masks, axis=1).any(axis=1)].copy()

    model_formula = "Q('Y染色体浓度') ~ 孕周 + Q('孕妇BMI') + I(孕周**2)"
    mixed_model = smf.mixedlm(model_formula, df_for_fitting, groups=df_for_fitting["孕妇代码"], re_formula="~孕周")
    results = mixed_model.fit()
    fe_params, random_effects = results.fe_params, results.random_effects
    
    sampled_individuals = df_for_analysis.sample(n=30, random_state=42)
    t_grid = np.arange(0, 197)
    all_curves = []

    for _, row in sampled_individuals.iterrows():
        subject_id = row['孕妇代码']
        random_effects_subject = random_effects.get(subject_id, pd.Series({'Group': 0, '孕周': 0}))
        t_weeks = t_grid / 7.0
        intercept = fe_params['Intercept'] + random_effects_subject['Group']
        slope = fe_params['孕周'] + random_effects_subject['孕周']
        y_hat = intercept + slope * t_weeks + fe_params["Q('孕妇BMI')"] * row['孕妇BMI'] + fe_params["I(孕周 ** 2)"] * (t_weeks**2)
        z = (0.04 - y_hat) / np.sqrt(results.scale)
        all_curves.append(1 - norm.cdf(z))
    
    fig, ax = plt.subplots(figsize=(14, 9))
    for curve in all_curves: ax.plot(t_grid/7, curve, color=colors['blue'], lw=0.8, alpha=0.4)
    ax.plot(t_grid/7, np.mean(all_curves, axis=0), color=colors['red'], lw=3, label='30个样本平均趋势')
    ax.axhline(y=0.95, color='black', linestyle='--', lw=1.5, label='95% 可靠性阈值')
    ax.axvline(x=10, color='gray', linestyle=':', lw=1.5, label='10周 (临床窗口起点)')
    
    ax.set_xlabel('检测孕周'); ax.set_ylabel('Y染色体浓度达标概率'); ax.legend()
    ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0))
    ax.xaxis.set_major_locator(mticker.MultipleLocator(4)); ax.xaxis.set_minor_locator(mticker.MultipleLocator(1))
    ax.set_xlim(0, 28); ax.set_ylim(0, 1.05)
    plt.tight_layout()
    plt.savefig(os.path.join(result_dir, '图5_30个孕妇个体概率趋势.png')); plt.close()
    print("图5: 30个孕妇个体概率趋势 已保存。")

def plot_sensitivity_delta_t(results_df, colors, result_dir):
    """图6: 机会成本时间延迟 Δt 敏感性分析"""
    df_subset = results_df[results_df['analysis_type'] == 'delta_t'].copy()
    fig, ax = plt.subplots()
    color_keys = list(colors.keys())
    
    for i, group_name in enumerate(df_subset['group_name'].unique()):
        group_data = df_subset[df_subset['group_name'] == group_name].sort_values('param_value')
        color = colors[color_keys[i % len(color_keys)]]
        ax.plot(group_data['param_value'], group_data['best_t_days'] / 7, marker='o', markersize=10, linestyle='-', lw=2.5, label=group_name, color=color)

    ax.set_xlabel('机会成本时间延迟 Δt (天)'); ax.set_ylabel('最优NIPT时点 (周)')
    ax.legend(title='BMI 分组', bbox_to_anchor=(1.05, 1), loc='upper left'); ax.set_xticks([14, 21, 28])
    plt.tight_layout(rect=[0, 0, 0.85, 1]); plt.savefig(os.path.join(result_dir, '图6_敏感性分析_delta_t.png')); plt.close()
    print("图6: 机会成本时间延迟 Δt 敏感性分析图 已保存。")

def plot_measurement_error_impact(results_df, colors, result_dir):
    """图7: 检测误差影响分析"""
    df_subset = results_df[results_df['analysis_type'] == 'error'].copy()
    fig, ax = plt.subplots()
    color_keys = list(colors.keys())
    
    for i, group_name in enumerate(df_subset['group_name'].unique()):
        group_data = df_subset[df_subset['group_name'] == group_name].sort_values('param_value')
        color = colors[color_keys[i % len(color_keys)]]
        ax.plot(group_data['param_value'], group_data['best_t_days'] / 7, marker='s', markersize=10, linestyle='--', lw=2.5, label=group_name, color=color)

    ax.set_xlabel('检测误差水平 (残差标准误σ的倍数)'); ax.set_ylabel('最优NIPT时点 (周)')
    ax.legend(title='BMI 分组', bbox_to_anchor=(1.05, 1), loc='upper left'); ax.set_xticks([0.0, 0.5, 1.0, 1.5])
    ax.set_xticklabels(['0σ (无误差)', '0.5σ', '1.0σ', '1.5σ'])
    plt.tight_layout(rect=[0, 0, 0.85, 1]); plt.savefig(os.path.join(result_dir, '图7_检测误差影响分析.png')); plt.close()
    print("图7: 检测误差影响分析图 已保存。")

# --- 3. 主流程 ---
if __name__ == '__main__':
    warnings.filterwarnings("ignore", category=UserWarning)
    RESULT_DIR = 'Result'
    
    try:
        summary_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_最终分组优化方案.csv'))
        risk_curves_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_最优方案风险曲线数据.csv'))
        labeled_data_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_带最优分组标签的数据.csv'))
        k_risks_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_不同K值总风险对比.csv'))
        robustness_df = pd.read_csv(os.path.join(RESULT_DIR, '问题二_稳健性分析结果.csv'))
        print("所有结果文件加载成功。")
    except FileNotFoundError as e:
        print(f"错误: 必需的CSV文件未找到: {e.filename}\n请先成功运行所有计算脚本。")
        exit()

    colors = setup_plot_style()
    
    summary_df_sorted = summary_df.sort_values('平均BMI').reset_index(drop=True)
    original_names = summary_df_sorted['群组名称'].tolist()
    ordered_group_names = [f'群组{i+1}' for i in range(len(summary_df_sorted))]
    summary_df_sorted['群组名称'] = ordered_group_names
    
    original_to_new_name_map = dict(zip(original_names, ordered_group_names))
    risk_curves_df['群组名称'] = risk_curves_df['群组名称'].map(original_to_new_name_map)

    best_k = len(summary_df)
    kmeans = KMeans(n_clusters=best_k, random_state=42, n_init=10)
    labeled_data_df['cluster'] = kmeans.fit_predict(labeled_data_df[['孕妇BMI']])
    mean_bmi_per_cluster = labeled_data_df.groupby('cluster')['孕妇BMI'].mean().sort_values().index
    cluster_to_name_map = {cluster_id: f'群组{i+1}' for i, cluster_id in enumerate(mean_bmi_per_cluster)}
    labeled_data_df['group_name'] = labeled_data_df['cluster'].map(cluster_to_name_map)
    
    plot_risk_curves_for_best_k(risk_curves_df, summary_df_sorted, colors, RESULT_DIR)
    plot_risk_composition(risk_curves_df, summary_df_sorted, colors, RESULT_DIR)
    plot_bmi_clusters(labeled_data_df, colors, RESULT_DIR)
    plot_k_risks(k_risks_df, colors, RESULT_DIR)
    plot_30_individual_trends(colors, RESULT_DIR)
    
    robustness_df['group_name'] = pd.Categorical(robustness_df['group_name'], categories=ordered_group_names, ordered=True)
    plot_sensitivity_delta_t(robustness_df, colors, RESULT_DIR)
    plot_measurement_error_impact(robustness_df, colors, RESULT_DIR)
    
    print("\n所有可视化图表已生成完毕。")