In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.impute import SimpleImputer # 用于处理问题4模型中的NaN
import os

# 设置 joblib 临时文件夹，避免 UnicodeEncodeError
# 请确保 'C:/temp_joblib' 文件夹存在且可写
os.environ['JOBLIB_TEMP_FOLDER'] = 'C:/temp_joblib'

print("--- 问题5：多目标治疗策略优化 ---")

# --- 1. 数据加载和预处理 (与问题2/3/4相同，确保环境一致性) ---
github_raw_url = 'https://raw.githubusercontent.com/Astraeushub/Astraeushub/main/%E9%99%84%E4%BB%B61.xlsx'
try:
    df = pd.read_excel(github_raw_url)
    print("数据加载成功！")
except Exception as e:
    print(f"错误：从GitHub加载文件失败。\n错误信息：{e}")
    exit()

# 删除多余的 'Unnamed' 列
unnamed_cols = [col for col in df.columns if 'Unnamed:' in col]
df.drop(columns=unnamed_cols, inplace=True)

# 清洗列名
df.rename(columns={
    '妊娠时间（周数）': '妊娠时间',
    '整晚睡眠时间（时：分：秒）': '整晚睡眠时间',
    '婴儿年龄（月）': '婴儿年龄',
    '婴儿行为特征': '婴儿行为特征'
}, inplace=True)
df.columns = df.columns.str.strip()

# 处理 '整晚睡眠时间' 列
def convert_time_to_hours(time_str):
    if pd.isna(time_str):
        return np.nan
    try:
        if isinstance(time_str, (int, float)):
            return float(time_str)
        parts = str(time_str).split(':')
        hours = int(parts[0])
        minutes = int(parts[1])
        return hours + minutes / 60
    except Exception:
        return np.nan

df['整晚睡眠时间'] = df['整晚睡眠时间'].apply(convert_time_to_hours)

# --- 2. 重新训练问题2和问题4的最佳模型 (确保模型可用) ---

# --- 2.1 问题2模型 (婴儿行为特征预测) ---
# 训练集：'婴儿行为特征' 不为空的数据
df_train_P2 = df.dropna(subset=['婴儿行为特征']).copy()
# 特征列
features_P2 = [
    '母亲年龄', '婚姻状况', '教育程度', '妊娠时间', '分娩方式',
    'CBTS', 'EPDS', 'HADS', '婴儿性别', '婴儿年龄'
]
target_P2 = '婴儿行为特征'
X_train_P2 = df_train_P2[features_P2]
y_train_P2 = df_train_P2[target_P2]

numerical_features_P2 = ['母亲年龄', '妊娠时间', 'CBTS', 'EPDS', 'HADS', '婴儿年龄']
categorical_features_P2 = ['婚姻状况', '教育程度', '分娩方式', '婴儿性别']

preprocessor_P2 = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features_P2),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_P2)
    ],
    remainder='passthrough'
)
pipeline_P2 = ImbPipeline(steps=[('preprocessor', preprocessor_P2),
                              ('smote', SMOTE(random_state=42)),
                              ('classifier', RandomForestClassifier(random_state=42))])
param_grid_P2 = { # 使用问题2中找到的最佳参数
    'classifier__n_estimators': [200],
    'classifier__max_depth': [None],
    'classifier__min_samples_split': [5],
    'classifier__min_samples_leaf': [1]
}
cv_P2 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search_P2 = GridSearchCV(pipeline_P2, param_grid_P2, cv=cv_P2, scoring='f1_weighted', n_jobs=-1, verbose=0)
grid_search_P2.fit(X_train_P2, y_train_P2)
best_model_P2 = grid_search_P2.best_estimator_
print("问题2的最佳模型已重新训练并加载。")

# --- 2.2 问题4模型 (婴儿综合睡眠质量预测) ---
# 创建“综合睡眠质量”标签 (与问题4相同)
df_with_sleep_data = df.dropna(subset=['整晚睡眠时间', '睡醒次数', '入睡方式']).copy()
def assign_comprehensive_sleep_quality(row):
    score = 0
    if row['整晚睡眠时间'] > 11.5: score += 2
    elif 10.5 <= row['整晚睡眠时间'] <= 11.5: score += 1
    elif row['整晚睡眠时间'] < 9.5: score -= 1
    if row['睡醒次数'] == 0: score += 2
    elif row['睡醒次数'] == 1: score += 1
    elif row['睡醒次数'] > 2: score -= 1
    if row['入睡方式'] == 4: score += 0.5
    elif row['入睡方式'] == 1: score -= 0.5
    if score >= 3.5: return '优'
    elif score >= 1.5: return '良'
    elif score >= -0.5: return '中'
    else: return '差'
df_with_sleep_data['综合睡眠质量'] = df_with_sleep_data.apply(assign_comprehensive_sleep_quality, axis=1)

X_train_P4 = df_with_sleep_data.drop(columns=['综合睡眠质量', '婴儿行为特征', '编号']).copy()
y_train_P4 = df_with_sleep_data['综合睡眠质量'].copy()

# 特征列
features_P4 = X_train_P4.columns.tolist()
numerical_features_P4 = [
    '母亲年龄', '妊娠时间', 'CBTS', 'EPDS', 'HADS', '婴儿年龄',
    '整晚睡眠时间', '睡醒次数'
]
categorical_features_P4 = [
    '婚姻状况', '教育程度', '分娩方式', '婴儿性别', '入睡方式'
]

# 预处理器，包含 SimpleImputer 处理 NaN
preprocessor_P4_imputed = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numerical_features_P4),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_P4)
    ],
    remainder='passthrough'
)
pipeline_P4_imputed = ImbPipeline(steps=[('preprocessor', preprocessor_P4_imputed),
                                         ('smote', SMOTE(random_state=42)),
                                         ('classifier', RandomForestClassifier(random_state=42))])
param_grid_P4 = { # 使用问题4中找到的最佳参数
    'classifier__n_estimators': [200],
    'classifier__max_depth': [10],
    'classifier__min_samples_split': [5],
    'classifier__min_samples_leaf': [2]
}
cv_P4 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search_P4 = GridSearchCV(pipeline_P4_imputed, param_grid_P4, cv=cv_P4, scoring='f1_weighted', n_jobs=-1, verbose=0)
grid_search_P4.fit(X_train_P4, y_train_P4)
best_model_P4 = grid_search_P4.best_estimator_
print("问题4的最佳模型已重新训练并加载。")
print("-" * 60)

# --- 3. 建立线性治疗费用模型 (与问题3相同) ---
def calculate_linear_cost_params(s1, c1, s2, c2):
    if s1 == s2: raise ValueError("s1 and s2 cannot be the same for linear model.")
    m = (c2 - c1) / (s2 - s1)
    c = c1 - m * s1
    return m, c

m_cbts, c_cbts = calculate_linear_cost_params(0, 200, 3, 2812)
m_epds, c_epds = calculate_linear_cost_params(0, 500, 2, 1890)
m_hads, c_hads = calculate_linear_cost_params(0, 300, 5, 12500)

def get_cost_at_score_linear(score, m, c):
    return m * max(0, score) + c

def calculate_total_treatment_cost_linear(current_cbts, target_cbts,
                                          current_epds, target_epds,
                                          current_hads, target_hads):
    cost_cbts = get_cost_at_score_linear(current_cbts, m_cbts, c_cbts) - get_cost_at_score_linear(target_cbts, m_cbts, c_cbts)
    cost_epds = get_cost_at_score_linear(current_epds, m_epds, c_epds) - get_cost_at_score_linear(target_epds, m_epds, c_epds)
    cost_hads = get_cost_at_score_linear(current_hads, m_hads, c_hads) - get_cost_at_score_linear(target_hads, m_hads, c_hads)
    
    cost_cbts = max(0, cost_cbts)
    cost_epds = max(0, cost_epds)
    cost_hads = max(0, cost_hads)
    
    return cost_cbts + cost_epds + cost_hads

print("线性治疗费用模型已加载。")
print("-" * 60)

# --- 4. 提取编号238婴儿的当前数据 ---
infant_id_238 = 238
# 从 df_train_P2 获取，因为它包含了所有训练集特征，包括原始睡眠指标
infant_238_data = df_train_P2[df_train_P2['编号'] == float(infant_id_238)].iloc[0]

current_cbts = infant_238_data['CBTS']
current_epds = infant_238_data['EPDS']
current_hads = infant_238_data['HADS']
current_behavior = infant_238_data['婴儿行为特征'] # 原始行为特征
current_sleep_time = infant_238_data['整晚睡眠时间']
current_wake_times = infant_238_data['睡醒次数']
current_sleep_mode = infant_238_data['入睡方式']


print(f"编号 {infant_id_238} 婴儿的当前数据：")
print(f"  当前行为特征: {current_behavior}")
print(f"  当前CBTS得分: {current_cbts}, EPDS得分: {current_epds}, HADS得分: {current_hads}")
print(f"  当前整晚睡眠时间: {current_sleep_time}, 睡醒次数: {current_wake_times}, 入睡方式: {current_sleep_mode}")
print("-" * 60)

# --- 5. 定义优化目标和搜索空间并执行搜索 ---
# 目标1: 行为特征 -> 中等型 且 睡眠质量 -> 优
min_cost_zhongdeng_you = float('inf')
best_scores_zhongdeng_you = {}
# 目标2: 行为特征 -> 安静型 且 睡眠质量 -> 优
min_cost_anjing_you = float('inf')
best_scores_anjing_you = {}

solutions_zhongdeng_you = []
solutions_anjing_you = []

# 获取除CBTS, EPDS, HADS外的其他不变特征
# 确保这里包含所有P2和P4模型可能用到的特征
all_base_features = infant_238_data.loc[features_P4].drop(labels=['CBTS', 'EPDS', 'HADS'])
all_base_features_dict = all_base_features.to_dict()

print("开始搜索最优治疗方案（多目标，可能需要较长时间，请耐心等待）...")
# 迭代所有可能的CBTS, EPDS, HADS分数组合
for cbts_s in range(int(current_cbts) + 1):
    for epds_s in range(int(current_epds) + 1):
        for hads_s in range(int(current_hads) + 1):
            # 计算治疗费用
            cost = calculate_total_treatment_cost_linear(current_cbts, cbts_s,
                                                          current_epds, epds_s,
                                                          current_hads, hads_s)

            # 构建用于行为预测的特征向量 (P2模型)
            predict_data_point_P2 = all_base_features_dict.copy()
            predict_data_point_P2['CBTS'] = cbts_s
            predict_data_point_P2['EPDS'] = epds_s
            predict_data_point_P2['HADS'] = hads_s
            
            predict_df_P2 = pd.DataFrame([predict_data_point_P2], columns=features_P2)
            predicted_behavior = best_model_P2.predict(predict_df_P2)[0]

            # 构建用于睡眠质量预测的特征向量 (P4模型)
            # P4模型需要所有特征，包括原始睡眠指标
            predict_data_point_P4 = all_base_features_dict.copy()
            predict_data_point_P4['CBTS'] = cbts_s
            predict_data_point_P4['EPDS'] = epds_s
            predict_data_point_P4['HADS'] = hads_s
            
            predict_df_P4 = pd.DataFrame([predict_data_point_P4], columns=features_P4)
            predicted_sleep_quality = best_model_P4.predict(predict_df_P4)[0]

            # 检查是否达到目标1: 行为中等型 且 睡眠优
            if predicted_behavior == '中等型' and predicted_sleep_quality == '优':
                if cost < min_cost_zhongdeng_you:
                    min_cost_zhongdeng_you = cost
                    best_scores_zhongdeng_you = {'CBTS': cbts_s, 'EPDS': epds_s, 'HADS': hads_s}
                solutions_zhongdeng_you.append({'cost': cost, 'CBTS': cbts_s, 'EPDS': epds_s, 'HADS': hads_s})

            # 检查是否达到目标2: 行为安静型 且 睡眠优
            if predicted_behavior == '安静型' and predicted_sleep_quality == '优':
                if cost < min_cost_anjing_you:
                    min_cost_anjing_you = cost
                    best_scores_anjing_you = {'CBTS': cbts_s, 'EPDS': epds_s, 'HADS': hads_s}
                solutions_anjing_you.append({'cost': cost, 'CBTS': cbts_s, 'EPDS': epds_s, 'HADS': hads_s})

print("\n搜索完成。")
print("-" * 60)

# --- 6. 输出结果 ---

print("\n--- 目标1: 将婴儿行为特征从矛盾型变为中等型 且 睡眠质量评级为优 ---")
if min_cost_zhongdeng_you != float('inf'):
    print(f"最少花费治疗费用: {min_cost_zhongdeng_you:.2f} 元")
    print(f"对应的CBTS、EPDS、HADS目标分数: {best_scores_zhongdeng_you}")
    print(f"  (原始CBTS: {current_cbts}, EPDS: {current_epds}, HADS: {current_hads})")
else:
    print("未找到能同时满足行为中等型且睡眠优的方案。")

print("\n--- 目标2: 将婴儿行为特征从矛盾型变为安静型 且 睡眠质量评级为优 ---")
if min_cost_anjing_you != float('inf'):
    print(f"最少花费治疗费用: {min_cost_anjing_you:.2f} 元")
    print(f"对应的CBTS、EPDS、HADS目标分数: {best_scores_anjing_you}")
    print(f"  (原始CBTS: {current_cbts}, EPDS: {current_epds}, HADS: {current_hads})")
else:
    print("未找到能同时满足行为安静型且睡眠优的方案。")

print("\n--- 详细方案列表（按费用升序，仅显示前5个最低成本方案） ---")
print("\n行为中等型且睡眠优方案：")
solutions_zhongdeng_you_sorted = sorted(solutions_zhongdeng_you, key=lambda x: x['cost'])
if solutions_zhongdeng_you_sorted:
    for s in solutions_zhongdeng_you_sorted[:5]:
        print(f"  费用: {s['cost']:.2f}, CBTS: {s['CBTS']}, EPDS: {s['EPDS']}, HADS: {s['HADS']}")
else:
    print("  无有效方案。")

print("\n行为安静型且睡眠优方案：")
solutions_anjing_you_sorted = sorted(solutions_anjing_you, key=lambda x: x['cost'])
if solutions_anjing_you_sorted:
    for s in solutions_anjing_you_sorted[:5]:
        print(f"  费用: {s['cost']:.2f}, CBTS: {s['CBTS']}, EPDS: {s['EPDS']}, HADS: {s['HADS']}")
else:
    print("  无有效方案。")


--- 问题5：多目标治疗策略优化 ---
数据加载成功！




问题2的最佳模型已重新训练并加载。




问题4的最佳模型已重新训练并加载。
------------------------------------------------------------
线性治疗费用模型已加载。
------------------------------------------------------------
编号 238 婴儿的当前数据：
  当前行为特征: 矛盾型
  当前CBTS得分: 15, EPDS得分: 22, HADS得分: 18
  当前整晚睡眠时间: 9.0, 睡醒次数: 2.0, 入睡方式: 3.0
------------------------------------------------------------
开始搜索最优治疗方案（多目标，可能需要较长时间，请耐心等待）...

搜索完成。
------------------------------------------------------------

--- 目标1: 将婴儿行为特征从矛盾型变为中等型 且 睡眠质量评级为优 ---
未找到能同时满足行为中等型且睡眠优的方案。

--- 目标2: 将婴儿行为特征从矛盾型变为安静型 且 睡眠质量评级为优 ---
未找到能同时满足行为安静型且睡眠优的方案。

--- 详细方案列表（按费用升序，仅显示前5个最低成本方案） ---

行为中等型且睡眠优方案：
  无有效方案。

行为安静型且睡眠优方案：
  无有效方案。
