In [None]:
# -*- coding: utf-8 -*-
# 文件名: solve_q2_mpga.py
# 功能: 针对问题二，采用多种群遗传算法(MPGA)和夏普比率目标函数进行求解。
# 版本: Q2_MPGA_v1

import pandas as pd
import numpy as np
import os
import time
import re
import random
import copy
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm

# --- 1. 模型核心参数配置区 ---

# (1) 遗传算法参数
POP_SIZE_PER_SUBPOP = 50       # 每个子种群的个体数量
NUM_POPULATIONS = 5            # 子种群数量
MAX_GEN = 200                  # 最大进化代数
CX_PROB = 0.8                  # 交叉概率
MUT_PROB = 0.2                 # 变异概率
TOURNAMENT_SIZE = 3            # 锦标赛选择大小
ELITISM_SIZE = 5               # 每个子种群的精英保留数量

# (2) 多种群特定参数
MIGRATION_INTERVAL = 25        # 移民操作的间隔代数
MIGRATION_SIZE = 3             # 每次移民的精英个体数量

# (3) 问题二特定参数
NUM_SCENARIOS = 100            # 蒙特卡洛模拟的情景数量
YEARS = list(range(2024, 2031)) # 规划年份

# --- 2. 数据加载与情景生成 ---
def load_and_prepare_data(data_path):
    """
    数据加载与处理函数 (已根据Q1代码修正KeyError)
    """
    try:
        print("（1）正在读取Excel文件...")
        path_f1 = data_path / '附件1.xlsx'
        path_f2 = data_path / '附件2.xlsx'

        plots_df = pd.read_excel(path_f1, sheet_name='乡村的现有耕地')
        crops_info_df = pd.read_excel(path_f1, sheet_name='乡村种植的农作物')
        stats_df = pd.read_excel(path_f2, sheet_name='2023年统计的相关数据')
        past_planting_df = pd.read_excel(path_f2, sheet_name='2023年的农作物种植情况')

        for df in [plots_df, crops_info_df, stats_df, past_planting_df]:
            df.columns = df.columns.str.strip()

        params = {}
        params['I_plots'] = sorted(plots_df['地块名称'].tolist())
        params['P_area'] = dict(zip(plots_df['地块名称'], plots_df['地块面积/亩']))
        params['P_plot_type'] = dict(zip(plots_df['地块名称'], plots_df['地块类型']))

        params['J_crops'] = sorted(crops_info_df['作物名称'].dropna().unique().tolist())
        params['P_crop_type'] = dict(zip(crops_info_df['作物名称'], crops_info_df['作物类型']))
        params['J_bean'] = [j for j, ctype in params['P_crop_type'].items() if isinstance(ctype, str) and '豆' in ctype]

        params['P_past'] = {i: {1: None, 2: None} for i in params['I_plots']}
        for _, row in past_planting_df.iterrows():
            plot, crop = row['种植地块'], row['作物名称']
            season = row.get('种植季节', 1)
            if plot in params['I_plots']:
                params['P_past'][plot][season] = crop

        def clean_and_convert_price(value):
            if isinstance(value, str) and any(c in value for c in '-–—'):
                parts = re.split(r'[-–—]', value.strip())
                try: return (float(parts[0]) + float(parts[1])) / 2
                except (ValueError, IndexError): return np.nan
            return pd.to_numeric(value, errors='coerce')

        stats_df['销售单价/(元/斤)'] = stats_df['销售单价/(元/斤)'].apply(clean_and_convert_price)
        stats_df.dropna(subset=['亩产量/斤', '种植成本/(元/亩)', '销售单价/(元/斤)'], inplace=True)
        
        params['P_yield_base'], params['P_cost_base'], params['P_price_base'] = {}, {}, {}
        for _, row in stats_df.iterrows():
            key = (row['作物名称'], row['地块类型'])
            params['P_cost_base'][key] = row['种植成本/(元/亩)']
            params['P_yield_base'][key] = row['亩产量/斤']
            params['P_price_base'][key] = row['销售单价/(元/斤)']

        # --- 【关键修正区域】 ---
        # 严格按照Q1的逻辑来计算基础需求量，避免KeyError
        params['P_demand_base'] = {j: 0 for j in params['J_crops']}
        merged_df = pd.merge(past_planting_df, plots_df, left_on='种植地块', right_on='地块名称', how='left')
        
        for crop in params['J_crops']: # 先遍历合法的作物清单
            # 然后在2023年的种植记录中筛选出该作物的记录
            crop_plantings = merged_df[merged_df['作物名称'] == crop]
            total_yield = 0
            if not crop_plantings.empty:
                for _, row in crop_plantings.iterrows():
                    area = row.get('种植面积/亩', params['P_area'][row['种植地块']])
                    yield_val = params['P_yield_base'].get((crop, row['地块类型']), 0)
                    total_yield += area * yield_val
            params['P_demand_base'][crop] = total_yield
        # --- 【修正结束】 ---

        params['S_suitability'] = {}
        restricted_veg = ['大白菜', '白萝卜', '红萝卜']
        for i in params['I_plots']:
            plot_t = params['P_plot_type'].get(i, '')
            for j in params['J_crops']:
                crop_t = params['P_crop_type'].get(j, '')
                is_veg = '蔬菜' in str(crop_t)
                for k in [1, 2]:
                    suitable = 0
                    if plot_t in ['平旱地', '梯田', '山坡地'] and ('粮食' in str(crop_t) or j in params['J_bean']) and k == 1: suitable = 1
                    elif plot_t == '水浇地':
                        if '水稻' in j and k == 1: suitable = 1
                        elif is_veg:
                            if j not in restricted_veg and k == 1: suitable = 1
                            elif j in restricted_veg and k == 2: suitable = 1
                    elif plot_t == '普通大棚':
                        if is_veg and j not in restricted_veg and k == 1: suitable = 1
                        elif '食用菌' in str(crop_t) and k == 2: suitable = 1
                    elif plot_t == '智慧大棚' and is_veg and j not in restricted_veg: suitable = 1
                    params['S_suitability'][(i, j, k)] = suitable
        
        print(" -> 基础数据参数准备完成。")
        return params
    except Exception as e:
        print(f"错误: 加载数据失败: {e}"); raise

def generate_scenarios(params, num_scenarios):
    """
    生成多个未来情景用于蒙特卡洛模拟
    """
    print(f"（2）正在生成 {num_scenarios} 个未来情景...")
    scenarios = []
    
    crop_types = params['P_crop_type']
    base_demand = params['P_demand_base']
    base_yield = params['P_yield_base']
    base_cost = params['P_cost_base']
    base_price = params['P_price_base']

    for _ in tqdm(range(num_scenarios), desc="生成情景"):
        scenario = {
            'P_demand': {y: {} for y in YEARS},
            'P_yield': {y: {} for y in YEARS},
            'P_cost': {y: {} for y in YEARS},
            'P_price': {y: {} for y in YEARS},
        }
        
        # 初始化2024年的数据
        for crop in params['J_crops']:
            scenario['P_demand'][2024][crop] = base_demand[crop]
        for key, val in base_yield.items():
            scenario['P_yield'][2024][key] = val
        for key, val in base_cost.items():
            scenario['P_cost'][2024][key] = val
        for key, val in base_price.items():
            scenario['P_price'][2024][key] = val

        # 逐年生成数据
        for y in range(2025, 2031):
            # (1) 预期销售量
            for crop in params['J_crops']:
                last_demand = scenario['P_demand'][y-1][crop]
                if crop in ['小麦', '玉米']:
                    growth_rate = np.random.uniform(0.05, 0.10)
                else:
                    growth_rate = np.random.uniform(-0.05, 0.05)
                scenario['P_demand'][y][crop] = last_demand * (1 + growth_rate)
            
            # (2) 亩产量
            for key, b_yield in base_yield.items():
                change = np.random.uniform(-0.10, 0.10)
                scenario['P_yield'][y][key] = b_yield * (1 + change)

            # (3) 种植成本
            for key, cost in scenario['P_cost'][y-1].items():
                growth_rate = np.random.normal(0.05, 0.01) # 围绕5%波动
                scenario['P_cost'][y][key] = cost * (1 + growth_rate)

            # (4) 销售价格
            for key, price in scenario['P_price'][y-1].items():
                crop, plot_type = key
                ctype = crop_types.get(crop, '')
                new_price = price
                if '粮食' in str(ctype):
                    pass # 基本稳定
                elif '蔬菜' in str(ctype):
                    growth_rate = np.random.normal(0.05, 0.02)
                    new_price = price * (1 + growth_rate)
                elif '食用菌' in str(ctype):
                    if crop == '羊肚菌':
                        decline_rate = 0.05
                    else:
                        decline_rate = np.random.uniform(0.01, 0.05)
                    new_price = price * (1 - decline_rate)
                scenario['P_price'][y][key] = new_price
        
        scenarios.append(scenario)
    print(" -> 情景生成完毕。")
    return scenarios

# --- 3. 遗传算法核心函数 (与Q1兼容, 但适应度函数重写) ---

def create_initial_solution(params):
    # (与Q1代码相同)
    solution = {y: {k: {i: None for i in params['I_plots']} for k in [1, 2]} for y in YEARS}
    for y in YEARS:
        for i in params['I_plots']:
            for k in [1, 2]:
                possible_crops = [
                    j for j in params['J_crops'] 
                    if params['S_suitability'].get((i, j, k), 0) == 1
                ]
                if possible_crops:
                    solution[y][k][i] = random.choice(possible_crops)
    return repair_solution(solution, params)

def repair_solution(solution, params):
    # (与Q1代码相同)
    # 修复重茬和豆类种植约束
    def get_crops_in_year(sol, y, i):
        crops = set()
        if y == 2023:
            for k in [1, 2]:
                crop = params['P_past'].get(i, {}).get(k)
                if crop: crops.add(crop)
        elif y in sol:
            for k in [1, 2]:
                crop = sol.get(y, {}).get(k, {}).get(i)
                if crop: crops.add(crop)
        return list(crops)

    for i in params['I_plots']:
        for y in YEARS:
            crops_last_year = get_crops_in_year(solution, y - 1, i)
            for k in [1, 2]:
                crop_this_season = solution[y][k][i]
                if crop_this_season and crop_this_season in crops_last_year:
                    possible_replacements = [
                        j for j in params['J_crops'] 
                        if params['S_suitability'].get((i, j, k), 0) == 1 and j not in crops_last_year
                    ]
                    solution[y][k][i] = random.choice(possible_replacements) if possible_replacements else None
    
    for i in params['I_plots']:
        all_years = [2023] + YEARS
        for idx in range(len(all_years) - 2):
            window = all_years[idx:idx+3]
            contains_bean = any(c in params['J_bean'] for y_win in window for c in get_crops_in_year(solution, y_win, i))
            if not contains_bean:
                y_fix = random.choice([y for y in window if y > 2023])
                k_fix = 1
                crops_last_year = get_crops_in_year(solution, y_fix - 1, i)
                possible_beans = [
                    b for b in params['J_bean']
                    if params['S_suitability'].get((i, b, k_fix), 0) == 1 and b not in crops_last_year
                ]
                if possible_beans:
                    solution[y_fix][k_fix][i] = random.choice(possible_beans)
    return solution

def crossover(p1, p2, params):
    # (与Q1代码相同)
    child = copy.deepcopy(p1)
    for i in params['I_plots']:
        if random.random() < 0.5:
            for y in YEARS:
                for k in [1, 2]:
                    child[y][k][i] = p2[y][k][i]
    return child

def mutate(solution, params):
    # (与Q1代码相同)
    mut_sol = copy.deepcopy(solution)
    for _ in range(random.randint(1, 5)): # 增加变异强度
        y = random.choice(YEARS)
        i = random.choice(params['I_plots'])
        k = random.choice([1, 2])
        possible_crops = [
            j for j in params['J_crops'] 
            if params['S_suitability'].get((i, j, k), 0) == 1
        ]
        if possible_crops:
            mut_sol[y][k][i] = random.choice(possible_crops)
    return mut_sol

def evaluate_fitness_q2(solution, params, scenarios):
    """
    问题二的适应度函数：计算夏普比率 (已修正价格计算逻辑)
    """
    profits = []
    for scenario in scenarios:
        total_revenue, total_cost = 0, 0
        
        total_production_by_crop = defaultdict(float)
        for y in YEARS:
            for i in params['I_plots']:
                plot_type = params['P_plot_type'][i]
                area = params['P_area'][i]
                for k in [1, 2]:
                    crop = solution[y][k][i]
                    if not crop: continue
                    key = (crop, plot_type)
                    cost = scenario['P_cost'][y].get(key, 9e9)
                    yield_val = scenario['P_yield'][y].get(key, 0)
                    if cost > 1e9: continue
                    total_cost += area * cost
                    total_production_by_crop[crop] += area * yield_val

        for crop, production in total_production_by_crop.items():
            total_demand_7_years = sum(scenario['P_demand'][y].get(crop, 0) for y in YEARS)

            # --- 【关键修正区域】 ---
            # 采用更稳健的方式计算作物的平均价格
            # 1. 搜集该作物在所有年份、所有地块类型下的非零价格
            all_prices = [
                p for y in YEARS
                for (c, pt), p in scenario['P_price'][y].items()
                if c == crop and p > 0
            ]
            
            # 2. 如果价格列表不为空，则计算平均价；否则，安全地将其设为0
            price = np.mean(all_prices) if all_prices else 0
            # --- 【修正结束】 ---

            # 如果价格有效，才计算收入
            if price > 0:
                normal_qty = min(production, total_demand_7_years)
                over_qty = production - normal_qty
                total_revenue += (normal_qty * price) + (over_qty * price * 0.5)
        
        profits.append(total_revenue - total_cost)

    # 过滤掉计算过程中可能产生的无效值(NaN)
    valid_profits = [p for p in profits if not np.isnan(p)]
    if not valid_profits: # 如果所有场景都无效，返回一个极差的适应度
        return -1e9

    expected_profit = np.mean(valid_profits)
    risk = np.std(valid_profits)

    if risk < 1e-6:
        return expected_profit if expected_profit > 0 else -1e9
    return expected_profit / risk

# --- 4. 多种群遗传算法(MPGA)运行器 ---
def run_mpga(params, scenarios):
    """
    多种群遗传算法运行器
    """
    print("\n--- 开始执行多种群遗传算法 (MPGA) ---")
    
    # (1) 初始化多个子种群
    populations = [[create_initial_solution(params) for _ in range(POP_SIZE_PER_SUBPOP)] for _ in range(NUM_POPULATIONS)]
    best_solution_overall, best_fitness_overall = None, -np.inf

    # (2) 开始进化
    for gen in tqdm(range(MAX_GEN), desc="MPGA进化中"):
        all_fitnesses = []
        
        # (2.1) 各子种群独立进化
        for i in range(NUM_POPULATIONS):
            pop = populations[i]
            fitnesses = [evaluate_fitness_q2(sol, params, scenarios) for sol in pop]
            all_fitnesses.append(fitnesses)
            
            best_fit_in_pop = np.max(fitnesses)
            if best_fit_in_pop > best_fitness_overall:
                best_fitness_overall = best_fit_in_pop
                best_solution_overall = copy.deepcopy(pop[np.argmax(fitnesses)])

            elite_indices = np.argsort(fitnesses)[-ELITISM_SIZE:]
            new_pop = [pop[idx] for idx in elite_indices]

            while len(new_pop) < POP_SIZE_PER_SUBPOP:
                def tournament_selection(p, f, k):
                    best = random.randrange(len(p))
                    for _ in range(k - 1):
                        idx = random.randrange(len(p))
                        if f[idx] > f[best]: best = idx
                    return p[best]

                p1 = tournament_selection(pop, fitnesses, TOURNAMENT_SIZE)
                p2 = tournament_selection(pop, fitnesses, TOURNAMENT_SIZE)
                child = crossover(p1, p2, params) if random.random() < CX_PROB else copy.deepcopy(p1)
                if random.random() < MUT_PROB:
                    child = mutate(child, params)
                new_pop.append(repair_solution(child, params))
            
            populations[i] = new_pop
        
        # (2.2) 执行移民操作
        if gen > 0 and gen % MIGRATION_INTERVAL == 0:
            for i in range(NUM_POPULATIONS):
                # 目标种群 (环形迁移)
                target_pop_idx = (i + 1) % NUM_POPULATIONS
                
                # 从当前种群选出最优个体
                current_fitnesses = all_fitnesses[i]
                best_indices_current = np.argsort(current_fitnesses)[-MIGRATION_SIZE:]
                migrants = [populations[i][idx] for idx in best_indices_current]
                
                # 在目标种群中找到最差个体并替换
                target_fitnesses = all_fitnesses[target_pop_idx]
                worst_indices_target = np.argsort(target_fitnesses)[:MIGRATION_SIZE]
                
                for j in range(MIGRATION_SIZE):
                    populations[target_pop_idx][worst_indices_target[j]] = copy.deepcopy(migrants[j])
            # print(f"\n[第 {gen} 代] 完成种群间移民。")

    print(f"\n--- MPGA 优化完成 ---")
    return best_solution_overall, best_fitness_overall
# --- 5. 主程序 ---
if __name__ == '__main__':
    try:
        # (1) 动态、准确地定义路径
        # 获取当前脚本文件所在的目录 (.../第二问)
        script_dir = Path(__file__).parent if "__file__" in locals() else Path.cwd()
        
        # 数据路径：从脚本目录 (.../第二问) 回到上一级 (项目根目录)，再进入 Data 文件夹
        data_path = script_dir / '..' / 'Data'
        
        # 输出路径：在脚本目录 (.../第二问) 下直接创建 Result 文件夹
        output_dir = script_dir / 'Result'
        
        # 确保输出目录存在
        output_dir.mkdir(parents=True, exist_ok=True)
        
        print(f"脚本运行目录: {script_dir}")
        print(f"数据读取路径: {data_path}")
        print(f"结果输出路径: {output_dir}")
        
        # (2) 加载基础数据
        base_params = load_and_prepare_data(data_path)
        
        # (3) 生成不确定性情景
        scenarios = generate_scenarios(base_params, NUM_SCENARIOS)

        # (4) 运行MPGA求解
        best_solution, best_fitness = run_mpga(base_params, scenarios)
        
        print(f"\n求解完成。")
        print(f" -> 最优方案的夏普比率: {best_fitness:.4f}")

        # (5) 保存结果到Excel
        if best_solution:
            output_list = []
            for y in sorted(best_solution.keys()):
                for k in sorted(best_solution.y.keys()):
                    for i in sorted(best_solution[y][k].keys()):
                        crop = best_solution[y][k][i]
                        if crop:
                            output_list.append({
                                '年份': y, '季节': k, '地块编号': i, '作物名称': crop, 
                                '种植面积（亩）': base_params['P_area'][i]
                            })
            result_df = pd.DataFrame(output_list)
            # 使用修正后的 output_dir
            file_path = output_dir / 'result2.xlsx'
            result_df.to_excel(file_path, index=False)
            print(f"最优方案已保存至: {file_path}")
        else:
            print("未能找到有效解。")

    except Exception as e:
        print(f"\n程序主流程发生错误: {e}")
        import traceback
        traceback.print_exc()

脚本运行目录: c:\Users\86185\Desktop\2024C\4问题二
数据读取路径: c:\Users\86185\Desktop\2024C\4问题二\..\Data
结果输出路径: c:\Users\86185\Desktop\2024C\4问题二\Result
（1）正在读取Excel文件...
 -> 基础数据参数准备完成。
（2）正在生成 100 个未来情景...


生成情景: 100%|██████████| 100/100 [00:00<00:00, 147.83it/s]


 -> 情景生成完毕。

--- 开始执行多种群遗传算法 (MPGA) ---


MPGA进化中:   2%|▎         | 5/200 [09:34<6:20:57, 117.22s/it]