In [1]:
import pickle
import numpy as np
import pandas as pd
import networkx as nx
import geopandas as gpd
import time
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor

# load data (保持你的原始加载方式)
with open('data/foot_traffic/G.pkl', 'rb') as f:
    G = pickle.load(f)

od_base_df = pd.read_csv('data/foot_traffic/od_results_with_W.csv')
od_top5_df = pd.read_pickle('data/foot_traffic/od_top5_shortest_paths.pkl')
tree_gdf = gpd.read_file('data/final/tree_sites_all.geojson')
tree_ids = tree_gdf['original_index'].tolist()
S = dict(zip(tree_ids, tree_gdf['unique_shade_area'].tolist()))

overlap_df = pd.read_csv('data/final/tree_pairwise_overlap.csv')
O = {(int(r['i']), int(r['j'])): float(r['overlap_area']) for _, r in overlap_df.iterrows()}
O.update({(j, i): a for (i, j), a in O.items()})  # symmetric

shade_edge_df = pd.read_csv('data/final/shade_on_edge.csv')
shade_on_edge = {}
for _, r in shade_edge_df.iterrows():
    t, e, L = int(r['site_id']), int(r['edge_id']), float(r['shade_length'])
    shade_on_edge.setdefault(t, {})[e] = L

overlap_edge_df = pd.read_csv('data/final/overlap_shade_on_edge.csv')
overlap_shade_on_edge = {}
for _, r in overlap_edge_df.iterrows():
    i, j, e, L = int(r['site_i']), int(r['site_j']), int(r['edge_id']), float(r['overlap_shade_length'])
    key = (i, j) if i < j else (j, i)
    overlap_shade_on_edge.setdefault(key, {})[e] = L

edge_id_map = {data["edge_id"]: (u, v) for u, v, data in G.edges(data=True)}

# 保持你的原始函数不变
def compute_Z_shade(selected):
    sel = list(selected)
    shade = sum(S[i] for i in sel)
    overlap = sum(O.get((sel[i], sel[j]), 0.0) for i in range(len(sel)) for j in range(i+1, len(sel)))
    return shade - overlap

def compute_edge_shade(selected):
    edge_shade = {}
    for t in selected:
        for e, L in shade_on_edge.get(t, {}).items():
            edge_shade[e] = edge_shade.get(e, 0.0) + L
    sel = list(selected)
    for i in range(len(sel)):
        for j in range(i+1, len(sel)):
            key = (sel[i], sel[j]) if sel[i] < sel[j] else (sel[j], sel[i])
            for e, L in overlap_shade_on_edge.get(key, {}).items():
                edge_shade[e] = edge_shade.get(e, 0.0) - L
    return edge_shade

def compute_Z_coolwalk(selected, alpha=2.0):
    edge_shade = compute_edge_shade(selected)
    edge_lambda = {}
    for eid, (u, v) in edge_id_map.items():
        l = G[u][v]['length']
        shaded = min(edge_shade.get(eid, 0), l)
        lambda_e = alpha * max(l - shaded, 0) + shaded
        edge_lambda[eid] = max(lambda_e, 0.01)
    total = 0
    for _, row in od_top5_df.iterrows():
        lengths = []
        for i in range(1, 6):
            path = row[f'top_{i}_shortest']
            if not path:
                lengths.append(np.inf)
            else:
                lengths.append(sum(edge_lambda.get(eid, 1e6) for eid in path))
        best_len = min(lengths)
        o, d = int(row.origin_node), int(row.dest_node)
        traffic_row = od_base_df.query("origin_node == @o and dest_node == @d")
        if not traffic_row.empty:
            weight = float(traffic_row['traffic_o'].values[0]) * float(traffic_row['W'].values[0])
            total += best_len * weight
    full_shade = np.sum(od_base_df['path_length'] * od_base_df['traffic_o'] * od_base_df['W'])
    no_shade = alpha * full_shade
    return (no_shade - total) / (no_shade - full_shade)

# 数据预处理和采样策略
def stratified_od_sampling(od_base_df, od_top5_df, sample_ratio=0.15):
    """对OD对进行分层采样以减少计算量"""
    print(f"原始OD对数量: {len(od_base_df)}")
    
    # 计算权重并分层
    od_with_weight = od_base_df.copy()
    od_with_weight['weight'] = od_with_weight['traffic_o'] * od_with_weight['W']
    
    # 分成高中低权重三层
    q33 = od_with_weight['weight'].quantile(0.33)
    q67 = od_with_weight['weight'].quantile(0.67)
    
    high_weight = od_with_weight[od_with_weight['weight'] > q67]
    mid_weight = od_with_weight[(od_with_weight['weight'] > q33) & 
                               (od_with_weight['weight'] <= q67)]
    low_weight = od_with_weight[od_with_weight['weight'] <= q33]
    
    # 分层采样 (高权重采样更多)
    n_high = int(len(high_weight) * 0.6)  # 高权重采样60%
    n_mid = int(len(mid_weight) * 0.2)    # 中权重采样20%  
    n_low = int(len(low_weight) * 0.05)   # 低权重采样5%
    
    sampled_high = high_weight.sample(n=min(n_high, len(high_weight)), random_state=42)
    sampled_mid = mid_weight.sample(n=min(n_mid, len(mid_weight)), random_state=42)
    sampled_low = low_weight.sample(n=min(n_low, len(low_weight)), random_state=42)
    
    sampled_od_base = pd.concat([sampled_high, sampled_mid, sampled_low])
    
    # 对应的top5路径
    sampled_pairs = set(zip(sampled_od_base['origin_node'], sampled_od_base['dest_node']))
    od_top5_pairs = list(zip(od_top5_df['origin_node'], od_top5_df['dest_node']))
    mask = [pair in sampled_pairs for pair in od_top5_pairs]
    sampled_od_top5 = od_top5_df[mask].reset_index(drop=True)
    
    print(f"采样后OD对数量: {len(sampled_od_base)}")
    print(f"采样后top5路径数量: {len(sampled_od_top5)}")
    
    return sampled_od_base, sampled_od_top5

def prefilter_trees(tree_ids, S, shade_on_edge, top_k=30000):
    """预筛选树木候选"""
    print(f"原始树木数量: {len(tree_ids)}")
    
    # 计算每个树的综合得分
    tree_scores = []
    for tid in tree_ids:
        shade_area = S.get(tid, 0)
        edge_count = len(shade_on_edge.get(tid, {}))
        total_shade_length = sum(shade_on_edge.get(tid, {}).values())
        
        # 综合得分：遮阴面积 + 影响边数 + 总遮阴长度
        score = shade_area + edge_count * 100 + total_shade_length * 10
        tree_scores.append((tid, score))
    
    # 按得分排序，选择top k
    tree_scores.sort(key=lambda x: x[1], reverse=True)
    filtered_tree_ids = [tid for tid, _ in tree_scores[:top_k]]
    
    print(f"预筛选后树木数量: {len(filtered_tree_ids)}")
    return filtered_tree_ids

# 修改后的compute_Z_coolwalk，支持采样数据
def compute_Z_coolwalk_sampled(selected, od_base_sampled, od_top5_sampled, alpha=2.0):
    edge_shade = compute_edge_shade(selected)
    edge_lambda = {}
    for eid, (u, v) in edge_id_map.items():
        l = G[u][v]['length']
        shaded = min(edge_shade.get(eid, 0), l)
        lambda_e = alpha * max(l - shaded, 0) + shaded
        edge_lambda[eid] = max(lambda_e, 0.01)
    
    total = 0
    for _, row in od_top5_sampled.iterrows():
        lengths = []
        for i in range(1, 6):
            path = row[f'top_{i}_shortest']
            if not path:
                lengths.append(np.inf)
            else:
                lengths.append(sum(edge_lambda.get(eid, 1e6) for eid in path))
        best_len = min(lengths)
        o, d = int(row.origin_node), int(row.dest_node)
        traffic_row = od_base_sampled.query("origin_node == @o and dest_node == @d")
        if not traffic_row.empty:
            weight = float(traffic_row['traffic_o'].values[0]) * float(traffic_row['W'].values[0])
            total += best_len * weight
    
    full_shade = np.sum(od_base_sampled['path_length'] * od_base_sampled['traffic_o'] * od_base_sampled['W'])
    no_shade = alpha * full_shade
    return (no_shade - total) / (no_shade - full_shade)

# 修改后的TreeParetoProblem类
from pymoo.core.problem import ElementwiseProblem

class TreeParetoProblemSampled(ElementwiseProblem):
    def __init__(self, tree_ids, S, O, shade_on_edge, overlap_shade_on_edge, 
                 G, edge_id_map, od_top5_sampled, od_base_sampled, N, alpha=2.0):
        super().__init__(n_var=len(tree_ids), n_obj=2, n_constr=1, xl=0, xu=1, vtype=int)
        self.tree_ids = tree_ids
        self.S = S
        self.O = O
        self.shade_on_edge = shade_on_edge
        self.overlap_shade_on_edge = overlap_shade_on_edge
        self.G = G
        self.edge_id_map = edge_id_map
        self.od_top5_sampled = od_top5_sampled
        self.od_base_sampled = od_base_sampled
        self.N = N
        self.alpha = alpha

    def _evaluate(self, x, out, *args, **kwargs):
        sel = [self.tree_ids[i] for i, xi in enumerate(x) if xi == 1]
        out["G"] = [abs(len(sel) - self.N)]
        if len(sel) != self.N:
            out["F"] = [1e6, 1e6]
            return
        z_shade = -compute_Z_shade(sel)
        z_coolwalk = -compute_Z_coolwalk_sampled(sel, self.od_base_sampled, self.od_top5_sampled, self.alpha)
        out["F"] = [z_shade, z_coolwalk]

# 保持你的采样类不变
from pymoo.core.sampling import Sampling

class FixedSumBinarySampling(Sampling):
    def __init__(self, N):
        super().__init__()
        self.N = N
    def _do(self, problem, n_samples, **kwargs):
        X = []
        for _ in range(n_samples):
            arr = np.zeros(problem.n_var, dtype=int)
            idx = np.random.choice(problem.n_var, self.N, replace=False)
            arr[idx] = 1
            X.append(arr)
        return np.array(X)

# 分层优化策略
def hierarchical_optimization(original_tree_ids, S, O, shade_on_edge, overlap_shade_on_edge,
                            G, edge_id_map, od_top5_sampled, od_base_sampled, 
                            N_final=3000, alpha=2.0):
    """分层优化：逐步缩小候选集合"""
    
    # 第一层：预筛选
    print("\n=== 第一层优化 ===")
    level1_candidates = prefilter_trees(original_tree_ids, S, shade_on_edge, top_k=20000)
    level1_select = min(N_final * 4, len(level1_candidates)//2)  # 选择目标的4倍
    
    print(f"从{len(level1_candidates)}个候选中选择{level1_select}个")
    level1_solutions = run_optimization(
        level1_candidates, S, O, shade_on_edge, overlap_shade_on_edge,
        G, edge_id_map, od_top5_sampled, od_base_sampled,
        N_select=level1_select, pop_size=50, n_gen=20, alpha=alpha
    )
    
    # 收集第一层的所有解
    level2_candidates = set()
    for sol in level1_solutions:
        level2_candidates.update(sol['tree_ids'])
    level2_candidates = list(level2_candidates)
    
    # 第二层：精细化
    print(f"\n=== 第二层优化 ===")
    level2_select = min(N_final * 2, len(level2_candidates)//2)
    print(f"从{len(level2_candidates)}个候选中选择{level2_select}个")
    level2_solutions = run_optimization(
        level2_candidates, S, O, shade_on_edge, overlap_shade_on_edge,
        G, edge_id_map, od_top5_sampled, od_base_sampled,
        N_select=level2_select, pop_size=100, n_gen=50, alpha=alpha
    )
    
    # 收集第二层的所有解
    level3_candidates = set()
    for sol in level2_solutions:
        level3_candidates.update(sol['tree_ids'])
    level3_candidates = list(level3_candidates)
    
    # 第三层：最终优化
    print(f"\n=== 第三层优化 ===")
    print(f"从{len(level3_candidates)}个候选中选择{N_final}个")
    final_solutions = run_optimization(
        level3_candidates, S, O, shade_on_edge, overlap_shade_on_edge,
        G, edge_id_map, od_top5_sampled, od_base_sampled,
        N_select=N_final, pop_size=200, n_gen=100, alpha=alpha
    )
    
    return final_solutions

def run_optimization(tree_candidates, S, O, shade_on_edge, overlap_shade_on_edge,
                    G, edge_id_map, od_top5_sampled, od_base_sampled,
                    N_select, pop_size=100, n_gen=50, alpha=2.0):
    """运行单次优化"""
    
    from pymoo.algorithms.moo.nsga2 import NSGA2
    from pymoo.optimize import minimize
    from pymoo.termination import get_termination
    from pymoo.operators.crossover.pntx import TwoPointCrossover as BinaryTwoPointCrossover
    from pymoo.operators.mutation.bitflip import BitflipMutation as BinaryBitflipMutation
    
    problem = TreeParetoProblemSampled(
        tree_ids=tree_candidates,
        S=S, O=O,
        shade_on_edge=shade_on_edge, 
        overlap_shade_on_edge=overlap_shade_on_edge,
        G=G, edge_id_map=edge_id_map,
        od_top5_sampled=od_top5_sampled, 
        od_base_sampled=od_base_sampled,
        N=N_select, alpha=alpha
    )
    
    algorithm = NSGA2(
        pop_size=pop_size,
        sampling=FixedSumBinarySampling(N_select),
        crossover=BinaryTwoPointCrossover(),
        mutation=BinaryBitflipMutation(),
        eliminate_duplicates=True
    )
    
    termination = get_termination("n_gen", n_gen)
    
    t0 = time.time()
    res = minimize(problem, algorithm, termination, seed=42, verbose=True)
    t1 = time.time()
    print(f"优化耗时: {t1-t0:.2f}秒")
    
    solutions = []
    for i, x in enumerate(res.X):
        sel = [tree_candidates[j] for j, xi in enumerate(x) if xi == 1]
        Z_shade = -res.F[i, 0]
        Z_coolwalk = -res.F[i, 1]
        solutions.append({
            "tree_ids": sel,
            "Z_shade": Z_shade,
            "Z_coolwalk": Z_coolwalk
        })
    
    return solutions


In [2]:

# =================== 主程序 ===================
if __name__ == "__main__":
    print("开始大规模树木选择优化...")
    
    # 设置参数
    N_SELECT = 3000
    ALPHA = 2.0
    
    # 第一步：OD对采样
    print("\n=== 步骤1: OD对采样 ===")
    sampled_od_base, sampled_od_top5 = stratified_od_sampling(od_base_df, od_top5_df, sample_ratio=0.15)
    
    # 第二步：分层优化
    print("\n=== 步骤2: 分层优化 ===")
    total_start = time.time()
    
    final_solutions = hierarchical_optimization(
        tree_ids, S, O, shade_on_edge, overlap_shade_on_edge,
        G, edge_id_map, sampled_od_top5, sampled_od_base,
        N_final=N_SELECT, alpha=ALPHA
    )
    
    total_end = time.time()
    
    print(f"\n=== 优化完成 ===")
    print(f"总耗时: {total_end-total_start:.2f}秒")
    print(f"找到 {len(final_solutions)} 个Pareto最优解")
    
    # 第三步：结果分析和保存
    print("\n=== 结果分析 ===")
    for i, sol in enumerate(final_solutions[:5]):  # 显示前5个解
        print(f"解 {i+1}: Z_shade={sol['Z_shade']:.4f}, Z_coolwalk={sol['Z_coolwalk']:.4f}")
    
    # 保存结果
    print("\n=== 保存结果 ===")
    results_data = {
        'pareto_solutions': final_solutions,
        'parameters': {
            'N_SELECT': N_SELECT,
            'ALPHA': ALPHA,
            'n_original_trees': len(tree_ids),
            'n_original_od_pairs': len(od_base_df),
            'n_sampled_od_pairs': len(sampled_od_base),
            'total_time': total_end-total_start
        }
    }
    
    with open('large_scale_pareto_solutions.pkl', 'wb') as f:
        pickle.dump(results_data, f)
    
    print("结果已保存到 'large_scale_pareto_solutions.pkl'")
    print("优化完成！")

开始大规模树木选择优化...

=== 步骤1: OD对采样 ===
原始OD对数量: 1162489
采样后OD对数量: 328403
采样后top5路径数量: 328403

=== 步骤2: 分层优化 ===

=== 第一层优化 ===
原始树木数量: 667221
预筛选后树木数量: 20000
从20000个候选中选择10000个


KeyboardInterrupt: 