In [None]:
import igraph as ig
import pandas as pd
from pathlib import Path
from src.config import INTERIM_DATA_DIR

# 图结构位置
electric_graph_path = INTERIM_DATA_DIR / "Texas7k_Gas/electric_graph.pkl"
gas_graph_path = INTERIM_DATA_DIR / "Texas7k_Gas/gas_graph.pkl" 
merged_graph_path = INTERIM_DATA_DIR / "Texas7k_Gas/merged_graph.pkl"

# 读取图结构
electric_graph = ig.Graph.Read_Pickle(electric_graph_path)
gas_graph = ig.Graph.Read_Pickle(gas_graph_path)
merged_graph = ig.Graph.Read_Pickle(merged_graph_path)

In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Markdown

def analyze_network_attributes(graph, output_file=None, sample_size=5):
    """
    分析网络节点属性并以优雅的方式展示，可选择保存为Markdown文件
    
    Parameters:
    -----------
    graph : igraph.Graph
        要分析的图
    output_file : str or Path, optional
        输出的Markdown文件路径
    sample_size : int
        每种节点类型要展示的样本数量
    """
    # 安全地访问属性
    def safe_attr(vertex, attr, default=None):
        try:
            return vertex[attr] if attr in vertex.attributes() else default
        except:
            return default
    
    # 创建Markdown内容
    md_content = []
    md_content.append("# 电力-天然气融合网络节点属性分析\n")
    
    # 获取基本统计信息
    electric_nodes = [v for v in graph.vs if safe_attr(v, "is_electric_node", False)]
    gas_nodes = [v for v in graph.vs if safe_attr(v, "is_gas_node", False)]
    
    # 添加网络概述
    md_content.append("## 1. 网络概述\n")
    md_content.append(f"- **总节点数**: {graph.vcount()} 个")
    md_content.append(f"- **总边数**: {graph.ecount()} 个")
    md_content.append(f"- **电力节点数**: {len(electric_nodes)} 个")
    md_content.append(f"- **天然气节点数**: {len(gas_nodes)} 个")
    md_content.append("")
    
    # 获取所有节点属性名
    all_attributes = set()
    for v in graph.vs:
        all_attributes.update(v.attributes().keys())
    
    # 将属性分类
    common_attrs = []
    electric_attrs = []
    gas_attrs = []
    
    # 确定每个属性的覆盖情况
    electric_coverage = {}
    gas_coverage = {}
    
    for attr in sorted(all_attributes):
        # 计算覆盖率
        electric_count = sum(1 for v in electric_nodes if attr in v.attributes() and safe_attr(v, attr) is not None)
        gas_count = sum(1 for v in gas_nodes if attr in v.attributes() and safe_attr(v, attr) is not None)
        
        electric_coverage[attr] = electric_count / len(electric_nodes) if electric_nodes else 0
        gas_coverage[attr] = gas_count / len(gas_nodes) if gas_nodes else 0
        
        # 分类属性
        if electric_coverage[attr] > 0.5 and gas_coverage[attr] > 0.5:
            common_attrs.append(attr)
        elif electric_coverage[attr] > 0.5:
            electric_attrs.append(attr)
        elif gas_coverage[attr] > 0.5:
            gas_attrs.append(attr)
        else:
            # 低覆盖率属性也放入通用属性
            common_attrs.append(attr)
    
    # 添加属性分类
    md_content.append("## 2. 节点属性分类\n")
    
    # 通用属性表格
    md_content.append("### 2.1 通用属性\n")
    md_content.append("| 属性名 | 电力节点覆盖率 | 天然气节点覆盖率 | 说明 |")
    md_content.append("|--------|--------------|----------------|------|")
    
    attr_descriptions = {
        "name": "节点名称或标识符",
        "label": "节点描述性标签",
        "latitude": "节点纬度坐标",
        "longitude": "节点经度坐标",
        "substation_id": "关联变电站ID",
        "degree": "节点连接边的数量",
        "is_electric_node": "是否为电力节点",
        "is_gas_node": "是否为天然气节点",
        # 电力属性
        "base_kv": "基准电压(千伏)",
        "bus_type": "母线类型(1=PQ负荷节点, 2=PV发电节点, 3=平衡节点, 4=孤立节点)",
        "pd": "有功负荷(MW)",
        "qd": "无功负荷(MVar)",
        "has_generator": "是否有发电机",
        "total_gen_capacity": "节点发电机总容量(MW)",
        "gen_pg": "发电机有功出力(MW)",
        "gen_qg": "发电机无功出力(MVar)",
        "gen_pmax": "发电机最大有功出力(MW)",
        "gen_pmin": "发电机最小有功出力(MW)",
        "gen_type": "发电机类型",
        "gen_fuel": "发电机燃料类型",
        # 天然气属性
        "node_type": "天然气节点类型",
        "gas_load_p": "天然气节点压力",
        "gas_load_qf": "天然气流量负荷",
        "qf_max": "最大流量",
        "qf_min": "最小流量",
        "is_slack": "是否为天然气网络平衡节点"
    }
    
    for attr in common_attrs:
        desc = attr_descriptions.get(attr, "")
        md_content.append(f"| {attr} | {electric_coverage[attr]*100:.1f}% | {gas_coverage[attr]*100:.1f}% | {desc} |")
    
    # 电力节点特有属性表格
    md_content.append("\n### 2.2 电力节点特有属性\n")
    md_content.append("| 属性名 | 电力节点覆盖率 | 说明 |")
    md_content.append("|--------|--------------|------|")
    
    for attr in electric_attrs:
        desc = attr_descriptions.get(attr, "")
        md_content.append(f"| {attr} | {electric_coverage[attr]*100:.1f}% | {desc} |")
    
    # 天然气节点特有属性表格
    md_content.append("\n### 2.3 天然气节点特有属性\n")
    md_content.append("| 属性名 | 天然气节点覆盖率 | 说明 |")
    md_content.append("|--------|----------------|------|")
    
    for attr in gas_attrs:
        desc = attr_descriptions.get(attr, "")
        md_content.append(f"| {attr} | {gas_coverage[attr]*100:.1f}% | {desc} |")
    
    # 样本节点数据分析
    md_content.append("\n## 3. 节点样本分析\n")
    
    # 电力节点样本
    md_content.append("### 3.1 电力节点样本\n")
    if electric_nodes:
        # 选择最有代表性的几个属性来展示
        key_electric_attrs = ["name", "bus_type", "base_kv", "pd", "qd", "has_generator"]
        if any(safe_attr(v, "has_generator", False) for v in electric_nodes):
            key_electric_attrs.extend(["gen_type", "gen_pmax"])
        
        # 格式化样本数据
        sample_nodes = electric_nodes[:min(sample_size, len(electric_nodes))]
        sample_data = []
        
        md_content.append("| 节点ID | " + " | ".join(key_electric_attrs) + " |")
        md_content.append("|" + "-" * 8 + "|" + "|".join(["-" * 12] * len(key_electric_attrs)) + "|")
        
        for v in sample_nodes:
            row = [str(v.index)]
            for attr in key_electric_attrs:
                value = safe_attr(v, attr, "")
                row.append(str(value))
            md_content.append("| " + " | ".join(row) + " |")
    
    # 天然气节点样本
    md_content.append("\n### 3.2 天然气节点样本\n")
    if gas_nodes:
        # 选择最有代表性的几个属性来展示
        key_gas_attrs = ["name", "node_type", "gas_load_p", "gas_load_qf", "is_slack"]
        
        # 格式化样本数据
        sample_nodes = gas_nodes[:min(sample_size, len(gas_nodes))]
        
        md_content.append("| 节点ID | " + " | ".join(key_gas_attrs) + " |")
        md_content.append("|" + "-" * 8 + "|" + "|".join(["-" * 12] * len(key_gas_attrs)) + "|")
        
        for v in sample_nodes:
            row = [str(v.index)]
            for attr in key_gas_attrs:
                value = safe_attr(v, attr, "")
                row.append(str(value))
            md_content.append("| " + " | ".join(row) + " |")
    
    # 类型分布分析
    md_content.append("\n## 4. 节点类型分析\n")
    
    # 发电机类型分布
    if electric_nodes:
        gen_types = {}
        for v in electric_nodes:
            if safe_attr(v, "has_generator", False):
                gen_type = safe_attr(v, "gen_type", "Unknown")
                gen_types[gen_type] = gen_types.get(gen_type, 0) + 1
        
        if gen_types:
            md_content.append("### 4.1 发电机类型分布\n")
            md_content.append("| 发电机类型 | 数量 | 占比 |")
            md_content.append("|-----------|------|------|")
            
            total_gens = sum(gen_types.values())
            for gen_type, count in sorted(gen_types.items(), key=lambda x: x[1], reverse=True):
                gen_desc = {
                    "CT": "燃气轮机 (Combustion Turbine)",
                    "WT": "风力发电机 (Wind Turbine)",
                    "GT": "燃气透平机 (Gas Turbine)",
                    "ST": "蒸汽轮机 (Steam Turbine)",
                    "CA": "联合循环机组 (Combined Cycle)",
                    "UN": "未知类型 (Unknown)",
                    "IC": "内燃机 (Internal Combustion)",
                    "PV": "光伏发电 (Photovoltaic)",
                    "HY": "水力发电 (Hydro)",
                    "CS": "集中式太阳能 (Concentrated Solar)",
                    "BA": "电池储能 (Battery)"
                }.get(gen_type, gen_type)
                
                md_content.append(f"| {gen_desc} | {count} | {count/total_gens*100:.1f}% |")
    
    # 天然气节点类型分布
    if gas_nodes:
        node_types = {}
        for v in gas_nodes:
            node_type = safe_attr(v, "node_type", "Unknown")
            node_types[node_type] = node_types.get(node_type, 0) + 1
        
        if node_types:
            md_content.append("\n### 4.2 天然气节点类型分布\n")
            md_content.append("| 节点类型 | 数量 | 占比 |")
            md_content.append("|---------|------|------|")
            
            for node_type, count in sorted(node_types.items(), key=lambda x: x[1], reverse=True):
                node_desc = {
                    "Junction": "管道连接点",
                    "Gas Load": "天然气负荷点",
                    "Processing Plant": "天然气处理厂",
                    "Electric Load": "电力负荷点(耦合点)",
                    "Storage Reservoir": "天然气储存库",
                    "Import/Export Point": "进出口点",
                    "Trading Hub": "交易中心",
                    "Processing Plant - Slack": "平衡节点处理厂"
                }.get(node_type, node_type)
                
                md_content.append(f"| {node_desc} | {count} | {count/len(gas_nodes)*100:.1f}% |")
    
    # 显示结果
    display(Markdown("\n".join(md_content)))
    
    # 保存为markdown文件
    if output_file:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("\n".join(md_content))
        print(f"分析结果已保存至: {output_file}")
    
    return "\n".join(md_content)

# 执行分析并保存结果
output_path = Path("/Volumes/Work/DevSpace/PowerGridCompoundRisk/reports/network_analysis.md")
analyze_network_attributes(merged_graph, output_path)