In [16]:
import os

import sys
import csv
import json
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.linalg import eigh

from networkx.algorithms.community import greedy_modularity_communities
from networkx.drawing.layout import *
from graph import network

# 获取当前 Notebook 的绝对路径
notebook_path = os.path.abspath("")

In [17]:
sys.path.append("..")  # 确保根目录在 Python 路径中
root_path = Path.cwd().parent.joinpath("graphs_json")
results_csv = Path.cwd().parent.joinpath("new_result","anomaly_results.csv")
excel_file = Path.cwd().parent.joinpath("new_result", "SR_results.xlsx")

In [18]:
def load_data(filepath):
    pos = {}
    user = []

    # Step 1: 读取 JSON 文件
    with open(filepath, "r") as f:
        data = json.load(f)

    # Step 2: 初始化图
    G = nx.Graph()

    # Step 3: 添加节点
    for node in data["nodes"]:
        node_id = node["id"]
        x, y = node["latitude"], node["longitude"]
        G.add_node(node_id, location=node["location"], country=node["country"])  # 添加节点到图
        pos[node_id] = (y, x)  # 保存节点位置，注意 (longitude, latitude)

    # Step 4: 添加边
    for edge in data["links"]:
        source = int(edge["source"])
        target = int(edge["target"])
        G.add_edge(source, target, length=edge["length"])  # 添加边到图

    degree_dict = dict(G.degree())
    degree_items = list(degree_dict.items())
    first_node,first_degree = degree_items[0]
    #print(f"First node ID: {first_node}, Degree: {first_degree}")

    user.append(data["nodes"][0]["id"])

    return G,user,pos

In [19]:
def calculate_topology_features(G):
    features = {}
    
    # Basic metrics
    features["n"] = G.number_of_nodes()
    features["m"] = G.number_of_edges()
    features["avg_degree"] = 2*features["m"]/features["n"] if features["n"] > 0 else 0
    
    # Connectivity check
    is_connected = nx.is_connected(G)
    
    # Diameter (处理非连通图)
    if is_connected:
        features["diameter"] = nx.diameter(G)
    else:
        features["diameter"] = max([nx.diameter(c) for c in nx.connected_components(G)], default=0)
    
    # Network density
    features["density"] = nx.density(G)
    
    # Average shortest path length (仅连通图)
    features["avg_shortest_path"] = nx.average_shortest_path_length(G) if is_connected else float('inf')
    
    # Clustering coefficients
    clustering = nx.clustering(G)
    features["avg_clustering"] = sum(clustering.values())/len(clustering)
    
    # Betweenness centrality
    edge_betweenness = nx.edge_betweenness_centrality(G)
    features["max_edge_betweenness"] = max(edge_betweenness.values(), default=0)
    
    node_betweenness = nx.betweenness_centrality(G)
    features["max_node_betweenness"] = max(node_betweenness.values(), default=0)
    
    # Global efficiency
    shortest_paths = dict(nx.shortest_path_length(G))
    efficiency = []
    for u in G:
        for v in G:
            if u != v:
                try:
                    efficiency.append(1/shortest_paths[u][v])
                except KeyError:
                    pass
    features["global_efficiency"] = sum(efficiency)/(features["n"]*(features["n"]-1)) if features["n"] > 1 else 0
    
    # Spectral features
    A = nx.adjacency_matrix(G).todense()
    eigenvalues = np.linalg.eigvals(A)
    features["spectral_radius"] = np.max(np.abs(eigenvalues))
    
    L = nx.normalized_laplacian_matrix(G).todense()
    eigenvalues_L = np.linalg.eigvals(L)
    features["algebraic_connectivity"] = sorted(eigenvalues_L)[1] if features["n"] >= 2 else 0
    
    # Weighted Spectral Distribution (WSD)
    features["WSD"] = sum((1 - eigenvalues_L)**4)
    
    # K-connectivity
    features["node_connectivity"] = nx.node_connectivity(G) if is_connected else 0
    features["edge_connectivity"] = nx.edge_connectivity(G) if is_connected else 0
    
    return features

In [20]:
results_dict = {}
subfolders = [sf for sf in root_path.iterdir() if sf.is_dir()]
subfolders.sort()

column_names = []

for s_idx in range(len(subfolders)):
    subfolder = subfolders[s_idx]
    if not subfolder.is_dir():
        continue

    files = [f for f in subfolder.iterdir() if f.is_file()]
    files.sort()

    for f_idx in range(len(files)):
        file = files[f_idx]
        combination_count = 0
        if not file.is_file():
            continue

        file_parts = Path(file).parts
        the_result_path = Path.cwd().parent.joinpath("new_result", *file_parts[-2:])
        new_path = the_result_path.with_suffix('').joinpath(f"{file.stem}_sr_details.csv")

        try:
            with open(new_path, 'r', encoding='utf-8') as f:
                reader = csv.reader(f)
                headers = next(reader)

                required_keys = ["MPC_protocol", "MPG_protocol", "SP_protocol"]
                protocol_ers = dict.fromkeys(required_keys, 0)
                key_indices = {key: headers.index(key) for key in required_keys if key in headers}
                
                for row in reader:
                    try:
                        combination_count += 1
                        for key, index in key_indices.items():
                            protocol_ers[key] += float(row[index])
                    except (IndexError, ValueError) as e:
                        print(f"行 {reader.line_num} 数据格式错误: {str(e)}")
                        continue
                if combination_count > 0:
                    average_protocol_ers = {key: value / combination_count for key, value in protocol_ers.items()}
                    improve_ratio_mpc_mpg = ((average_protocol_ers["MPC_protocol"] - average_protocol_ers["MPG_protocol"]) / average_protocol_ers["MPG_protocol"]) * 100 if average_protocol_ers["MPG_protocol"] != 0 else float("inf")
                    improve_ratio_mpc_sp = ((average_protocol_ers["MPC_protocol"] - average_protocol_ers["SP_protocol"]) / average_protocol_ers["SP_protocol"]) * 100 if average_protocol_ers["SP_protocol"] != 0 else float("inf")

            class_name = new_path.parent.parent.name

            G, _, _ = load_data(file)
            features_dict = calculate_topology_features(G)


            if not column_names:
                column_names = list(features_dict.keys())

            values = list(features_dict.values())


            row_data = [file.stem, combination_count] + list(average_protocol_ers.values()) + [improve_ratio_mpc_mpg, improve_ratio_mpc_sp]+ values
            results_dict.setdefault(class_name, []).append(row_data)

        except FileNotFoundError:
            print(f"文件 {new_path} 未找到，跳过")
        except Exception as e:
            print(f"处理 {file.name} 失败: {str(e)}")
            continue

if results_dict:
    with pd.ExcelWriter(excel_file) as writer:
        for class_name, rows in results_dict.items():
            columns = ["topology_name", "combination_count", "mpc_avg", "mpg_avg", "sp_avg",
                       "improve_ratio_mpc_mpg", "improve_ratio_mpc_sp"] + column_names
            df = pd.DataFrame(rows, columns=columns)

            # # 计算协议部分平均值
            # avg_mpc = df["mpc_avg"].mean()
            # avg_mpg = df["mpg_avg"].mean()
            # avg_sp = df["sp_avg"].mean()
            # avg_improve_ratio_mpc_mpg = df["improve_ratio_mpc_mpg"].mean()
            # avg_improve_ratio_mpc_sp = df["improve_ratio_mpc_sp"].mean()

            # # 计算拓扑特征的平均值
            # avg_topo_features = df[column_names].mean()

            # # 拼接整行：名称 + 空 + 5个平均值 + 拓扑特征均值
            # avg_row_data = ["Average", ""] + [avg_mpc, avg_mpg, avg_sp,
            #                                   avg_improve_ratio_mpc_mpg, avg_improve_ratio_mpc_sp] + list(avg_topo_features)

            # avg_row = pd.DataFrame([avg_row_data], columns=columns)

            # df = pd.concat([df, avg_row], ignore_index=True)
            df.to_excel(writer, sheet_name=class_name, index=False)
else:
    print("没有数据可以写入")

