In [None]:
import os

import sys
import csv
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from pathlib import Path

from networkx.algorithms.community import greedy_modularity_communities
from networkx.drawing.layout import *
from graph import network

# 获取当前 Notebook 的绝对路径
notebook_path = os.path.abspath("")

In [None]:
sys.path.append("..") 
root_path = Path.cwd().parent.joinpath("graphs_json")
excel_file = Path.cwd().parent.joinpath("new_result", "ratioresults_for_classic_clustering.xlsx")

In [None]:
"""
Aggregate protocol success ratios (ER) from CSV files and generate a summary Excel file.

This script:
    - Iterates over topology subfolders and result files
    - Reads each protocol's success ratio from *_sr_details.csv
    - Calculates average ERs and improvement ratios (MPC vs MPG and SP)
    - Groups results by topology class (from folder name)
    - Writes per-class summaries and overall averages to an Excel workbook
"""


results_dict = {}
subfolders = [sf for sf in root_path.iterdir() if sf.is_dir()]
subfolders.sort()

for subfolder in subfolders:
    if not subfolder.is_dir():
        continue

    files = [f for f in subfolder.iterdir() if f.is_file()]
    files.sort()

    for file in files:
        combination_count = 0
        if not file.is_file():
            continue

        file_parts = Path(file).parts
        the_result_path = Path.cwd().parent.joinpath("new_result", *file_parts[-2:])
        new_path = the_result_path.parent.joinpath(file.stem, f"{file.stem}_sr_details.csv")

        try:
            with open(new_path, 'r', encoding='utf-8') as f:
                reader = csv.reader(f)
                headers = next(reader)

                required_keys = ["MPC_protocol", "MPG_protocol", "SP_protocol"]
                protocol_ers = dict.fromkeys(required_keys, 0)

                key_indices = {key: headers.index(key) for key in required_keys if key in headers}

                for row in reader:
                    try:
                        combination_count += 1
                        for key, index in key_indices.items():
                            protocol_ers[key] += float(row[index])
                    except (IndexError, ValueError) as e:
                        print(f"Row {reader.line_num} has formatting error: {str(e)}")
                        continue

                if combination_count > 0:
                    average_protocol_ers = {
                        key: value / combination_count
                        for key, value in protocol_ers.items()
                    }

                    improve_ratio_mpc_mpg = (
                        ((average_protocol_ers["MPC_protocol"] - average_protocol_ers["MPG_protocol"])
                         / average_protocol_ers["MPG_protocol"]) * 100
                        if average_protocol_ers["MPG_protocol"] != 0 else float("inf")
                    )

                    improve_ratio_mpc_sp = (
                        ((average_protocol_ers["MPC_protocol"] - average_protocol_ers["SP_protocol"])
                         / average_protocol_ers["SP_protocol"]) * 100
                        if average_protocol_ers["SP_protocol"] != 0 else float("inf")
                    )

            class_name = new_path.parent.parent.name
            row_data = [file.stem, combination_count] + list(average_protocol_ers.values()) + [
                improve_ratio_mpc_mpg, improve_ratio_mpc_sp]
            results_dict.setdefault(class_name, []).append(row_data)

        except FileNotFoundError:
            print(f"File not found: {new_path}, skipping.")
        except Exception as e:
            print(f"Failed to process {file.name}: {str(e)}")
            continue

# Write to Excel
if results_dict:
    with pd.ExcelWriter(excel_file) as writer:
        for class_name, rows in results_dict.items():
            df = pd.DataFrame(rows, columns=[
                "topology_name", "combination_count",
                "mpc_avg", "mpg_avg", "sp_avg",
                "improve_ratio_mpc_mpg", "improve_ratio_mpc_sp"
            ])

            # Compute averages across all files
            avg_mpc = df["mpc_avg"].mean()
            avg_mpg = df["mpg_avg"].mean()
            avg_sp = df["sp_avg"].mean()
            avg_improve_ratio_mpc_mpg = df["improve_ratio_mpc_mpg"].mean()
            avg_improve_ratio_mpc_sp = df["improve_ratio_mpc_sp"].mean()

            avg_row = pd.DataFrame([[
                "Average", "", avg_mpc, avg_mpg, avg_sp,
                avg_improve_ratio_mpc_mpg, avg_improve_ratio_mpc_sp
            ]], columns=df.columns)

            df = pd.concat([df, avg_row], ignore_index=True)

            # Write to sheet named by topology class
            df.to_excel(writer, sheet_name=class_name, index=False)
else:
    print("No data available to write.")
