In [5]:
import numpy as np
import sys
import pandas as pd
import datetime
import os
import glob
import ast
import random
import csv
import copy  

In each p (p_e), there are 100 graphs in total, the first 50 data are entirely dense (not discussed in the paper), and the second 50 data are split-brain-like systems with N=100 treated in the paper.

In [6]:
# folder name where estimation outputs by GNNs exists
save_folder = 'result/'   

In [7]:
def calculate_statistics(dfs, column, mask):
    global combined
    
    combined = pd.concat([df[column] for df in dfs], axis=1)
    combined = combined.map(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    combined = combined.map(lambda x: np.array(x, dtype=float))

    combined = combined.loc[:, mask]
    
    mean_values = combined.apply(lambda x: np.mean(list(x), axis=0).tolist(), axis=1)
    var_values = combined.apply(lambda x: np.var(list(x), axis=0).tolist(), axis=1)
    std_values = combined.apply(lambda x: np.std(list(x), axis=0).tolist(), axis=1)
    binary_values = mean_values.apply(lambda x: [1 if val > 0.6 else 0 for val in x]) 
    
    mean_values = mean_values.apply(lambda x: [round(val, 3) for val in x])
    var_values = var_values.apply(lambda x: [round(val, 3) for val in x])
    std_values = std_values.apply(lambda x: [round(val, 3) for val in x])
    
    return pd.DataFrame({
        'Estimated class label average': mean_values,
        'Estimated class label (binary)': binary_values,       
        'Estimated class label variance': var_values,
        'Estimated class label std': std_values
    })

In [8]:
# the number of prelearned GNN models (= the number of output files for each p)  100 in our study
model_num = 100

# the number of nodes in systems (100 in our study)
N = 100

N2 = int(N/2)
N4 = int(N/4)

In [10]:
whole_statistics = []    
whole_statistics_mask = []  

for p_e in [0.02]:  #[0.0, 0.0004, 0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02,
#           0.04, 0.06, 0.08, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4]:

    file_paths = glob.glob(os.path.join(save_folder, 'final_test*TO*p=%.4lf.xlsx' %(p_e)))

    dataframes = []

    for file_path in file_paths:
        df = pd.read_excel(file_path, index_col=0)
        dataframes.append(df)

    for i, df in enumerate(dataframes):
        print(f"DataFrame from file {file_paths[i]}:")
        print(df)
        print("\n")
        
    if len(file_paths) != model_num:
        print('The number of files seems wrong! Please confirm it.')
        sys.exit()
        

    target_column = 'EstimatePhi'  

    column_data = [df[target_column] for df in dataframes]

    
    # Create a mask where a model that estimates a negative Phi value at least once is False, and a proper model is True
    df_concat = pd.concat(column_data, axis=1)
    mask = (df_concat >= 0).all(axis=0) 
    
    mask_all_true = copy.deepcopy(mask)
    mask_all_true.iloc[:] = True     
    
    df_concat_mask = copy.deepcopy(df_concat)
    df_concat_mask.loc[:, ~mask] = np.nan   
    
    mean_values = df_concat.mean(axis=1)
    variance_values = df_concat.var(axis=1)
    std_values = df_concat.std(axis=1)

    result1 = pd.DataFrame({
        'Estimated Phi average': mean_values,
        'Estimated Phi variance': variance_values,
        'Estimated Phi std': std_values
    })

    print('non-masked statistics', result1)   


    
    mean_values_mask = df_concat_mask.mean(axis=1)
    variance_values_mask = df_concat_mask.var(axis=1)
    std_values_mask = df_concat_mask.std(axis=1)

    result1_mask = pd.DataFrame({
        'Estimated Phi average': mean_values_mask,
        'Estimated Phi variance': variance_values_mask,
        'Estimated Phi std': std_values_mask
    })

    print('masked statistics', result1_mask)   

    
    
    result2 = calculate_statistics(dataframes, 'Est_label', list(mask_all_true) ) 
    result2_mask = calculate_statistics(dataframes, 'Est_label', list(mask) )

    print('non-masked statistics(label)', result2)
    print('masked statistics(label)', result2)
    
    all_result = pd.concat([result1, result2], axis=1)
    all_result_mask = pd.concat([result1_mask, result2_mask], axis=1)
    
    all_result.to_excel(save_folder+'final_test_result_p=%.4lf.xlsx' %(p_e))
    all_result_mask.to_excel(save_folder+'final_test_result_MASK_p=%.4lf.xlsx' %(p_e))

        
    matching_rows1 = all_result.loc[50:, 'Estimated class label (binary)'].apply(lambda x: x[:N2] == [1]*N2 and x[N2:] == [0]*N2).sum()
    matching_rows1_mask = all_result_mask.loc[50:, 'Estimated class label (binary)'].apply(lambda x: x[:N2] == [1]*N2 and x[N2:] == [0]*N2).sum()
    
    matching_rows2 = all_result.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x[:N2]) >= N4 and x[N2:] == [0]*N2).sum()
    matching_rows2_mask = all_result_mask.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x[:N2]) >= N4 and x[N2:] == [0]*N2).sum()

    matching_rows3 = all_result.loc[50:, 'Estimated class label (binary)'].apply(lambda x: x[N2:] == [1]*N2 and x[:N2] == [0]*N2).sum()
    matching_rows3_mask = all_result_mask.loc[50:, 'Estimated class label (binary)'].apply(lambda x: x[N2:] == [1]*N2 and x[:N2] == [0]*N2).sum()

    matching_rows4 = all_result.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x[N2:]) >= N4 and x[:N2] == [0]*N2).sum()
    matching_rows4_mask = all_result_mask.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x[N2:]) >= N4 and x[:N2] == [0]*N2).sum()

    
    

    total_rows = len(all_result)

    split_ratio1 = matching_rows1 / (total_rows / 2.0)    
    split_ratio2 = matching_rows2 / (total_rows / 2.0)    
    split_ratio3 = matching_rows3 / (total_rows / 2.0)    
    split_ratio4 = matching_rows4 / (total_rows / 2.0)   

    
    split_ratio1_mask = matching_rows1_mask / (total_rows / 2.0)    
    split_ratio2_mask = matching_rows2_mask / (total_rows / 2.0)    
    split_ratio3_mask = matching_rows3_mask / (total_rows / 2.0)   
    split_ratio4_mask = matching_rows4_mask / (total_rows / 2.0)   

    
    mean_mc_volume_former = all_result.loc[:49, 'Estimated class label (binary)'].apply(lambda x: sum(x) / len(x)).mean()
    mean_mc_volume_former_mask = all_result_mask.loc[:49, 'Estimated class label (binary)'].apply(lambda x: sum(x) / len(x)).mean()
    
    mean_mc_volume_latter = all_result.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x) / len(x)).mean()
    mean_mc_volume_latter_mask = all_result_mask.loc[50:, 'Estimated class label (binary)'].apply(lambda x: sum(x) / len(x)).mean()

    mean_big_phi_mean_former = all_result.loc[:49, 'Estimated Phi average'].mean()
    mean_big_phi_mean_former_mask = all_result_mask.loc[:49, 'Estimated Phi average'].mean()
    
    mean_big_phi_mean_latter = all_result.loc[50:, 'Estimated Phi average'].mean()
    mean_big_phi_mean_latter_mask = all_result_mask.loc[50:, 'Estimated Phi average'].mean()
    
    mean_big_phi_std_former = all_result.loc[:49, 'Estimated Phi std'].mean()
    mean_big_phi_std_former_mask = all_result_mask.loc[:49, 'Estimated Phi std'].mean()
    
    mean_big_phi_std_latter = all_result.loc[50:, 'Estimated Phi std'].mean()
    mean_big_phi_std_latter_mask = all_result_mask.loc[50:, 'Estimated Phi std'].mean()


    whole_statistics.append([p_e, split_ratio1, split_ratio2, split_ratio3, split_ratio4, split_ratio1 + split_ratio3,
                             split_ratio2 + split_ratio4, mean_mc_volume_former, mean_mc_volume_latter, 
                             mean_big_phi_mean_former, mean_big_phi_mean_latter, mean_big_phi_std_former, mean_big_phi_std_latter])

    whole_statistics_mask.append([p_e, split_ratio1_mask, split_ratio2_mask, split_ratio3_mask, split_ratio4_mask, 
                                  split_ratio1_mask + split_ratio3_mask, split_ratio2_mask + split_ratio4_mask, 
                                  mean_mc_volume_former_mask, mean_mc_volume_latter_mask, 
                                  mean_big_phi_mean_former_mask, mean_big_phi_mean_latter_mask, mean_big_phi_std_former_mask, 
                                  mean_big_phi_std_latter_mask, mask.sum()])

    
pd.DataFrame(whole_statistics, columns=['p_e', 'split_brain_ratio(ideal left MC)', 'split_brain_ratio(left MC)',
                                         'split_brain_ratio(ideal right MC)', 'split_brain_ratio(right MC)',
                                         'split_brain_ratio(ideal left-right MC)', 'split_brain_ratio(left-right MC)',
                                         'Mean_MC_size (non split)', 'Mean_MC_size (split-brain-like)',
                                         'Mean_Phi average (non split)', 'Mean_Phi average (split-brain-like)',
                                         'Mean_Phi std (non split)', 'Mean_Phi std (split-brain-like)' ]).to_excel(save_folder+'whole_statistics.xlsx')

pd.DataFrame(whole_statistics_mask, columns=['p_e',  'split_brain_ratio(ideal left MC)', 'split_brain_ratio(left MC)',
                                         'split_brain_ratio(ideal right MC)', 'split_brain_ratio(right MC)',
                                         'split_brain_ratio(ideal left-right MC)', 'split_brain_ratio(left-right MC)',
                                         'Mean_MC_size (non split)', 'Mean_MC_size (split-brain-like)',
                                         'Mean_Phi average (non split)', 'Mean_Phi average (split-brain-like)',
                                         'Mean_Phi std (non split)', 'Mean_Phi std (split-brain-like)', 'effective model' ]).to_excel(save_folder+'whole_statistics_MASK.xlsx' ) 

DataFrame from file result\final_test_0_0_result_2024-10-15_21-27-26_TO_2024-12-24_04-18-23_N=100_type9_p=0.0200.xlsx:
    EstimatePhi  TruePhi        MSEPhi      MAEPhi   MAERPhi  \
0     -0.767502     -100   9847.088867   99.232498  0.992325   
1     -0.480240     -100   9904.182617   99.519760  0.995198   
2     -0.694129     -100   9861.656250   99.305870  0.993059   
3     -0.342893     -100   9931.538086   99.657104  0.996571   
4     -0.143399     -100   9971.339844   99.856598  0.998566   
..          ...      ...           ...         ...       ...   
95     0.030454     -100  10006.091797  100.030457  1.000305   
96     0.677381     -100  10135.935547  100.677383  1.006774   
97     0.719905     -100  10144.499023  100.719902  1.007199   
98     0.197777     -100  10039.594727  100.197777  1.001978   
99     1.324670     -100  10266.688477  101.324669  1.013247   

                                           True_label  \
0   [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...  

In [22]:
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
