In [None]:
import os
import time
import csv

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy as sp          
from mpl_toolkits.mplot3d import Axes3D
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib.lines import Line2D
import matplotlib as mpl
from igraph import Graph
from scipy.sparse.linalg import eigsh
from scipy.linalg import eigh, eigvals

from utils import *
%matplotlib inline

In [None]:
df_info = pd.read_csv("real_graphs_data.csv")

category_map = {
    'Collaboration networks': 'Collaboration',
    'Social networks': 'Social',
    'Miscellaneous Networks': 'Miscellaneous',
    'Biological Networks': 'Biological'
}


all_results = []

for idx, row in df_info.iterrows():
    graph_name = row['graph_name']
    category_str = row['category']
    n = int(row['num_nodes'])

    category = category_map.get(category_str, "Unknown")

    base_dir = "Networks"
    file_path = os.path.join(base_dir, category_str, f"{graph_name}.txt")

    if not os.path.exists(file_path):
        found = False
        for ext in ['.txt', '.edges', '.csv']:
            alt_path = os.path.join(base_dir, category_str, f"{graph_name}{ext}")
            if os.path.exists(alt_path):
                file_path = alt_path
                found = True
                break

        if not found:
            print(f"File not found for {graph_name}")
            all_results.append({
                'graph': graph_name,
                'category': category,
                'spectral_index_GP': None
            })
            continue

    try:
        G = nx.Graph()

        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith('#'):
                    continue

                parts = line.split()
                if len(parts) < 2:
                    continue

                u, v = parts[0].strip(), parts[1].strip()
                if u and v:
                    G.add_edge(u, v)

        if G.number_of_nodes() != n:
            print(
                f"Node count mismatch for {graph_name}: "
                f"CSV={n}, Actual={G.number_of_nodes()}"
            )

        P = stochastic_matrix_calculator(G)
        spectral_index = estrada_index_using_p(G, P, k_max=10)

        all_results.append({
            'graph': graph_name,
            'category': category,
            'spectral_index_GP': (
                round(spectral_index, 6)
                if not np.isnan(spectral_index) else None
            )
        })

        print(
            f"{graph_name} | Nodes: {n} | "
            f"SpectralIndex: {spectral_index:.6f}"
        )

    except Exception as e:
        print(f"Error processing {graph_name}: {e}")
        all_results.append({
            'graph': graph_name,
            'category': category,
            'spectral_index_GP': None
        })


final_df = pd.DataFrame(all_results)
output_file = "total_real_networks_E,E-L_filled.csv"
final_df.to_csv(output_file, index=False)

print(f"Done! Results saved to: {os.path.abspath(output_file)}")
print("Preview:")
print(final_df.head(10))

In [None]:
df_info = pd.read_csv("real_graphs_data.csv")

category_map = {
    'Collaboration networks': 'Collaboration',
    'Social networks': 'Social',
    'Miscellaneous Networks': 'Miscellaneous',
    'Biological Networks': 'Biological'
}

all_results = []

for idx, row in df_info.iterrows():
    graph_name = row['graph_name']
    category_str = row['category']
    n = int(row['num_nodes'])
    m = int(row['num_edges'])

    category = category_map.get(category_str, "Unknown")

    base_dir = "Networks"
    file_path = os.path.join(base_dir, category_str, f"{graph_name}.txt")

    if not os.path.exists(file_path):
        found = False
        for ext in ['.txt', '.edges', '.csv']:
            alt_path = os.path.join(base_dir, category_str, f"{graph_name}{ext}")
            if os.path.exists(alt_path):
                file_path = alt_path
                found = True
                break
        if not found:
            print(f"File not found for {graph_name}")
            all_results.append({
                'graph': graph_name,
                'category': category,
                'energy': None,
                'laplacian_energy': None
            })
            continue

    try:
        G = nx.Graph()
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                parts = line.split()
                if len(parts) < 2:
                    continue
                u, v = parts[0].strip(), parts[1].strip()
                if u and v:
                    G.add_edge(u, v)

        if G.number_of_nodes() != n or G.number_of_edges() != m:
            print(f"Count mismatch for {graph_name}: CSV=({n}, {m}), Actual=({G.number_of_nodes()}, {G.number_of_edges()})")

        energy, laplacian_energy = compute_energies(G)

        all_results.append({
            'graph': graph_name,
            'category': category,
            'energy': round(energy, 6) if not np.isnan(energy) else None,
            'laplacian_energy': round(laplacian_energy, 6) if not np.isnan(laplacian_energy) else None
        })

        print(f"{graph_name} | Nodes: {n} | Energy: {energy:.4f} | LapEnergy: {laplacian_energy:.4f}")

    except Exception as e:
        print(f"Error processing {graph_name}: {e}")
        all_results.append({
            'graph': graph_name,
            'category': category,
            'energy': None,
            'laplacian_energy': None
        })

final_df = pd.DataFrame(all_results)
output_file = "real_networks_energies_.csv"
final_df.to_csv(output_file, index=False)

print(f"Done! Results saved to: {os.path.abspath(output_file)}")
print("Summary:")
print(final_df[['graph', 'category', 'energy', 'laplacian_energy']].head(10))

In [None]:
def compute_energies(G):
    n = G.number_of_nodes()
    m = G.number_of_edges()
    
    if n == 0:
        return np.nan, np.nan

    k_dynamic = max(1, min(n - 1, int(round(0.0002 * n))))  

    try:
        if n <= 500:
            A_dense = nx.to_numpy_array(G, dtype=float)
            eigs_adj = eigh(A_dense, eigvals_only=True)
        else:
            k_adj = k_dynamic
            A_sparse = nx.to_scipy_sparse_array(G, format='csr', dtype=float)
            eigs_adj = eigsh(A_sparse, k=k_adj, which='LM', return_eigenvectors=False)
        energy = np.sum(np.abs(eigs_adj))
    except Exception as e:
        print(f"Adjacency energy failed (n={n}, k={k_dynamic}): {e}")
        energy = np.nan


    try:
        mu = (2 * m) / n
        if n <= 500:
            L_dense = nx.laplacian_matrix(G).astype(float).toarray()
            eigs_lap = eigh(L_dense, eigvals_only=True)
        else:
            k_lap = k_dynamic
            L_sparse = nx.laplacian_matrix(G).tocsr().astype(float)
            eigs_lap = eigsh(L_sparse, k=k_lap, which='LA', return_eigenvectors=False)
        laplacian_energy = np.sum(np.abs(eigs_lap - mu))
    except Exception as e:
        print(f"Laplacian energy failed (n={n}, k={k_dynamic}): {e}")
        laplacian_energy = np.nan

    return energy, laplacian_energy



categories = {
    'Collaboration': 'Networks/Collaboration networks',
    'Social': 'Networks/Social networks',
    'Miscellaneous': 'Networks/Miscellaneous Networks',
    'Biological': 'Networks/Biological Networks'
}



all_results = []

for category, path in categories.items():
    print(f"Processing {category} networks...")
    if not os.path.exists(path):
        print(f"Path not found: {path}")
        continue

    networks = read_real_networks(path)
    print(f"Found {len(networks)} networks.")

    start = time.time()
    category_results = []

    for key in tqdm(networks, desc=f"{category}"):
        G = networks[key]
        if not isinstance(G, nx.Graph):
            print(f"Skipping non-Graph object: {key}")
            continue

        energy, laplacian_energy = compute_energies(G) 

        category_results.append({
            'graph': key,
            'category': category,
            'energy': round(energy, 6) if not np.isnan(energy) else None,
            'laplacian_energy': round(laplacian_energy, 6) if not np.isnan(laplacian_energy) else None
        })

    df = pd.DataFrame(category_results)
    all_results.append(df)
    elapsed = time.time() - start
    print(f"Done in {elapsed:.2f} seconds.")

final_df = pd.concat(all_results, ignore_index=True)
output_file = "real_networks_energies.csv"
final_df.to_csv(output_file, index=False)

print(f"Results saved to: {os.path.abspath(output_file)}")
print(final_df.head())

In [None]:
GRAPH_DIR = "Networks" 
OUTPUT_CSV = "real_graphs_data.csv"


with open(OUTPUT_CSV, mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["graph_name", "category", "num_nodes", "num_edges"])

    for root, dirs, files in os.walk(GRAPH_DIR):
        for file in files:
            if file.endswith(".txt") or file.endswith(".edges") or file.endswith(".csv"):
                file_path = os.path.join(root, file)
                graph_name = os.path.splitext(file)[0]
                category = os.path.basename(root)    

                try:
                    G = nx.read_edgelist(file_path, nodetype=str, comments='#')
                    num_nodes = G.number_of_nodes()
                    num_edges = G.number_of_edges()

                    writer.writerow([graph_name, category, num_nodes, num_edges])
                    print(f"Processed: {graph_name} (Category: {category}) → Nodes: {num_nodes}, Edges: {num_edges}")

                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

print(f"Data saved to {OUTPUT_CSV}")

# ---------------------------

In [None]:
PATH = 'Networks/Collaboration networks'
collab_networks = read_real_networks(PATH)
print(collab_networks.keys())
print(collab_networks['CA-CondMat'])

In [None]:
collab_networks

In [None]:
collab_networks.values()

In [None]:
collab_values = {}
start = time.time()
q_max = 0

for key in tqdm(collab_networks):
    collab_values[key] = dict()
    print(key)
    G = collab_networks[key]
    print("here1")
    n = G.number_of_nodes()
    print("here2")
    H, _ = calculate_H_geometric(G=G)
    print("here3")
    m2 = est_moment(G, 10000, 2)[1]
    estrada = estrada_index(G)
    print("here4")
    _, _, Q = synchronizability_calculator(G, for_real_networks=True)
    print("here5")
    gini = calculate_gini(G)

    if Q > q_max:
        q_max = Q 
    
    collab_values[key]['Gini'] = round(gini, 3)
    collab_values[key]['H'] = round(H, 3)
    collab_values[key]['m2']= round(m2, 3)
    collab_values[key]['Q']= round(Q, 3)
    collab_values[key]['estrada']= round(estrada, 3)
    

collab_df = pd.DataFrame(collab_values).T  
collab_df = collab_df.reset_index(drop=False).rename(columns={'index':'graph'})
collab_df['category'] = 'Collaboration'
end = time.time()
elapsed = end - start
collab_df.to_csv("SavedNetworks/collab_df.csv")
print(f'Time taken: {elapsed:.6f} seconds')
print("Q_max :", q_max)

In [None]:
collab_df = pd.read_csv("SavedNetworks/collab_df.csv")

In [None]:
collab_df["Q_norm"] = (np.log(collab_df["Q"]))
collab_df.to_csv('SavedNetworks/collab_df.csv', index=False)
collab_df

In [None]:
collab_df = pd.read_csv("SavedNetworks/collab_df.csv")


cv_values = {}

for key in tqdm(collab_networks, desc="Computing CV"):
    G = collab_networks[key]
    
    degrees = np.array([d for _, d in G.degree()])
    
    if degrees.mean() != 0:
        cv = degrees.std() / degrees.mean()
    else:
        cv = np.nan
    
    cv_values[key] = round(cv, 3)

collab_df["CV"] = collab_df["graph"].map(cv_values)

collab_df.to_csv("SavedNetworks/collab_df.csv", index=False)

collab_df 


In [None]:
PATH = 'Networks/Social networks'
social_networks = read_real_networks(PATH)
print(social_networks.keys())
print(social_networks['feather-lastfm-social'])


In [None]:
start = time.time()
social_values = dict()
excluded_keys = ['soc-twitter-follows', 'soc-youtube']

for key in tqdm(social_networks):
    if key in excluded_keys:
        continue
        
    social_values[key] = dict()
    G = social_networks[key]
    print(key,": ", G)
    n = G.number_of_nodes()
    H, _ = calculate_H_geometric(G=G)
    estrada = estrada_index(G)
    m2 = est_moment(G, 10000, 2)[1]
    _, _, Q = synchronizability_calculator(G, for_real_networks=True)
    gini = calculate_gini(G)


    social_values[key]['Gini'] = round(gini, 3)
    social_values[key]['H'] = round(H, 3)
    social_values[key]['m2'] = round(m2, 3)
    social_values[key]['Q'] = round(Q, 3)
    social_values[key]['estrada'] = round(estrada, 3)

social_df = pd.DataFrame(social_values).T  
social_df = social_df.reset_index(drop=False).rename(columns={'index':'graph'})
social_df['category'] = 'Social'
end = time.time()
elapsed = end - start
social_df.to_csv("SavedNetworks/social_df.csv")
print(f"Elapsed time:{elapsed: .2f} seconds")

In [None]:
social_df["Q_norm"] = (np.log(social_df["Q"]))
social_df.to_csv('SavedNetworks/social_df.csv', index=False)
social_df

In [None]:
social_df.to_csv("SavedNetworks/social_df.csv")

In [None]:
social_df = pd.read_csv('SavedNetworks/social_df.csv')

In [None]:
social_df = pd.read_csv('SavedNetworks/social_df.csv')

cv_values = {}

for key in tqdm(social_networks, desc="Computing CV (Social)"):
    G = social_networks[key]
    
    degrees = np.array([d for _, d in G.degree()])
    
    if degrees.mean() != 0:
        cv = degrees.std() / degrees.mean()
    else:
        cv = np.nan
    
    cv_values[key] = round(cv, 3)

social_df["CV"] = social_df["graph"].map(cv_values)

social_df.to_csv("SavedNetworks/social_df.csv", index=False)

social_df


In [None]:
PATH = 'Networks/Miscellaneous Networks'
miscellaneous_networks = read_real_networks(PATH)
print(miscellaneous_networks.keys())
print(miscellaneous_networks['Karate_Club_33node'])

In [None]:
miscellaneous_values = dict()
excluded_keys = []
q_max = 0
for key in miscellaneous_networks:
    miscellaneous_values[key] = dict()
    G = miscellaneous_networks[key]
    n = G.number_of_nodes()
    print(key,': ', G)
    H, _ = calculate_H_geometric(G=G)
    print("here")
    m2 = est_moment(G, 10000, 2)[1]
    print("here2")
    _, _, Q = synchronizability_calculator(G, for_real_networks=True)
    print("here3")
    estrada = estrada_index(G)
    gini = calculate_gini(G)

    miscellaneous_values[key]['Gini'] = round(gini, 3)
    
    miscellaneous_values[key]['H'] = round(H, 3)
    miscellaneous_values[key]['m2'] = round(m2, 3)
    miscellaneous_values[key]['Q'] = round(Q, 3)
    miscellaneous_values[key]['estrada'] = round(estrada, 3)
miscellaneous_df = pd.DataFrame(miscellaneous_values).T  
miscellaneous_df = miscellaneous_df.reset_index(drop=False).rename(columns={'index':'graph'})
miscellaneous_df.to_csv('SavedNetworks/miscellaneous_df.csv', index=False)
miscellaneous_df['category'] = 'Miscellaneous'

In [None]:
miscellaneous_df['Q_norm'] = (np.log(miscellaneous_df["Q"]))
miscellaneous_df.to_csv('SavedNetworks/miscellaneous_df.csv', index=False)

In [None]:
miscellaneous_df = pd.read_csv('SavedNetworks/miscellaneous_df.csv')
miscellaneous_df

In [None]:
miscellaneous_df = pd.read_csv('SavedNetworks/miscellaneous_df.csv')

cv_values = {}

for key in tqdm(miscellaneous_networks, desc="Computing CV (Miscellaneous)"):
    G = miscellaneous_networks[key]
    
    degrees = np.array([d for _, d in G.degree()])
    
    if degrees.mean() != 0:
        cv = degrees.std() / degrees.mean()
    else:
        cv = np.nan
    
    cv_values[key] = round(cv, 3)

miscellaneous_df["CV"] = miscellaneous_df["graph"].map(cv_values)

miscellaneous_df.to_csv("SavedNetworks/miscellaneous_df.csv", index=False)

miscellaneous_df


In [None]:
PATH = 'Networks/Biological Networks'
biological_networks = read_real_networks(PATH)
print(biological_networks.keys())


In [None]:
biological_values = dict()
excluded_keys = ['bio-grid-human']
for key in biological_networks:
    if key in excluded_keys:
        continue
    biological_values[key] = dict()
    G = biological_networks[key]
    n = G.number_of_nodes()
    print(key,':', G)
    H, _ = calculate_H_geometric(G=G)
    m2= est_moment(G, 10000, 2)[1]
    _, _, Q = synchronizability_calculator(G, for_real_networks=True)
    estrada = estrada_index(G)
    gini = calculate_gini(G)


    biological_values[key]['Gini'] = round(gini, 3)
    biological_values[key]['H'] = round(H, 3)
    biological_values[key]['m2'] = round(m2, 3)
    biological_values[key]['Q'] = round(Q, 3)
    biological_values[key]['estrada'] = round(estrada, 3)
biological_df = pd.DataFrame(biological_values).T  
biological_df = biological_df.reset_index(drop=False).rename(columns={'index':'graph'})
biological_df.to_csv('SavedNetworks/biological_df.csv', index=False)
biological_df['category'] = 'Biological'    

In [None]:
biological_df['Q_norm'] = (np.log(biological_df['Q']))
biological_df.to_csv('SavedNetworks/biological_df.csv', index=False)

In [None]:
biological_df = pd.read_csv('SavedNetworks/biological_df.csv')
biological_df

In [None]:
biological_df = pd.read_csv('SavedNetworks/biological_df.csv')

cv_values = {}

for key in tqdm(biological_networks, desc="Computing CV (Biological)"):
    G = biological_networks[key]
    
    degrees = np.array([d for _, d in G.degree()])
    
    if degrees.mean() != 0:
        cv = degrees.std() / degrees.mean()
    else:
        cv = np.nan
    
    cv_values[key] = round(cv, 3)

biological_df["CV"] = biological_df["graph"].map(cv_values)

biological_df.to_csv("SavedNetworks/biological_df.csv", index=False)

biological_df

In [None]:
combined_df = pd.concat([biological_df, collab_df, miscellaneous_df, social_df]).reset_index(drop=True)
combined_df

In [None]:
combined_df = combined_df[combined_df['graph'] != 'CA-HepTh'].copy()

In [None]:
allowed_cols = [
    "graph", "Gini", "H", "m2", "Q",
    "estrada", "CV", "Q_norm", "category"
]

combined_df = combined_df[allowed_cols]

combined_df

In [None]:
combined_df.to_csv('SavedNetworks/real_networks_df.csv', index=False)

In [None]:
combined_df

In [None]:
combined_df = pd.read_csv('SavedNetworks/real_networks_df.csv')

In [None]:
mapping = combined_df[['graph']]
mapping.to_csv('SavedNetworks/real_graphs_annotation.csv', index=True)
print(mapping)

In [None]:
mapping = combined_df[['graph']]
print(mapping)

In [None]:
mpl.rcParams.update({
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'Helvetica', 'DejaVu Sans'],
    'axes.linewidth': 0.8,
    'xtick.major.width': 0.8,
    'ytick.major.width': 0.8,
    'xtick.major.size': 4,
    'ytick.major.size': 4,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
})


combined_df_reset = combined_df.reset_index(drop=True)
combined_df_reset['index'] = combined_df_reset.index  


df_long = pd.melt(
    combined_df_reset,
    id_vars=['index', 'category'],  
    value_vars=['H', 'm2'],
    var_name='Metric',
    value_name='Value'
)


plt.figure(figsize=(20, 10))
palette = {'H': '#D62728', 'm2': '#1F77B4'} 

ax = sns.barplot(
    data=df_long,
    x='index',
    y='Value',
    hue='Metric',
    palette=palette,
    edgecolor='black',
    linewidth=0.35,
    saturation=0.95
)


ax.set_xlabel('Network Index', fontsize=30)
ax.set_ylabel('Value of Indices', fontsize=30)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=16)
ax.grid(axis='y', color='gray', linestyle=':', linewidth=0.8, alpha=0.6)
ax.set_axisbelow(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)



legend = ax.legend(title='Metric', title_fontsize=16, fontsize=14, frameon=False, loc='upper right')
legend.get_title().set_weight('semibold')


ax.set_title('Real-World Networks', fontsize=35, fontweight='semibold', pad=15)
plt.tight_layout(pad=2.0)
plt.show()

In [None]:
mpl.rcParams.update({
    'font.family': 'Times New Roman',
    'font.weight': 'bold',
    'font.size': 11,
    'axes.linewidth': 0.8,
    'xtick.major.width': 0.8,
    'ytick.major.width': 0.8,
    'xtick.major.size': 4,
    'ytick.major.size': 4,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
})



name_map = {
    "alpha": "alpha",
    "beta": "beta",
    "bio-dmela": "bio-dmela",
    "bio-grid-yeast": "bio-grid-yeast",
    "delta": "delta",
    "Dyslexia_Cortex_64node": "Dyslexia_Cortex",
    "Florida ecosystem dry_128node_2106edge": "Florida ecosystem dry",
    "Little Rock Lake_182node": "Little Rock Lake",
    "Silwood Park_153node": "Silwood Park",
    "Typical_Cortex_64node": "Typical_Cortex",
    "Ythan Estuary_134node": "Ythan Estuary",
    "CA-AstroPh": "CA-AstroPh",
    "CA-cit-HepPh": "CA-cit-HepPh",
    "CA-CondMat": "CA-CondMat",
    "CA-Erdos992": "CA-Erdos992",
    "CA-GrQc": "CA-GrQc",
    "CA-sandi_auths": "CA-sandi_auths",
    "Celegans_Metabolic_453node": "Celegans_Metabolic",
    "Dolphins_63node": "Dolphins",
    "Karate_Club_33node": "Karate_Club",
    "Polbooks_241node": "Polbooks",
    "feather-deezer-social": "feather-deezer-social",
    "feather-lastfm-social": "feather-lastfm-social",
    "musae_facebook": "musae_facebook",
    "musae_github": "musae_github",
}


combined_df_reset = combined_df.reset_index(drop=True)

combined_df_reset['graph'] = combined_df_reset['graph'].replace(name_map)

df_long = pd.melt(
    combined_df_reset,
    id_vars='graph',
    value_vars=['H', 'm2'],
    var_name='Metric',
    value_name='Value'
)


plt.figure(figsize=(20, 10))

palette = {
    'H': '#D62728',   
    'm2': '#1F77B4',  
}

ax = sns.barplot(
    data=df_long,
    x='graph',
    y='Value',
    hue='Metric',
    palette=palette,
    edgecolor='black',
    linewidth=0.35,
    saturation=0.95
)


ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=90,
    ha='right',
    fontweight='bold'
)


ax.set_ylabel('Value of Indices', fontsize=25, fontweight='bold', labelpad=20)
ax.set_xlabel('')
ax.set_title('Real-World Networks', fontsize=25, fontweight='bold', pad=15)

ax.tick_params(axis='x', labelsize=23)
ax.tick_params(axis='y', labelsize=23)

ax.grid(
    axis='y',
    color='gray',
    linestyle=':',
    linewidth=0.8,
    alpha=0.6
)
ax.set_axisbelow(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

legend = ax.legend(
    title='Metric',
    title_fontsize=25,
    fontsize=23,
    frameon=False,
    # loc='upper right',
)
legend.get_title().set_weight('semibold')

plt.tight_layout(pad=2.0)
plt.savefig("Figures/real_worlds_networks_h_m2.png", dpi=500, bbox_inches='tight')
plt.show()

In [None]:
combined_df = combined_df.drop(columns=['Unnamed: 0'], errors='ignore')

In [None]:
combined_df

In [None]:
corr_matrix = combined_df.drop(columns=['graph', 'category'])
corr_matrix


In [None]:
combined_df.dtypes

In [None]:
mpl.rcParams.update({
    'font.family': 'Times New Roman',
    'axes.linewidth': 0.8,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
})



categories = combined_df['category'].dropna().unique()
palette = sns.color_palette("colorblind", n_colors=len(categories))
color_dict = {cat: palette[i] for i, cat in enumerate(categories)}

fig = plt.figure(figsize=(25, 25))
# fig = plt.figure(figsize=(16, 16))

ax = fig.add_subplot(111, projection='3d')


xs = combined_df['H']
ys = combined_df['m2']
zs = combined_df['Q_norm']
colors = combined_df['category'].map(color_dict)

scatter = ax.scatter(
    xs, ys, zs,
    c=colors,
    s=120,
    edgecolor='black',
    linewidth=0.35,
    alpha=0.92
)

ax.set_xlabel('H-Index',
              fontfamily='Times New Roman',
              weight='bold',
              fontsize=25,
              labelpad=20)

ax.set_ylabel('m2-Index',
              fontfamily='Times New Roman',
              weight='bold',
              fontsize=25,
              labelpad=20)

ax.set_zlabel('Ln(Q)',
              fontfamily='Times New Roman',
              weight='bold',
              fontsize=25,
              labelpad=20) 


ax.tick_params(axis='x')
ax.tick_params(axis='y')
ax.tick_params(axis='z')

for tick in ax.get_xticklabels():
    tick.set_fontsize(15)
    tick.set_fontweight('normal') 

for tick in ax.get_yticklabels():
    tick.set_fontsize(15)
    tick.set_fontweight('normal')  

for tick in ax.get_zticklabels():
    tick.set_fontsize(16)
    tick.set_fontweight('normal') 

legend_elements = [
    Line2D([0], [0], marker='o', color='w', label=cat,
           markerfacecolor=color_dict[cat], markersize=10,
           markeredgecolor='black', markeredgewidth=0.5)
    for cat in categories
]

legend = ax.legend(
    handles=legend_elements,
    title='',
    fontsize=25,
    frameon=False,
    # loc='upper left',
    ncol=2,
    bbox_to_anchor=(0.6, 0.8)
)
legend.get_title().set_weight('semibold')

ax.view_init(elev=20, azim=-60)
ax.grid(True, color='gray', linestyle=':', alpha=0.4)

plt.tight_layout(pad=1.2)
ax.set_xlim(right=0, left=1)
ax.set_ylim(bottom=0) 
plt.savefig('Figures/real.png', facecolor='white', dpi=500, bbox_inches='tight')
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans


mpl.rcParams.update({
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'Helvetica', 'DejaVu Sans'],
    'axes.linewidth': 0.8,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 12
})

results_df = combined_df[combined_df['graph'] != 'florida_bay'].reset_index(drop=True)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(results_df[['H', 'm2', 'Q_norm']])

n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
labels = kmeans.fit_predict(scaled_features)

semantic_labels = ['High heterogeneity', 'Low heterogeneity', 'Medium heterogeneity']
results_df = results_df.copy()
results_df['category'] = [semantic_labels[l] for l in labels]

categories = [cat for cat in semantic_labels if cat in results_df['category'].values]
palette = sns.color_palette("colorblind", n_colors=len(categories))
color_dict = {cat: palette[i] for i, cat in enumerate(categories)}

xs = results_df['H']
ys = results_df['m2']
zs = results_df['Q_norm']
colors = results_df['category'].map(color_dict)

def tight_limits(data, pad_frac=0.05):
    d_min, d_max = data.min(), data.max()
    if d_min == d_max:
        return d_min - 0.1, d_max + 0.1
    padding = (d_max - d_min) * pad_frac
    return d_min - padding, d_max + padding

x_min, x_max = tight_limits(xs)
y_min, y_max = tight_limits(ys)
z_min, z_max = tight_limits(zs)

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(xs, ys, zs, c=colors, s=200, edgecolor='black', linewidth=0.5, alpha=0.95)

for i in range(len(results_df)):
    ax.text(xs.iloc[i], ys.iloc[i], zs.iloc[i], f'{i + 1}', fontsize=5, fontweight='bold',
            color='white', ha='center', va='center', zorder=10)

ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_zlim(z_min, z_max)

ax.set_xlabel('H', fontsize=16, labelpad=10)
ax.set_ylabel('m2', fontsize=16, labelpad=10)
ax.set_zlabel('Q', fontsize=16, labelpad=10)

ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.tick_params(axis='z', labelsize=12)

legend_elements = [
    Line2D([0], [0], marker='o', color='w', label=cat,
           markerfacecolor=color_dict[cat], markersize=10,
           markeredgecolor='black', markeredgewidth=0.6)
    for cat in categories
]

legend = ax.legend(handles=legend_elements, title='Heterogeneity', title_fontsize=15,
                   fontsize=13, frameon=False, loc='upper left', bbox_to_anchor=(0.02, 0.98))
legend.get_title().set_weight('semibold')

ax.view_init(elev=20, azim=-60)
ax.grid(True, color='gray', linestyle=':', alpha=0.3)
ax.set_xlim(left=1, right=0)
ax.set_ylim(bottom=0) 
plt.tight_layout(pad=1.0)
plt.show()

print("Mapping of Annotation Numbers to Graph Names:")
print(results_df['graph'])

In [None]:
from scipy.stats import linregress, skew, kurtosis


plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}

df = combined_df.copy()

df['H_new'] = np.log(df['H'])
slope, intercept, r_value, p_value, std_err = linregress(df['H_new'], df['Q_norm'])
residuals = df['Q_norm'] - (slope * df['H_new'] + intercept)
skew_res = skew(residuals)
kurt_res = kurtosis(residuals)
r_squared = r_value ** 2

plt.figure(figsize=(9, 6))
plt.scatter(df['H_new'], df['Q_norm'], color='black', s=50)
plt.plot(df['H_new'], slope * df['H_new'] + intercept, 'r-')

plt.xlabel('Ln(H-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.ylabel('Ln(Q-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.xlim(-5, 0)  
plt.ylim(0,12)
plt.title('Real-World Networks', weight='bold')
plt.grid(True, alpha=0.3)

plt.xticks(fontfamily='Times New Roman', weight='bold')
plt.yticks(fontfamily='Times New Roman', weight='bold')

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'
props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.97, textstr, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=props)

plt.tight_layout()

plt.savefig('Figures/q_h.png', facecolor='white', dpi=1000, bbox_inches='tight')

plt.show()

In [None]:
plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}
df = combined_df.copy()

df['m2_new'] = np.log(df['m2'])
slope, intercept, r_value, p_value, std_err = linregress(df['m2_new'], df['Q_norm'])
residuals = df['Q_norm'] - (slope * df['m2_new'] + intercept)
skew_res = skew(residuals)
kurt_res = kurtosis(residuals)
r_squared = r_value ** 2

plt.figure(figsize=(9, 6))
plt.scatter(df['m2_new'], df['Q_norm'], color='black', s=50)
plt.plot(df['m2_new'], slope * df['m2_new'] + intercept, 'r-')


plt.xlabel('Ln(m2-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.ylabel('Ln(Q-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.xlim(-5, -1)  
plt.ylim(0,12)
plt.title('Real-World Networks', weight='bold')
plt.grid(True, alpha=0.3)

plt.xticks(fontfamily='Times New Roman', weight='bold')
plt.yticks(fontfamily='Times New Roman', weight='bold')

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'


props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.97, textstr, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=props)

plt.savefig('Figures/q_m2.png', facecolor='white', dpi=1000, bbox_inches='tight')


plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}

df = combined_df.copy()

df['m2_new'] = np.log(df['m2'])
df['H_new'] = np.log(df['H'])

slope, intercept, r_value, p_value, std_err = linregress(df['m2_new'], df['H_new'])
r_squared = r_value ** 2
residuals = df['H_new'] - (slope * df['m2_new'] + intercept)

skew_res = skew(residuals)
kurt_res = kurtosis(residuals)

plt.figure(figsize=(8, 6))
plt.scatter(df['m2_new'], df['H_new'], color='black', s=50)
plt.plot(df['m2_new'], slope * df['m2_new'] + intercept, 'r-')

plt.xlabel('Ln(m2-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.ylabel('Ln(H-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.xlim(-5, -1)  
plt.ylim(-4,0)
plt.title('Real-World Networks', weight='bold')
plt.grid(True, alpha=0.3)

plt.xticks(fontfamily='Times New Roman', weight='bold')
plt.yticks(fontfamily='Times New Roman', weight='bold')

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'


props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.35, textstr, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=props)

plt.savefig('Figures/h_m2.png', facecolor='white', dpi=1000, bbox_inches='tight')


plt.show()

In [None]:
plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}

df = combined_df.copy()

df['estrada_new'] = np.log(df['estrada'])
df['H_new'] = np.log(df['H'])

slope, intercept, r_value, p_value, std_err = linregress(df['estrada_new'], df['H_new'])

residuals = df['H_new'] - (slope * df['estrada_new'] + intercept)

skew_res = skew(residuals)
kurt_res = kurtosis(residuals)
r_squared = r_value ** 2
plt.figure(figsize=(8, 6))
plt.scatter(df['estrada_new'], df['H_new'], color='black', s=50)
plt.plot(df['estrada_new'], slope * df['estrada_new'] + intercept, 'r-')


plt.xlabel('Ln(Estrada-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.ylabel('Ln(H-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.xlim(-5, 0)  
plt.ylim(-4,0)
plt.title('Real-World Networks', weight='bold')
plt.grid(True, alpha=0.3)


plt.xticks(fontfamily='Times New Roman', weight='bold')
plt.yticks(fontfamily='Times New Roman', weight='bold')

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'

props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.97, textstr, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=props)

plt.savefig('Figures/h_estrada.png', facecolor='white', dpi=1000, bbox_inches='tight')


plt.show()

In [None]:
plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}


df = combined_df.copy()

df['gini_new'] = np.log(df['Gini'])
df['H_new'] = np.log(df['H'])

slope, intercept, r_value, p_value, std_err = linregress(df['gini_new'], df['H_new'])

residuals = df['H_new'] - (slope * df['gini_new'] + intercept)

skew_res = skew(residuals)
kurt_res = kurtosis(residuals)
r_squared = r_value ** 2

plt.figure(figsize=(8, 6))
plt.scatter(df['gini_new'], df['H_new'], color='black', s=50)
plt.plot(df['gini_new'], slope * df['gini_new'] + intercept, 'r-')

plt.xlabel('Ln(Gini-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.ylabel('Ln(Q-Index)', fontfamily='Times New Roman', weight='bold', size=12)
plt.title('Real-World Networks', weight='bold')
plt.grid(True, alpha=0.3)
plt.xticks(fontfamily='Times New Roman', weight='bold')
plt.yticks(fontfamily='Times New Roman', weight='bold')
plt.xlim(-3,0)

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'

props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.97, textstr, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=props)

plt.savefig('Figures/h_gini.png', facecolor='white', dpi=1000, bbox_inches='tight')

plt.show()

In [None]:
plt.figure(figsize=(10, 6))
font = {'family': 'times new roman', 'weight': 'bold', 'size': 14}


df = combined_df.copy()

df['gini_new'] = np.log(df['Gini'])
df['estrada_new'] = np.log(df['estrada'])

slope, intercept, r_value, p_value, std_err = linregress(df['gini_new'], df['estrada_new'])

residuals = df['estrada_new'] - (slope * df['gini_new'] + intercept)
r_squared = r_value ** 2
skew_res = skew(residuals)
kurt_res = kurtosis(residuals)

plt.figure(figsize=(8, 6))
plt.scatter(df['gini_new'], df['estrada_new'], color='black', s=50)
plt.plot(df['gini_new'], slope * df['gini_new'] + intercept, 'r-')


plt.xlabel('Ln(Gini-Index)', fontfamily='Times New Roman', weight='bold', size=20)
plt.ylabel('Ln(Estrada-Index)', fontfamily='Times New Roman', weight='bold', size=20)

plt.title('Real-World Networks', weight='bold', size=25)
plt.grid(True, alpha=0.3)
plt.xticks(fontfamily='Times New Roman', weight='bold', size=15)
plt.yticks(fontfamily='Times New Roman', weight='bold', size=15)
plt.xlim(-3,0)
plt.ylim(-6, 0)

textstr = f'Slope = {slope:.3f}\nIntercept = {intercept:.3f}\nSkewness = {skew_res:.3f}\nKurtosis = {kurt_res:.3f}\nR² = {r_squared:.3f}'

props = dict(boxstyle='round', facecolor='white', alpha=0.8)
plt.text(0.03, 0.97, textstr, transform=plt.gca().transAxes, fontsize=15,
         verticalalignment='top', bbox=props)

plt.savefig('Figures/estrada_gini.png', facecolor='white', dpi=1000, bbox_inches='tight')


plt.show()