In [2]:
import numpy as np
from scipy.stats import gmean

# Heterophilic dataset mean values (only mean, ignore std dev)
hetero = {
    "GSAINT-E": [46.49, 69.19, 66.27, 54.51, 57.40, 68.35, 42.91, 63.47, 42.44, 62.80, 30.53, 79.84, 97.08, 57.11, 78.59, 45.75],
    "DropEdge": [43.24, 54.95, 48.36, 60.03, 59.00, 65.87, 46.00, 61.40, 48.60, 64.14, 34.64, 80.00, 97.00, 50.60, 78.40, 43.87],
    "MoG": [42.16, 57.30, 53.33, 55.75, 56.78, np.nan, 39.27, np.nan, 27.67, np.nan, 27.74, 80.00, 97.04, 53.25, 78.49, 41.18],
    "SparseGAT": [51.35, 66.66, 56.86, 55.69, 49.00, 65.00, 41.18, 53.77, 29.50, 57.57, 25.05, 80.00, 97.08, 60.60, 78.20, 44.23],
    "NeuralSp.": [72.43, 84.44, 52.83, 58.54, 56.85, np.nan, 44.91, np.nan, 38.24, 57.56, 27.85, 80.10, 97.02, 60.52, 78.16, 47.05],
    "SGS-GNN": [74.59, 76.22, 76.08, 64.15, 72.75, 75.65, 64.69, 69.15, 52.35, 73.80, 33.88, 80.00, 97.05, 62.37, 79.98, 50.15],
}

# Identify indices (datasets) to exclude due to any np.nan in any method
def get_valid_indices(data):
    num_entries = len(next(iter(data.values())))
    valid_indices = []
    for i in range(num_entries):
        if all(not np.isnan(method[i]) for method in data.values()):
            valid_indices.append(i)
    return valid_indices

# Filter data using valid indices
def filter_data(data, valid_indices):
    filtered = {}
    for method, values in data.items():
        filtered[method] = [values[i] for i in valid_indices]
    return filtered

# Compute geometric mean per method
def compute_gm(data):
    gmeans = {}
    for method, values in data.items():
        gmeans[method] = round(gmean(values), 2)
    return gmeans

valid_hetero_indices = get_valid_indices(hetero)
filtered_hetero = filter_data(hetero, valid_hetero_indices)
hetero_gm = compute_gm(filtered_hetero)

# Homophilic dataset mean values
homo = {
    "GSAINT-E": [57.63, 81.19, 90.37, 87.62, 85.29, 76.44, 94.09, 93.63, 96.23, 86.75, 80.19],
    "DropEdge": [65.09, 87.20, 60.65, 86.00, 84.80, 76.47, 93.30, 80.47, 97.28, 77.40, 80.19],
    "MoG": [67.26, 72.37, np.nan, 83.84, np.nan, 78.43, 72.88, 83.84, np.nan, 78.43, 72.88],
    "SparseGAT": [61.07, 84.68, 88.91, 75.30, 80.60, 75.70, 92.35, 95.30, 95.96, 58.75, 79.26],
    "NeuralSp.": [56.68, 73.39, 75.32, 73.97, 79.30, 75.79, 94.36, 94.16, 96.38, 65.40, 73.03],
    "SGS-GNN": [65.58, 80.37, 90.97, 87.52, 83.99, 76.94, 94.25, 93.99, 96.27, 86.78, 81.49],
}

valid_homo_indices = get_valid_indices(homo)
filtered_homo = filter_data(homo, valid_homo_indices)
homo_gm = compute_gm(filtered_homo)

print("Heterophilic GM:", hetero_gm)
print("Homophilic GM:", homo_gm)


Heterophilic GM: {'GSAINT-E': 56.44, 'DropEdge': 55.04, 'MoG': 51.15, 'SparseGAT': 53.04, 'NeuralSp.': 58.25, 'SGS-GNN': 65.99}
Homophilic GM: {'GSAINT-E': 81.36, 'DropEdge': 80.36, 'MoG': 76.04, 'SparseGAT': 76.78, 'NeuralSp.': 74.89, 'SGS-GNN': 82.87}


In [7]:
homo_gm.values()

dict_values([81.36, 80.36, 76.04, 76.78, 74.89, 82.87])

In [10]:
import numpy as np
from scipy.stats import gmean

# Heterophilic datasets mean values only (excluding std dev)
hetero_data = {
    "Org. Graph": [43.78, 61.62, 51.76, 61.35, 61.83, 73.23, 44.25, 65.13, 48.38, 68.71, 28.42, 79.56, 97.05, 64.43, 79.03, 46.72, 80.80, 62.05, 39.05, 35.38, 93.15],
    "Random":     [49.19, 55.14, 61.96, 54.92, 57.49, 68.52, 43.08, 63.36, 42.40, 63.40, 32.37, 79.73, 97.07, 57.98, 78.59, 45.70, 81.99, 60.30, 36.96, 34.57, 93.15],
    "Edge":       [46.49, 69.19, 66.27, 54.51, 57.40, 68.35, 42.91, 63.47, 42.44, 62.80, 30.53, 79.84, 97.08, 57.11, 78.59, 45.75, 81.60, 60.17, 37.06, 34.48, 93.15],
    "ER":         [43.78, 61.08, 58.82, 59.69, 50.60, 70.74, 58.18, 63.89, 43.86, 62.14, 32.03, 80.02, 97.02, 59.78, 78.10, 44.39, 82.25, 58.76, 36.62, 33.13, 93.15],
    "SGS-GNN":    [74.59, 76.22, 76.08, 64.15, 72.75, 75.65, 64.69, 69.15, 52.35, 73.80, 33.88, 80.00, 97.05, 62.37, 79.98, 50.15, 82.59, 60.49, 38.42, 35.41, 93.15]
}

# Homophilic datasets mean values only (excluding std dev)
homo_data = {
    "Org. Graph": [67.29, 83.92, 90.19, 86.73, 86.29, 80.28, 92.79, 92.41, 96.08, 91.44, 80.07, 91.43],
    "Random":     [61.20, 81.00, 90.34, 87.58, 85.39, 75.82, 94.07, 93.54, 96.20, 86.38, 80.10, 91.39],
    "Edge":       [57.63, 81.19, 90.37, 87.62, 85.29, 76.44, 94.09, 93.63, 96.23, 86.75, 80.19, 91.35],
    "ER":         [66.90, 81.81, 89.87, 87.70, 85.63, 75.90, 93.77, 93.42, 96.22, 86.24, 80.32, 91.00],
    "SGS-GNN":    [65.58, 80.37, 90.97, 87.52, 83.99, 76.94, 94.25, 93.99, 96.27, 86.78, 81.49, 91.45]
}

# Compute geometric mean for each method per group
def compute_gmeans(data):
    return {method: gmean(values) for method, values in data.items()}

hetero_gmeans = compute_gmeans(hetero_data)
homo_gmeans = compute_gmeans(homo_data)

hetero_gmeans.values()


dict_values([58.41804207188218, 57.24974542028695, 57.62596113418608, 57.67988822703311, 64.7866445550317])

In [9]:
homo_gmeans.values()

dict_values([86.21939204744186, 84.67853836131316, 84.3717504144447, 85.32732446976283, 85.3560596882435])