In [1]:
import sys
sys.path.insert(0, '../src/')
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import graph_statistics
import utils

from evaluation import (Evaluation, tabular_from_statistics, df_from_tabular, compute_original_statistics,
                        boxplot, df_from_dataset, errorbar_plot)

#### True graph

In [2]:
_A_obs = load_npz('../data/datasets/CORA_ML.npz')

val_share = 0.1
test_share = 0.05
seed = 481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.csr_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1])))
assert (train_graph.toarray() == train_graph.toarray().T).all()

#### Statistic Functions

In [19]:
statistic_fns = {'Assortativity':graph_statistics.assortativity,
                 #'Average Degree':graph_statistics.average_degree,
                 #'Claw Count':graph_statistics.claw_count,
                 'Clustering Coefficient':graph_statistics.clustering_coefficient,
                 #'Characteristic Path Length':graph_statistics.compute_cpl,
                 'Edge Distribution Entropy':graph_statistics.edge_distribution_entropy,
                 #'Gini':graph_statistics.gini,
#                   'Intra com.' : intra_community_density,
#                   'Inter com.' : inter_community_density,
                 #'LCC Size':graph_statistics.LCC,
                 'Max Degree':graph_statistics.max_degree,
                 #'Min Degree':graph_statistics.min_degree,
                 #'Num Connected Components':graph_statistics.num_connected_components,
                 'Power Law α':graph_statistics.power_law_alpha,
                 'Spectral Gap':graph_statistics.spectral_gap,
                 'Square Count':graph_statistics.square_count,
                 'Triangle Count':graph_statistics.triangle_count,
                 'Wedge Count':graph_statistics.wedge_count,
                 }

#### Evaluate 

In [8]:
utils.load_dict('../logs/CORA-ML/baseline_NetGAN_eo_52/Experiment_0/sampled_graphs/overlap.pickle')

{1: 0.541500810850656}

In [20]:
df, evals = df_from_dataset(path_to_dataset='../logs/CORA-ML/',
                            statistic_fns=statistic_fns,
                            target_overlap=0.50,
                            original_graph=train_graph,
                            max_trials=None)
df

Unnamed: 0,Assortativity,Clustering Coefficient,Edge Distribution Entropy,Max Degree,Power Law α,Spectral Gap,Square Count,Triangle Count,Wedge Count,Edge Overlap (%),ROC-AUC Score,Average Precision,Time (s)
CORA-ML,-0.076264,0.002771,0.940665,238.0,1.855065,0.006114,14268.0,2802.0,101747.0,1.0,,,
baseline_FS,-0.084367,0.001449,0.955512,174.0,1.788657,0.024225,1806.0,660.0,75297.0,0.502285,0.7879,0.846239,38.628556
baseline_FT,-0.058367,0.002553,0.964817,139.4,1.769471,0.029537,1617.4,557.8,56878.4,0.565119,0.709044,0.798294,33.409426
baseline_FA,-0.041861,0.002999,0.974958,121.4,1.722574,0.03791,1128.0,443.6,47898.8,0.52549,0.560806,0.648291,32.277159
Ours,-0.070188,0.002213,0.949905,203.6,1.823047,0.039909,6880.2,1396.4,82621.6,0.529677,0.938313,0.944477,12.420809
baseline_conf,-0.050796,0.000642,,226.7,1.85631,0.07179,3010.55,572.9,97430.7,0.50328,,,0.111448
baseline_NetGAN_sample_many,-0.070948,0.002008,0.953051,219.0,1.798262,0.037843,5554.8,1461.2,83917.0,0.538847,0.950744,0.957105,2127.321929
baseline_FM,-0.042582,0.002966,0.97532,122.4,1.720652,0.047959,1134.8,436.6,47649.8,0.52552,0.557462,0.64716,47.815948


#### Plot at selected statistics

In [21]:
df_for_paper = df.reindex(['CORA-ML', 'baseline_conf', 'baseline_FA', 'baseline_FT', 'baseline_FS', 
                           'baseline_FM',
                           'baseline_NetGAN_sample_many', 
                           'Ours'])
df_for_paper[['Max Degree', 'Assortativity', 'Edge Distribution Entropy', 'Wedge Count', 'Triangle Count', 'Square Count', 'Power Law α', 'Clustering Coefficient',
              'Spectral Gap', 'ROC-AUC Score', 'Time (s)', 'Edge Overlap (%)']]

Unnamed: 0,Max Degree,Assortativity,Edge Distribution Entropy,Wedge Count,Triangle Count,Square Count,Power Law α,Clustering Coefficient,Spectral Gap,ROC-AUC Score,Time (s),Edge Overlap (%)
CORA-ML,238.0,-0.076264,0.940665,101747.0,2802.0,14268.0,1.855065,0.002771,0.006114,,,1.0
baseline_conf,226.7,-0.050796,,97430.7,572.9,3010.55,1.85631,0.000642,0.07179,,0.111448,0.50328
baseline_FA,121.4,-0.041861,0.974958,47898.8,443.6,1128.0,1.722574,0.002999,0.03791,0.560806,32.277159,0.52549
baseline_FT,139.4,-0.058367,0.964817,56878.4,557.8,1617.4,1.769471,0.002553,0.029537,0.709044,33.409426,0.565119
baseline_FS,174.0,-0.084367,0.955512,75297.0,660.0,1806.0,1.788657,0.001449,0.024225,0.7879,38.628556,0.502285
baseline_FM,122.4,-0.042582,0.97532,47649.8,436.6,1134.8,1.720652,0.002966,0.047959,0.557462,47.815948,0.52552
baseline_NetGAN_sample_many,219.0,-0.070948,0.953051,83917.0,1461.2,5554.8,1.798262,0.002008,0.037843,0.950744,2127.321929,0.538847
Ours,203.6,-0.070188,0.949905,82621.6,1396.4,6880.2,1.823047,0.002213,0.039909,0.938313,12.420809,0.529677
