In [1]:
import sys
sys.path.insert(0, '../src/')
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import graph_statistics
import utils

from evaluation import (Evaluation, tabular_from_statistics, df_from_tabular, compute_original_statistics,
                        boxplot, df_from_dataset, errorbar_plot)

#### True graph

In [2]:
_A_obs = load_npz('../data/datasets/CORA_ML.npz')

val_share = 0.1
test_share = 0.05
seed = 481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.csr_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1])))
assert (train_graph.toarray() == train_graph.toarray().T).all()

#### Statistic Functions

In [4]:
statistic_fns = {'Assortativity':graph_statistics.assortativity,
                 #'Average Degree':graph_statistics.average_degree,
                 #'Claw Count':graph_statistics.claw_count,
                 'Clustering Coefficient':graph_statistics.clustering_coefficient,
                 #'Characteristic Path Length':graph_statistics.compute_cpl,
#                  'Edge Distribution Entropy':graph_statistics.edge_distribution_entropy,
                 #'Gini':graph_statistics.gini,
#                   'Intra com.' : intra_community_density,
#                   'Inter com.' : inter_community_density,
                 #'LCC Size':graph_statistics.LCC,
                 'Max Degree':graph_statistics.max_degree,
                 #'Min Degree':graph_statistics.min_degree,
                 #'Num Connected Components':graph_statistics.num_connected_components,
                 'Power Law α':graph_statistics.power_law_alpha,
                 'Spectral Gap':graph_statistics.spectral_gap,
                 'Square Count':graph_statistics.square_count,
                 'Triangle Count':graph_statistics.triangle_count,
#                  'Wedge Count':graph_statistics.wedge_count,
                 }

#### Evaluate 

In [5]:
df, evals = df_from_dataset(path_to_dataset='../logs/CORA-ML/',
                            statistic_fns=statistic_fns,
                            target_overlap=0.50,
                            original_graph=train_graph,
                            max_trials=None)
df

Unnamed: 0,Assortativity,Clustering Coefficient,Max Degree,Power Law α,Spectral Gap,Square Count,Triangle Count,Edge Overlap (%),ROC-AUC Score,Average Precision,Time (s)
CORA-ML,-0.076264,0.002771,238.0,1.855065,0.006114,14268.0,2802.0,1.0,,,
baseline_FS,-0.084191,0.001622,167.2,1.788639,0.02315,1942.4,691.4,0.520212,0.799069,0.853881,38.14351
baseline_conf,-0.053171,0.000688,228.25,1.856362,0.06139,3110.95,623.35,0.523411,,,0.10393


#### Plot at selected statistics

In [6]:
# df_for_paper = df.reindex(['CORA-ML', 'baseline_conf', 'baseline_FA', 'baseline_FT', 'baseline_FS', 
#                            'baseline_FM',
#                            'baseline_NetGAN_sample_many', 
#                            'Ours'])
df_for_paper = df.reindex(['CORA-ML', 'baseline_conf', 'baseline_FS'])
df_for_paper[['Max Degree', 'Assortativity', 'Triangle Count', 'Square Count', 'Power Law α', 'Clustering Coefficient',
              'Spectral Gap', 'ROC-AUC Score', 'Time (s)', 'Edge Overlap (%)']]

Unnamed: 0,Max Degree,Assortativity,Triangle Count,Square Count,Power Law α,Clustering Coefficient,Spectral Gap,ROC-AUC Score,Time (s),Edge Overlap (%)
CORA-ML,238.0,-0.076264,2802.0,14268.0,1.855065,0.002771,0.006114,,,1.0
baseline_conf,228.25,-0.053171,623.35,3110.95,1.856362,0.000688,0.06139,,0.10393,0.523411
baseline_FS,167.2,-0.084191,691.4,1942.4,1.788639,0.001622,0.02315,0.799069,38.14351,0.520212
