In [1]:
import sys
sys.path.insert(0, '../src/')

import os
import abc
import pickle
import warnings
warnings.filterwarnings('ignore')

import time

#import tensorflow as tf
import torch
device = 'cpu'
dtype = torch.float32

import scipy.sparse as sp
import numpy as np
from matplotlib import pyplot as plt
from scipy.sparse import save_npz, load_npz, csr_matrix
from sklearn.metrics import roc_auc_score, average_precision_score
import time
import pandas as pd
%matplotlib inline

# from netgan.netgan import *
# from netgan import utils

from net.utils import *
from net import utils_netgan as utils
import net.net as net
from net.sparse_graph_statistics import *

In [2]:
class Evaluation(object):
    def __init__(self, experiment_root, statistic_fns):
        self.experiment_root = experiment_root
        self.statistic_fns = statistic_fns
        
    def _load_timing(self, experiment):
        return self._load(experiment, 'timing')
    
    def _load_overlap(self, experiment):
        return self._load(experiment, 'overlap')
    
    def _load_ROC_AUC():
        return self._load(experiment, 'ROC-AUC')
    
    def _load_avg_precision():
        return self._load(experiment, 'avg_precision')
    
    def _load(self, experiment, name):
        filename = os.path.join(self.experiment_root,
                                f'Experiment_{experiment}/sampled_graphs/{name}.pickle')
        with open(filename, 'rb') as handle:
            timing = pickle.load(handle)
        return timing
    
    def compute_statistics(self):
        # parse experiment root folder
        num_experiments = len(os.listdir(self.experiment_root))
        # load overlaps and timings
        overlaps = [self._load_overlap(i) for i in range(num_experiments)]
        timings = [self._load_timing(i) for i in range(num_experiments)]
        
        steps = max(timings[0].keys())
        step_len = int(np.log10(steps))+1
        invoke_every = steps // len(timings[0].keys())
        
        statistics = {name: np.zeros([num_experiments, steps//invoke_every]) for name in self.statistic_fns.keys()}
        statistics['Edge Overlap (%)'] = np.zeros([num_experiments, steps//invoke_every])
        statistics['Time (s)'] = np.zeros([num_experiments, steps//invoke_every])
        for step in range(invoke_every, steps, invoke_every):
            for experiment in range(num_experiments):
                # load sparse graph
                graph_name = f'graph_{step:0{step_len}d}.npz'
                graph_path = os.path.join(self.experiment_root,
                                          f'Experiment_{experiment}/sampled_graphs/{graph_name}')
                graph = load_npz(graph_path)
                # compute statistics
                statistics['Edge Overlap (%)'][experiment, step//invoke_every] = overlaps[experiment][step]
                statistics['Time (s)'][experiment, step//invoke_every] = timings[experiment][step]
                for name, statistic_fn in self.statistic_fns.items():
                    #print(name)
                    statistics[name][experiment, step//invoke_every] = statistic_fn(graph)
                    
        self.statistics = statistics
        self.steps = steps
        self.invoke_every = invoke_every
        #self.statistics_mean = {name:stats.mean(axis=0) for name, stats in self.statistics.items()}
        #self.statistics_std = {name:stats.std(axis=0) for name, stats in self.statistics.items()}
    
    def aggregate_statistics(self, num_bins, start=0, end=1):
        # binning
        overlaps = self.statistics['Edge Overlap (%)']
        lin = np.linspace(start, end, num_bins+1)
        statistics_mean = {name:np.zeros(num_bins) for name in self.statistics.keys()}
        statistics_std = {name:np.zeros(num_bins) for name in self.statistics.keys()}
        for idx, (start, end) in enumerate(zip(lin[:-1], lin[1:])):
            args = np.argwhere(np.logical_and(start<overlaps, overlaps<=end))
            for name, statistic in self.statistics.items():
                statistics_mean[name][idx] = statistic[args[:,0], args[:,1]].mean()
                statistics_std[name][idx] = statistic[args[:,0], args[:,1]].std()
        
        self.statistics_mean = statistics_mean
        self.statistics_std = statistics_std
                
    def export_statistics(self):
        pass
    def plot_statistics(self):
        pass

In [3]:
eval_ours = Evaluation(experiment_root='../logs/experiments_CORA-ML/',
                       statistic_fns={'Assortativity':s_statistics_assortativity,
                                      'Average Degree':s_statistics_average_degree,
                                      'Claw Count':s_statistics_claw_count,
                                      'Clustering Coefficient':s_statistics_clustering_coefficient,
                                      #'Characteristic Path Length':s_statistics_compute_cpl,
                                      'Edge Distribution Entropy':s_statistics_edge_distribution_entropy,
                                      'Gini':s_statistics_gini,
                                      'LCC Size':s_statistics_LCC,
                                      'Max Degree':s_statistics_max_degree,
                                      'Min Degree':s_statistics_min_degree,
                                      'Num Connected Components':s_statistics_num_connected_components,
                                      'Power Law α':s_statistics_power_law_alpha,
                                      'Spectral Gap':s_statistics_spectral_gap,
                                      'Square Count':s_statistics_square_count,
                                      'Triangle Count':s_statistics_triangle_count,
                                      'Wedge Count':s_statistics_wedge_count,
                                     })

In [4]:
eval_ours.compute_statistics()

In [5]:
eval_ours.statistics

{'Assortativity': array([[ 0.        ,  0.00435352, -0.03473832, -0.09767079, -0.07726152,
         -0.07090277, -0.07359809, -0.08652701, -0.08209418, -0.08260024,
         -0.07985274, -0.08132368, -0.08078135, -0.08307026, -0.07795887,
         -0.08074784, -0.08017273, -0.07771085, -0.07594585, -0.07846241],
        [ 0.        , -0.0187318 ,  0.01892586, -0.08628063, -0.06648247,
         -0.07293834, -0.0716047 , -0.07110334, -0.07507697, -0.07987052,
         -0.08005604, -0.07926036, -0.08002776, -0.08131844, -0.08470377,
         -0.07548903, -0.07578538, -0.0792658 , -0.07670507, -0.07579294]]),
 'Average Degree': array([[0.        , 4.82775801, 4.82775801, 4.82775801, 4.82775801,
         4.82775801, 4.82775801, 4.82775801, 4.82775801, 4.82775801,
         4.82775801, 4.82775801, 4.82775801, 4.82775801, 4.82775801,
         4.82775801, 4.82775801, 4.82775801, 4.82775801, 4.82775801],
        [0.        , 4.82775801, 4.82775801, 4.82775801, 4.82775801,
         4.82775801, 4.

In [19]:
eval_ours.aggregate_statistics(num_bins=10)

In [8]:
eval_ours.statistics_mean

{'Assortativity': array([-0.00754768,         nan,         nan, -0.09197571, -0.071872  ,
        -0.07192055, -0.07570828, -0.08001684, -0.07899615,         nan]),
 'Average Degree': array([4.82775801,        nan,        nan, 4.82775801, 4.82775801,
        4.82775801, 4.82775801, 4.82775801, 4.82775801,        nan]),
 'Claw Count': array([  68684.75,        nan,        nan, 1401516.  ,  956529.5 ,
        1824513.5 , 2064388.75, 2388310.75, 2687748.  ,        nan]),
 'Clustering Coefficient': array([0.00181306,        nan,        nan, 0.00182722, 0.00270477,
        0.00214525, 0.00244825, 0.00255388, 0.00270357,        nan]),
 'Edge Distribution Entropy': array([0.98391462,        nan,        nan, 0.95180479, 0.95810932,
        0.95068629, 0.94575939, 0.94098013, 0.93824027,        nan]),
 'Gini': array([0.27451433,        nan,        nan, 0.44473661, 0.42301819,
        0.44908369, 0.47057084, 0.48770301, 0.4956553 ,        nan]),
 'LCC Size': array([2810.    ,       nan,       na

In [23]:
def tabular_from_statistics(EO_criterion, statistics):
    tabular_mean = {}
    tabular_std = {}
    for model_name, (statistics_mean, statistics_std) in statistics.items():
        tabular_mean[model_name] = {}
        tabular_std[model_name] = {}
        # find matching EO
        overlap = statistics_mean['Edge Overlap (%)']
        arg = np.argwhere(overlap>EO_criterion).min()
        for statistic_name in statistics_mean.keys():
            tabular_mean[model_name][statistic_name] = statistics_mean[statistic_name][arg]
            tabular_std[model_name][statistic_name] = statistics_std[statistic_name][arg]
    return (tabular_mean, tabular_std)

In [24]:
tabular_from_statistics(EO_criterion=0.5,
                        statistics={'ours': (eval_ours.statistics_mean, eval_ours.statistics_std)})

({'ours': {'Assortativity': -0.07192055463410299,
   'Average Degree': 4.8277580071174375,
   'Claw Count': 1824513.5,
   'Clustering Coefficient': 0.002145254657692784,
   'Edge Distribution Entropy': 0.9506862873668398,
   'Gini': 0.44908369416318694,
   'LCC Size': 2800.5,
   'Max Degree': 201.0,
   'Min Degree': 1.0,
   'Num Connected Components': 4.5,
   'Power Law α': 1.820583287869216,
   'Spectral Gap': 0.050009915028284585,
   'Square Count': 6297.5,
   'Triangle Count': 1304.0,
   'Wedge Count': 81247.0,
   'Edge Overlap (%)': 0.5482087571870853,
   'Time (s)': 14.381987571716309}},
 {'ours': {'Assortativity': 0.0010177807011411064,
   'Average Degree': 0.0,
   'Claw Count': 38365.5,
   'Clustering Coefficient': 5.333134973773139e-05,
   'Edge Distribution Entropy': 0.00022682679880031698,
   'Gini': 0.0006105645105017876,
   'LCC Size': 5.5,
   'Max Degree': 1.0,
   'Min Degree': 0.0,
   'Num Connected Components': 1.5,
   'Power Law α': 1.1357265897848912e-05,
   'Spectral 

In [12]:
a = np.arange(10)[::-1]

In [14]:
a.sort()

In [15]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
print('%')

%


In [25]:
help(load_npz)

Help on function load_npz in module scipy.sparse._matrix_io:

load_npz(file)
    Load a sparse matrix from a file using ``.npz`` format.
    
    Parameters
    ----------
    file : str or file-like object
        Either the file name (string) or an open file (file-like object)
        where the data will be loaded.
    
    Returns
    -------
    result : csc_matrix, csr_matrix, bsr_matrix, dia_matrix or coo_matrix
        A sparse matrix containing the loaded data.
    
    Raises
    ------
    IOError
        If the input file does not exist or cannot be read.
    
    See Also
    --------
    scipy.sparse.save_npz: Save a sparse matrix to a file using ``.npz`` format.
    numpy.load: Load several arrays from a ``.npz`` archive.
    
    Examples
    --------
    Store sparse matrix to disk, and load it again:
    
    >>> import scipy.sparse
    >>> sparse_matrix = scipy.sparse.csc_matrix(np.array([[0, 0, 3], [4, 0, 0]]))
    >>> sparse_matrix
    <2x3 sparse matrix of type '<c

In [19]:
dct = {1:'a', 2:'b', 5:'e'}

In [21]:
max(dct.keys())

5

In [45]:
np.ones([3,3]) > 0#np.zeros([3]) 

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [5]:
a = np.zeros([3,3])
0<a

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

In [6]:
a<=1

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [7]:
np.logical_and(0<a, a<=1)

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

In [8]:
a[np.argwhere(np.logical_and(0<a, a<=1))[:,0],np.argwhere(np.logical_and(0<a, a<=1))[:,1]]

array([], dtype=float64)

In [32]:
os.listdir('../logs/experiments_CORA-ML/Experiment_0/sampled_graphs/')

['graph_090.npz',
 'graph_080.npz',
 'graph_100.npz',
 'overlap.pickle',
 'graph_060.npz',
 'graph_015.npz',
 'graph_075.npz',
 'graph_005.npz',
 'graph_020.npz',
 'graph_070.npz',
 'graph_065.npz',
 'graph_010.npz',
 'graph_040.npz',
 'graph_095.npz',
 'graph_050.npz',
 'graph_035.npz',
 'timing.pickle',
 'graph_025.npz',
 'graph_085.npz',
 'graph_055.npz',
 'graph_045.npz',
 'graph_030.npz']

In [2]:
def timing(f):
    def g(*args, **kwargs):
        start = time.time()
        y = f(*args, **kwargs)
        g.last_time = time.time() - start
        return y
    return g

In [5]:
@timing
def loop(iters):
    x = 0
    for i in range(iters):
        x += i
    return x

In [9]:
loop(100)
loop.last_time

1.3828277587890625e-05

In [12]:
def f(*args, **kwargs):
    y = 0
    for x in args:
        y += x
    return y

In [16]:
lst = [1,2,3,4,5,6]
f(*lst)

21

In [None]:
f(lst[0], lst[1])