In [19]:
import sys
sys.path.insert(0, '../src/')
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import graph_statistics
import utils
from evaluation import (Evaluation, tabular_from_statistics, df_from_tabular, compute_original_statistics,
                        boxplot, make_comparison_df, comparison_tabular_from_df)

In [20]:
_A_obs = load_npz('../data/datasets/CORA_ML.npz')

val_share = 0.1
test_share = 0.05
seed = 481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)
train_graph = sp.csr_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1])))

# Test graph statistics

In [23]:
import networkx as nx
import scipy.sparse as sp
import numpy as np
from scipy.sparse.csgraph import connected_components, minimum_spanning_tree
import warnings
from matplotlib import pyplot as plt
import igraph
import powerlaw
from numba import jit


def load_npz(file_name):
    """Load a SparseGraph from a Numpy binary file.
    Parameters
    ----------
    file_name : str
        Name of the file to load.
    Returns
    -------
    sparse_graph : SparseGraph
        Graph in sparse matrix format.
    """
    if not file_name.endswith('.npz'):
        file_name += '.npz'
    with np.load(file_name, allow_pickle=True) as loader:
        loader = dict(loader)['arr_0'].item()
        adj_matrix = sp.csr_matrix((loader['adj_data'], loader['adj_indices'],
                                              loader['adj_indptr']), shape=loader['adj_shape'])

        if 'attr_data' in loader:
            attr_matrix = sp.csr_matrix((loader['attr_data'], loader['attr_indices'],
                                                   loader['attr_indptr']), shape=loader['attr_shape'])
        else:
            attr_matrix = None

        labels = loader.get('labels')

    return adj_matrix, attr_matrix, labels

_A_obs, _X_obs, _z_obs = load_npz('../data/cora_ml.npz')

In [14]:
def statistics_cluster_props(A, Z_obs):
    def get_blocks(A_in, Z_obs, normalize=True):
        block = Z_obs.T.dot(A_in.dot(Z_obs))
        counts = np.sum(Z_obs, axis=0)
        blocks_outer = counts[:,None].dot(counts[None,:])
        if normalize:
            blocks_outer = np.multiply(block, 1/blocks_outer)
        return blocks_outer
    
    in_blocks = get_blocks(A, Z_obs)
    diag_mean = np.multiply(in_blocks, np.eye(in_blocks.shape[0])).mean()
    offdiag_mean = np.multiply(in_blocks, 1-np.eye(in_blocks.shape[0])).mean() 
    return diag_mean, offdiag_mean

# if Z_obs is not None:
#     # inter- and intra-community density
#     intra, inter = statistics_cluster_props(A, Z_obs)
#     statistics['intra_community_density'] = intra
#     statistics['inter_community_density'] = inter

In [None]:
def community_densities(A_in, class_memberships):
    classes = set(class_memberships)
    for c in classes:
    
    rows, cols = A_in.nonzero()
    num_classes = len(classes)
    class_edges = np.zeros((num_classes, num_classes))
    for l in range(len(rows)):
        if rows[l]<=cols[l]:
            class_edges[rows[l], cols[l]] += 1
    

In [None]:
np.argsort()

In [48]:
A = np.eye(5)

In [49]:
np.diag(A)

array([1., 1., 1., 1., 1.])

In [42]:
class_set = set(_z_obs)

In [45]:
class_set

{0, 1, 2, 3, 4, 5, 6}

In [44]:
len(class_set)

7

In [34]:
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1

In [37]:
set(_z_obs)

{0, 1, 2, 3, 4, 5, 6}

In [46]:
_A_obs.shape

(2995, 2995)

In [24]:
_z_obs.shape

(2995,)

In [21]:
train_graph

<2810x2810 sparse matrix of type '<class 'numpy.float64'>'
	with 13566 stored elements in Compressed Sparse Row format>

In [35]:
statistics_cluster_props(A=_A_obs, Z_obs=_z_obs)

IndexError: invalid index to scalar variable.