In [1]:
from scipy.sparse.linalg import eigs

import networkx as nx
import scipy.sparse as sp
import numpy as np
from scipy.sparse.csgraph import connected_components, minimum_spanning_tree
from scipy.sparse.linalg import eigs
import warnings
from matplotlib import pyplot as plt
import igraph
import powerlaw
from numba import jit

import sys
sys.path.insert(0, '../src/')

import warnings
warnings.filterwarnings('ignore')

#'import tensorflow as tf
import torch
import scipy.sparse as sp
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score
import time
import pandas as pd
%matplotlib inline

# from netgan.netgan import *
# from netgan import utils

from net.utils import *
from net import utils_netgan as utils
import net.net as net

In [2]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/cora_ml.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

val_share = 0.1
test_share = 0.05
seed = 481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=True)

train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()

Selecting 1 largest connected components


# Vanilla statistics

In [330]:
def edge_overlap(A, B):
    """
    Compute edge overlap between input graphs A and B, i.e. how many edges in A are also present in graph B. Assumes
    that both graphs contain the same number of edges.

    Parameters
    ----------
    A: sparse matrix or np.array of shape (N,N).
       First input adjacency matrix.
    B: sparse matrix or np.array of shape (N,N).
       Second input adjacency matrix.

    Returns
    -------
    float, the edge overlap.
    """

    return ((A == B) & (A == 1)).sum()

def squares(g):
    """
    Count the number of squares for each node
    Parameters
    ----------
    g: igraph Graph object
       The input graph.

    Returns
    -------
    List with N entries (N is number of nodes) that give the number of squares a node is part of.
    """

    cliques = g.cliques(min=4, max=4)
    result = [0] * g.vcount()
    for i, j, k, l in cliques:
        result[i] += 1
        result[j] += 1
        result[k] += 1
        result[l] += 1
    return result

def statistics_degrees(A_in):
    """
    Compute min, max, mean degree

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    d_max. d_min, d_mean
    """

    degrees = A_in.sum(axis=0)
    return np.max(degrees), np.min(degrees), np.mean(degrees)


def statistics_LCC(A_in):
    """
    Compute the size of the largest connected component (LCC)

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Size of LCC

    """

    unique, counts = np.unique(connected_components(A_in)[1], return_counts=True)
    LCC = np.where(connected_components(A_in)[1] == np.argmax(counts))[0]
    return LCC


def statistics_wedge_count(A_in):
    """
    Compute the wedge count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    The wedge count.
    """

    degrees = A_in.sum(axis=0)
    return float(np.sum(np.array([0.5 * x * (x - 1) for x in degrees])))


def statistics_claw_count(A_in):
    """
    Compute the claw count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Claw count
    """

    degrees = A_in.sum(axis=0)
    return float(np.sum(np.array([1 / 6. * x * (x - 1) * (x - 2) for x in degrees])))


def statistics_triangle_count(A_in):
    """
    Compute the triangle count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Triangle count
    """

    A_graph = nx.from_numpy_matrix(A_in)
    triangles = nx.triangles(A_graph)
    t = np.sum(list(triangles.values())) / 3
    return int(t)


def statistics_square_count(A_in):
    """
    Compute the square count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Square count
    """

    A_igraph = igraph.Graph.Adjacency((A_in > 0).tolist()).as_undirected()
    return int(np.sum(squares(A_igraph)) / 4)


def statistics_power_law_alpha(A_in):
    """
    Compute the power law coefficient of the degree distribution of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Power law coefficient
    """

    degrees = A_in.sum(axis=0)
    return powerlaw.Fit(degrees, xmin=max(np.min(degrees),1), verbose=False).power_law.alpha


def statistics_gini(A_in):
    """
    Compute the Gini coefficient of the degree distribution of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Gini coefficient
    """

    n = A_in.shape[0]
    degrees = A_in.sum(axis=0)
    degrees_sorted = np.sort(degrees)
    G = (2 * np.sum(np.array([i * degrees_sorted[i] for i in range(len(degrees))]))) / (n * np.sum(degrees)) - (
                                                                                                               n + 1) / n
    return float(G)


def statistics_edge_distribution_entropy(A_in):
    """
    Compute the relative edge distribution entropy of the input graph.

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Rel. edge distribution entropy
    """

    degrees = A_in.sum(axis=0)
    m = 0.5 * np.sum(np.square(A_in))
    n = A_in.shape[0]

    H_er = 1 / np.log(n) * np.sum(-degrees / (2 * float(m)) * np.log((degrees+.0001) / (2 * float(m))))
    return H_er

def statistics_cluster_props(A, Z_obs):
    def get_blocks(A_in, Z_obs, normalize=True):
        block = Z_obs.T.dot(A_in.dot(Z_obs))
        counts = np.sum(Z_obs, axis=0)
        blocks_outer = counts[:,None].dot(counts[None,:])
        if normalize:
            blocks_outer = np.multiply(block, 1/blocks_outer)
        return blocks_outer
    
    in_blocks = get_blocks(A, Z_obs)
    diag_mean = np.multiply(in_blocks, np.eye(in_blocks.shape[0])).mean()
    offdiag_mean = np.multiply(in_blocks, 1-np.eye(in_blocks.shape[0])).mean() 
    return diag_mean, offdiag_mean

def statistics_compute_cpl(A):
    """Compute characteristic path length."""
    P = sp.csgraph.shortest_path(sp.csr_matrix(A))
    return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)].mean()

def statistics_smallest_eigvals_of_LCC(A):
    """Computes few smallest eigenvalues of graph Laplacian, restricted to largest connected component."""
    G = nx.from_numpy_matrix(A)
    Gc = G.subgraph(max(nx.connected_components(G), key=len))
    L = nx.normalized_laplacian_matrix(Gc)
    vals, vecs = eigs(L, k=2, sigma=-0.0001)
    return np.real(vals)


def compute_graph_statistics(A_in, Z_obs=None):
    """

    Parameters
    ----------
    A_in: sparse matrix
          The input adjacency matrix.
    Z_obs: np.matrix [N, K], where K is the number of classes.
          Matrix whose rows are one-hot vectors indicating the class membership of the respective node.
          
    Returns
    -------
    Dictionary containing the following statistics:
             * Maximum, minimum, mean degree of nodes
             * Size of the largest connected component (LCC)
             * Wedge count
             * Claw count
             * Triangle count
             * Square count
             * Power law exponent
             * Gini coefficient
             * Relative edge distribution entropy
             * Assortativity
             * Clustering coefficient
             * Number of connected components
             * Intra- and inter-community density (if Z_obs is passed)
             * Characteristic path length
    """

    A = A_in.copy()

    assert ((A == A.T).all())
    A_graph = nx.from_numpy_matrix(A).to_undirected()

    statistics = {}

    d_max, d_min, d_mean = statistics_degrees(A)

    # Degree statistics
    statistics['d_max'] = d_max
    statistics['d_min'] = d_min
    statistics['d'] = d_mean

    # largest connected component
    LCC = statistics_LCC(A)

    statistics['LCC'] = LCC.shape[0]
    # wedge count
    statistics['wedge_count'] = statistics_wedge_count(A)

    # claw count
    statistics['claw_count'] = statistics_claw_count(A)

    # triangle count
    statistics['triangle_count'] = statistics_triangle_count(A)

    # Square count
    statistics['square_count'] = statistics_square_count(A)
    # power law exponent
    statistics['power_law_exp'] = statistics_power_law_alpha(A)
    # gini coefficient
    statistics['gini'] = statistics_gini(A)
    # Relative edge distribution entropy
    statistics['rel_edge_distr_entropy'] = statistics_edge_distribution_entropy(A)
    # Assortativity
    statistics['assortativity'] = nx.degree_assortativity_coefficient(A_graph)
    # Clustering coefficient
    statistics['clustering_coefficient'] = 3 * statistics['triangle_count'] / statistics['claw_count']    
    # Number of connected components
    statistics['n_components'] = connected_components(A)[0]
    if Z_obs is not None:
        # inter- and intra-community density
        intra, inter = statistics_cluster_props(A, Z_obs)
        statistics['intra_community_density'] = intra
        statistics['inter_community_density'] = inter
    statistics['cpl'] = statistics_compute_cpl(A)
    # Spectral gap of largest connected component
    eigvals = statistics_smallest_eigvals_of_LCC(A)
    statistics['spectral_gap'] = eigvals[1] - eigvals[0]
    return statistics

# Sparse statistics

In [27]:
def s_val_performance(scores_matrix, val_ones, val_zeros):
    """ Compute the ROC-AUC score and average precision of a graph (link prediction performance)."""
    edge_scores = np.append(scores_matrix[tuple(val_ones.T)].A1, 
                            scores_matrix[tuple(val_zeros.T)].A1)
    actual_labels_val = np.append(np.ones(len(val_ones)), np.zeros(len(val_zeros)))
    
    roc_auc = roc_auc_score(actual_labels_val, edge_scores)
    avg_prec = average_precision_score(actual_labels_val, edge_scores)
    return roc_auc, avg_prec

def s_edge_overlap(A, B):
    """
    Compute edge overlap between input graphs A and B, i.e. how many edges in A are also present in graph B. Assumes
    that both graphs contain the same number of edges.

    Parameters
    ----------
    A: sparse matrix or np.array of shape (N,N).
       First input adjacency matrix.
    B: sparse matrix or np.array of shape (N,N).
       Second input adjacency matrix.

    Returns
    -------
    float, the edge overlap.
    """

    return A.multiply(B).sum() / 2


def s_statistics_max_degree(A_in):
    """Compute max degree."""
    degrees = A_in.sum(axis=-1)
    return np.max(degrees)

def s_statistics_min_degree(A_in):
    """Compute min degree."""
    degrees = A_in.sum(axis=-1)
    return np.min(degrees)

def s_statistics_average_degree(A_in):
    """Compute average degree."""
    degrees = A_in.sum(axis=-1)
    return np.mean(degrees)


def s_statistics_LCC(A_in):
    """
    Compute the size of the largest connected component (LCC)

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Size of LCC

    """
    G = nx.from_scipy_sparse_matrix(A_in)
    return max([len(c) for c in nx.connected_components(G)])

def s_statistics_num_connected_components(A_in):
    """Compute the number of connected components."""
    G = nx.from_scipy_sparse_matrix(A_in)
    return len(list(nx.connected_components(G)))


def s_statistics_wedge_count(A_in):
    """
    Compute the wedge count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    The wedge count.
    """

    degrees = np.array(A_in.sum(axis=-1))
    return 0.5 * np.dot(degrees.T, degrees-1).reshape([])


def s_statistics_claw_count(A_in):
    """
    Compute the claw count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Claw count
    """

    degrees = np.array(A_in.sum(axis=-1))
    return 1/6 * np.sum(degrees * (degrees-1) * (degrees-2))


def s_statistics_triangle_count(A_in):
    """
    Compute the triangle count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Triangle count
    """

    A_graph = nx.from_scipy_sparse_matrix(A_in)
    triangles = nx.triangles(A_graph)
    t = np.sum(list(triangles.values())) / 3
    return int(t)


def s_statistics_square_count(A_in):
    """
    Compute the square count of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.
    Returns
    -------
    Square count
    """

    A_squared = A_in @ A_in
    common_neighbors = sp.triu(A_squared, k=1).tocsr()
    num_common_neighbors = np.array(common_neighbors[common_neighbors.nonzero()]).reshape(-1)
    return np.dot(num_common_neighbors, num_common_neighbors-1) / 4


def s_statistics_power_law_alpha(A_in):
    """
    Compute the power law coefficient of the degree distribution of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Power law coefficient
    """

    degrees = np.array(A_in.sum(axis=-1)).flatten()
    return powerlaw.Fit(degrees, xmin=max(np.min(degrees),1), verbose=False).power_law.alpha


def s_statistics_gini(A_in):
    """
    Compute the Gini coefficient of the degree distribution of the input graph

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Gini coefficient
    """
    N = A_in.shape[0]
    degrees_sorted = np.sort(np.array(A_in.sum(axis=-1)).flatten())
    return 2 * np.dot(degrees_sorted, np.arange(1, N+1)) / (N * np.sum(degrees_sorted)) - (N+1) / N


def s_statistics_edge_distribution_entropy(A_in):
    """
    Compute the relative edge distribution entropy of the input graph.

    Parameters
    ----------
    A_in: sparse matrix or np.array
          The input adjacency matrix.

    Returns
    -------
    Rel. edge distribution entropy
    """
    N = A_in.shape[0]
    degrees = np.array(A_in.sum(axis=-1)).flatten()
    degrees /= degrees.sum()
    return -np.dot(np.log(degrees), degrees) / np.log(N)


def s_statistics_compute_cpl(A_in):
    """Compute characteristic path length."""
    P = sp.csgraph.shortest_path(A_in)
    return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)].mean()

def s_statistics_smallest_eigvals_of_LCC(A):
    """Computes few smallest eigenvalues of graph Laplacian, restricted to largest connected component."""
    G = nx.from_scipy_sparse_matrix(A)
    Gc = G.subgraph(max(nx.connected_components(G), key=len))
    L = nx.normalized_laplacian_matrix(Gc)
    vals, vecs = eigs(L, k=2, sigma=-0.0001)
    return np.real(vals)

def s_statistics_spectral_gap(A_in):
    """ Compute spectral gap."""
    eigvals = s_statistics_smallest_eigvals_of_LCC(A_in)
    return eigvals[1] - eigvals[0]

def s_statistics_assortativity(A_in):
    """Compute assortativity."""
    G = nx.from_scipy_sparse_matrix(A_in)
    return nx.degree_assortativity_coefficient(G)

def s_statistics_clustering_coefficient(A_in):
    """Compute clustering coefficient."""
    return 3 * s_statistics_triangle_count(A_in) / s_statistics_claw_count(A_in)


def s_compute_graph_statistics(A):
    """

    Parameters
    ----------
    A_in: sparse matrix
          The input adjacency matrix.
          
    Returns
    -------
    Dictionary containing the following statistics:
             * Maximum, minimum, mean degree of nodes
             * Size of the largest connected component (LCC)
             * Wedge count
             * Claw count
             * Triangle count
             * Square count
             * Power law exponent
             * Gini coefficient
             * Relative edge distribution entropy
             * Assortativity
             * Clustering coefficient
             * Number of connected components
             * Intra- and inter-community density (if Z_obs is passed)
             * Characteristic path length
    """

    statistics = {}

    # Degree statistics
    statistics['d_max'] = s_statistics_max_degree(A)
    statistics['d_min'] = s_statistics_min_degree(A)
    statistics['d'] = s_statistics_average_degree(A)
    # largest connected component
    statistics['LCC'] = s_statistics_LCC(A)
    # wedge count
    statistics['wedge_count'] = s_statistics_wedge_count(A)
    # claw count
    statistics['claw_count'] = s_statistics_claw_count(A)
    # triangle count
    statistics['triangle_count'] = s_statistics_triangle_count(A)
    # Square count
    statistics['square_count'] = s_statistics_square_count(A)
    # power law exponent
    statistics['power_law_exp'] = s_statistics_power_law_alpha(A)
    # gini coefficient
    statistics['gini'] = s_statistics_gini(A)
    # Relative edge distribution entropy
    statistics['rel_edge_distr_entropy'] = s_statistics_edge_distribution_entropy(A)
    # Assortativity
    statistics['assortativity'] = s_statistics_assortativity(A)
    # Clustering coefficient
    statistics['clustering_coefficient'] = s_statistics_clustering_coefficient(A)
    # Number of connected components
    statistics['n_components'] = s_statistics_num_connected_components(A)
    # Characteristic path length
    statistics['cpl'] = s_statistics_compute_cpl(A)
    # Spectral gap of largest connected component
    statistics['spectral_gap'] = s_statistics_spectral_gap(A)
    return statistics

In [3]:
def configuration_model(A, B=None, EO=None):
    """Given two graphs A and B with same amount of edges, generates new graph by keeping overlapping edges,
       and rewiring remaining edges such that degrees of nodes in A are preserved. Self-loops and multiple 
       edges are removed. If B is None, draws the percentage EO of edges from A."""
    configuration_graph = np.zeros_like(A)
    if B is not None:
        configuration_graph = A * B
    else:
        B = np.triu(A, k=1)
        B /= B.sum()
        nonzero_ixs = B.nonzero()
        edges_from_A = np.random.choice(a=len(nonzero_ixs[0]), size=int(EO * A.sum() / 2), replace=False, 
                                        p=B[nonzero_ixs])
        configuration_graph[nonzero_ixs[0][edges_from_A], nonzero_ixs[1][edges_from_A]] = 1
        configuration_graph = configuration_graph + configuration_graph.T
    degrees = (A.sum(axis=-1) - configuration_graph.sum(axis=-1)).astype(int)
    stubs = np.zeros(degrees.sum())
    counter = 0
    for i in degrees.nonzero()[0]:
        stubs[counter: counter+degrees[i]] = i * np.ones(degrees[i])
        counter += degrees[i]
    np.random.shuffle(stubs)
    stubs = stubs.reshape(-1, 2).astype(int)
    configuration_graph[stubs[:, 0], stubs[:, 1]] = 1
    configuration_graph[stubs[:, 1], stubs[:, 0]] = 1  
    np.fill_diagonal(configuration_graph, 0)
    return configuration_graph

In [4]:
def time_wrapper(f, *args):
    start = time.time()
    print(f(*args))
    print(time.time()-start)
    return

In [6]:
A = train_graph

In [7]:
B = configuration_model(A.toarray(), EO=0.52)
B = sp.csr_matrix(B)

In [8]:
print(type(A), type(B))

<class 'scipy.sparse.csr.csr_matrix'> <class 'scipy.sparse.csr.csr_matrix'>


In [62]:
# Vanilla
time_wrapper(edge_overlap, A.toarray(), B)

# Sparse
time_wrapper(s_edge_overlap, A, B)

7096
0.05903506278991699
3548.0
0.0008366107940673828


In [67]:
# Vanilla
time_wrapper(statistics_triangle_count, A.toarray())

# Sparse
time_wrapper(s_statistics_triangle_count, A)

2802
0.16067862510681152
2802
0.18396568298339844


In [98]:
# Vanilla
time_wrapper(statistics_degrees, A.toarray())

# Sparse
time_wrapper(statistics_degrees, A)

(238.0, 1.0, 4.8277580071174375)
0.016636133193969727
(238.0, 1.0, 4.8277580071174375)
0.0006220340728759766


In [120]:
# Vanilla
time_wrapper(statistics_wedge_count, A.toarray())

# Sparse
time_wrapper(s_statistics_wedge_count, A)

101747.0
0.017568111419677734
101747.0
0.001300811767578125


In [125]:
# Vanilla
time_wrapper(statistics_claw_count, A.toarray())

# Sparse
time_wrapper(s_statistics_claw_count, A)

3033514.0
0.022715091705322266
3033514.0
0.0005564689636230469


In [120]:
# Vanilla
time_wrapper(statistics_wedge_count, A.toarray())

# Sparse
time_wrapper(s_statistics_wedge_count, A)

101747.0
0.017568111419677734
101747.0
0.001300811767578125


In [229]:
# Vanilla
time_wrapper(statistics_square_count, A.toarray())

# Sparse
time_wrapper(s_statistics_square_count, A)

457
0.7692854404449463
14269.0
0.01426386833190918


### Test if square count is correct

In [251]:
expec = 0
for i in range(100):
    ER_graph = nx.erdos_renyi_graph(100, 0.5)
    ER_graph = nx.to_scipy_sparse_matrix(ER_graph)
    expec += s_statistics_square_count(ER_graph) / 100
expec

734486.0700000001

In [252]:
expec = 0
for i in range(100):
    ER_graph = nx.erdos_renyi_graph(100, 0.5)
    ER_graph = nx.to_scipy_sparse_matrix(ER_graph)
    expec += statistics_square_count(ER_graph.toarray()) / 100
expec

61401.939999999995

Expected number of squares

In [237]:
from scipy.special import binom

binom(100, 4) * 3*2*1 / 2**5

735229.6875

Expected number of 4-cliques

In [254]:
binom(100, 4) / 2 ** (binom(4, 2))

61269.140625

#### Interpretation: we compute actual square count, but they compute 4-cliques count

In [270]:
# Vanilla
time_wrapper(statistics_LCC, A.toarray())

# Sparse
time_wrapper(s_statistics_LCC, A)

[   0    1    2 ... 2807 2808 2809]
0.4540128707885742
2810
0.2728407382965088


In [291]:
# Vanilla
time_wrapper(statistics_gini, A.toarray())

# Sparse
time_wrapper(s_statistics_gini, A)

0.4818118959889852
0.01648235321044922
0.48252363976122714
0.0006756782531738281


In [295]:
# Vanilla
time_wrapper(statistics_edge_distribution_entropy, A.toarray())

# Sparse
time_wrapper(s_statistics_edge_distribution_entropy, A)

0.9406731398068211
0.04870343208312988
0.94067574800982
0.0010118484497070312


In [303]:
# Vanilla
time_wrapper(statistics_compute_cpl, A.toarray())

# Sparse
time_wrapper(s_statistics_compute_cpl, A)

5.630006245811316
3.6048834323883057
5.630006245811316
3.485039472579956


# Test all statistics at once (except edge overlap)

In [328]:
# Vanilla
time_wrapper(compute_graph_statistics, A.toarray())

# Sparse
time_wrapper(s_compute_graph_statistics, A)

{'d_max': 238.0, 'd_min': 1.0, 'd': 4.828113879003559, 'LCC': 2810, 'wedge_count': 101749.0, 'claw_count': 3033515.0, 'triangle_count': 2802, 'square_count': 457, 'power_law_exp': 1.854959374066405, 'gini': 0.4818118959889852, 'rel_edge_distr_entropy': 0.9406731398068211, 'assortativity': -0.07625760260188129, 'clustering_coefficient': 0.002771042833149004, 'n_components': 1, 'spectral_gap': 0.00611441079539339}
1.9164848327636719
{'d_max': 238.0, 'd_min': 1.0, 'd': 4.828113879003559, 'LCC': 2810, 'wedge_count': 101749.0, 'claw_count': 3033515.0, 'triangle_count': 2802, 'square_count': 14269.0, 'power_law_exp': 1.854959374066405, 'gini': 0.48252363976122714, 'rel_edge_distr_entropy': 0.94067574800982, 'assortativity': -0.07625760260188129, 'clustering_coefficient': 0.002771042833149004, 'n_components': 1, 'spectral_gap': 0.0061144107953933855}
1.1001887321472168


ROC-AUC and average precision

In [29]:
print(s_val_performance(_A_obs, val_ones=val_ones, val_zeros=val_zeros))
print(s_val_performance(train_graph, val_ones=val_ones, val_zeros=val_zeros))

(1.0, 1.0)