In [None]:
import sys; sys.path.append('..')

In [None]:
from itertools import combinations
import pickle
from collections import defaultdict
from fractions import Fraction
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from util import step_plot, ds_names, ds_names_short, colors_full, linestyles, colors_group
import networkx as nx
from scipy.stats import pearsonr
import seaborn as sns
from collections import deque

In [None]:
p_CE = Path('../data/clique_expansion')
p_LCC = Path('../data/largest_connected_component')
p_LCC.mkdir(exist_ok=True)
for ds_name in ds_names:
    p_CE_ds = p_CE / ds_name
    with (p_CE_ds / 'CE_U.pkl').open('rb') as f:
        CE_U = pickle.load(f)
    LCC = max(nx.connected_components(CE_U), key=len)
    with (p_LCC / '{}.pkl'.format(ds_name)).open('wb') as f:
        pickle.dump(LCC, f)
    print('Dataset {},'.format(ds_name), 'total number of nodes = {},'.format(CE_U.number_of_nodes()),
          'number of nodes in the LCC = {}'.format(len(LCC)))

In [None]:
from scipy.sparse import csr_matrix

p_CE = Path('../data/clique_expansion')
p_EC = Path('../data/eigenvector_centrality')
p_incident = Path('../data/incident')
p_EC.mkdir(exist_ok=True)
centrality_types = ['linear', 'log-exp', 'max']
max_iter = 1000
tol = 1e-6
for ds_name in ds_names[4:]:
    print(ds_name)
    p_CE_ds = p_CE / ds_name
    p_EC_ds = p_EC / ds_name
    p_EC_ds.mkdir(exist_ok=True)
    with (p_CE_ds / 'CE_U.pkl').open('rb') as f:
        CE_U = pickle.load(f)
    with (p_CE_ds / 'CE_W.pkl').open('rb') as f:
        CE_W = pickle.load(f)

    EC_CE_U = nx.eigenvector_centrality(CE_U)
    with (p_EC_ds / 'EC_CE_U.pkl').open('wb') as f:
        pickle.dump(EC_CE_U, f)
    print('Eigen-centrality of unweighted clique expansion done!')

    CE_W_conv = nx.Graph()
    for v1, v2 in CE_W.edges():
        if CE_W_conv.has_edge(v1, v2):
            CE_W_conv[v1][v2]['weight'] += 1
        else:
            CE_W_conv.add_edge(v1, v2, weight=1)
    EC_CE_W = nx.eigenvector_centrality(CE_W_conv, weight='weight', max_iter=max_iter)
    with (p_EC_ds / 'EC_CE_W.pkl').open('wb') as f:
        pickle.dump(EC_CE_W, f)
    print('Eigen-centrality of weighted clique expansion done!')

    p_incident_ds = p_incident / ds_name
    with (p_incident_ds / 'i2edges.pkl').open('rb') as f:
        i2edges = pickle.load(f)
    with (p_incident_ds / 'v2edges.pkl').open('rb') as f:
        v2edges = pickle.load(f)

    n, m = len(v2edges), len(i2edges)
    node_list = sorted(v2edges.keys())
    row_list = []
    col_list = []
    for i_v, v in enumerate(node_list):
        for e in v2edges[v]:
            row_list.append(i_v)
            col_list.append(e)
    data = [True for _ in row_list]
    B = csr_matrix((data, (row_list, col_list)), shape=(n, m), dtype=bool)

    for centrality_type in centrality_types:
        if centrality_type == 'linear':
            f = g = phi = psi = lambda xx: xx
        elif centrality_type == 'log-exp':
            f = lambda xx: xx
            g = lambda xx: np.sqrt(xx)
            phi = lambda xx: np.log(xx)
            psi = lambda xx: np.exp(xx)
        else:  # 'max'
            f = g = lambda xx: xx
            phi = lambda xx: xx ** 10
            psi = lambda xx: xx ** 0.1
        f = np.vectorize(f)
        g = np.vectorize(g)
        phi = np.vectorize(phi)
        psi = np.vectorize(psi)

        x0 = np.ones(n) / n
        y0 = np.ones(m) / m
        diff = x = y = None
        for _ in range(max_iter):
            u = np.sqrt(x0 * g(B.dot(f(y0))))
            v = np.sqrt(y0 * psi(B.T.dot(phi(x0))))
            x = u / np.linalg.norm(u)
            y = v / np.linalg.norm(v)
            diff = np.linalg.norm(x - x0) / np.linalg.norm(x) + np.linalg.norm(y - y0) / np.linalg.norm(y)
            if diff < tol:
                break
            x0 = np.copy(x)
            y0 = np.copy(y)
        print(ds_name, centrality_type, diff)
        centrality_dict_V = dict(zip(node_list, x))
        centrality_dict_E = dict(zip(range(m), y))

        with (p_EC_ds / 'hyperEC_{}_V.pkl'.format(centrality_type)).open('wb') as f:
            pickle.dump(centrality_dict_V, f)
        with (p_EC_ds / 'hyperEC_{}_E.pkl'.format(centrality_type)).open('wb') as f:
            pickle.dump(centrality_dict_E, f)
        print('Hyper-eigen-centrality ({}) done!'.format(centrality_type))

In [None]:
p_data_cpp = Path('../data_cpp')
p_data_cpp.mkdir(exist_ok=True)
p_incident = Path('../data/incident')
p_LCC = Path('../data/largest_connected_component')

for ds_name in ds_names:
    print(ds_name)
    p_incident_ds = p_incident / ds_name
    with (p_incident_ds / 'i2edges.pkl').open('rb') as f:
        i2edges = pickle.load(f)
    with (p_incident_ds / 'v2edges.pkl').open('rb') as f:
        v2edges = pickle.load(f)

    with open(p_data_cpp / '{}_edges.txt'.format(ds_name), 'w') as f:
        for e in i2edges.values():
            f.write('\t'.join([str(v) for v in e]) + '\n')

    with open(p_data_cpp / '{}_degrees.txt'.format(ds_name), 'w') as f:
        for v, E_v in v2edges.items():
            f.write('{}\t{}\n'.format(v, len(E_v)))

    with open(p_data_cpp / '{}_incident.txt'.format(ds_name), 'w') as f:
        for v, E_v in v2edges.items():
            f.write(str(v) + '\t' + ','.join([str(e) for e in E_v]) + '\n')

    with (p_LCC / '{}.pkl'.format(ds_name)).open('rb') as f:
        LCC = pickle.load(f)

    with open(p_data_cpp / '{}_cc.txt'.format(ds_name), 'w') as f:
        for v in LCC:
            f.write('{}\n'.format(v))
