In [None]:
import sys; sys.path.append('..')

In [None]:
from itertools import combinations
import pickle
from collections import defaultdict
from fractions import Fraction
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from util import step_plot, ds_names, ds_names_short, colors_full, linestyles, colors_group
import networkx as nx
from scipy.stats import pearsonr
import seaborn as sns
from collections import deque

In [None]:
p_incident = Path('../data/incident')
p_core = Path('../data/core')
p_size = Path('../data/size')
p_dumps = Path('dumps')
p_dumps.mkdir(exist_ok=True)
p_figures = Path('figures')
p_figures.mkdir(exist_ok=True)

plt.rcParams.update({'font.size': 30, 'text.usetex': False, 'lines.linewidth': 7})
plt.clf()
for p_core_ds in p_core.iterdir():
    ds_name = p_core_ds.name
    print(ds_name)
    p_incident_ds = p_incident / ds_name
    with (p_incident_ds / 'i2edges.pkl').open('rb') as f:
        i2edges = pickle.load(f)
    print('i2edges loading done!')
    with (p_incident_ds / 'v2edges.pkl').open('rb') as f:
        v2edges = pickle.load(f)
    print('v2edges loading done!')
    n, m = len(v2edges), len(i2edges)
    overall_density = Fraction(m, n)

    t2max_core_relative_density = dict()
    for c_t in tqdm(list(p_core_ds.glob('c*.pkl'))):
        t = Fraction(c_t.name[2:-4].replace('-', '/'))
        with c_t.open('rb') as f:
            v2c_t = pickle.load(f)
        k_max_t = max(v2c_t.values())
        V_max = [v for v, k in v2c_t.items() if k == k_max_t]
        n_max = len(V_max)
        i_e2n_core_e = defaultdict(int)
        for v in V_max:
            for i_e in v2edges[v]:
                i_e2n_core_e[i_e] += 1
        m_max = 0
        for i_e, n_core_e in i_e2n_core_e.items():
            if n_core_e >= max(t * len(i2edges[i_e]), 2):
                m_max += 1
        t2max_core_relative_density[t] = Fraction(m_max, n_max) / overall_density

    p_dumps_ds = p_dumps / ds_name
    p_dumps_ds.mkdir(exist_ok=True)
    with (p_dumps_ds / 't2max_core_relative_density.pkl').open('wb') as f:
        pickle.dump(t2max_core_relative_density, f)

    # p_figures_ds = p_figures / ds_name
    # p_figures_ds.mkdir(exist_ok=True)

    # ds_index = ds_names.index(ds_name)
    # ds_name_short = ds_names_short[ds_index]
    # ds_color = colors_full[ds_index]
    # ds_linestyle = linestyles[ds_index]
    # step_plot(t2max_core_relative_density, label=ds_name_short, color=ds_color, linestyle=ds_linestyle)

In [None]:
plt.rcParams.update({'font.size': 20, 'text.usetex': False, 'lines.linewidth': 3})
plt.clf()

for ds_name in ds_names:
    p_dumps_ds = p_dumps / ds_name
    with (p_dumps_ds / 't2max_core_relative_density.pkl').open('rb') as f:
        t2max_core_relative_density = pickle.load(f)
    ds_index = ds_names.index(ds_name)
    ds_name_short = ds_names_short[ds_index]
    ds_color = colors_full[ds_index]
    ds_linestyle = linestyles[ds_index]
    step_plot(t2max_core_relative_density, label=ds_name_short, color=ds_color, linestyle=ds_linestyle)

plt.yscale('log')
plt.xlabel('t')
plt.ylabel('relative density of\nmax. hypercore')
plt.legend(prop={'size': 11}, bbox_to_anchor=(1.0, 0.5), loc='center left', fontsize=30)
plt.savefig('relative_density.png', bbox_inches='tight')
plt.savefig('relative_density.pdf', bbox_inches='tight')
plt.show()

In [None]:
t_samples = np.linspace(0.005, 0.995, 100)
ds2MCRD_list = dict()
ds_num = len(ds_names)

for p_dumps_ds in p_dumps.iterdir():
    ds_name = p_dumps_ds.name
    ds_index = ds_names.index(ds_name)
    with (p_dumps_ds / 't2max_core_relative_density.pkl').open('rb') as f:
        t2max_core_relative_density = pickle.load(f)
    t_chosen = [min(t for t in t2max_core_relative_density if t >= t_sample) for t_sample in t_samples]
    ds2MCRD_list[ds_index] = [t2max_core_relative_density[t] for t in t_chosen]

RDMD_matrix = np.zeros((ds_num, ds_num))
dist_total = []
dist_within = []
dist_cross = []
excluded_index = ds_names_short.index('tags-SO')
for i_1, i_2 in tqdm(list(combinations(ds2MCRD_list, 2))):
    MCRD_list_1 = np.array(ds2MCRD_list[i_1], dtype=float)
    MCRD_list_2 = np.array(ds2MCRD_list[i_2], dtype=float)
    RDMD = np.sqrt(np.mean(np.square(np.log(MCRD_list_1) - np.log(MCRD_list_2))))
    RDMD_matrix[i_1][i_2] = RDMD_matrix[i_2][i_1] = RDMD
    if i_1 == excluded_index or i_2 == excluded_index:
        continue
    if ds_names[i_1].split('-')[0] == ds_names[i_2].split('-')[0]:
        dist_within.append(RDMD)
    else:
        dist_cross.append(RDMD)
    dist_total.append(RDMD)

p_RDMD = Path('RDMD.pkl')
with p_RDMD.open('wb') as f:
    pickle.dump(RDMD_matrix, f)

plt.rcParams.update({'font.size': 15})
plt.clf()
# tags-SO excluded
ds_names_short_new = [ds for ds in ds_names_short if ds != 'tags-SO']
indices_new = [i for i, ds in enumerate(ds_names_short) if ds != 'tags-SO']
ax = sns.heatmap(RDMD_matrix[np.ix_(indices_new,indices_new)], xticklabels=False, yticklabels=ds_names_short_new, cmap='RdBu')
for ticklabel in ax.get_yticklabels():
    ticklabel_text = ticklabel.get_text()
    ds_index = ds_names_short.index(ticklabel_text)
    tickcolor = colors_group[ds_index]
    ticklabel.set_color(tickcolor)
ax.figure.savefig('RDMD.png', bbox_inches='tight')
ax.figure.savefig('RDMD.pdf', bbox_inches='tight')
plt.show()

print('Global average: {:.3f}'.format(np.mean(dist_total)))
print('Within-domain average: {:.3f}'.format(np.mean(dist_within)))
print('Cross-domain average: {:.3f}'.format(np.mean(dist_cross)))

In [None]:
p_incident = Path('../data/incident')
p_core = Path('../data/core')
p_size = Path('../data/size')
p_vtx_cover = Path('vertex_cover')
p_vtx_cover.mkdir(exist_ok=True)
t_samples = [i / 10 for i in range(6, 11)]
K = 100

In [None]:
for t_sample in tqdm(t_samples, ncols=50):
    p_vtx_cover_t = p_vtx_cover / str(t_sample)
    p_vtx_cover_t.mkdir(exist_ok=True)
    for p_core_ds in p_core.iterdir():
        ds_name = p_core_ds.name
        p_incident_ds = p_incident / ds_name
        with (p_incident_ds / 'i2edges.pkl').open('rb') as f:
            i2edges = pickle.load(f)
        with (p_incident_ds / 'v2edges.pkl').open('rb') as f:
            v2edges = pickle.load(f)
        nodes = list(v2edges.keys())

        # t-hypercoreness
        possible_t = [Fraction(c_t.name[2:-4].replace('-', '/')) for c_t in p_core_ds.glob('c*.pkl')]
        t_chosen = min(t for t in possible_t if t >= t_sample) if t_sample > 0.5 else min(possible_t)
        with (p_core_ds / 'c_{}.pkl'.format(str(t_chosen).replace('/', '-'))).open('rb') as f:
            v2c_t = pickle.load(f)
        nodes_by_coreness = deque(sorted(nodes, key=lambda v: (v2c_t[v], len(v2edges[v])), reverse=True))
        chosen_nodes = set()
        covered_edges = set()
        n_chosen2m_covered = dict()
        for _ in range(K):
            if not chosen_nodes:
                chosen_nodes.add(nodes_by_coreness.popleft())
            else:
                chosen_nodes.add(nodes_by_coreness.popleft())
                i_e2n_cover_e = defaultdict(int)
                for v in chosen_nodes:
                    for i_e in v2edges[v]:
                        if i_e not in covered_edges:
                            i_e2n_cover_e[i_e] += 1
                for i_e, n_cover_e in i_e2n_cover_e.items():
                    if i_e in covered_edges:
                        continue
                    if n_cover_e >= t_sample * len(i2edges[i_e]):
                        covered_edges.add(i_e)
            n_chosen2m_covered[len(chosen_nodes)] = len(covered_edges)
        p_vtx_cover_t_ds = p_vtx_cover_t / ds_name
        p_vtx_cover_t_ds.mkdir(exist_ok=True)
        with (p_vtx_cover_t_ds / 'coreness.pkl').open('wb') as f:
            pickle.dump(n_chosen2m_covered, f)

        # degree
        nodes_by_degree = deque(sorted(nodes, key=lambda v: len(v2edges[v]), reverse=True))
        chosen_nodes = set()
        covered_edges = set()
        n_chosen2m_covered = dict()
        for _ in range(K):
            if not chosen_nodes:
                chosen_nodes.add(nodes_by_degree.popleft())
            else:
                chosen_nodes.add(nodes_by_degree.popleft())
                i_e2n_cover_e = defaultdict(int)
                for v in chosen_nodes:
                    for i_e in v2edges[v]:
                        if i_e not in covered_edges:
                            i_e2n_cover_e[i_e] += 1
                for i_e, n_cover_e in i_e2n_cover_e.items():
                    if i_e in covered_edges:
                        continue
                    if n_cover_e >= t_sample * len(i2edges[i_e]):
                        covered_edges.add(i_e)
            n_chosen2m_covered[len(chosen_nodes)] = len(covered_edges)
        with (p_vtx_cover_t_ds / 'degree.pkl').open('wb') as f:
            pickle.dump(n_chosen2m_covered, f)

        # greedy
        nodes_by_degree = deque(sorted(nodes, key=lambda v: len(v2edges[v]), reverse=True))
        chosen_nodes = set()
        covered_edges = set()
        n_chosen2m_covered = dict()
        for _ in range(K):
            if not chosen_nodes:
                chosen_nodes.add(nodes_by_degree.popleft())
            else:
                i_e2n_cover_e = defaultdict(int)
                v2newly_covered_E = defaultdict(set)
                for v in chosen_nodes:
                    for i_e in v2edges[v]:
                        if i_e not in covered_edges:
                            i_e2n_cover_e[i_e] += 1
                for i_e in i2edges if t_sample <= 0.5 else i_e2n_cover_e:
                    if i_e in covered_edges:
                        continue
                    n_cover_e = i_e2n_cover_e[i_e]
                    e = i2edges[i_e]
                    if n_cover_e + 1 >= t_sample * len(e):  # one more node to cover
                        for v in i2edges[i_e]:
                            if v in chosen_nodes:
                                continue
                            v2newly_covered_E[v].add(i_e)
                if not v2newly_covered_E:
                    chosen_nodes.add(nodes_by_degree.popleft())
                else:
                    chosen_node = max(v2newly_covered_E, key=lambda v: len(v2newly_covered_E[v]))
                    chosen_nodes.add(chosen_node)
                    nodes_by_degree.remove(chosen_node)
                    covered_edges.update(v2newly_covered_E[chosen_node])
            n_chosen2m_covered[len(chosen_nodes)] = len(covered_edges)
        with (p_vtx_cover_t_ds / 'greedy.pkl').open('wb') as f:
            pickle.dump(n_chosen2m_covered, f)

In [None]:
p_figures = Path('figures')
plt.rcParams.update({'font.size': 25, 'text.usetex': False, 'lines.linewidth': 3})
plt.clf()

for t_sample in tqdm(t_samples, ncols=50):
    p_vtx_cover_t = p_vtx_cover / str(t_sample)
    m_coreness_r_list = []
    m_greedy_r_list = []
    for p_vtx_cover_t_ds in p_vtx_cover_t.iterdir():
        with (p_vtx_cover_t_ds / 'coreness.pkl').open('rb') as f:
            n_chosen2m_covered_coreness = pickle.load(f)
        with (p_vtx_cover_t_ds / 'degree.pkl').open('rb') as f:
            n_chosen2m_covered_degree = pickle.load(f)
        with (p_vtx_cover_t_ds / 'greedy.pkl').open('rb') as f:
            n_chosen2m_covered_greedy = pickle.load(f)
        m_coreness = np.array([n_chosen2m_covered_coreness[n] for n in range(5, 101)])
        m_degree = np.array([n_chosen2m_covered_degree[n] for n in range(5, 101)])
        m_greedy = np.array([n_chosen2m_covered_greedy[n] for n in range(5, 101)])
        m_coreness_r = m_coreness / m_degree
        m_greedy_r = m_greedy / m_degree
        m_coreness_r_list.append(m_coreness_r)
        m_greedy_r_list.append(m_greedy_r)
    m_coreness_r_mean = np.mean(m_coreness_r_list, axis=0)
    m_greedy_r_mean = np.mean(m_greedy_r_list, axis=0)

    plt.clf()
    plt.plot(range(5, 101), m_coreness_r_mean, label='$t_c$-hypercoreness', marker='.')
    plt.plot(range(5, 101), np.ones_like(m_coreness_r_mean), label='degree', marker='^')
    plt.plot(range(5, 101),m_greedy_r_mean, label='greedy', marker='v')
    plt.xlabel('# chosen nodes')
    plt.ylabel('rel. # ' + r'$t_c$-covered' +'\nhyperedges')
    plt.title(r'$t_c = {}$'.format(t_sample))
    plt.subplots_adjust(left=0.25, right=0.95, bottom=0.25)
    plt.savefig((p_figures /'{}.png'.format(t_sample)))
    plt.savefig((p_figures /'{}.pdf'.format(t_sample)))
    plt.show()

In [None]:
p_figures = Path('figures')
plt.rcParams.update({'font.size': 25, 'text.usetex': False, 'lines.linewidth': 3})
plt.clf()

for t_sample in tqdm(t_samples, ncols=50):
    p_vtx_cover_t = p_vtx_cover / str(t_sample)
    m_coreness_r_list = []
    m_greedy_r_list = []
    for p_vtx_cover_t_ds in p_vtx_cover_t.iterdir():
        ds_name = p_vtx_cover_t_ds.name
        with (p_vtx_cover_t_ds / 'coreness.pkl').open('rb') as f:
            n_chosen2m_covered_coreness = pickle.load(f)
        with (p_vtx_cover_t_ds / 'degree.pkl').open('rb') as f:
            n_chosen2m_covered_degree = pickle.load(f)
        with (p_vtx_cover_t_ds / 'greedy.pkl').open('rb') as f:
            n_chosen2m_covered_greedy = pickle.load(f)
        m_coreness = np.array([n_chosen2m_covered_coreness[n] for n in range(5, 101)])
        m_degree = np.array([n_chosen2m_covered_degree[n] for n in range(5, 101)])
        m_greedy = np.array([n_chosen2m_covered_greedy[n] for n in range(5, 101)])
        plt.clf()
        plt.plot(range(5, 101), m_coreness, label='$t_c$-hypercoreness', marker='.')
        plt.plot(range(5, 101), m_degree, label='degree', marker='^')
        plt.plot(range(5, 101), m_greedy, label='greedy', marker='v')
        plt.xlabel('# chosen nodes')
        plt.ylabel('# $t_c$-covered hyperedges')
        plt.title('${}, t_c = {}$'.format(ds_names_short[ds_names.index(ds_name)], t_sample))
        p_figures_ds = p_figures / ds_name
        p_figures_ds.mkdir(exist_ok=True)
        plt.savefig((p_figures_ds /'{}.png'.format(t_sample)), bbox_inches='tight')
        plt.savefig((p_figures_ds /'{}.pdf'.format(t_sample)), bbox_inches='tight')
        plt.show()