In [None]:
import sys; sys.path.append('..')

In [None]:
import pickle
from collections import Counter
from fractions import Fraction
from pathlib import Path
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import powerlaw
from scipy.stats import linregress
from tqdm import tqdm
from util import ds_names, ds_names_short, colors_group, step_plot

In [None]:
p_core = Path('../data/core')
p_figures = Path('figures')
p_figures.mkdir(exist_ok=True)
p_dumps = Path('dumps')
p_figures.mkdir(exist_ok=True)
bounds = np.array([-100, -10, -1, 0., 1, 100, 10000.])
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)
plt.rcParams.update({'font.size': 30, 'text.usetex': True, 'lines.linewidth': 7})
colormap = plt.get_cmap('RdBu_r')
for ds_name in ds_names:
    p_core_ds = p_core / ds_name
    ds_index = ds_names.index(ds_name)
    p_figures_ds = p_figures / ds_name
    p_figures_ds.mkdir(exist_ok=True)
    p_dumps_ds = p_dumps / ds_name
    p_dumps_ds.mkdir(exist_ok=True)
    N = None
    t2lll_max = dict()
    t2plR2 = dict()
    for c_t in tqdm(list(p_core_ds.glob('c*.pkl'))):
        t = Fraction(c_t.name[2:-4].replace('-', '/'))
        with c_t.open('rb') as f:
            v2c_t = pickle.load(f)
        k_max = max(v2c_t.values())
        if N is None:
            N = len(v2c_t)
        cnt_c_t = Counter(v2c_t.values())
        cnt_rem = dict()
        cnt_rem[1] = len(v2c_t)
        cur_cnt = len(v2c_t)
        for k in range(1, k_max + 1):
            cur_cnt -= cnt_c_t[k]
            cnt_rem[k + 1] = cur_cnt
            if not cur_cnt:
                break
        X = list(range(1, k_max + 1))
        Y = [cnt_rem[x] for x in X]
        slope, intercept, r_value, _, _ = linregress(np.log10(X), np.log10(Y))
        r2 = r_value ** 2
        plt.clf()
        plt.loglog(X, Y)
        X = np.linspace(1, k_max, 10000)
        Y = [x ** float(slope) * 10 ** float(intercept) for x in X]
        plt.plot(X, Y, 'r--', linewidth=7)
        plt.xlabel('k')
        plt.ylabel('\# nodes with\n' + r'$t$-hypercorenss $\geq k$')
        plt.title('$t = {t}$, '.format(t=t) + r'{$\mathbf{R^2 = \underline{%.2f}}$}' % r2)
        plt.savefig(p_figures_ds / c_t.name[2:].replace('pkl', 'png'), bbox_inches='tight')
        plt.savefig(p_figures_ds / c_t.name[2:].replace('pkl', 'pdf'), bbox_inches='tight')
        # plt.show()
        t2plR2[t] = r2
        pl_fit = powerlaw.Fit(list(v2c_t.values()), verbose=False)
        lll_pl, _ = pl_fit.loglikelihood_ratio('power_law', 'exponential')
        lll_ln, _ = pl_fit.loglikelihood_ratio('lognormal', 'exponential')
        lll_max = max(lll_pl, lll_ln)
        t2lll_max[t] = lll_max
        # print(lll_max)
    with (p_dumps_ds / 't2plR2.pkl').open('wb') as f:
        pickle.dump(t2plR2, f)
    with (p_dumps_ds / 't2lll_max.pkl').open('wb') as f:
        pickle.dump(t2lll_max, f)

    plt.clf()
    step_plot(t2lll_max)
    plt.xlabel('t')
    plt.ylabel('max. log-likelihood')
    plt.yscale('log')
    plt.savefig(p_figures_ds / 'lll_max.png', bbox_inches='tight')
    plt.savefig(p_figures_ds / 'lll_max.pdf', bbox_inches='tight')
    plt.show()

    plt.clf()
    step_plot(t2plR2)
    plt.xlabel('t')
    plt.ylabel('$R^2$ of power-law fitting')
    plt.savefig(p_figures_ds / 'plr2.png', bbox_inches='tight')
    plt.savefig(p_figures_ds / 'plr2.pdf', bbox_inches='tight')
    plt.show()


In [None]:
S = 5
t_samples = [i / S for i in range(S + 1)]
lll_max_matrix = np.zeros((len(ds_names), len(t_samples)))
plt.rcParams.update({'font.size': 15, 'text.usetex': False, 'lines.linewidth': 7})
for ds_index, ds_name in enumerate(ds_names):
    p_dumps_ds = p_dumps / ds_name
    with (p_dumps_ds / 't2lll_max.pkl').open('rb') as f:
        t2lll_max = pickle.load(f)
    t_chosen = [min(t for t in t2lll_max if t >= t_sample) for t_sample in t_samples]
    for i, t in enumerate(t_chosen):
        lll_max_matrix[ds_index][i] = t2lll_max[t]

plt.clf()
sm = plt.cm.ScalarMappable(cmap=colormap, norm=norm)
sm._A = []
plt.imshow(lll_max_matrix, cmap=colormap, norm=norm, aspect='auto')
ax = plt.gca()
ax.set_yticks(list(range(len(ds_names))))
ax.set_yticklabels(ds_names_short)
ax.set_xticks(list(range(S+1)))
ax.set_xticklabels(map(str, t_samples))
for ticklabel in ax.get_yticklabels():
    ticklabel_text = ticklabel.get_text()
    ds_index = ds_names_short.index(ticklabel_text)
    tickcolor = colors_group[ds_index]
    ticklabel.set_color(tickcolor)
plt.colorbar(sm)
plt.xlabel('t')
plt.savefig('lll_max_matrix.png', bbox_inches='tight')
plt.savefig('lll_max_matrix.pdf', bbox_inches='tight')
plt.show()

