In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
import project_path

In [5]:
import glob
import os

from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from src.plotting import make_result_heatmap, add_lines

In [6]:
path_in = Path(f'../benchmarks/benchmark_SMB.csv').resolve()
path_out = Path(f'../plots/benchmarks/SBM').resolve()
path_out.mkdir(parents=True, exist_ok=True)

In [7]:
df = pd.read_csv(path_in, index_col=False)
df

Unnamed: 0,p,q,score,method
0,0.05,0.05,-0.001430,Spectral Custering
1,0.05,0.05,0.000000,Affinity Propagation
2,0.05,0.05,-0.002177,K-means
3,0.05,0.05,0.002011,Agglomerative Complete Clustering
4,0.05,0.05,0.025708,Agglomerative Average Clustering
...,...,...,...,...
2395,0.92,0.92,0.000000,Affinity Propagation
2396,0.92,0.92,-0.004937,K-means
2397,0.92,0.92,0.000201,Agglomerative Complete Clustering
2398,0.92,0.92,-0.001412,Agglomerative Average Clustering


In [8]:
block_size = 100
xs = np.arange(1, 21)
i = (np.log(block_size) / block_size * np.arange(1, 21)).round(2)

idx = np.arange(len(xs))
mask = np.zeros((len(xs), len(xs)), dtype=bool)

for a, b in product(xs, xs):
    if a >= b:
        mask[a-1, b-1] = (np.abs(np.sqrt(a) - np.sqrt(b)) >= np.sqrt(2))
theory_df = pd.DataFrame(mask, columns=i, index=i).T.sort_index(ascending=False).sort_index(axis=1, ascending=True)
values_theory = theory_df.to_numpy()

In [10]:
experiments = [x for _, x in df.groupby(['method'])]
columns = ['p', 'q', 'score']

for experiment in experiments:

    method = experiment['method'].iloc[0]

    data = experiment[columns]
    fig, ax = plt.subplots(figsize=(15, 10))

    name = f"benchmark_sbm_{method}"

    make_result_heatmap(data, ax, x_column=columns[0], y_column=columns[1], values_column=columns[2])
    add_lines(values_theory, ax)
    ax.set_axisbelow(True)

    fig.tight_layout()

    fig.savefig(f'{path_out / name}.pdf')
    plt.close(fig) 