In [45]:
%load_ext autoreload
%autoreload 2\
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
import project_path

In [47]:
import glob
import os

from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from src.plotting import make_result_heatmap, add_lines

In [48]:
pd.read_csv('../experiments.csv')

Unnamed: 0,id,Type of experiment
0,0,Local experiments
1,1587919855,p vs q algorithms for SBM with 50 cuts
2,1587918440,p vs q algorithms for SBM with 50 cuts for 4 ...
3,1587804052,p vs q algorithms for SBM with 50 cuts for 5 ...
4,1587703699,KnnBlobs experiments with different k and 50 ...
5,1586946592,SBM experiments
6,1587621343,KnnBlobs with 100 cuts testing different a
7,1587629564,KL vs MF algorithms on SBM
8,1587632158,SBM with fixed p and q and different numbers ...
9,1587632375,KnnBlobs with fixed k and different numbers o...


In [50]:
experiment_name = '1590392249'

In [51]:
path_in = Path(f'../output/{experiment_name}').resolve()
path_out = Path(f'../plots/{experiment_name}').resolve()
path_out.mkdir(parents=True, exist_ok=True)

In [52]:
full_df = pd.DataFrame()
for subdir, dirs, files in os.walk(path_in):
    for file in files:
        
        current_df = pd.read_csv(f'{subdir}/{file}', index_col=0)
        full_df = full_df.append(current_df)
full_df = full_df.reset_index(drop=True)

In [54]:
full_df

Unnamed: 0,seed,dataset_name,preprocessing_name,agreement,percentile_orders,unique_id,dataset_type,block_sizes,p,q,nb_cuts,lb_f,Adjusted Rand Score
0,60,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.55,0.69,50,0.4,-0.003963
1,80,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.14,0.92,50,0.4,-0.006556
2,70,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.41,0.28,50,0.3,0.431297
3,40,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.32,0.51,50,0.2,0.000000
4,100,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.28,0.87,50,0.3,-0.000197
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11989,100,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.92,0.14,50,0.2,0.000603
11990,40,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.60,0.83,50,0.2,0.000000
11991,10,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.28,0.05,50,0.4,1.000000
11992,10,sbm,fid_mat,50,100,1590392249,graph,"[100, 100]",0.23,0.83,50,0.3,0.000121


# SBM

### Plot SBM for 2 blocks and different a id = 1587639002

In [23]:
if experiment_name == '1587639002':
        
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f'])]
    for experiment in experiments:
        
        fig, ax = plt.subplots(figsize=(15, 10))

        title = f"SBM with {experiment['block_sizes'].iloc[0]} blocks using p = 0.3, nb_cuts = 100, lb_f = {experiment['lb_f'].iloc[0]}"
        name = f"block_sizes_{experiment['block_sizes'].iloc[0]}_lb_f_{experiment['lb_f'].iloc[0]}"
        
        columns = ['q', 'agreement', 'homogeneity']
        data = experiment[columns]
                                                 
        make_result_heatmap(data, title, ax, x_column=columns[0], y_column=columns[1], values_column=columns[2])

        fig.tight_layout()

        fig.savefig(f'{path_out / name}.svg')
        plt.close(fig) 

### Plot SBM for 2 blocks with theoretical bounds id = 1590392249

In [55]:
block_size = 100
xs = np.arange(1, 21)
i = (np.log(block_size) / block_size * np.arange(1, 21)).round(2)

idx = np.arange(len(xs))
mask = np.zeros((len(xs), len(xs)), dtype=bool)

for a, b in product(xs, xs):
    if a >= b:
        mask[a-1, b-1] = (np.abs(np.sqrt(a) - np.sqrt(b)) >= np.sqrt(2))
theory_df = pd.DataFrame(mask, columns=i, index=i).T.sort_index(ascending=False).sort_index(axis=1, ascending=True)
values_theory = theory_df.to_numpy()

In [58]:
if experiment_name == '1590392249':
    experiments = [x for _, x in full_df.groupby(['lb_f'])]
    for experiment in experiments:

        fig, ax = plt.subplots(figsize=(15, 10))

        name = f"SBM_theory_lb_f_{experiment['lb_f'].iloc[0]}"

        columns = ['p', 'q', 'Adjusted Rand Score']
        data = experiment[columns].groupby(['p', 'q'], as_index=False).mean()
        
        data.to_csv(f'{name}.csv', index=False)
                                                 
        make_result_heatmap(data, ax, x_column=columns[0], y_column=columns[1], values_column=columns[2])
        add_lines(values_theory, ax)
        ax.set_axisbelow(True)
        fig.tight_layout()

        fig.savefig(f'{path_out / name}.pdf')
        plt.close(fig) 

### Plot SBM for 2/3 blocks and unbalanced id = 1587919855

In [76]:
if experiment_name == '1587919855':
        
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f', 'agreement'])]
    for experiment in experiments:
        
        fig, ax = plt.subplots(figsize=(16.18, 10))

        title = f"SBM with {experiment['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {experiment['lb_f'].iloc[0]} and a = {experiment['agreement'].astype(int).iloc[0]}"
        name = f"block_sizes_{experiment['block_sizes'].iloc[0]}_a_{experiment['agreement'].iloc[0]}_lb_f_{experiment['lb_f'].iloc[0]}"
        
        data = experiment[['p', 'q', 'homogeneity']]
        make_result_heatmap(data, title, ax)

        fig.tight_layout()

        fig.savefig(f'{path_out / name}.pdf')
        plt.close(fig) 

### Plot SBM for 4 blocks id = 1587918440

In [27]:
if experiment_name == '1587918440':
        
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f', 'agreement'])]
    for experiment in experiments:
        
        fig, ax = plt.subplots(figsize=(15, 10))

        title = f"SBM with {experiment['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {experiment['lb_f'].iloc[0]} and a = {experiment['agreement'].astype(int).iloc[0]}"
        name = f"block_sizes_{experiment['block_sizes'].iloc[0]}_a_{experiment['agreement'].iloc[0]}_lb_f_{experiment['lb_f'].iloc[0]}"
        
        data = experiment[['p', 'q', 'homogeneity']]
        make_result_heatmap(data, title, ax)

        fig.tight_layout()

        fig.savefig(f'{path_out / name}.svg')
        plt.close(fig) 