### Allow relative imports

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import project_path

In [18]:
import glob
import os

from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd

from altair_saver import save

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from src.plotting import make_homogeneity_plot, make_full_plot, graphic_settings, make_result_heatmap, add_lines

In [6]:
pd.read_csv('../experiments.csv')

Unnamed: 0,id,Type of experiment
0,0,Local experiments
1,1587919855,p vs q algorithms for SBM with 50 cuts
2,1587918440,p vs q algorithms for SBM with 50 cuts for 4 ...
3,1587804052,p vs q algorithms for SBM with 50 cuts for 5 ...
4,1587703699,KnnBlobs experiments with different k and 50 ...
5,1586946592,SBM experiments
6,1587639002,SBM with 100 cuts fixed p and q testing diffe...
7,1587621343,KnnBlobs with 100 cuts testing different a
8,1587629564,KL vs MF algorithms on SBM
9,1587632158,SBM with fixed p and q and different numbers ...


In [7]:
experiment_name = '1588572854'

In [8]:
path_in = Path(f'../output/{experiment_name}').resolve()
path_out = Path(f'../plots/{experiment_name}').resolve()
path_out.mkdir(parents=True, exist_ok=True)

In [9]:
full_df = pd.DataFrame()
for subdir, dirs, files in os.walk(path_in):
    for file in files:
        
        current_df = pd.read_csv(f'{subdir}/{file}', index_col=0)
        full_df = full_df.append(current_df)
full_df = full_df.reset_index(drop=True)

In [10]:
full_df

Unnamed: 0,agreement,block_sizes,completeness,dataset_name,dataset_type,homogeneity,lb_f,max_order,nb_cuts,order_best,p,percentile_orders,preprocessing_name,q,seed,unique_id,v_measure_score
0,50.0,"[100, 100]",0.000122,sbm,graph,1.218091e-04,0.4,3373.0,50.0,3233.0,0.05,100.0,fid_mat,0.69,42.0,1588572854,1.217795e-04
1,50.0,"[100, 100]",1.000000,sbm,graph,1.000000e+00,0.4,1163.0,50.0,512.0,0.18,100.0,fid_mat,0.05,42.0,1588572854,1.000000e+00
2,50.0,"[100, 100]",0.000096,sbm,graph,8.507747e-05,0.3,3979.0,50.0,3307.0,0.28,100.0,karnig_lin,0.60,42.0,1588572854,9.015704e-05
3,50.0,"[100, 100]",1.000000,sbm,graph,1.000000e+00,0.3,5024.0,50.0,1403.0,0.87,100.0,fid_mat,0.14,42.0,1588572854,1.000000e+00
4,50.0,"[100, 100]",1.000000,sbm,graph,1.000000e+00,0.4,1991.0,50.0,1403.0,0.64,100.0,karnig_lin,0.14,42.0,1588572854,1.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2388,50.0,"[100, 100]",1.000000,sbm,graph,1.281371e-15,0.2,2865.0,50.0,1940.0,0.05,100.0,fid_mat,0.60,42.0,1588572854,2.562741e-15
2389,50.0,"[100, 100]",1.000000,sbm,graph,1.000000e+00,0.2,5941.0,50.0,2798.0,0.92,100.0,fid_mat,0.28,42.0,1588572854,1.000000e+00
2390,50.0,"[100, 100]",1.000000,sbm,graph,1.281371e-15,0.2,4950.0,50.0,3431.0,0.32,100.0,karnig_lin,0.74,42.0,1588572854,2.562741e-15
2391,50.0,"[100, 100]",1.000000,sbm,graph,1.281371e-15,0.2,4743.0,50.0,3461.0,0.74,100.0,fid_mat,0.46,42.0,1588572854,2.562741e-15


In [14]:
block_size = 100
xs = np.arange(1, 21)
i = (np.log(block_size) / block_size * np.arange(1, 21)).round(2)

idx = np.arange(len(xs))
mask = np.zeros((len(xs), len(xs)), dtype=bool)

for a, b in product(xs, xs):
    if a >= b:
        mask[a-1, b-1] = (np.abs(np.sqrt(a) - np.sqrt(b)) >= np.sqrt(2))
theory_df = pd.DataFrame(mask, columns=i, index=i).T.sort_index(ascending=False).sort_index(axis=1, ascending=True)
values_theory = theory_df.to_numpy()

In [25]:
full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f'])]
for experiment in experiments:
        
    fig, ax = plt.subplots(figsize=(15, 10))
    
    title = f"SBM with {experiment['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {experiment['lb_f'].iloc[0]} and a = {experiment['agreement'].astype(int).iloc[0]}"
    name = f"block_sizes_{experiment['block_sizes'].iloc[0]}_lb_f_{experiment['lb_f'].iloc[0]}"
    
    data = experiment[['p', 'q', 'homogeneity']]
    make_result_heatmap(data, title, ax)
    add_lines(values_theory, ax)
    
    fig.tight_layout()
    
    fig.savefig(f'{path_out / name}.svg')
    plt.close(fig) 

# Questionairre

In [101]:
if experiment_name == '1587978724':
    experiments = [x for _, x in full_df.groupby(['nb_mindsets'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"nb_mindsets_{data['nb_mindsets'].iloc[0]}"
        title = f"Number mindsets: {data['nb_mindsets'].iloc[0]}"
        
        chart = make_homogeneity_plot(data, title, x_axis='nb_features', y_axis='range_answers', facet_on='preprocessing_name')
        chart = graphic_settings(chart)
        save(chart, f'{path_out / name}.svg')

# SBM

### Plot SBM for 2 blocks id = 1588572854

In [133]:
import altair as alt

block_size = 100
xs = np.arange(1, 21)
i = (np.log(block_size) / block_size * np.arange(1, 21)).round(2)


idx = np.arange(len(xs))
mask = []
i = 0

for a, b in product(xs, xs):
    i += 1
    p = (np.log(block_size) / block_size * a).round(2)
    q = (np.log(block_size) / block_size * b).round(2)
    if a >= b:
        mask.append([p, q, (np.abs(np.sqrt(a) - np.sqrt(b)) >= np.sqrt(2))])
    else:
        mask.append([p, q, False])
df = pd.DataFrame(mask, columns=['p', 'q', 'values'])
df

Unnamed: 0,p,q,values
0,0.05,0.05,False
1,0.05,0.09,False
2,0.05,0.14,False
3,0.05,0.18,False
4,0.05,0.23,False
...,...,...,...
395,0.92,0.74,False
396,0.92,0.78,False
397,0.92,0.83,False
398,0.92,0.87,False


In [160]:
chart = alt.Chart(df, width=800, height=300).mark_rect().encode(
            alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending',), axis=alt.Axis(grid=True)),
            alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
            alt.Color('values', type='quantitative', scale=alt.Scale(domain=[0, 1]))
).properties(
    title='Expected recovery'
)
chart = graphic_settings(chart)
chart

In [150]:
if experiment_name == '1588572854':
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
    
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_lb_f_{data['lb_f'].iloc[0]}"
        title = f"SBM with {data['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {data['lb_f'].iloc[0]} and a = {data['agreement'].iloc[0]}"
        
        chart = make_homogeneity_plot(data, title, x_axis='p', y_axis='q', facet_on='preprocessing_name')
        

        chart2 = alt.Chart(df, width=800, height=300).mark_rect().encode(
            alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending',), axis=alt.Axis(grid=True)),
            alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
            alt.Color('values', type='nominal'),
        )
        chart2
        
        chart = chart2 + chart
        chart = graphic_settings(chart)
        chart.show()
        break
        
        #save(chart, f'{path_out / name}.svg')

Displaying chart at http://localhost:16584/


### Plot SBM for 2/3 blocks and unbalanced id = 1587919855

In [103]:
if experiment_name == '1587919855':
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f', 'agreement'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}_lb_f_{data['lb_f'].iloc[0]}"
        title = f"SBM with {data['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {data['lb_f'].iloc[0]} and a = {data['agreement'].iloc[0]}"
        
        chart = make_homogeneity_plot(data, title, x_axis='p', y_axis='q', facet_on='preprocessing_name')
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot SBM for 4 blocks id = 1587918440

In [104]:
if experiment_name == '1587918440':
    
    full_df = full_df[full_df['preprocessing_name'] == 'fid_mat']

    experiments = [x for _, x in full_df.groupby(['lb_f', 'agreement'])]
    for experiment in experiments:

        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc(0)}_lb_f_{data['lb_f'].iloc[0]}"
        title = f"Block sizes: {data['block_sizes'].iloc[0]} with lb_f: {data['lb_f'].iloc[0]} for 50 cuts and a = {data['agreement'].iloc[0]}"
        
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}_lb_f_{data['lb_f'].iloc[0]}"
        title = f"SBM with {data['block_sizes'].iloc[0]} blocks using nb_cuts = 50, lb_f = {data['lb_f'].iloc[0]} and a = {data['agreement'].iloc[0]}"
        
        chart = make_homogeneity_plot(data, title, x_axis='p', y_axis='q', facet_on='preprocessing_name')
        chart = graphic_settings(chart)
        
        save(chart, f'{path_out / name}.svg')

### Plot SBM for 5 blocks id = 1587804052

In [105]:
if experiment_name == '1587804052':
    experiments = [x for _, x in full_df.groupby(['lb_f'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_lb_f_{data['lb_f'].iloc[0]}"
        title = f"Block sizes: {data['block_sizes'].iloc[0]} with lb_f: {data['lb_f'].iloc[0]} for 50 cuts and a = {data['agreement'].iloc[0]}"
        
        chart = make_full_plot(data, title, x_axis='p', y_axis='q', facet_on='preprocessing_name')
        save(chart, f'{path_out / name}.svg')

# Mindsets

### Plot Mindsets with variable nb question id = 1587726047

In [106]:
if experiment_name == '1587726047':
    experiments = [x for _, x in full_df.groupby(['mindset_sizes'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"mindset_sizes_{data['mindset_sizes'].iloc[0]}"
        title = f"Mindset sizes: {data['mindset_sizes'].iloc[0]} without useless questions"
        
        chart = make_homogeneity_plot(data, title, x_axis='nb_questions', y_axis='noise', facet_on='preprocessing_name')
        chart = graphic_settings(chart)
        save(chart, f'{path_out / name}.svg')

### Plot Mindsets useless vs noise for 50 questions id = 1587723598

In [110]:
if experiment_name == '1587723598':
    experiments = [x for _, x in full_df.groupby(['mindset_sizes'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"mindset_sizes_{data['mindset_sizes'].iloc[0]}"
        title = f"{data['mindset_sizes'].iloc[0]} mindsets sizes with 50 usefull questions plus variable number of useless questions"
        
        chart = make_homogeneity_plot(data, title, x_axis='nb_useless', y_axis='noise', facet_on='preprocessing_name')
        chart = graphic_settings(chart)
        save(chart, f'{path_out / name}.svg')

### Plot Mindsets with fixed nb question id = 1587716525

In [82]:
if experiment_name == '1587716525':
   
    experiments = [x for _, x in full_df.groupby(['mindset_sizes'])]
    for experiment in experiments:
        
        data = experiment
        data['order'] = data['order_best'] / data['max_order']
        data['order'] = data['order'].round(2)
        name = f"mindset_sizes_{data['mindset_sizes'].iloc[0]}"
        title = f"Mindset sizes: {data['mindset_sizes'].iloc[0]} with 50 cuts and 50 usefull questions"
        
        chart = make_homogeneity_plot(data, title, x_axis='agreement', y_axis='noise', facet_on='preprocessing_name')
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot KnnBlobs with variable k for 50 cuts id = 1587703699

In [19]:
if experiment_name == '1587703699':
    experiments = [x for _, x in full_df.groupby(['blob_centers'])]
    for experiment in experiments:
        
        data = experiment
        name = f"centers_{data['blob_centers'].iloc[0]}"
        title = f"block sizes: {data['blob_centers'].iloc[0]} with 50 cuts"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('k', type='ordinal', sort=alt.EncodingSortField(field='k', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('k', type='ordinal', sort=alt.EncodingSortField(field='k', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )

        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot SBM with variable p and q (no 4 blocks) id = 1587641195

In [13]:
if experiment_name == '1587641195':
    full_df = full_df[full_df['block_sizes'] != '[100, 100, 100, 100]']
    experiments = [x for _, x in full_df.groupby(['block_sizes', 'lb_f'])]
    for experiment in experiments:
        
        data = experiment
        name = f"block_sizes_{data['block_sizes'].iloc[0]}_lbf_{data['lb_f'].iloc[0]}"
        title = f"block sizes: {data['block_sizes'].iloc[0]} with lb_f: {data['lb_f'].iloc[0]}"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )

        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot SBM with fixed p and q and different numbers of cuts id = 1587632158

In [60]:
if experiment_name == '1587632158':
    experiments = [x for _, x in full_df.groupby(['block_sizes'])]
    for experiment in experiments:
        
        data = experiment
        name = f"block_sizes_{data['block_sizes'].iloc[0]}"
        title = f"block sizes: {data['block_sizes'].iloc[0]} with p=.3 and q=.1"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='descending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='descending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )

        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot KnnBlobs with fixed k and different numbers of cuts id = 1587632375

In [51]:
if experiment_name == '1587632375':
    experiments = [x for _, x in full_df.groupby(['blob_centers'])]
    for experiment in experiments:
        
        data = experiment
        name = f"centers_{data['blob_centers'].iloc[0]}"
        title = f"block sizes: {data['blob_centers'].iloc[0]} with k=15"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )

        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot different a for Knn id = 1587621343

In [12]:
if experiment_name == '1587621343':
    experiments = [x for _, x in full_df.groupby(['blob_centers'])]
    for experiment in experiments:
        
        data = experiment
        name = f"centers_{data['blob_centers'].iloc[0]}"
        title = f"block sizes: {data['blob_centers'].iloc[0]} with k=15"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('agreement', type='ordinal', sort=alt.EncodingSortField(field='agreement', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('agreement', type='ordinal', sort=alt.EncodingSortField(field='agreement', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )

        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)

        save(chart, f'{path_out / name}.svg')

### Plot different a for SBM id = 1587616865

In [36]:
if experiment_name == '1587616865':
    
    full_df['order_best'] = full_df[['order_max', 'order_best']].max(axis=1)
    
    experiments = [x for _, x in full_df.groupby(['block_sizes'])]
    for experiment in experiments:

        data = experiment

        name = f"block_sizes_{data['block_sizes'].iloc[0]}"
        title = f"block sizes: {data['block_sizes'].iloc[0]} with p=.3 and q=.1"

        v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
            alt.X('agreement', type='ordinal', sort=alt.EncodingSortField(field='agreement', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='descending'), axis=alt.Axis(grid=True)),
            alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title=title
        )

        text = alt.Chart(data, width=400, height=150).mark_text().encode(
            alt.X('agreement', type='ordinal', sort=alt.EncodingSortField(field='agreement', order='ascending'), axis=alt.Axis(grid=True)),
            alt.Y('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='descending'), axis=alt.Axis(grid=True)),
            alt.Text('order_best'),
        ).facet(
            facet=alt.Facet('preprocessing_name:N', title=None),
            title='order of the best v-measure'
        )
            
        chart = alt.vconcat(v_measure_chart, text)
        chart = graphic_settings(chart)
        
        save(chart, f'{path_out / name}.svg')

In [43]:
experiments = [x for _, x in full_df.groupby(['block_sizes'])]
for experiment in experiments:
    name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}"
    title = f"block sizes: {data['block_sizes'].iloc[0]} with agreement: {data['agreement'].iloc[0]}"

    data = experiment[experiment.nb_cuts == 100]
    base1 = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    data = experiment[experiment.nb_cuts == 200]
    base2= alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    data = experiment[experiment.nb_cuts == 300]
    base3 = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    chart = alt.vconcat()
    chart = base1 & base2 & base3
    chart = chart.properties(
        title=title
    )

    chart = chart.configure_title(
            fontSize=10,
            font='Courier',
            anchor='middle',
            color='gray'
        ).configure_axis(
            gridOpacity = 0.0,

            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            grid=False
        ).configure_axisX(
            labelAngle=0,
        ).configure_axisY(
            titleAngle=0,
            titlePadding=10,
        ).configure_legend(
            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            titleAnchor='middle'
        ).configure_view(strokeOpacity=0)
    save(chart, f'{path_out / name}.svg')

### Plot comparison between preprocessing methods id = 1586947660

In [15]:
experiments = [x for _, x in full_df.groupby(['block_sizes', 'agreement'])]
for experiment in experiments:
    data = experiment

    name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}"
    title = f"block sizes: {data['block_sizes'].iloc[0]} with agreement: {data['agreement'].iloc[0]}"

    v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.Y('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='descending'), axis=alt.Axis(grid=True)),
        alt.X('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=title
    )

    text = alt.Chart(data, width=400, height=150).mark_text().encode(
        alt.Y('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='descending'), axis=alt.Axis(grid=True)),
        alt.X('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Text('order_max'),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title='max order'
    )
    chart = alt.vconcat(v_measure_chart, text)
    chart = chart.configure_title(
            fontSize=10,
            font='Courier',
            anchor='middle',
            color='gray'
        ).configure_axis(
            gridOpacity = 0.0,

            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            grid=False
        ).configure_axisX(
            labelAngle=0,
        ).configure_axisY(
            titleAngle=0,
            titlePadding=30,
        ).configure_legend(
            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            titleAnchor='middle'
        ).configure_view(strokeOpacity=0)
    save(chart, f'{path_out / name}.svg')