In [1]:
import glob
import os

from pathlib import Path


import pandas as pd

import altair as alt
from altair.expr import datum
from altair_saver import save

In [2]:
pd.read_csv('../experiments.csv')

Unnamed: 0,id,Type of experiment
0,0,Local experiments
1,1586947660,KL vs MF algorithms
2,1586939612,KnnBlobs experiments
3,1586946592,SBM experiments


In [3]:
experiment_name = '1586946592'

In [4]:
path_in = Path(f'../output/{experiment_name}').resolve()
path_out = Path(f'../plots/{experiment_name}').resolve()
path_out.mkdir(parents=True, exist_ok=True)

In [5]:
full_df = pd.DataFrame()
for subdir, dirs, files in os.walk(path_in):
    for file in files:
        
        current_df = pd.read_csv(f'{subdir}/{file}', index_col=0)
        full_df = full_df.append(current_df)

In [46]:
full_df

Unnamed: 0,agreement,block_sizes,completeness,dataset_name,dataset_type,homogeneity,lb_f,nb_cuts,order_max,p,percentile_orders,preprocessing_name,q,seed,unique_id,v_measure_score
0,50.0,"[100, 100]",1.000000,sbm,graph,1.281371e-15,0.2,100.0,2200.0,0.05,100.0,fid_mat,0.70,42.0,1586946592,2.562741e-15
0,50.0,"[100, 100]",1.000000,sbm,graph,1.000000e+00,0.2,100.0,302.0,0.30,100.0,fid_mat,0.03,42.0,1586946592,1.000000e+00
0,40.0,"[80, 100]",1.000000,sbm,graph,1.000000e+00,0.2,300.0,81.0,0.80,100.0,karnig_lin,0.01,42.0,1586946592,1.000000e+00
0,45.0,"[90, 100]",1.000000,sbm,graph,1.604922e-16,0.2,100.0,2514.0,0.60,100.0,karnig_lin,0.40,42.0,1586946592,3.209843e-16
0,25.0,"[50, 100]",0.910697,sbm,graph,8.968977e-01,0.2,300.0,3088.0,1.00,100.0,karnig_lin,0.40,42.0,1586946592,9.037446e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,50.0,"[100, 100, 100, 100, 100]",0.931126,sbm,graph,9.306204e-01,0.2,100.0,2612.0,0.20,100.0,karnig_lin,0.05,42.0,1586946592,9.308731e-01
0,50.0,"[100, 100, 100, 100, 100]",1.000000,sbm,graph,1.000000e+00,0.2,300.0,1207.0,0.20,100.0,karnig_lin,0.02,42.0,1586946592,1.000000e+00
0,50.0,"[100, 100, 100, 100, 100]",0.000346,sbm,graph,1.083066e-04,0.2,200.0,30465.0,0.30,100.0,fid_mat,0.90,42.0,1586946592,1.650220e-04
0,50.0,"[100, 100, 100, 100, 100]",1.000000,sbm,graph,1.000000e+00,0.2,100.0,3136.0,0.80,100.0,karnig_lin,0.05,42.0,1586946592,1.000000e+00


In [43]:
experiments = [x for _, x in full_df.groupby(['block_sizes'])]
for experiment in experiments:
    name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}"
    title = f"block sizes: {data['block_sizes'].iloc[0]} with agreement: {data['agreement'].iloc[0]}"

    data = experiment[experiment.nb_cuts == 100]
    base1 = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    data = experiment[experiment.nb_cuts == 200]
    base2= alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
            title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    data = experiment[experiment.nb_cuts == 300]
    base3 = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.X('p', type='ordinal', sort=alt.EncodingSortField(field='p', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Y('q', type='ordinal', sort=alt.EncodingSortField(field='q', order='descending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=f'nb cuts: {data.nb_cuts.iloc[0]}'
    )

    chart = alt.vconcat()
    chart = base1 & base2 & base3
    chart = chart.properties(
        title=title
    )

    chart = chart.configure_title(
            fontSize=10,
            font='Courier',
            anchor='middle',
            color='gray'
        ).configure_axis(
            gridOpacity = 0.0,

            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            grid=False
        ).configure_axisX(
            labelAngle=0,
        ).configure_axisY(
            titleAngle=0,
            titlePadding=10,
        ).configure_legend(
            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            titleAnchor='middle'
        ).configure_view(strokeOpacity=0)
    save(chart, f'{path_out / name}.svg')

### Plot comparison between preprocessing methods id = 1586947660

In [15]:
experiments = [x for _, x in full_df.groupby(['block_sizes', 'agreement'])]
for experiment in experiments:
    data = experiment

    name = f"block_sizes_{data['block_sizes'].iloc[0]}_a_{data['agreement'].iloc[0]}"
    title = f"block sizes: {data['block_sizes'].iloc[0]} with agreement: {data['agreement'].iloc[0]}"

    v_measure_chart = alt.Chart(data, width=400, height=150).mark_rect().encode(
        alt.Y('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='descending'), axis=alt.Axis(grid=True)),
        alt.X('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Color('v_measure_score', type='quantitative', title='v-measure score',scale=alt.Scale(domain=[0, 1])),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title=title
    )

    text = alt.Chart(data, width=400, height=150).mark_text().encode(
        alt.Y('nb_cuts', type='ordinal', sort=alt.EncodingSortField(field='nb_cuts', order='descending'), axis=alt.Axis(grid=True)),
        alt.X('lb_f', type='ordinal', sort=alt.EncodingSortField(field='lb_f', order='ascending'), axis=alt.Axis(grid=True)),
        alt.Text('order_max'),
    ).facet(
        facet=alt.Facet('preprocessing_name:N', title=None),
        title='max order'
    )
    chart = alt.vconcat(v_measure_chart, text)
    chart = chart.configure_title(
            fontSize=10,
            font='Courier',
            anchor='middle',
            color='gray'
        ).configure_axis(
            gridOpacity = 0.0,

            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            grid=False
        ).configure_axisX(
            labelAngle=0,
        ).configure_axisY(
            titleAngle=0,
            titlePadding=30,
        ).configure_legend(
            labelFont='Courier',
            labelColor='black',

            titleFont='Courier',
            titleColor='gray',
            titleAnchor='middle'
        ).configure_view(strokeOpacity=0)
    save(chart, f'{path_out / name}.svg')