In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import project_path

In [4]:
import glob
import os

from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from src.plotting import make_result_heatmap, add_lines

In [5]:
pd.read_csv('../experiments.csv')

Unnamed: 0,id,Type of experiment
0,0,Local experiments
1,1587919855,p vs q algorithms for SBM with 50 cuts
2,1587918440,p vs q algorithms for SBM with 50 cuts for 4 ...
3,1587804052,p vs q algorithms for SBM with 50 cuts for 5 ...
4,1587703699,KnnBlobs experiments with different k and 50 ...
5,1586946592,SBM experiments
6,1587621343,KnnBlobs with 100 cuts testing different a
7,1587629564,KL vs MF algorithms on SBM
8,1587632158,SBM with fixed p and q and different numbers ...
9,1587632375,KnnBlobs with fixed k and different numbers o...


In [6]:
experiment_name = '1589869223'

In [7]:
path_in = Path(f'../output/{experiment_name}').resolve()
path_out = Path(f'../plots/{experiment_name}').resolve()
path_out.mkdir(parents=True, exist_ok=True)

In [8]:
full_df = pd.DataFrame()
for subdir, dirs, files in os.walk(path_in):
    for file in files:
        
        current_df = pd.read_csv(f'{subdir}/{file}', index_col=0)
        full_df = full_df.append(current_df)
full_df = full_df.reset_index(drop=True)

In [9]:
full_df.dropna(how='all')

Unnamed: 0,agreement,ars,dataset_name,dataset_type,max_order,mindset_sizes,nb_questions,nb_useless,noise,order_best,percentile_orders,preprocessing_name,seed,unique_id
0,65.0,0.446449,mindsets,discrete,5.29,"[100, 100]",20.0,20.0,0.18,5.03,100.0,features,42.0,1.589869e+09
1,85.0,0.000000,mindsets,discrete,4.93,"[100, 100]",20.0,20.0,0.39,4.90,100.0,features,42.0,1.589869e+09
2,40.0,,mindsets,discrete,6.50,"[100, 100]",20.0,10.0,0.42,,100.0,features,42.0,1.589869e+09
3,85.0,0.057766,mindsets,discrete,5.59,"[100, 100]",20.0,15.0,0.42,5.55,100.0,features,42.0,1.589869e+09
4,25.0,0.030029,mindsets,discrete,9.63,"[100, 100]",20.0,0.0,0.42,9.58,100.0,features,42.0,1.589869e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15995,50.0,1.000000,mindsets,discrete,5.80,"[100, 100]",20.0,20.0,0.05,4.80,100.0,features,42.0,1.589869e+09
15996,80.0,0.000000,mindsets,discrete,9.59,"[100, 100]",20.0,0.0,0.45,9.52,100.0,features,42.0,1.589869e+09
15997,50.0,0.001965,mindsets,discrete,4.93,"[100, 100]",20.0,20.0,0.39,4.90,100.0,features,42.0,1.589869e+09
15998,30.0,0.003457,mindsets,discrete,4.91,"[100, 100]",20.0,20.0,0.42,4.88,100.0,features,42.0,1.589869e+09


# Mindsets

In [10]:
n = 200
noises = np.round(np.linspace(0, 0.5, 20), 2)
As = np.arange(4, 20) * 5

mask = np.zeros((len(noises), len(As)), dtype=bool)

for i, noise in enumerate(noises):
    for j, a in enumerate(As):
        a_prime = a/n
        mask[i, j] = (a_prime <= (1-3*noise)/2)

theory_df = pd.DataFrame(mask, columns=As, index=noises).sort_index(ascending=False).sort_index(axis=1, ascending=True)    
values_theory = theory_df.to_numpy()

### Plot Mindsets for 2 blocks with theoretical bounds id = 1589869223

In [50]:
if experiment_name == '1589304428':
    experiments = [x for _, x in full_df.groupby(['nb_questions'])]
    for experiment in experiments:

        fig, ax = plt.subplots(figsize=(15, 10))

        title = f"Mindset with {experiment['mindset_sizes'].iloc[0]} mindsets with {experiment['nb_questions'].iloc[0]} questions"
        name = f"mindset_sizes_{experiment['mindset_sizes'].iloc[0]}_questions_{experiment['nb_questions'].iloc[0]}"
        
        columns = ['agreement', 'noise', 'homogeneity']
        data = experiment[columns]
                                                 
        make_result_heatmap(data, title, ax, x_column=columns[0], y_column=columns[1], values_column=columns[2])
        add_lines(values_theory, ax, left=False)

        ax.set_axisbelow(True)
        fig.tight_layout()

        fig.savefig(f'{path_out / name}.svg')
        plt.close(fig) 

### Plot Mindsets for 2 blocks with theoretical bounds id = 1589349809

In [73]:
a = [x for _, x in experiment[columns].groupby(['agreement', 'noise'], as_index=False)]
a[2]

Unnamed: 0,agreement,noise,homogeneity
313,20.0,0.05,0.249273
8183,20.0,0.05,0.249273
8295,20.0,0.05,0.249273
8582,20.0,0.05,0.249273
11671,20.0,0.05,0.249273
11683,20.0,0.05,0.249273
13572,20.0,0.05,0.249273
14725,20.0,0.05,0.249273
15441,20.0,0.05,0.249273
15536,20.0,0.05,0.249273


In [13]:
if experiment_name == '1589869223':
    experiments = [x for _, x in full_df.groupby(['nb_questions', 'nb_useless'])]
    for experiment in experiments:
        
        experiment = experiment.fillna(0)
        
        
        fig, ax = plt.subplots(figsize=(15, 10))

        title = f"Mindset with {experiment['mindset_sizes'].iloc[0]} mindsets with {experiment['nb_questions'].iloc[0]} questions and {experiment['nb_useless'].iloc[0]} useless questions"
        name = f"mindset_sizes_{experiment['mindset_sizes'].iloc[0]}_questions_{experiment['nb_questions'].iloc[0]}_useless_{experiment['nb_useless'].iloc[0]}"
        
        columns = ['agreement', 'noise', 'ars']
        data = experiment[columns].groupby(['agreement', 'noise'], as_index=False).mean()
    
        make_result_heatmap(data, ax, x_column=columns[0], y_column=columns[1], values_column=columns[2])
        add_lines(values_theory, ax, left=False)

        ax.set_axisbelow(True)
        fig.tight_layout()

        fig.savefig(f'{path_out / name}.pdf')
        plt.close(fig) 