In [28]:
import os
import pandas as pd

res_path = '/home/gabi/diplomka/results/'
base_names = ['labeled_train', 'labeled_valid_unseen_networks-val_loss',
              'labeled_valid_unseen_images-val_loss',
             'labeled_valid_unseen_images-test_loss',
             'labeled_valid_unseen_images_nets-test_loss']

def process_res_directory(dir_path: str, with_baselines=True):
    loss_df = pd.read_csv(os.path.join(dir_path, 'loss.csv'), index_col=0)    
    metrics_df = pd.read_csv(os.path.join(dir_path, 'metrics.csv'), index_col=0)
                            
    res_df = pd.concat([loss_df, metrics_df], axis=1)
    res_df.reset_index(inplace=True)
    
    exp_name = dir_path.split('/')[-3]
    res_df.insert(0, 'exp_name', exp_name)
    
    if with_baselines:
        baselines = {}
        base_files = ['train_long_baseline.csv', 'valid_long_baseline.csv',
                      'test_small_split_baseline.csv', 'test_train_long_baseline.csv',
                     'test_valid_long_baseline.csv']
        for name, baseline in zip(base_names, base_files):
            baseline_path = os.path.join(dir_path, baseline)
            base_df = pd.read_csv(baseline_path, index_col=0)

            baselines[name] = base_df

        return exp_name, res_df, baselines
    
    return exp_name, res_df

In [29]:
import glob

dfs = []
baseline_dict = {}
in_dirs = glob.glob(os.path.join(res_path, 'with*/*/'))

for dir_p in in_dirs:
    name, res = process_res_directory(dir_p, with_baselines=False)
    
    dfs.append(res)
    
dfs = pd.concat(dfs)
_, _, base = process_res_directory(in_dirs[0])

In [110]:
base

{'labeled_train':   loss_name      mean       std       min       max    median
 0       MSE  1.000185  0.289896  0.314191  2.707990  0.962370
 1        L1  0.765579  0.106959  0.412319  1.209878  0.761163
 2     Huber  0.407250  0.089773  0.151409  0.813649  0.401320,
 'labeled_valid_unseen_networks-val_loss':   loss_name      mean       std       min       max    median
 0       MSE  0.834597  1.126022  0.037128  6.101653  0.304573
 1        L1  0.710611  0.525986  0.105278  2.441400  0.511811
 2     Huber  0.359268  0.425021  0.018274  1.943842  0.148049,
 'labeled_valid_unseen_images-val_loss':   loss_name      mean       std       min       max    median
 0       MSE  0.958540  0.337582  0.292087  2.829140  0.905116
 1        L1  0.725408  0.108752  0.441126  1.138974  0.717286
 2     Huber  0.375167  0.093793  0.142421  0.734999  0.367203,
 'labeled_valid_unseen_images-test_loss':   loss_name      mean       std       min       max    median
 0       MSE  1.007835  0.288054  0.30

In [30]:
dfs.columns

Index(['exp_name', 'index', 'labeled_total', 'labeled_unlabeled',
       'labeled_labeled', 'unlabeled_total', 'unlabeled_unlabeled',
       'unlabeled_labeled', 'reference_total', 'reference_unlabeled',
       'reference_labeled', 'labeled_valid_unseen_networks-val_loss',
       'labeled_valid_unseen_networks-MSE', 'labeled_valid_unseen_networks-L1',
       'labeled_valid_unseen_networks-val_loss_min',
       'labeled_valid_unseen_networks-val_loss_max',
       'labeled_valid_unseen_networks-val_loss_std',
       'labeled_valid_unseen_networks-val_loss_median',
       'labeled_valid_unseen_images-val_loss',
       'labeled_valid_unseen_images-MSE', 'labeled_valid_unseen_images-L1',
       'labeled_valid_unseen_images-val_loss_min',
       'labeled_valid_unseen_images-val_loss_max',
       'labeled_valid_unseen_images-val_loss_std',
       'labeled_valid_unseen_images-val_loss_median', 'labeled_acc_ops_val',
       'labeled_mean_corr_adj_val', 'labeled_mean_fal_pos_adj_val',
       'la

In [35]:
shadow_columns = [
    'labeled_labeled',
    'labeled_valid_unseen_networks-val_loss',
    'labeled_valid_unseen_images-val_loss',
    # TODO test set
]

In [34]:
base_keys = list(base.keys())
base_keys

['labeled_train',
 'labeled_valid_unseen_networks-val_loss',
 'labeled_valid_unseen_images-val_loss',
 'labeled_valid_unseen_images-test_loss',
 'labeled_valid_unseen_images_nets-test_loss']

In [36]:
base['labeled_train']

Unnamed: 0,loss_name,mean,std,min,max,median
0,MSE,1.000185,0.289896,0.314191,2.70799,0.96237
1,L1,0.765579,0.106959,0.412319,1.209878,0.761163
2,Huber,0.40725,0.089773,0.151409,0.813649,0.40132


In [38]:
(dfs['labeled_valid_unseen_networks-L1'] == dfs['labeled_valid_unseen_networks-val_loss']).all()

True

In [43]:
import numpy as np

train_samples = 608000
val_samples = 77000
test_samples = 122000
test_train_samples = 1094000
test_val_samples = 154000

sample_list = [train_samples, val_samples, test_samples, test_train_samples, test_val_samples]

def ci_95(std, sample_size):
    return 1.96 * std / np.sqrt(sample_size)

In [111]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

titles = ['Train loss', 'Validation loss (unseen networks)',
         'Validation loss (unseen images)',
         'Test loss (unseen images)',
         'Test loss (unseen images and networks)']

use_stats = '_median'
use_stats = ''

save_path = '/home/gabi/diplomka/master-thesis/img/info-losses/'
if not os.path.exists(save_path):
    os.mkdir(save_path)

for run_column, ref_columns, sample_s, title in zip(shadow_columns, base_keys, sample_list, titles):
    plt.figure(figsize=(7,5))
    stats_title = ''
    
    for stats in [run_column, run_column + '_median']:
    
        if stats not in dfs.columns:
            continue

        run_data = dfs[['exp_name', 'index', stats]]
        print(run_data.columns)

        ref = base[ref_columns].iloc[1]

        if 'median' in stats:
            stats_title += f', median ref: {ref["median"].round(2)}'
        else:
            stats_title += f', mean ref: {ref["mean"].round(2)}'
            #ref_mean = ref['mean'].round(2)
            #ref_ci = ci_95(ref['std'], sample_s).round(2)
            #ref_title = f"${ref_mean} \pm {ref_ci}$"

        sns.lineplot(data=run_data, x='index', y=stats, label='Median' if 'median' in stats else 'Mean')
    #plt.hlines(ref_mean, 1, 30, colors='r')
    #plt.fill_between(np.arange(1, 31), ref_mean + ref_ci, ref_mean - ref_ci)
    
    plt.title(f"{title}{stats_title}", fontsize=13)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel(f'Loss', fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, f"{run_column}.png"), dpi=500)
    plt.show()

<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_labeled'], dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_valid_unseen_networks-val_loss'], dtype='object')
Index(['exp_name', 'index', 'labeled_valid_unseen_networks-val_loss_median'], dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_valid_unseen_images-val_loss'], dtype='object')
Index(['exp_name', 'index', 'labeled_valid_unseen_images-val_loss_median'], dtype='object')


In [49]:
# Tohleto už stačí, test set budou boxploty, teď to samý pro recon acc a train loss 

Unnamed: 0,exp_name,index,labeled_labeled
0,with_ref_1,1,0.253823
1,with_ref_1,2,0.201816
2,with_ref_1,3,0.193596
3,with_ref_1,4,0.190007
4,with_ref_1,5,0.187761
...,...,...,...
25,with_ref_9,26,0.172180
26,with_ref_9,27,0.172187
27,with_ref_9,28,0.172097
28,with_ref_9,29,0.172164


In [84]:
dfs.columns

Index(['exp_name', 'index', 'labeled_total', 'labeled_unlabeled',
       'labeled_labeled', 'unlabeled_total', 'unlabeled_unlabeled',
       'unlabeled_labeled', 'reference_total', 'reference_unlabeled',
       'reference_labeled', 'labeled_valid_unseen_networks-val_loss',
       'labeled_valid_unseen_networks-MSE', 'labeled_valid_unseen_networks-L1',
       'labeled_valid_unseen_networks-val_loss_min',
       'labeled_valid_unseen_networks-val_loss_max',
       'labeled_valid_unseen_networks-val_loss_std',
       'labeled_valid_unseen_networks-val_loss_median',
       'labeled_valid_unseen_images-val_loss',
       'labeled_valid_unseen_images-MSE', 'labeled_valid_unseen_images-L1',
       'labeled_valid_unseen_images-val_loss_min',
       'labeled_valid_unseen_images-val_loss_max',
       'labeled_valid_unseen_images-val_loss_std',
       'labeled_valid_unseen_images-val_loss_median', 'labeled_acc_ops_val',
       'labeled_mean_corr_adj_val', 'labeled_mean_fal_pos_adj_val',
       'la

In [113]:
stats_list = ['unlabeled', 'acc_ops_val', 'acc_adj_val', 'validity', 'uniqueness']
titles = ['VAE loss', 'Operation reconstruction accuracy', 'Adjacency reconstruction accuracy',
         'Validity', 'Uniqueness']
%matplotlib notebook

save_path = '/home/gabi/diplomka/master-thesis/img/info-arch-comparison/'
if not os.path.exists(save_path):
    os.mkdir(save_path)


for title, column in zip(titles, stats_list):
    plt.figure(figsize=(7,5))

    label_name = f"labeled_{column}" if column not in ['validity', 'uniqueness'] else f"unlabeled_{column}"
    ref_name = f"reference_{column}"
    
    run_data = dfs[['exp_name', 'index', label_name, ref_name, 'unlabeled_unlabeled']]
    print(run_data.columns)

    info_nas_label = 'Info-NAS' if column != 'unlabeled' else 'Info-NAS unlabeled batches'
    arch2vec_label = 'arch2vec' if column != 'unlabeled' else 'arch2vec all batches'
    sns.lineplot(data=run_data, x='index', y=label_name, label=info_nas_label)
    sns.lineplot(data=run_data, x='index', y=ref_name, label=arch2vec_label)
    
    if column == 'unlabeled':
        sns.lineplot(data=run_data, x='index', y='unlabeled_unlabeled', label='Info-NAS unlabeled batches')
    #plt.hlines(ref_mean, 1, 30, colors='r')
    #plt.fill_between(np.arange(1, 31), ref_mean + ref_ci, ref_mean - ref_ci)
    
    plt.title(f"{title}", fontsize=14)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel(f'Mean value', fontsize=12)
    plt.tight_layout()
    plt.savefig(os.path.join(save_path, f"{column}.png"), dpi=500)
    plt.show()

<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_unlabeled', 'reference_unlabeled',
       'unlabeled_unlabeled'],
      dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_acc_ops_val', 'reference_acc_ops_val',
       'unlabeled_unlabeled'],
      dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'labeled_acc_adj_val', 'reference_acc_adj_val',
       'unlabeled_unlabeled'],
      dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'unlabeled_validity', 'reference_validity',
       'unlabeled_unlabeled'],
      dtype='object')


<IPython.core.display.Javascript object>

Index(['exp_name', 'index', 'unlabeled_uniqueness', 'reference_uniqueness',
       'unlabeled_unlabeled'],
      dtype='object')
