Composition of the dataset of patterns for the IMGEP OGL experiments. (Fig. 12, Supplementary Material)

In [None]:
# default print properties
multiplier = 2

pixel_cm_ration = 36.5

width_full = int(13.95 * pixel_cm_ration) * multiplier
width_half = int(13.95/2 * pixel_cm_ration) * multiplier

height_default_1 = int(3.5 * pixel_cm_ration) * multiplier

# margins in pixel
top_margin = 0 * multiplier 
left_margin = 35 * multiplier 
right_margin = 0 * multiplier 
bottom_margin = 25 * multiplier 

font_size = 10 * multiplier 
font_family='Times New Roman'

line_width = 2 * multiplier 

In [None]:
# Define and load data
import autodisc as ad
import ipywidgets
import plotly
import numpy as np
import collections
plotly.offline.init_notebook_mode(connected=True)

data_filters = collections.OrderedDict()
data_filters['none'] = []
data_filters['non dead'] = ('classifier_dead.data', '==', False)
data_filters['animals (non div)'] = (('classifier_diverging.data', '==', 0), 'and', ('classifier_animal.data', '==', True))
data_filters['non animals (non div)'] = ((('classifier_dead.data', '==', False), 'and', ('classifier_animal.data', '==', False)), 'and', ('classifier_diverging.data', '==', 0))
data_filters['animals (all)'] = ('classifier_animal.data', '==', True)
data_filters['non animals (all)'] = (('classifier_dead.data', '==', False), 'and', ('classifier_animal.data', '==', False))

org_experiment_definitions = dict()

org_experiment_definitions['main_paper'] = [
     dict(id = '302',
          directory = '../experiments/experiment_000302',
          name = 'IMGEP-OGL',
          is_default = True),
]

repetition_ids = list(range(10))

# define names and load the data
experiment_name_format = '<name>' # <id>, <name>

#global experiment_definitions
experiment_definitions = []
experiment_statistics = []

current_experiment_list = 'main_paper'

experiment_definitions = []
for org_exp_def in org_experiment_definitions[current_experiment_list]:
    new_exp_def = dict()
    new_exp_def['directory'] = org_exp_def['directory']
    if 'is_default' in org_exp_def:
        new_exp_def['is_default'] = org_exp_def['is_default']

    if 'name' in org_exp_def:
        new_exp_def['id'] = ad.gui.jupyter.misc.replace_str_from_dict(experiment_name_format, {'id': org_exp_def['id'], 'name': org_exp_def['name']})
    else:
        new_exp_def['id'] = ad.gui.jupyter.misc.replace_str_from_dict(experiment_name_format, {'id': org_exp_def['id']})

    experiment_definitions.append(new_exp_def)

experiment_statistics = dict()
for experiment_definition in experiment_definitions:
    experiment_statistics[experiment_definition['id']] = ad.gui.jupyter.misc.load_statistics(experiment_definition['directory'])
       

In [None]:
# Config
default_config = dict(
    plotly_format = 'svg',
    layout = dict(
        xaxis = dict(
            title = 'steps of training'
            ),
        yaxis = dict(
            title = 'dataset size'
            ),
        font = dict(
            family=font_family, 
            size=font_size, 
            ),
        updatemenus=[],
        width=width_full, # in cm
        height=height_default_1, # in cm
        
        margin = dict(
            l=left_margin, #left margin in pixel
            r=right_margin, #right margin in pixel
            b=bottom_margin, #bottom margin in pixel
            t=top_margin,  #top margin in pixel
            ),

        legend=dict(
            xanchor='left',
            yanchor='top',
            y=1,
            x=0.02,
            ),        
        ),
    
    default_colors = ['rgb(204,121,167)', 
                      'rgb(230,159,0)', 
                      'rgb(0,158,115)',
                      'rgb(0,0,0)', 
                      'rgb(240,228,66)',
                      'rgb(213,94,0)', 
                      'rgb(0,0,0)',  'rgb(86,180,233)', 'rgb(0,158,115)', 'rgb(240,228,66)', 'rgb(0,114,178)', 'rgb(213,94,0)', 'rgb(204,121,167)'],
    
    default_mean_trace = dict(line=dict(width = line_width)),
    
    mean_traces = [
        dict(line = dict(dash = 'solid')),
        dict(line = dict(dash = 'dot')),
        dict(line = dict(dash = 'dash')),
        dict(line = dict(dash = 'dashdot')),
        dict(line = dict(dash = 'solid')),
        dict(line = dict(dash = 'longdashdot')),
        dict(line = dict(dash = 'longdash')),
        dict(line = dict(dash = 'solid')),
        dict(line = dict(dash = 'dash')),
        dict(line = dict(dash = 'dashdot')),
        dict(line = dict(dash = 'dot')),
        dict(line = dict(dash = 'longdash')),
        dict(line = dict(dash = 'longdashdot')),
    ],
   
)

In [None]:
import os
import autodisc

n_stages = 50 
#n_steps = [i*100 for i in range(1,51)]


config = default_config
config['mean_labels'] = ['all', 'animals', 'non-animals']

x_values = [stage_nr+1 for stage_nr in range(n_stages)]
config['default_trace'] = dict(
        x = x_values,
        )
    
config['default_std_trace'] = dict(
        x = x_values + x_values[::-1],
        )


for exp_def in experiment_definitions:
    data = np.zeros((3,len(repetition_ids), n_stages), dtype='float')
    #data[0]: all
    #data[1]: animals
    #data[2]: non-animals

    for stage_idx in range(n_stages):

        n_train_new = 0 
        n_train_tot = 0 

        n_animal_new = 0
        n_animal_tot = 0

        n_nonanimal_new = 0
        n_nonanimal_tot = 0

        n_valid_new = 0
        n_valid_tot = 0

        n_dead_new = 0
        n_dead_tot = 0

        n_runs_new = 0
        n_runs_tot = 0

        for repetition_idx in repetition_ids:

            training_summary_filename = os.path.join(exp_def['directory'], 'repetition_{:06d}/trained_representation/stages_summary/stage_{:06d}/summary.csv'.format(repetition_idx,stage_idx))

            with open(training_summary_filename, 'r') as f:
                lineslist = [line.rstrip() for line in f]
                for line in lineslist:
                    line = line.split('\t')
                    if line[0] == 'n_train_dataset: ':
                        data[0,repetition_idx, stage_idx] += float(line[3])
                        
                    if line[0] == 'n_valid_dataset: ':
                        data[0,repetition_idx, stage_idx] += float(line[3])

                    if line[0] == 'n_stable_animals: ':
                        data[1,repetition_idx, stage_idx] += float(line[3])
                    if line[0] == 'n_diverging_animals: ':
                        data[1,repetition_idx, stage_idx] += float(line[3])

                    if line[0] == 'n_non_animals: ':
                        data[2][repetition_idx, stage_idx] += float(line[3])

    data = [data[idx] for idx in range(3)]
    fig = autodisc.gui.jupyter.plotly_meanstd_scatter(data=data, config=config)
