# Checking the experiments are ok

In [235]:
import os
from tabulate import tabulate
import pandas as pd
import yaml

## Checking the datasets

In [236]:
datasets = ['kuhar', 'motionsense', 'uci', 'wisdm', 'realworld_thigh', 'realworld_waist']
models = ['umap', 'ae', 'tae', 'convae', 'convtae']
criteria = {
    'C1': 'Does the experiment route exists?',
    'C2': 'Checking base config: 1 model, 1 dataset',
    'C3': 'Check exploration config: latent dimension and percentage',
    'C4': 'Did the experiment finished?',
    'C4.1': 'Did the experiment finished with over 1000 iterations?',
    'C4.2': 'Did the experiment finished without errors?',

}
percentages = [25, 50, 75, 100, 200]
experiment_name_template = '{model}_{dataset}_p{percentage}'

### Helper functions

In [259]:
def check_base_config_file(route, dataset):
    try:
        # Read base config
        with open(route, 'r') as f:
            base_config = yaml.load(f, Loader=yaml.FullLoader)
            # Only 1 reducer, train and test dataset
            reducer_datasets = base_config['reducer_dataset']
            train_datasets = base_config['train_dataset']
            test_datasets = base_config['test_dataset']
            if len(reducer_datasets) != 1:
                return False
            if len(train_datasets) != 1:
                return False
            if len(test_datasets) != 1:
                return False
            reducer_dataset = reducer_datasets[0].split('.')[0]
            train_dataset = train_datasets[0].split('.')[0]
            test_dataset = test_datasets[0].split('.')[0]
            # Check if reducer, train and test dataset are the same as the ones in the experiment name
            if reducer_dataset != dataset:
                return False
            if train_dataset != dataset:
                return False
            if test_dataset != dataset:
                return False
            return True
            # print(reducer_dataset, base_config['train_dataset'], base_config['test_dataset'], dataset)
    except Exception as e:
        print(e)
        print('Error reading config files')
    return False

def check_exploration_config_file(route, reducer, percentage):
    try:
        # Read exploration config
        with open(route, 'r') as f:
            exploration_config = yaml.load(f, Loader=yaml.FullLoader)
            if reducer == 'umap':
                tune_parameters = exploration_config['search_space']['umap_ncomp']['tune_parameters']
            else:
                tune_parameters = exploration_config['search_space']['latent_dim']['tune_parameters']
            # print(tune_parameters)
            if len(tune_parameters) != 2:
                return False
            if tune_parameters[0] != 2:
                return False
            if tune_parameters[1] != int(360*percentage/100)+1:
                return False
            return True
    except UnboundLocalError as e:
        # print(e.with_traceback())
        print('Not found tune_parameters')
    return False

def check_4_1(route):
    # Did the experiment finished with over 1000 iterations?
    try:
        data = pd.read_csv(route)
        if len(data) < 1000:
            return False
        return True
    except Exception as e:
        # print(e)
        # print('Error reading data file')
        return False
    return False

def check_4_2(route):
    # Did the experiment finished without errors?
    try:
        data = pd.read_csv(route)
        if len(data) > 0:
            return False
        return True
    except Exception as e:
        # print(e)
        # print('Error reading data file')
        return False
    return False

### Do all experiments exist?

In [260]:
data = []
for dataset in datasets:
    for model in models:
        for percentage in percentages:
            experiment_route = experiment_name_template.format(model=model, dataset=dataset, percentage=percentage)
            if model != 'umap':
                experiment_route = 'P10_' + experiment_route
            experiment_route = '../../experiments/' + experiment_route + '/'
            does_experiment_route_exist = os.path.exists(experiment_route)
            does_base_config_route_exist = False
            does_exploration_config_route_exist = False
            does_best_config_exist = False
            if does_experiment_route_exist:
                base_config_route = experiment_route + 'base_config.yaml'
                exploration_config_route = experiment_route + 'exploration_config.yaml'
                does_base_config_route_exist = os.path.exists(base_config_route)
                does_exploration_config_route_exist = os.path.exists(exploration_config_route)
                does_data_exist = os.path.exists(experiment_route + 'data.csv')
            new_object = {
                'model': model,
                'dataset': dataset,
                'perc': percentage,
                'route': experiment_route,
                'C1': does_experiment_route_exist,
                'C2': check_base_config_file(base_config_route, dataset),
                'C3': check_exploration_config_file(exploration_config_route, model, percentage),
                'C4': does_data_exist,
                'C4.1': check_4_1(experiment_route + 'data.csv'),
                'C4.2': check_4_2(experiment_route + 'callback_errors.csv'),
            }
            data.append(new_object)


In [250]:
base_columns = ['model', 'dataset', 'perc', 'route']

In [247]:
df = pd.DataFrame(data)
df = df[df['C1'] == False]
df.shape

(0, 8)

In [248]:
print(tabulate(df, headers='keys', tablefmt='psql'))

+---------+-----------+--------+---------+------+------+------+------+
| model   | dataset   | perc   | route   | C1   | C2   | C3   | C4   |
|---------+-----------+--------+---------+------+------+------+------|
+---------+-----------+--------+---------+------+------+------+------+


### Asserting values in experiments

In [None]:
print(tabulate(df, headers='keys', tablefmt='psql'))

+---------+-----------+--------+---------+---------------+----------------------+-------------------+
| model   | dataset   | perc   | route   | base_config   | exploration_config   | does_data_exist   |
|---------+-----------+--------+---------+---------------+----------------------+-------------------|
+---------+-----------+--------+---------+---------------+----------------------+-------------------+


### How many experiments are left per model?

In [254]:
print(criteria['C4'])

Did the experiment finished?


In [263]:
df = pd.DataFrame(data)
df = df[base_columns + ['C4', 'C4.1', 'C4.2']]
# df.sort_values(by=['model', 'dataset', 'perc'], inplace=True)
# df = df[(df['C4'] == False)]
print(df.shape)
print(tabulate(df, headers='keys', tablefmt='psql'))

(150, 7)
+-----+---------+-----------------+--------+-----------------------------------------------------+-------+--------+--------+
|     | model   | dataset         |   perc | route                                               | C4    | C4.1   | C4.2   |
|-----+---------+-----------------+--------+-----------------------------------------------------+-------+--------+--------|
|   0 | umap    | kuhar           |     25 | ../../experiments/umap_kuhar_p25/                   | True  | True   | False  |
|   1 | umap    | kuhar           |     50 | ../../experiments/umap_kuhar_p50/                   | True  | True   | False  |
|   2 | umap    | kuhar           |     75 | ../../experiments/umap_kuhar_p75/                   | True  | True   | False  |
|   3 | umap    | kuhar           |    100 | ../../experiments/umap_kuhar_p100/                  | True  | True   | False  |
|   4 | umap    | kuhar           |    200 | ../../experiments/umap_kuhar_p200/                  | True  | True   | 