# Checking the experiments are ok

In [141]:
import os
from tabulate import tabulate
import pandas as pd
import yaml

## Checking the datasets

In [142]:
datasets = ['kuhar', 'motionsense', 'uci', 'wisdm', 'realworld_thigh', 'realworld_waist']
models = ['umap', 'ae', 'tae', 'convae', 'convtae']
percentages = [25, 50, 75, 100, 200]
experiment_name_template = '{model}_{dataset}_p{percentage}'

### Helper functions

In [143]:
def check_base_config_file(route, dataset):
    try:
        # Read base config
        with open(route, 'r') as f:
            base_config = yaml.load(f, Loader=yaml.FullLoader)
            # Only 1 reducer, train and test dataset
            reducer_datasets = base_config['reducer_dataset']
            train_datasets = base_config['train_dataset']
            test_datasets = base_config['test_dataset']
            if len(reducer_datasets) != 1:
                return False
            if len(train_datasets) != 1:
                return False
            if len(test_datasets) != 1:
                return False
            reducer_dataset = reducer_datasets[0].split('.')[0]
            train_dataset = train_datasets[0].split('.')[0]
            test_dataset = test_datasets[0].split('.')[0]
            # Check if reducer, train and test dataset are the same as the ones in the experiment name
            if reducer_dataset != dataset:
                return False
            if train_dataset != dataset:
                return False
            if test_dataset != dataset:
                return False
            return True
            # print(reducer_dataset, base_config['train_dataset'], base_config['test_dataset'], dataset)
    except Exception as e:
        print(e)
        print('Error reading config files')
    return False

def check_exploration_config_file(route, reducer, percentage):
    try:
        # Read exploration config
        with open(route, 'r') as f:
            exploration_config = yaml.load(f, Loader=yaml.FullLoader)
            if reducer == 'umap':
                tune_parameters = exploration_config['search_space']['umap_ncomp']['tune_parameters']
            else:
                tune_parameters = exploration_config['search_space']['latent_dim']['tune_parameters']
            # print(tune_parameters)
            if len(tune_parameters) != 2:
                return False
            if tune_parameters[0] != 2:
                return False
            if tune_parameters[1] != int(360*percentage/100)+1:
                return False
            return True
    except UnboundLocalError as e:
        # print(e.with_traceback())
        print('Not found tune_parameters')
    return False

### Do all experiments exist?

In [144]:
data = []
for dataset in datasets:
    for model in models:
        for percentage in percentages:
            experiment_route = experiment_name_template.format(model=model, dataset=dataset, percentage=percentage)
            if model != 'umap':
                experiment_route = 'P10_' + experiment_route
            experiment_route = '../../experiments/' + experiment_route + '/'
            does_experiment_route_exist = os.path.exists(experiment_route)
            does_base_config_route_exist = False
            does_exploration_config_route_exist = False
            if does_experiment_route_exist:
                base_config_route = experiment_route + 'base_config.yaml'
                exploration_config_route = experiment_route + 'exploration_config.yaml'
                does_base_config_route_exist = os.path.exists(base_config_route)
                does_exploration_config_route_exist = os.path.exists(exploration_config_route)
            
            new_object = {
                'model': model,
                'dataset': dataset,
                'percentage': percentage,
                'route': experiment_route,
                'exists': does_experiment_route_exist,
                'base_config': check_base_config_file(base_config_route, dataset),
                'exploration_config': check_exploration_config_file(exploration_config_route, model, percentage)
            }
            data.append(new_object)


In [145]:
df = pd.DataFrame(data)
df = df[df['exists'] == False]
df.shape

(0, 7)

In [146]:
print(tabulate(df, headers='keys', tablefmt='psql'))

+---------+-----------+--------------+---------+----------+---------------+----------------------+
| model   | dataset   | percentage   | route   | exists   | base_config   | exploration_config   |
|---------+-----------+--------------+---------+----------+---------------+----------------------|
+---------+-----------+--------------+---------+----------+---------------+----------------------+


### Asserting values in experiments

In [147]:
df = pd.DataFrame(data)
df = df[(df['base_config'] == False) | (df['exploration_config'] == False)]
df.shape

(0, 7)

In [148]:
print(tabulate(df, headers='keys', tablefmt='psql'))

+---------+-----------+--------------+---------+----------+---------------+----------------------+
| model   | dataset   | percentage   | route   | exists   | base_config   | exploration_config   |
|---------+-----------+--------------+---------+----------+---------------+----------------------|
+---------+-----------+--------------+---------+----------+---------------+----------------------+


In [149]:
alpha = np.mean(y) - 

SyntaxError: invalid syntax (1639551396.py, line 1)

In [None]:
beta = np.sum((x - np.mean(x)) * (y - np.mean(y))) / np.sum((x - np.mean(x)) ** 2)
print(beta)

0.7


In [None]:
beta = np.sum(x*y) / np.sum(x**2)
beta

0.6727272727272727

In [None]:
alpha = np.mean(y) - beta * np.mean(x)
alpha

-0.018181818181818077

In [None]:
import numpy as np
hd = 360
ls = 20
numHL = 2
dimensions = np.linspace(hd, ls, numHL+1).round().astype(int)
dimensions[::-1]

array([ 20, 190, 360])