# Experiment Analysis

This notebook is used to analyze the results of certain experiments which can be found in the pycomex experiment archive located at the ``RESULTS_PATH`` directory.

In [1]:
import os
import time
import json
from collections import defaultdict

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Latex
from rich.pretty import pprint
from pycomex.utils import is_experiment_archive
from pycomex.utils import render_string_table
from pycomex.functional.experiment import Experiment

# This will be the path to the directory in which the notebook is located.
PATH: str = os.getcwd()
# This will have to be the path to the pycomex "results" directory containing the 
# experiment archives of interest.
# HAS TO BE CHANGED, if the analysis is not located in the same directory as the results
# folder.
RESULTS_PATH: str = os.path.join(PATH, 'results')

  import pkg_resources


In [2]:
# This is a data structure which maps each possible encoding method to a dictionary of the
# relevant hyperparameters and the corresponding values which are lists of values to
# be used for the hyperparameter sweep.
ENCODING_PARAMETER_MAP: dict[str, dict[str, list]] = {
    'gnn': {},
    'fp': {
        'FINGERPRINT_SIZE': [1024, 2048, 4096, 8192],
        'FINGERPRINT_RADIUS': [1, 2, 3],
    },
    'hdc': {
        'EMBEDDING_SIZE': [1024, 2048, 4096, 8192],
        'NUM_LAYERS': [1, 2, 3],
    },
}

# This is a data structure which maps each possible model name to a dictionary of the 
# relevant hyperparameters and the corresponding values which are lists of values to 
# be used for the hyperparameter sweep.
MODEL_PARAMETER_MAP: dict[str, dict[str, list]] = {
    'random_forest': {
        'RF_NUM_ESTIMATORS': [10, 100, 500],
        'RF_MAX_DEPTH': [None, 10, 20],
        'RF_MAX_FEATURES': [None, 'sqrt', 'log2'],
    },
    'grad_boost': {
        'GB_NUM_ESTIMATORS': [10, 100, 300],
        'GB_MAX_DEPTH': [2, 3, 4],
        'GB_LEARNING_RATE': [0.01, 0.1, 0.2],
    },
    'k_neighbors': {
        'KN_NUM_NEIGHBORS': [3, 5, 10],
        'KN_WEIGHTS': ['uniform', 'distance'],
    },
    'linear': {
        'LN_ALPHA': [0.0001, 0.001, 0.01],
        'LN_FIT_INTERCEPT': [True, False],
        'LN_L1_RATIO': [0.0, 0.1, 0.5],
    },
    'neural_net': {
        'NN_HIDDEN_LAYER_SIZES': [
            (10, 10),
            (50, 50),
            (100, 100),
        ],
        'NN_ALPHA': [0.0001, 0.001, 0.01],
        'NN_LEARNING_RATE_INIT': [0.0001, 0.001, 0.01],
    },
    'neural_net2': {
        'NN_HIDDEN_LAYER_SIZES': [
            (10, 10),
            (50, 50),
            (100, 100),
        ],
        'NN_ALPHA': [0.0001, 0.001, 0.01],
        'NN_LEARNING_RATE_INIT': [0.0001, 0.001, 0.01],
    },
    'gcn': {
        'CONV_UNITS': [
            (64, 64, 64),
            (128, 128, 128),
            (256, 256, 256),  
        ],
        'LEARNING_RATE': [0.001, 0.0001],
        'EPOCHS': [150],
    },
    'gin': {
        'CONV_UNITS': [
            (64, 64, 64),
            (128, 128, 128),
            (256, 256, 256),  
        ],
        'LEARNING_RATE': [0.001, 0.0001],
        'EPOCHS': [150],
    },
    'gatv2': {
        'CONV_UNITS': [
            (64, 64, 64),
            (128, 128, 128),
            (256, 256, 256),  
        ],
        'LEARNING_RATE': [0.001, 0.0001],
        'EPOCHS': [150],
    },
}

# This is a data structure which maps each encoding method to a list of model names
# that are compatible with that encoding method. This is used to filter the model
# names when generating the SLURM jobs, so that only the models that are compatible
# with the encoding method are used in the hyperparameter sweep.
ENCODING_MODEL_MAP = {
    'gnn': ['gcn', 'gatv2', 'gin'],
    'fp': ['random_forest', 'grad_boost', 'k_neighbors', 'linear', 'neural_net'],
    'hdc': ['random_forest', 'grad_boost', 'k_neighbors', 'linear', 'neural_net'],
}

In [3]:

# Filter which experiments will be loaded based on their name and/or parameters.
def select_experiment(experiment_name: str,
                      experiment_metadata: dict,
                      experiment_parameters: dict
                      ) -> bool:
    
    return '__PREFIX__' in experiment_parameters and 'ex_00_c' in experiment_parameters['__PREFIX__']


# Assign a unique key to the experiment based on its data / parameters etc.
# Later on the experiments will be sorted based on this key which will be the 
# key of a dictionary and the values will be a list of all the experiments with 
# that same key.
def sort_experiment(experiment: Experiment) -> tuple:
    *_, encoding, dataset = experiment.metadata['name'].split('__')
    
    if 'NOTE' in experiment.parameters:
        dataset = experiment.parameters.get('NOTE', dataset)
        
    if 'FINGERPRINT_TYPE' in experiment.parameters:
        encoding = experiment.parameters['FINGERPRINT_TYPE']
    
    method = experiment.parameters['MODELS'][0]
    
    return (encoding, dataset, method)

### Experiment Discovery

The following cell will first discover all of the previously executed experiment runs which have been archived in the given results directory. It will then print the number of experiments found.

In [4]:
# This list will contain the paths to the individual expeirment *namespaces* which in 
# turn contain the actual individual experiment archives.
experiment_namespace_paths: list[str] = [
    path
    for file_name in os.listdir(RESULTS_PATH)
    if os.path.isdir(path := os.path.join(RESULTS_PATH, file_name))
]

# Subsequently, this list will contain the paths to the individual experiment archives
# folders.
experiment_paths: list[str] = []
for namespace_path in experiment_namespace_paths:
    for dirpath, dirnames, filenames in os.walk(namespace_path):
        if is_experiment_archive(dirpath):
            experiment_paths.append(dirpath)
            dirnames.clear() # Prevents further recursion into subdirectories
        
print(f'✅ found {len(experiment_paths)} experiment archives in {len(experiment_namespace_paths)} namespaces')
pprint(experiment_paths, max_length=3)

✅ found 23448 experiment archives in 32 namespaces


### Experiment Loading

The following cell will then load the experiment runs into memory selectively based on the previously defined ``select_experiment`` function. Only experiments for which the function returns ``True`` will be loaded into memory. The loaded experiments will actually be restored from the archive by loading the parameters as well as the data back into a ``pycomex.experiment.Experiment`` object.

In [5]:
# This list will be populated with the actual Experiment instances which will 
# be loaded from the experiment archive folders.
experiments: list[Experiment] = []

experiments_error: list[str] = []

print('Loading experiments from archives...')
time_start: float = time.time()
for experiment_path in experiment_paths:
    
    experiment_identifier: str = os.path.basename(experiment_path)
    
    experiment_data_path = os.path.join(experiment_path, Experiment.DATA_FILE_NAME)
    if not os.path.exists(experiment_data_path):
        print(f'   ⚠️ Skipping experiment "{experiment_identifier}" - no data found')
        continue
    
    experiment_meta_path = os.path.join(experiment_path, Experiment.METADATA_FILE_NAME)
    if not os.path.exists(experiment_meta_path):
        print(f'   ⚠️ Skipping experiment "{experiment_identifier}" - no metadata found')
        continue
    
    with open(experiment_meta_path) as file:
        content = file.read()
        # This will contain the experiment metadata as a dictionary which we can now use to
        # filter for instance.
        experiment_metadata: dict = json.loads(content)
        
    if 'parameters' not in experiment_metadata:
        print(f'   ⚠️ Skipping experiment "{experiment_identifier}" - no parameters found')
        continue
    
    experiment_parameters: dict = {
        param: info['value']
        for param, info in experiment_metadata['parameters'].items()
        if 'value' in info
    }
    
    # Here we apply the filter to determine whether or not the experiment should be included
    # in the analysis.
    condition: bool = select_experiment(
        experiment_name=experiment_metadata['name'],
        experiment_metadata=experiment_metadata,
        experiment_parameters=experiment_parameters
    )
    
    if condition:
        try:
            print(f'   > included experiment "{experiment_identifier}"')
            experiment: Experiment = Experiment.load(experiment_path)
            experiments.append(experiment)
        except Exception as e:
            experiments_error.append(experiment_path)
            print(f'   ⚠️ Failed to load experiment "{experiment_identifier}" - Exception: {e}')
            
time_end: float = time.time()
duration: float = time_end - time_start
print(f'✅ Loaded {len(experiments)} experiments ({len(experiments_error)} errors) in {duration:.2f} seconds')

Loading experiments from archives...
   ⚠️ Skipping experiment "debug" - no data found
   > included experiment "ex_00_c__19_08_2025__08_12__WUiO"


  from .autonotebook import tqdm as notebook_tqdm


   > included experiment "ex_00_c__16_08_2025__02_41__p5LA"
   > included experiment "ex_00_c__18_08_2025__23_27__SPbv"
   > included experiment "ex_00_c__15_08_2025__22_27__kcqT"
   > included experiment "ex_00_c__15_08_2025__17_48__jTPs"
   > included experiment "ex_00_c__19_08_2025__08_17__25Ln"
   > included experiment "ex_00_c__19_08_2025__01_56__1DOt"
   > included experiment "ex_00_c__15_08_2025__22_55__JFsp"
   > included experiment "ex_00_c__16_08_2025__12_24__6WpX"
   > included experiment "ex_00_c__18_08_2025__19_21__TJID"
   > included experiment "ex_00_c__16_08_2025__11_12__5Vd1"
   > included experiment "ex_00_c__16_08_2025__05_50__tDFM"
   > included experiment "ex_00_c__18_08_2025__17_51__dzP3"
   > included experiment "ex_00_c__17_08_2025__09_57__8AN0"
   > included experiment "ex_00_c__18_08_2025__17_00__8Czd"
   > included experiment "ex_00_c__16_08_2025__08_03__i66R"
   > included experiment "ex_00_c__19_08_2025__03_53__6R9Q"
   > included experiment "ex_00_c__15_08

In [6]:
example_experiment: Experiment = experiments[0] if experiments else None
pprint(example_experiment.data, max_length=10)
pprint(example_experiment.metadata)
pprint(example_experiment.parameters)

### Experiment Sorting

The following cell will sort the - currently still unsorted - experiment list by a custom criteria defined in the ``sort_experiments`` function. This is done by putting all the experiments into the same list for which this function returns the same index tuple. The result is the ``key_experiment_map`` dictionary data structure which maps the index tuple to a list of experiments.

In [7]:
# This will be a dictionary mapping the unique key of the experiment to a list of
# experiments which share that key. This will allow us to group experiments based on
# their parameters or other attributes.
key_experiment_map: dict[tuple, list[Experiment]] = defaultdict(list)

for experiment in experiments:
    key: tuple = sort_experiment(experiment)
    key_experiment_map[key].append(experiment)
    
pprint(key_experiment_map, max_length=3)

In [8]:
experiment_1 = key_experiment_map.get(('rdkit', 'qm9_gap', 'neural_net'), [])[0]
print(experiment_1.parameters['TARGET_INDEX'])

experiment_1 = key_experiment_map.get(('rdkit', 'qm9_energy', 'neural_net'), [])[0]
print(experiment_1.parameters['TARGET_INDEX'])

7
10


### Experiment Result Table

The following cell illustrates how to create a table from the aggregated results of many loaded experiments, which is a common use case of the analysis.

In [9]:
column_names = ['Key', 'Runtime']
rows: list[list] = []

for key, exps in key_experiment_map.items():
    
    row = [
        str(key),
        [exp.metadata['duration'] for exp in exps]
    ]
    rows.append(row)
    
string = render_string_table(column_names, rows, reduce_func=lambda x: f'{np.mean(x):.2f} ± {np.std(x):.2f}' if isinstance(x, list) else str(x))
print(string)

+-------------------------------------------+-----------------+
|                    Key                    |     Runtime     |
+-------------------------------------------+-----------------+
|      ('hdc', 'clogp', 'neural_net2')      | 170.96 ± 106.89 |
|       ('hdc', 'clogp', 'grad_boost')      | 118.19 ± 108.16 |
|     ('hdc', 'clogp', 'random_forest')     | 266.14 ± 399.86 |
|      ('hdc', 'clogp', 'k_neighbors')      | 111.12 ± 110.30 |
|       ('hdc', 'clogp', 'neural_net')      | 186.54 ± 151.79 |
|      ('hdc', 'qm9_gap', 'neural_net')     | 284.81 ± 256.09 |
|   ('hdc', 'qm9_energy', 'random_forest')  | 405.84 ± 655.82 |
|     ('hdc', 'qm9_gap', 'neural_net2')     | 166.21 ± 127.80 |
|     ('hdc', 'qm9_gap', 'k_neighbors')     |  84.00 ± 99.63  |
|    ('hdc', 'qm9_energy', 'neural_net')    | 251.05 ± 252.53 |
|    ('hdc', 'qm9_gap', 'random_forest')    | 385.93 ± 611.78 |
|    ('hdc', 'qm9_energy', 'neural_net2')   |  120.67 ± 35.71 |
|    ('hdc', 'qm9_energy', 'grad_boost')

## Selecting Best Parameters

The following section will iterate over all of the loaded experiment results and select the best parameters for each experiment based on the evaluation metrics. The best parameters for each combination are then collected into one data structure which may then server as a lookup table for the best parameters for subsequent experiments.

In [10]:
# This data structure will contain the best experiment for each key consisting 
# of a tuple of (encoding, dataset, method). The keys will be tuples and the values
# will be the best Experiment instance for that key.
key_best_experiment_map: dict[tuple, Experiment] = dict()

# This data structure will contain the best hyperparameter config for each case.
# The keys will be tuples consisting of (encoding, dataset, method) and the values 
# will be dictionaries containing the best hyperparameters for that case.
key_best_parameters_map: dict[tuple, dict] = defaultdict(dict)


for (encoding, dataset, method), exps in key_experiment_map.items():
    
    ## -- Metric Selection --
    # We need to use different metrics for regression or classification tasks.
    if exps[0].parameters['DATASET_TYPE'] == 'regression':
        metric = 'r2'
    else:
        metric = 'f1'
    
    # This list will contain the tuples (experiment, value) where the value is the 
    # evaluation metric value for the experiment. Later on we can use a max() function 
    # on this list to find the best experiment for each case.
    experiment_value_tuples: list[tuple[Experiment, float]] = [
        (experiment, experiment.data['metrics'][f'test_{method}'][metric])
        for experiment in exps
        if 'metrics' in experiment.data
    ]
    
    print(f'{encoding} - {dataset} - {method}')
    best_experiment, best_value = max(experiment_value_tuples, key=lambda x: x[1])
    print(f' > best experiment ({best_value:.2f})')
    
    best_experiment.data['value'] = best_value
    key_best_experiment_map[(encoding, dataset, method)] = best_experiment
    

for (encoding, dataset, method), best_experiment in key_best_experiment_map.items():
    
    # -- Select Relevant Parameters --
    # Only a subset of the experiment parameters were actually used in the hyperparameter sweep.
    # We will collect the relevant parameters for this case based on the encoding and method.
    relevant_parameters: list[str] = [
        *list(ENCODING_PARAMETER_MAP.get(encoding, {}).keys()),
        *list(MODEL_PARAMETER_MAP.get(method, {}).keys())
    ]
    
    # Then we collect the values to these parameters from the best experiment.
    best_experiment_parameters: dict[str, any] = {
        param: best_experiment.parameters[param]
        for param in relevant_parameters
        if param in best_experiment.parameters
    }
    
    # Collect the best hyperparameters for this case.
    key_best_parameters_map[(encoding, dataset, method)] = best_experiment_parameters
    
pprint(key_best_parameters_map)

hdc - clogp - neural_net2
 > best experiment (0.99)
hdc - clogp - grad_boost
 > best experiment (0.89)
hdc - clogp - random_forest
 > best experiment (0.94)
hdc - clogp - k_neighbors
 > best experiment (0.92)
hdc - clogp - neural_net
 > best experiment (0.99)
hdc - qm9_gap - neural_net
 > best experiment (0.93)
hdc - qm9_energy - random_forest
 > best experiment (0.75)
hdc - qm9_gap - neural_net2
 > best experiment (0.93)
hdc - qm9_gap - k_neighbors
 > best experiment (0.84)
hdc - qm9_energy - neural_net
 > best experiment (1.00)
hdc - qm9_gap - random_forest
 > best experiment (0.88)
hdc - qm9_energy - neural_net2
 > best experiment (1.00)
hdc - qm9_energy - grad_boost
 > best experiment (0.64)
hdc - qm9_energy - k_neighbors
 > best experiment (0.61)
hdc - qm9_gap - grad_boost
 > best experiment (0.80)
hdc - aqsoldb - neural_net
 > best experiment (0.83)
hdc - aqsoldb - random_forest
 > best experiment (0.82)
hdc - aqsoldb - k_neighbors
 > best experiment (0.80)
hdc - aqsoldb - neural

Exporting this data structure to a JSON file so we can use it later on to inform the subsequent experiments.

In [11]:
import json

parameter_map_path: str = os.path.join(PATH, 'experiment_best_parameters_map.json')
with open(parameter_map_path, 'w') as file:
    json.dump(list(key_best_parameters_map.items()), file, indent=4)
    
if os.path.exists(parameter_map_path):
    print(f'✅ Saved best hyperparameters to {parameter_map_path}')

✅ Saved best hyperparameters to /media/ssd2/Programming/graph_hdc/graph_hdc/experiments/fingerprints/experiment_best_parameters_map.json


Visualizing the hyperparameter results in a table.

In [12]:
from prettytable import PrettyTable

table = PrettyTable()
table.field_names = ['Dataset', 'Encoding', 'Method', 'Best Value', 'Hyperparameters']

keys = sorted(key_best_experiment_map.keys(), key=lambda x: (x[1], x[0], x[2]))

for (encoding, dataset, method) in keys:
    best_experiment = key_best_experiment_map[(encoding, dataset, method)]
    best_parameters = key_best_parameters_map[(encoding, dataset, method)]
    best_value = best_experiment.data['value']
    
    parameters_string = '\n'.join(
        f'{param}: {value}'
        for param, value in best_parameters.items()
    )
    
    table.add_row([
        dataset,
        encoding,
        method,
        f'{best_value:.2f}',
        parameters_string,
    ])
    table.add_divider()
    
print(table.get_string())

+------------+----------+---------------+------------+-----------------------------------+
|  Dataset   | Encoding |     Method    | Best Value |          Hyperparameters          |
+------------+----------+---------------+------------+-----------------------------------+
|  aqsoldb   |   atom   |   grad_boost  |    0.61    |       GB_NUM_ESTIMATORS: 300      |
|            |          |               |            |          GB_MAX_DEPTH: 4          |
|            |          |               |            |       GB_LEARNING_RATE: 0.01      |
+------------+----------+---------------+------------+-----------------------------------+
|  aqsoldb   |   atom   |  k_neighbors  |    0.78    |        KN_NUM_NEIGHBORS: 5        |
|            |          |               |            |        KN_WEIGHTS: distance       |
+------------+----------+---------------+------------+-----------------------------------+
|  aqsoldb   |   atom   |   neural_net  |    0.85    |  NN_HIDDEN_LAYER_SIZES: [50, 50]  |