In [1]:
import os
import time
import numpy as np
from rich.pretty import pprint
from typing import List, Dict, Tuple

from pycomex.functional.experiment import Experiment

In [2]:
PATH = os.getcwd()
RESULTS_PATH = os.path.join(PATH, 'results')

In [3]:
# ~ finding experiments

print('traversing experiment namespaces...')
experiment_namespace_paths: List[str] = [
    path
    for file in os.listdir(RESULTS_PATH)
    if os.path.isdir(path := os.path.join(RESULTS_PATH, file))
]
pprint(experiment_namespace_paths)

print('traversing experiment paths...')
experiment_paths: List[str] = [
    path
    for folder_path in experiment_namespace_paths
    for file in os.listdir(folder_path)
    if os.path.isdir(path := os.path.join(folder_path, file))
]
pprint(experiment_paths)

traversing experiment namespaces...


traversing experiment paths...


In [4]:
# ~ loading experiments
# Now that we have the paths to all the experiment archive folders, we can now actually 
# load them back into memory
experiments: List[Experiment] = []

time_start = time.time()
for path in experiment_paths:
    
    experiment_data_path = os.path.join(path, 'experiment_data.json')
    if not os.path.exists(experiment_data_path):
        print(f'no experiment data found at {experiment_data_path}')
        continue
    
    experiment = Experiment.load(path)
    experiments.append(experiment)
    
time_end = time.time()
    
print(f'loaded {len(experiments)} experiments in {time_end - time_start:.1f} seconds')


no experiment data found at /media/ssd/Programming/graph_hdc/graph_hdc/experiments/fingerprints/results/predict_molecules__gnn__qm9_smiles/ex_01_a__10_01_2025__20_18__l2MR/experiment_data.json
no experiment data found at /media/ssd/Programming/graph_hdc/graph_hdc/experiments/fingerprints/results/predict_molecules__gnn__qm9_smiles/ex_01_a__10_01_2025__21_18__xmQY/experiment_data.json
no experiment data found at /media/ssd/Programming/graph_hdc/graph_hdc/experiments/fingerprints/results/predict_molecules__gnn__qm9_smiles/ex_01_a__10_01_2025__19_17__K6PZ/experiment_data.json
loaded 51 experiments in 3.8 seconds


In [5]:
print('example experiment data:')
pprint(experiments[0].data, max_length=10)


example experiment data:


In [11]:
# ~ figuring out the datasets and the methods
# Now based on all the loaded data we are going to figure out the unique names of the datasets and the 
# methods that were used in the experiments

# This will be a list of all the unique dataset name strings
datasets: list[str] = list(set([
    experiment.metadata['name'].strip('.py').split('__')[2]
    for experiment in experiments
]))

from typing import Tuple

def get_methods(experiment: Experiment) -> List[Tuple[str, str]]:
    methods = []
    _, base, _ = experiment.metadata['name'].strip('.py').split('__')
    for method in experiment.parameters['MODELS']:
        methods.append((base, method))

    return methods

# This will be a list of all the unique method name strings
methods: list[str] = list(set([
    method
    for experiment in experiments
    for method in get_methods(experiment)
]))

print('datasets:')
pprint(datasets)

print('methods:')
pprint(methods)

datasets:


methods:


In [8]:
from collections import defaultdict

# Then we want to build a mapping that bundles all the experiments belonging to a certain method
method_experiment_map: Dict[str, List[Experiment]] = defaultdict(list)

for experiment in experiments:
    for method in get_methods(experiment):
        method_experiment_map[method].append(experiment)
    
pprint(method_experiment_map, max_length=3)

In [12]:
# ~ Retrieving the actual values
# Now that we have the supporting data structure we can construct the actual values that we need to 
# plot the results and also to generate the latex table with the results.

column_names = ['Base', 'Model'] + [dataset.replace('_', ' ') for dataset in datasets]
# This list will contain the individual rows of the dataset that we will later use to generate the
# latex table
rows: List[str] = []

for (base, method), experiments in method_experiment_map.items():
    row = [base.replace('_', ' '), method.replace('_', ' ')]
    
    for dataset in datasets:
        values = []
        for experiment in experiments:
            if experiment.metadata['name'].strip('.py').split('__')[2] == dataset and 'metrics' in experiment.data:
                if '__' in method:
                    _, _method = method.split('__')
                else:
                    _method = method
                
                key = f'test_{_method}'
                if key in experiment.data['metrics']:
                    metrics = experiment.data['metrics'][key]
                    if 'r2' in metrics:
                        values.append(metrics['r2'])
                    if 'f1' in metrics:
                        values.append(metrics['f1'])
                    
        row.append(values)
        
    rows.append(row)
        
pprint(rows, max_length=5)

In [13]:
# ~ Generating the latex table

from graph_hdc.utils import render_latex
from graph_hdc.utils import latex_table

# ~ rendering latex

tex_content, tex_table = latex_table(
    column_names=column_names,
    rows=rows
)
print(tex_table)

tex_path = os.path.join(PATH, '_results_ex01.tex')
with open(tex_path, 'w') as file:
    file.write(tex_table)

pdf_path = os.path.join(PATH, '_results_ex01.pdf')
render_latex({'content': tex_table}, pdf_path)

\begin{tabular}{ cccccccc }
% -- table header --
\toprule
Base &
Model &
qm9 smiles &
bace &
ames &
bbb &
clog &
aqsoldb \\

\midrule
% -- table content --
% row 1
gnn &
gcn &
$nan {\color{gray} \pm \mathsmaller{ nan } }$ &
$0.73 {\color{gray} \pm \mathsmaller{ 0.01 } }$ &
$0.75 {\color{gray} \pm \mathsmaller{ 0.01 } }$ &
$0.85 {\color{gray} \pm \mathsmaller{ 0.01 } }$ &
$0.99 {\color{gray} \pm \mathsmaller{ 0.00 } }$ &
$0.83 {\color{gray} \pm \mathsmaller{ 0.01 } }$ 
\\
% row 2
gnn &
gin &
$nan {\color{gray} \pm \mathsmaller{ nan } }$ &
$0.75 {\color{gray} \pm \mathsmaller{ 0.03 } }$ &
$0.81 {\color{gray} \pm \mathsmaller{ 0.03 } }$ &
$0.84 {\color{gray} \pm \mathsmaller{ 0.01 } }$ &
$0.99 {\color{gray} \pm \mathsmaller{ 0.00 } }$ &
$0.84 {\color{gray} \pm \mathsmaller{ 0.01 } }$ 
\\
% row 3
gnn &
gatv2 &
$nan {\color{gray} \pm \mathsmaller{ nan } }$ &
$0.76 {\color{gray} \pm \mathsmaller{ 0.01 } }$ &
$0.79 {\color{gray} \pm \mathsmaller{ 0.00 } }$ &
$0.83 {\color{gray} \pm \mathsmall