In [None]:
from datetime import datetime

import os
from pathlib import Path
import subprocess
import warnings
import re
import pickle
import pandas as pd
import functools

import jax
import jax.numpy as jnp

from src.config.core import Config
from src.config.sampler import Sampler
from src.config.data import DatasetType
import src.dataset as ds
from src.models.tabular import FCN
import src.training.utils as train_utils
import src.inference.utils as inf_utils
import src.visualization as viz
from src.config.data import Task
from src.inference.evaluation import evaluate_bde

from matplotlib import pyplot as plt
import numpy as np

### Config template

In [None]:
CONFIG_DICT = {
    'saving_dir': 'results/',
    'experiment_name': 'bike',
    'data': {
        'path': 'data/bikesharing.data',
        'source': 'local',
        'data_type': 'tabular',
        'task': 'regr',
        'target_column': None,
        'target_len': 1,
        'features': None,
        'datapoint_limit': None,
        'normalize': True,
        'train_split': 0.8,
        'valid_split': 0.0,
        'test_split': 0.2,
    },
    'model': {
        'model': 'FCN',
        'hidden_structure': [16, 16, 16, 2],
        'activation': 'relu',
        'use_bias': True,
    },
    'training': {
        'warmstart': { # meaningless placeholder
            'include': False,
            'optimizer_config': {'name': "sgd", 'parameters': {}}
        },
        'sampler': {
            'name': 'sgld',
            'warmup_steps': 0,
            'n_chains': 4,
            'n_samples': 24000,  # total steps
            'batch_size': 512,
            'step_size_init': 2.0e-6,  # step_size_explore
            'n_thinning': 1,
            'keep_warmup': False,
            'optimizer_name': 'sgd',
            'prior_config': {
                'name': 'StandardNormal'
            },
            'scheduler_config': {
                'name': 'Cyclical',
                'n_samples_per_cycle': 200,
                'parameters': {
                    'n_cycles': 4,
                }
            }
        }
    },
    'rng': 1446,
    'logging': False,
}

In [None]:
def get_config(
        exp_name: str = 'bike',
        n_chains: int = 4,
        n_cycles: int = 4,
        n_steps_per_cycle: int = 2000,
        n_samples_per_cycle: int = 200,
        n_thinning: int = 1,
        optimizer_name: str = 'adam',
        scheduler_name: str = 'Cyclical',
        step_size_init: float = 2.0e-6,
        step_size_sampling: float | None = None,
        seed: int = 0
    ):
    n_samples = n_cycles * n_steps_per_cycle

    new_config_dict = CONFIG_DICT.copy()
    new_config_dict['experiment_name'] = exp_name
    new_config_dict['training']['sampler'] = {
        'name': 'sgld',
        'warmup_steps': 0,
        'keep_warmup': False,
        'n_chains': n_chains,
        'n_samples': n_samples,  # total steps
        'batch_size': 512,
        'step_size_init': step_size_init,  # step_size_explore
        'n_thinning': n_thinning,
        'optimizer_name': optimizer_name,
        'prior_config': {
            'name': 'StandardNormal'
        },
        'scheduler_config': {
            'name': scheduler_name,
            'n_samples_per_cycle': n_samples_per_cycle,
            'parameters': {
                'n_cycles': n_cycles,
                'step_size_sampling': step_size_sampling
            }
        }
    }
    new_config_dict['rng'] = seed

    # datetime_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    new_config_dict['saving_dir'] = f'results/'
    config_path = Path('experiments/csgld') / f'{exp_name}.yaml'
    # if config_path.exists():
    #     warnings.warn(f"Config file {config_path} already exists. Overwriting it.")
    if not config_path.parent.exists():
        config_path.parent.mkdir(parents=True)
    Config.from_dict(new_config_dict).to_yaml(config_path)

    return config_path

### Chains/Cycles

In [None]:
chains_cycles = [2, 4, 6, 8, 10, 12]
seeds = [0, 42, 221, 476, 1453, 1644, 1840, 1973, 2025, 2100]

### Constant Schedule

In [None]:
get_config_constant = functools.partial(
    get_config,
    n_steps_per_cycle=5500,
    n_samples_per_cycle=500,
    n_thinning=10,
    optimizer_name='adam',
    scheduler_name='Constant',
    step_size_init=0.01,
    step_size_sampling=1.0e-8,
)

In [None]:
# parallel
config_paths_p = []
for i, n in enumerate(chains_cycles):
    for seed in seeds:
        exp_name = f'bike8/{n}x1_constant_5000+500_seed{seed+i}'
        config_path = get_config_constant(
            exp_name=exp_name,
            n_chains=n,
            n_cycles=1,
            seed=seed+i
        )
        config_paths_p.append(config_path)
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
# sequential
config_paths_c = []
max_cycles = int(np.max(chains_cycles))
for i, n in enumerate(chains_cycles):
    for seed in seeds:
        exp_name = f'bike8/1x{n}_constant_5000+500_seed{seed+i}'
        config_path = get_config_constant(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=n,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        # print(config_path)
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
parallel_configs = [(2, 6), (3, 4), (4, 3), (6, 2)]
for i, (n_chains, n_cycles) in enumerate(parallel_configs):
    for seed in seeds:
        exp_name = f'bike8/{n_chains}x{n_cycles}_constant_{5000}+{500}_10_seed{seed+i}'
        config_path = get_config_constant(
            exp_name=exp_name,
            n_chains=n_chains,
            n_cycles=n_cycles,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

### Cyclical Schedule

In [None]:
get_config_cyclical = functools.partial(
    get_config,
    n_steps_per_cycle=12000,
    n_samples_per_cycle=800,
    n_thinning=10,
    optimizer_name='sgd',
    scheduler_name='Cyclical',
    step_size_init=1.0e-6
)

cyclical_chains_cycles = [2, 4, 6, 8]

In [None]:
# parallel
config_paths_p = []
for i, n in enumerate(cyclical_chains_cycles):
    for seed in seeds:
        exp_name = f'bike12/parallel_cyclical_{n}_seed{seed+i}'
        config_path = get_config_cyclical(
            exp_name=exp_name,
            n_chains=n,
            n_cycles=1,
            seed=seed+i
        )
        config_paths_p.append(config_path)
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
# sequential
config_paths_s = []
# max_cycles = int(np.max(chains_cycles))
for i, n in enumerate(cyclical_chains_cycles):
    for seed in seeds:
        exp_name = f'bike11/sequential_cyclical_{n}_seed{seed+i}'
        config_path = get_config_cyclical(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=n,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

### Cycle Length Ablation

In [None]:
get_config_constant_different_length = functools.partial(
    get_config,
    n_thinning=10,
    optimizer_name='adam',
    scheduler_name='Constant',
    step_size_init=0.01,
    step_size_sampling=1.0e-8,
)

In [None]:
exploration_steps = np.arange(2000, 8001, 1000)
# seeds = [476, 1453, 1644, 1806, 1912]
sampling_steps = 500
for i, n in enumerate(exploration_steps):
    for seed in seeds:
        exp_name = f'bike13/1x8_constant_{n}+{sampling_steps}_seed{seed+i}'
        config_path = get_config_constant_different_length(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=8,
            n_steps_per_cycle=int(n+sampling_steps),
            n_samples_per_cycle=sampling_steps,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed0.yaml
2025-07-03 13:42:45,731 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:42:46,315 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:42:46,316 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:42:46,730 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed0
2025-07-03 13:42:46,743 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:42:46,743 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:42:46,744 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:42:46,798 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:24<00:00, 803.40it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed42.yaml
2025-07-03 13:43:32,884 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:43:33,487 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:43:33,487 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:43:33,921 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed42
2025-07-03 13:43:33,934 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:43:33,934 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:43:33,935 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:43:33,991 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 799.48it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed221.yaml
2025-07-03 13:44:20,284 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:44:20,867 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:44:20,867 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:44:21,250 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed221
2025-07-03 13:44:21,258 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:44:21,259 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:44:21,259 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:44:21,305 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:24<00:00, 801.03it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed476.yaml
2025-07-03 13:45:07,614 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:45:08,172 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:45:08,173 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:45:08,598 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed476
2025-07-03 13:45:08,608 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:45:08,608 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:45:08,609 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:45:08,665 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 793.87it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed1453.yaml
2025-07-03 13:45:55,637 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:45:56,260 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:45:56,261 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:45:56,704 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed1453
2025-07-03 13:45:56,714 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:45:56,714 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:45:56,714 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:45:56,760 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 784.45it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed1644.yaml
2025-07-03 13:46:43,768 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:46:44,341 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:46:44,342 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:46:44,748 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed1644
2025-07-03 13:46:44,758 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:46:44,758 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:46:44,759 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:46:44,803 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 794.43it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed1840.yaml
2025-07-03 13:47:32,256 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:47:32,822 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:47:32,823 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:47:33,229 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed1840
2025-07-03 13:47:33,239 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:47:33,239 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:47:33,239 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:47:33,288 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:24<00:00, 810.55it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed1973.yaml
2025-07-03 13:48:20,833 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:48:21,362 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:48:21,363 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:48:21,745 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed1973
2025-07-03 13:48:21,754 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:48:21,754 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:48:21,755 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:48:21,799 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 784.96it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed2025.yaml
2025-07-03 13:49:08,878 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:49:09,488 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:49:09,489 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:49:09,943 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed2025
2025-07-03 13:49:09,953 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:49:09,953 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:49:09,953 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:49:10,005 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:24<00:00, 801.27it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_2000+500_seed2100.yaml
2025-07-03 13:49:56,146 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:49:56,693 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:49:56,694 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:49:57,074 - __main__ - INFO - > Running experiment: bike13/1x8_constant_2000+500_seed2100
2025-07-03 13:49:57,084 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:49:57,084 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:49:57,085 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:49:57,129 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 20000/20000 [00:25<00:00, 771.63it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed1.yaml
2025-07-03 13:50:46,013 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:50:46,574 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:50:46,575 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:50:46,959 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed1
2025-07-03 13:50:46,969 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:50:46,969 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:50:46,969 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:50:47,015 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:37<00:00, 748.91it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed43.yaml
2025-07-03 13:51:46,150 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:51:46,754 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:51:46,755 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:51:47,150 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed43
2025-07-03 13:51:47,160 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:51:47,160 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:51:47,160 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:51:47,206 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:35<00:00, 796.81it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed222.yaml
2025-07-03 13:52:43,998 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:52:44,539 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:52:44,540 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:52:44,921 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed222
2025-07-03 13:52:44,931 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:52:44,931 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:52:44,932 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:52:44,976 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:35<00:00, 786.34it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed477.yaml
2025-07-03 13:53:42,531 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:53:43,095 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:53:43,096 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:53:43,552 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed477
2025-07-03 13:53:43,562 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:53:43,563 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:53:43,563 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:53:43,614 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:36<00:00, 771.61it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed1454.yaml
2025-07-03 13:54:41,702 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:54:42,246 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:54:42,247 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:54:42,624 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed1454
2025-07-03 13:54:42,634 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:54:42,634 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:54:42,634 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:54:42,679 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:36<00:00, 775.49it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed1645.yaml
2025-07-03 13:55:40,691 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:55:41,384 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:55:41,385 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:55:41,844 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed1645
2025-07-03 13:55:41,854 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:55:41,854 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:55:41,855 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:55:41,901 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:35<00:00, 793.38it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed1841.yaml
2025-07-03 13:56:39,077 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:56:39,625 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:56:39,625 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:56:40,029 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed1841
2025-07-03 13:56:40,038 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:56:40,038 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:56:40,039 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:56:40,085 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:34<00:00, 804.56it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed1974.yaml
2025-07-03 13:57:37,839 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:57:38,567 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:57:38,567 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:57:39,087 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed1974
2025-07-03 13:57:39,097 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:57:39,097 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:57:39,098 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:57:39,145 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:35<00:00, 796.89it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed2026.yaml
2025-07-03 13:58:36,688 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:58:37,238 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:58:37,239 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:58:37,696 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed2026
2025-07-03 13:58:37,708 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:58:37,708 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:58:37,709 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:58:37,763 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:34<00:00, 817.68it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_3000+500_seed2101.yaml
2025-07-03 13:59:32,871 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 13:59:33,415 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 13:59:33,416 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 13:59:33,810 - __main__ - INFO - > Running experiment: bike13/1x8_constant_3000+500_seed2101
2025-07-03 13:59:33,820 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 13:59:33,820 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 13:59:33,821 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 13:59:33,868 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 28000/28000 [00:34<00:00, 815.16it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed2.yaml
2025-07-03 14:00:30,807 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:00:31,507 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:00:31,508 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:00:31,927 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed2
2025-07-03 14:00:31,937 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:00:31,937 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:00:31,937 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:00:31,984 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:44<00:00, 814.85it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed44.yaml
2025-07-03 14:01:38,857 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:01:39,513 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:01:39,514 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:01:39,978 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed44
2025-07-03 14:01:39,988 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:01:39,988 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:01:39,988 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:01:40,035 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:43<00:00, 820.31it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed223.yaml
2025-07-03 14:02:44,629 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:02:45,169 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:02:45,170 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:02:45,556 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed223
2025-07-03 14:02:45,565 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:02:45,565 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:02:45,565 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:02:45,609 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:44<00:00, 808.49it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed478.yaml
2025-07-03 14:03:51,408 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:03:52,005 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:03:52,006 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:03:52,455 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed478
2025-07-03 14:03:52,465 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:03:52,465 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:03:52,465 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:03:52,516 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:46<00:00, 776.84it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed1455.yaml
2025-07-03 14:04:59,833 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:05:00,369 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:05:00,370 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:05:00,743 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed1455
2025-07-03 14:05:00,753 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:05:00,753 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:05:00,753 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:05:00,797 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:42<00:00, 839.92it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed1646.yaml
2025-07-03 14:06:05,026 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:06:05,617 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:06:05,618 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:06:05,997 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed1646
2025-07-03 14:06:06,006 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:06:06,006 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:06:06,007 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:06:06,052 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:44<00:00, 802.46it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed1842.yaml
2025-07-03 14:07:12,627 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:07:13,195 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:07:13,195 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:07:13,591 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed1842
2025-07-03 14:07:13,601 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:07:13,601 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:07:13,601 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:07:13,652 - src.training.trainer - INFO - > Setting up directories...


Sampling: 100%|██████████| 36000/36000 [00:44<00:00, 815.20it/s]


Running training for config: experiments/csgld/bike13/1x8_constant_4000+500_seed1975.yaml
2025-07-03 14:08:19,565 - __main__ - INFO - Loaded 1 Experiment(s)
2025-07-03 14:08:20,232 - datasets - INFO - PyTorch version 2.2.2+cpu available.
2025-07-03 14:08:20,232 - datasets - INFO - JAX version 0.4.28 available.
2025-07-03 14:08:20,671 - __main__ - INFO - > Running experiment: bike13/1x8_constant_4000+500_seed1975
2025-07-03 14:08:20,681 - jax._src.xla_bridge - INFO - Unable to initialize backend 'cuda': 
2025-07-03 14:08:20,681 - jax._src.xla_bridge - INFO - Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
2025-07-03 14:08:20,682 - jax._src.xla_bridge - INFO - Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory
2025-07-03 14:08:20,726 - src.training.trainer - INFO - > Setting up directories...


Sampling:  55%|█████▌    | 19959/36000 [00:25<00:19, 841.28it/s]

In [None]:
exploration_steps = np.arange(2000, 8001, 1000)
# seeds = [476, 1453, 1644, 1806, 1912]
sampling_steps = 500
for i, n in enumerate(exploration_steps):
    # if n != 5000:
    #     continue
    for seed in seeds:
        exp_name = f'bike10/8x1_constant_{n}+{sampling_steps}_10_seed{seed+i}'
        config_path = get_config_constant_different_length(
            exp_name=exp_name,
            n_chains=12,
            n_cycles=1,
            n_steps_per_cycle=int(n),
            n_samples_per_cycle=sampling_steps,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
get_config_cyclical_different_length = functools.partial(
    get_config,
    n_thinning=10,
    optimizer_name='sgd',
    scheduler_name='Cyclical',
    step_size_init=0.01,
    step_size_sampling=2.0e-6,
)