In [None]:
%cd ..
from pathlib import Path
import subprocess
import functools
from src.config.core import Config
import numpy as np

### Config template

In [None]:
CONFIG_DICT = {
    'saving_dir': 'results/',
    'experiment_name': 'bike',
    'data': {
        'path': 'data/bikesharing.data',
        'source': 'local',
        'data_type': 'tabular',
        'task': 'regr',
        'target_column': None,
        'target_len': 1,
        'features': None,
        'datapoint_limit': None,
        'normalize': True,
        'train_split': 0.8,
        'valid_split': 0.0,
        'test_split': 0.2,
    },
    'model': {
        'model': 'FCN',
        'hidden_structure': [16, 16, 16, 2],
        'activation': 'relu',
        'use_bias': True,
    },
    'training': {
        'warmstart': { # meaningless placeholder
            'include': False,
            'optimizer_config': {'name': "sgd", 'parameters': {}}
        },
        'sampler': {
            'name': 'sgld',
            'warmup_steps': 0,
            'n_chains': 4,
            'n_samples': 24000,  # total steps
            'batch_size': 512,
            'step_size_init': 2.0e-6,  # step_size_explore
            'n_thinning': 1,
            'keep_warmup': False,
            'optimizer_name': 'sgd',
            'prior_config': {
                'name': 'StandardNormal'
            },
            'scheduler_config': {
                'name': 'cosine',
                'n_samples_per_cycle': 200,
                'parameters': {
                    'n_cycles': 4,
                }
            }
        }
    },
    'rng': 1446,
    'logging': False,
}

In [None]:
def get_config(
        exp_name: str = 'bike',
        n_chains: int = 4,
        n_cycles: int = 4,
        n_steps_per_cycle: int = 2000,
        n_samples_per_cycle: int = 200,
        n_thinning: int = 1,
        optimizer_name: str = 'adam',
        scheduler_name: str = 'cosine',
        step_size_init: float = 2.0e-6,
        step_size_sampling: float | None = None,
        seed: int = 0
    ):
    n_samples = n_cycles * n_steps_per_cycle

    new_config_dict = CONFIG_DICT.copy()
    new_config_dict['experiment_name'] = exp_name
    new_config_dict['training']['sampler'] = {
        'name': 'sgld',
        'warmup_steps': 0,
        'keep_warmup': False,
        'n_chains': n_chains,
        'n_samples': n_samples,  # total steps
        'batch_size': 512,
        'step_size_init': step_size_init,  # step_size_explore
        'n_thinning': n_thinning,
        'optimizer_name': optimizer_name,
        'prior_config': {
            'name': 'StandardNormal'
        },
        'scheduler_config': {
            'name': scheduler_name,
            'n_samples_per_cycle': n_samples_per_cycle,
            'parameters': {
                'n_cycles': n_cycles,
                'step_size_sampling': step_size_sampling
            }
        }
    }
    new_config_dict['rng'] = seed
    new_config_dict['saving_dir'] = f'results/'
    config_path = Path('experiments/csgld') / f'{exp_name}.yaml'
    # if config_path.exists():
    #     warnings.warn(f"Config file {config_path} already exists. Overwriting it.")
    if not config_path.parent.exists():
        config_path.parent.mkdir(parents=True)
    Config.from_dict(new_config_dict).to_yaml(config_path)

    return config_path

### Chains/Cycles

In [None]:
chains_cycles = [2, 4, 6, 8, 10, 12]
seeds = [0, 42, 221, 476, 1453, 1644, 1840, 1973, 2025, 2100]

### Constant Schedule

In [None]:
get_config_constant = functools.partial(
    get_config,
    n_steps_per_cycle=5500,
    n_samples_per_cycle=500,
    n_thinning=10,
    optimizer_name='adam',
    scheduler_name='constant',
    step_size_init=0.01,
    step_size_sampling=1.0e-8,
)

In [None]:
# parallel
config_paths_p = []
for i, n in enumerate(chains_cycles):
    for seed in seeds:
        exp_name = f'constant/{n}x1_constant_5000+500_seed{seed+i}'
        config_path = get_config_constant(
            exp_name=exp_name,
            n_chains=n,
            n_cycles=1,
            seed=seed+i
        )
        config_paths_p.append(config_path)
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
# sequential
config_paths_c = []
max_cycles = int(np.max(chains_cycles))
for i, n in enumerate(chains_cycles):
    for seed in seeds:
        exp_name = f'constant/1x{n}_constant_5000+500_seed{seed+i}'
        config_path = get_config_constant(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=n,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        # print(config_path)
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

### Cyclical Schedule

In [None]:
get_config_cyclical = functools.partial(
    get_config,
    n_steps_per_cycle=12000,
    n_samples_per_cycle=500,
    n_thinning=10,
    optimizer_name='sgd',
    scheduler_name='cosine',
    step_size_init=2.0e-6
)

cosine_chains_cycles = [2, 4, 6, 8]

In [None]:
# parallel
for i, n in enumerate(cosine_chains_cycles):
    for seed in [0, 42, 1973, 2025, 2100]:
        exp_name = f'cosine/{n}x1_cosine_11500+500_seed{seed+i}'
        config_path = get_config_cyclical(
            exp_name=exp_name,
            n_chains=n,
            n_cycles=1,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

In [None]:
# sequential
for i, n in enumerate(cosine_chains_cycles):
    for seed in [0, 42, 1973, 2025, 2100]:
        exp_name = f'cosine/1x{n}_cosine_11500+500_seed{seed+i}'
        config_path = get_config_cyclical(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=n,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])

### Cycle Length Ablation

In [None]:
get_config_constant_different_length = functools.partial(
    get_config,
    n_thinning=10,
    optimizer_name='adam',
    scheduler_name='constant',
    step_size_init=0.01,
    step_size_sampling=1.0e-8,
)

In [None]:
exploration_steps = np.arange(2000, 8001, 1000)
sampling_steps = 500
for i, n in enumerate(exploration_steps):
    for seed in seeds:
        exp_name = f'exploration_budget/1x12_constant_{n}+{sampling_steps}_seed{seed+i}'
        config_path = get_config_constant_different_length(
            exp_name=exp_name,
            n_chains=1,
            n_cycles=12,
            n_steps_per_cycle=int(n+sampling_steps),
            n_samples_per_cycle=sampling_steps,
            seed=seed+i
        )
        result_path = Path('results') / exp_name / "eval_metrics.pkl"
        if result_path.exists():
            continue
        print("=" * 50)
        print(f'Running training for config: {config_path}')
        subprocess.run(['python', 'train.py', '-c', str(config_path), '-d', '12'])