In [1]:
import os,sys
import numpy as np
from tqdm.notebook import tqdm, trange

In [3]:
os.environ['machine'] = 'puma'

In [4]:
import haloflow.data as D

In [5]:
import torch
from torch import nn 
from torch.utils.tensorboard.writer import SummaryWriter

In [20]:
import optuna 
from sbi import neural_nets as nn
from sbi import inference as Inference

In [7]:
cuda = torch.cuda.is_available()
device = ("cuda:0" if cuda else "cpu")

In [10]:
obs = 'mags'
sim = 'TNG50'

if sim not in ['TNG50', 'TNG100', 'TNG_ALL', 'Eagle100', 'Simba100']: raise ValueError

In [11]:
y_train, x_train = D.hf2_centrals('train', obs, sim=sim, version=1) 

In [12]:
lower_bounds = torch.tensor([8., 8.]) # training set only includes galaxies with logMstar/Mun > 9
upper_bounds = torch.tensor([14., 15.])

In [13]:
prior = Ut.BoxUniform(low=lower_bounds, high=upper_bounds, device=device)

In [14]:
n_trials    = 1000
study_name  = 'h2.v1.%s.%s' % (sim, obs) 

output_dir = '/xdisk/chhahn/chhahn/haloflow/hf2/npe'

In [15]:
n_jobs     = 8
if not os.path.isdir(os.path.join(output_dir, study_name)): 
    os.system('mkdir %s' % os.path.join(output_dir, study_name))
storage    = 'sqlite:///%s/%s/%s.db' % (output_dir, study_name, study_name)
n_startup_trials = 20

n_blocks_min, n_blocks_max = 2, 5 
n_transf_min, n_transf_max = 2, 5 
n_hidden_min, n_hidden_max = 32, 128 
n_lr_min, n_lr_max = 5e-6, 1e-3 

In [21]:
def Objective(trial):
    ''' bojective function for optuna 
    '''
    # Generate the model                                         
    n_blocks = trial.suggest_int("n_blocks", n_blocks_min, n_blocks_max)
    n_transf = trial.suggest_int("n_transf", n_transf_min,  n_transf_max)
    n_hidden = trial.suggest_int("n_hidden", n_hidden_min, n_hidden_max, log=True)
    lr = trial.suggest_float("lr", n_lr_min, n_lr_max, log=True) 
    neural_posterior = nn.posterior_nn('maf', 
            hidden_features=n_hidden, 
            num_transforms=n_transf, 
            num_blocks=n_blocks, 
            use_batch_norm=True)

    anpe = Inference.SNPE(prior=prior,
            density_estimator=neural_posterior,
            device=device, 
            summary_writer=SummaryWriter('%s/%s/%s.%i' % 
                (output_dir, study_name, study_name, trial.number)))

    anpe.append_simulations( 
            torch.tensor(y_train, dtype=torch.float32).to(device), 
            torch.tensor(x_train, dtype=torch.float32).to(device))

    p_theta_x_est = anpe.train(
            training_batch_size=50,
            learning_rate=lr, 
            show_train_summary=True)

    # save trained NPE  
    qphi    = anpe.build_posterior(p_theta_x_est)
    fqphi   = os.path.join(output_dir, study_name, '%s.%i.pt' % (study_name, trial.number))
    torch.save(qphi, fqphi)

    best_valid_log_prob = anpe._summary['best_validation_log_prob'][0]

    return -1*best_valid_log_prob

In [22]:
sampler     = optuna.samplers.TPESampler(n_startup_trials=n_startup_trials) 
study       = optuna.create_study(study_name=study_name, sampler=sampler, storage=storage, directions=["minimize"], load_if_exists=True) 

[I 2025-01-22 20:30:14,031] Using an existing study with name 'h2.v1.TNG50.mags' instead of creating a new one.


In [23]:
study.optimize(Objective, n_trials=n_trials, n_jobs=n_jobs)
print("  Number of finished trials: %i" % len(study.trials))

 Training neural network. Epochs trained: 21

[W 2025-01-22 20:31:22,578] Trial 97 failed with parameters: {'n_blocks': 5, 'n_transf': 4, 'n_hidden': 109, 'lr': 6.488423637555843e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/groups/chhahn/haloflow/venv/lib/python3.9/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_2350/2617463445.py", line 25, in Objective
    p_theta_x_est = anpe.train(
  File "/groups/chhahn/haloflow/venv/lib/python3.9/site-packages/sbi/inference/trainers/npe/npe_c.py", line 189, in train
    return super().train(**kwargs)
  File "/groups/chhahn/haloflow/venv/lib/python3.9/site-packages/sbi/inference/trainers/npe/npe_base.py", line 375, in train
    clip_grad_norm_(
  File "/groups/chhahn/haloflow/venv/lib/python3.9/site-packages/torch/nn/utils/clip_grad.py", line 30, in _no_grad_wrapper
    return func(*args, **kwargs)
  File "/groups/chhahn/haloflow/venv/lib/python3.9/site-packages/

KeyboardInterrupt: 