## Setup libraries

In [1]:
import sys
from numcosmo_py import nc
from numcosmo_py import ncm

__name__ = "NcContext"

ncm.cfg_init()
ncm.cfg_set_log_handler(lambda msg: sys.stdout.write(msg) and sys.stdout.flush())

import numpy as np
import pandas as pd
from astropy.table import Table

## Load cluster catalog

Here we choose a specific cluster to fit. You can choose which cluster by changing `cluster_number`.

In [2]:
cluster_catalog = Table.read("hamana_clusters.fits")

In [None]:
cluster_number = 0
cluster = cluster_catalog[cluster_number]
print(cluster["name"])

## Running numcosmo CLI app

Below we show a couple of examples on how to run the fit using the numcosmo CLI app. These have to be run on your terminal and won't work on this notebook.

In [None]:
# This will calculate the best fit for the experiment
numcosmo run fit clusters/HWL16a-002_experiment.yaml -p

# This will calculate the Fisher matrix for the experiment
numcosmo run fisher clusters/HWL16a-002_experiment.yaml -p

# This will run the MCMC analysis using the APES sampler with 200 walkers and 500 samples and 12 threads parallelization
numcosmo run mcmc apes clusters/HWL16a-002_experiment.yaml -p --nwalkers 200 --nsamples 500 --parallel threads --nthreads 12

# This will run the MCMC analysis using the APES sampler with 200 walkers and 500 samples and MPI parallelization
numcosmo run mcmc apes clusters/HWL16a-002_experiment.yaml -p --nwalkers 200 --nsamples 500 --parallel mpi

## Load experiment (Gauss)

We start by loading the serialized experiment for the chosen clusters.

In [4]:
ser = ncm.Serialize.new(ncm.SerializeOpt.CLEAN_DUP)

experiment_path = f"clusters/{cluster['name']}/{cluster['name']}_experiment.yaml"
dataset_path = f"clusters/{cluster['name']}/{cluster['name']}_experiment.dataset.gvar"

dataset = ser.from_binfile(dataset_path)
experiment_objects = ser.dict_str_from_yaml_file(experiment_path)

In [None]:
cluster_data = dataset.get_data(0)

likelihood = experiment_objects.get("likelihood")
mset = experiment_objects.get("model-set")

mset.prepare_fparam_map()

galaxy_redshift = mset.peek_by_name("NcGalaxySDObsRedshift")
galaxy_position = mset.peek_by_name("NcGalaxySDPosition")
galaxy_shape = mset.peek_by_name("NcGalaxySDShape")

We then create a `pandas` `DataFrame` from our `wl_obs` object...

In [None]:
wl_obs = cluster_data.peek_obs()

wl_obs_dict = {col: [] for col in wl_obs.peek_columns()}

for i in range(wl_obs.len()):
    for col in wl_obs_dict.keys():
        wl_obs_dict[col].append(wl_obs.get(col, i))

wl_obs_df = pd.DataFrame(wl_obs_dict)

wl_obs_df

... and plot histograms for our data.

In [None]:
wl_obs_df.hist(
    column=[
        "epsilon_obs_1",
        "epsilon_obs_2",
        "sigma_int",
        "sigma_obs",
        "ra",
        "dec",
        "zp",
    ],
    bins=50,
    figsize=(14, 10),
)

We then create a fit object and calculate a best fit and fisher matrix. The fisher matrix specifically can take some time to compute.

In [12]:
fit = ncm.Fit.factory(
    ncm.FitType.NLOPT,
    "ln-neldermead",
    likelihood,
    mset,
    ncm.FitGradType.NUMDIFF_FORWARD,
)

In [None]:
fit.run(ncm.FitRunMsgs.SIMPLE)
print(
    f"Original -> ra: {cluster['ra']}, dec: {cluster['dec']}, log10M: {np.log10(cluster['m200_wmap'])}"
)

In [None]:
fit.obs_fisher()

In [None]:
fit.log_info()
fit.log_covar()

Finally we run a MCMC analysis.

In [None]:
ncm.func_eval_set_max_threads(12)
ncm.func_eval_log_pool_stats()

init_sampler = ncm.MSetTransKernGauss.new(0)
init_sampler.set_mset(mset)
init_sampler.set_prior_from_mset()
init_sampler.set_cov_from_rescale(1.0e-1)

nwalkers = 200
stretch = ncm.FitESMCMCWalkerAPES.new(nwalkers, mset.fparams_len())
esmcmc = ncm.FitESMCMC.new(fit, nwalkers, init_sampler, stretch, ncm.FitRunMsgs.SIMPLE)

esmcmc.set_auto_trim_div(100)
esmcmc.set_max_runs_time(2.0 * 60.0)
esmcmc.set_data_file(
    f"clusters/{cluster['name']}/{cluster['name']}_experiment.python.fits"
)
esmcmc.set_nthreads(12)
esmcmc.start_run()
esmcmc.run(100000 / nwalkers)
esmcmc.end_run()

mcat = esmcmc.peek_catalog()

## Load experiment (PDF)

We start by loading the serialized experiment for the chosen clusters.

In [4]:
ser = ncm.Serialize.new(ncm.SerializeOpt.CLEAN_DUP)

experiment_path = f"clusters/{cluster['name']}/{cluster['name']}_experiment_pdf.yaml"
dataset_path = (
    f"clusters/{cluster['name']}/{cluster['name']}_experiment_pdf.dataset.gvar"
)

dataset = ser.from_binfile(dataset_path)
experiment_objects = ser.dict_str_from_yaml_file(experiment_path)

In [5]:
cluster_data = dataset.get_data(0)

likelihood = experiment_objects.get("likelihood")
mset = experiment_objects.get("model-set")

mset.prepare_fparam_map()

galaxy_redshift = mset.peek_by_name("NcGalaxySDObsRedshift")
galaxy_position = mset.peek_by_name("NcGalaxySDPosition")
galaxy_shape = mset.peek_by_name("NcGalaxySDShape")

We then create a `pandas` `DataFrame` from our `wl_obs` object (notice we leave out the P(z) data)...

In [None]:
wl_obs = cluster_data.peek_obs()

wl_obs_dict = {col: [] for col in wl_obs.peek_columns()}

for i in range(wl_obs.len()):
    for col in wl_obs_dict.keys():
        wl_obs_dict[col].append(wl_obs.get(col, i))

wl_obs_df = pd.DataFrame(wl_obs_dict)

wl_obs_df

... and plot histograms for our data (here, `z` is `photo_z_best` and is not used on the analysis).

In [None]:
wl_obs_df.hist(
    column=[
        "epsilon_obs_1",
        "epsilon_obs_2",
        "sigma_int",
        "sigma_obs",
        "ra",
        "dec",
        "z",
    ],
    bins=50,
    figsize=(14, 10),
)

We then create a fit object and calculate a best fit and fisher matrix. The fisher matrix specifically can take some time to compute.

In [8]:
fit = ncm.Fit.factory(
    ncm.FitType.NLOPT,
    "ln-neldermead",
    likelihood,
    mset,
    ncm.FitGradType.NUMDIFF_FORWARD,
)

In [None]:
fit.run(ncm.FitRunMsgs.SIMPLE)
print(
    f"Original -> ra: {cluster['ra']}, dec: {cluster['dec']}, log10M: {np.log10(cluster['m200_wmap'])}"
)

In [None]:
fit.obs_fisher()

In [None]:
fit.log_info()
fit.log_covar()

Finally we run a MCMC analysis.

In [None]:
ncm.func_eval_set_max_threads(12)
ncm.func_eval_log_pool_stats()

init_sampler = ncm.MSetTransKernGauss.new(0)
init_sampler.set_mset(mset)
init_sampler.set_prior_from_mset()
init_sampler.set_cov_from_rescale(1.0e-1)

nwalkers = 200
stretch = ncm.FitESMCMCWalkerAPES.new(nwalkers, mset.fparams_len())
esmcmc = ncm.FitESMCMC.new(fit, nwalkers, init_sampler, stretch, ncm.FitRunMsgs.SIMPLE)

esmcmc.set_auto_trim_div(100)
esmcmc.set_max_runs_time(2.0 * 60.0)
esmcmc.set_data_file(
    f"clusters/{cluster['name']}/{cluster['name']}_experiment.python.fits"
)
esmcmc.set_nthreads(12)
esmcmc.start_run()
esmcmc.run(100000 / nwalkers)
esmcmc.end_run()

mcat = esmcmc.peek_catalog()