The goal of this notebook is to analyze the simulations where I only include one component of the C yield and to combine these to quickly (MCMC) explore the space of permissable yields.

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np

import pandas as pd

In [None]:
import vice

import surp
from surp import subgiants
from surp import gce_math as gcem

import arya
arya.style.set_size((10/3, 10/3))
from arya import COLORS

from surp.yields import calc_y

In [None]:
import warnings

In [None]:
import corner

In [None]:
from scipy.optimize import minimize

In [None]:
np.seterr(divide="ignore")

In [None]:
surp.set_yields()

In [None]:
def find_model(name):
    """
    Finds the pickled model with either the given name or the parameters 
    and returns the csv summary
    """
    
    file_name = "../models/perturbations/" + name + "/stars.csv"
    model =  pd.read_csv(file_name, index_col=0)
    model["z_c"] = surp.gce_math.brak_to_abund(model["AG_H"], "c")
    return model

In [None]:

def median_se(data, n_bootstrap=1000):
    """
    Vectorized estimation of the standard error on the median using bootstrapping.

    :param data: Input array of values.
    :param n_bootstrap: Number of bootstrap samples to generate.
    :return: Bootstrap estimate of the standard error of the median.
    """
    # Resample the data n_bootstrap times in a single operation
    bootstrap_samples = np.random.choice(data, size=(n_bootstrap, len(data)), replace=True)
    
    # Calculate the median along the bootstrap dimension
    medians = np.median(bootstrap_samples, axis=1)
    
    # Calculate the standard error of the medians
    return np.std(medians, ddof=1)


In [None]:
def bin_2d(df, x="MG_H_true", y="MG_FE_true", val="z_c"):
    mg_bins = np.arange(-1, 0.4, 0.1)
    mg_fe_bins = np.arange(0, 0.41, 0.05)

    df["x_bin"] = pd.cut(df[x], bins=mg_bins, labels=False, include_lowest=True)
    df["y_bin"] = pd.cut(df[y], bins=mg_fe_bins, labels=False, include_lowest=True)

    grouped = df.groupby(["x_bin", "y_bin"])

    results = grouped.agg(
        med=pd.NamedAgg(aggfunc="median", column=val),
        err=pd.NamedAgg(aggfunc=median_se, column=val),        
        xmed=pd.NamedAgg(aggfunc="median", column=x),
        ymed=pd.NamedAgg(aggfunc="median", column=y),
        counts=pd.NamedAgg(aggfunc="count", column=val),
    ).reset_index()

    # Create a full grid of all (x_bin, y_bin) combinations
    x_bin_range = range(len(mg_bins)-1)  # Number of x bins
    y_bin_range = range(len(mg_fe_bins)-1)  # Number of y bins
    full_grid = pd.MultiIndex.from_product([x_bin_range, y_bin_range], names=['x_bin', 'y_bin'])
    full_grid_df = full_grid.to_frame(index=False)
    
    df = pd.merge(full_grid_df, results, on=['x_bin', 'y_bin'], how='left')

    x_bin_mids = (mg_bins[:-1] + mg_bins[1:])/2
    y_bin_mids = (mg_fe_bins[:-1] + mg_fe_bins[1:])/2
    
    df["x"] = x_bin_mids[df.x_bin]
    df["y"] = y_bin_mids[df.y_bin]
    
    return df
            

In [None]:
def bin_mg_fe(df, x="MG_FE_true", val="z_c", n_min =3, m_h="MG_H_true", m_h_0 = -0.1, d_m_h=0.05):
    mg_bins = np.arange(0, 0.36, 0.05)
    filt = df[m_h] < m_h_0 + d_m_h
    filt &= df[m_h] >= m_h_0 - d_m_h
    df = df[filt].copy()
    df["x_bin"] = pd.cut(df[x], bins=mg_bins, labels=False, include_lowest=True)

    grouped = df.groupby(["x_bin"])

    results = grouped.agg(
        med=pd.NamedAgg(aggfunc="median", column=val),
        xmed=pd.NamedAgg(aggfunc="median", column=x),
        err=pd.NamedAgg(aggfunc=median_se, column=val),
        counts=pd.NamedAgg(aggfunc="count", column=val),
    ).reset_index()

    x_bin_range = range(len(mg_bins)-1)  # Number of x bins

    full_grid_df = pd.DataFrame({"x_bin": x_bin_range})
    
    df = pd.merge(full_grid_df, results, on=['x_bin'], how='left')
    
    x_bin_mids = (mg_bins[:-1] + mg_bins[1:])/2

    df["x"] = x_bin_mids[df.x_bin]

    df.loc[df.counts < n_min, "med"] = np.nan

    return df
            

In [None]:
def bin_mg_h(df, x="MG_H_true", val="z_c", n_min =3):
    mg_bins = np.arange(-0.5, 0.4, 0.1)

    df = df[~df.high_alpha].copy()
    df["x_bin"] = pd.cut(df[x], bins=mg_bins, labels=False, include_lowest=True)

    grouped = df.groupby(["x_bin"])

    results = grouped.agg(
        med=pd.NamedAgg(aggfunc="median", column=val),
        xmed=pd.NamedAgg(aggfunc="median", column=x),
        err=pd.NamedAgg(aggfunc=median_se, column=val),
        counts=pd.NamedAgg(aggfunc="count", column=val),
    ).reset_index()

    x_bin_range = range(len(mg_bins)-1)  # Number of x bins

    full_grid_df = pd.DataFrame({"x_bin": x_bin_range})
    
    df = pd.merge(full_grid_df, results, on=['x_bin'], how='left')
    
    x_bin_mids = (mg_bins[:-1] + mg_bins[1:])/2

    df["x"] = x_bin_mids[df.x_bin]

    df.loc[df.counts < n_min, "med"] = np.nan

    return df
            

In [None]:
eq_correction = 1
def calc_eq_caah(M_H, **kwargs):
    Zs = gcem.MH_to_Z(M_H)
    ys = calc_y(Zs)
    ymg = calc_y(Zs, "mg")
    print(ymg[1])
    print(ys[1])
    print(vice.solar_z("c"))

    co = gcem.abund_ratio_to_brak(ys / ymg*eq_correction, "C", "MG")
    
    return co

def plot_eq_caah(M_H =np.linspace(-1, 0.35, 100),  **kwargs):
    co = calc_eq_caah(M_H)
    
    plt.plot(M_H, co, label="equilibrium", color="k", lw=2, **kwargs)
    

In [None]:
def set_yields(alpha_agb=1, y0_cc = 1, zeta_cc = 0.1):
    vice.yields.agb.settings["c"] = y_agb * alpha_agb

    vice.yields.ccsne.settings["c"] = lambda Z: y0_cc*y_const + zeta_cc*y_piecelin(Z)
    

In [None]:
def add_model(total, model, factor):
    filt = (model.counts > 0).values
    filt &= np.isfinite(model.med)

    # sanity check
    assert all(model.x_bin == total.x_bin)
    if "y_bin" in model.keys():
        assert all(model.y_bin == total.y_bin)
    
    if np.sum(filt) > 0:
        total.loc[filt, "med"] += factor * model.med.loc[filt]
        total.loc[filt, "counts"] += model.counts.loc[filt]
        total.loc[filt, "err"] += model.err.loc[filt]


In [None]:
def empty_total_like(models):
    total = {}

    for col, mod in models.items():
        total[col] = mod.copy()
        total[col]["med"] = 0.
        total[col]["counts"] = 0
        total[col]["err"] = 0.
        
    return total

In [None]:
def add_models(models, factors):
    totals = empty_total_like(models[0])
    
    for (ms, factor) in zip(models, factors):
        for col in totals.keys():
            model = ms[col]
            add_model(totals[col], model, factor)


    df = totals["2d"]
    df.rename(columns = {"x": "MG_H", "y": "MG_FE"}, inplace=True)
    
    df["C_H"] = surp.gce_math.abund_to_brak(df.med, "C")
    df["C_MG"] = df.C_H - df.MG_H
    df.loc[~np.isfinite(df.C_MG), "C_MG"] = np.nan

    df = totals["mg_h"]
    df.rename(columns = {"x": "MG_H"}, inplace=True)
    
    df["C_H"] = surp.gce_math.abund_to_brak(df.med, "C")
    df["C_MG"] = df.C_H - df.MG_H
    df.loc[~np.isfinite(df.C_MG), "C_MG"] = np.nan

    df = totals["mg_fe"]
    df.rename(columns = {"x": "MG_FE"}, inplace=True)
    
    df["C_H"] = surp.gce_math.abund_to_brak(df.med, "C")
    df["C_MG"] = df.C_H - (-0.1)
    df.loc[~np.isfinite(df.C_MG), "C_MG"] = np.nan
    
    return totals

In [None]:
def calc_chi2(y1, y2, yerr1, yerr2=0):
    yerr = np.sqrt(yerr1**2 + yerr2**2)
    filt = np.isfinite(y1) & np.isfinite(y2) & (yerr > 0)

    s2 = (y1 - y2)**2 / yerr**2
    chi2 = np.sum(s2[filt])
    return chi2 + 500*np.sum(~filt)

In [None]:
def log_likelihood(models, subgiants_binned):

    df = models["mg_h"]
    df2 = subgiants_binned["mg_h"]
    yerr = df.err / df.med / np.log(10)
    chi2_caah = calc_chi2(df.C_MG, df2.med, df2.err)

    df = models["mg_fe"]
    df2 = subgiants_binned["mg_fe"]
    yerr = df.err / df.med / np.log(10)
    chi2_caafe = calc_chi2(df.MG_FE, df2.med, df2.err)

    return -1/2 * (chi2_caah + chi2_caafe)
    
    

## Model loading

In [None]:
def bin_model(name):
    model = find_model(name)
    mgfe = bin_mg_fe(model)
    mgh = bin_mg_h(model)
    bin2d = bin_2d(model)

    return {
        "mg_fe": mgfe,
        "mg_h": mgh,
        "2d": bin2d
    }
    

In [None]:
y_piecelin = surp.yield_models.Quadratic_CC(y0=0.001, zeta=0.001, A=0.2, Z1=0.016)
y_agb = surp.agb_interpolator.interpolator("c")
y_const = 0.001

In [None]:
set_yields(0, 0, 1)
plot_eq_caah()

In [None]:
empty = bin_model("noextra")

In [None]:
fruity = bin_model("analytic")

In [None]:
cc_piecelin = bin_model("piecelin_m0.2")

In [None]:
cc_const = bin_model("const_cc")

In [None]:
subgiants_binned = {
    "mg_fe": bin_mg_fe(subgiants, x="MG_FE", val="C_MG", m_h="MG_H"),
    "mg_h": bin_mg_h(subgiants, x="MG_H", val="C_MG"),
    "2d": bin_2d(subgiants, x="MG_H", y="MG_FE", val="C_MG"),
}

In [None]:
y0 = 2.02
alpha =  1.45
zeta = 1.96
fiducial = add_models([fruity, cc_const, cc_piecelin], [alpha, y0, zeta])

In [None]:
set_yields(alpha, y0, zeta)

In [None]:
def compare_caah(fiducial):
    df = fiducial["2d"]
    #plt.scatter(df.MG_H, df.C_MG, c=df.MG_FE)
    
    df = fiducial["mg_h"]
    plt.errorbar(df.MG_H, df.C_MG, yerr=df.err / df.med * np.log(10))
    
    df = subgiants_binned["mg_h"]
    plt.errorbar(df.x, df.med, yerr=df.err)

    plt.xlabel("[Mg/H]")
    plt.ylabel("[C/Mg]")
    #plot_eq_caah()
    #plt.colorbar(label=")

In [None]:
def compare_caafe(fiducial):
    df = fiducial["2d"]
    #plt.scatter(df.MG_H, df.C_MG, c=df.MG_FE)
    
    df = fiducial["mg_fe"]
    yerr = df.err / df.med * np.log(10)
    filt = np.isfinite(df.MG_FE) & np.isfinite(df.C_MG)
    plt.errorbar(df.MG_FE[filt], df.C_MG[filt], yerr=yerr[filt])
    
    df = subgiants_binned["mg_fe"]
    plt.errorbar(df.x, df.med, yerr=df.err)

    plt.xlabel("[Mg/FE]")
    plt.ylabel("[C/Mg]")
    #plot_eq_caah()
    #plt.colorbar(label=")

In [None]:
import emcee

In [None]:
components = [fruity, cc_const, cc_piecelin]
labels = [r"$\alpha$", r"$y_0$", r"$\zeta$"]
initial = [1.5, 2.0, 1.9]


In [None]:
def model_from_theta(theta, components):
    return add_models(components, theta)

In [None]:
def log_likelihood_mc(theta, components, subgiants_binned):
    alpha, y0, zeta = theta
    fiducial = model_from_theta(theta, components)

    ll = log_likelihood(fiducial, subgiants_binned)
    return ll

In [None]:
soln = minimize(lambda *args: -log_likelihood_mc(*args), np.array(initial), args=(components, subgiants_binned))

In [None]:
log_likelihood_mc(initial, components, subgiants_binned)

In [None]:
log_likelihood_mc(soln.x, components, subgiants_binned)

In [None]:
soln

In [None]:
fiducial["mg_fe"]

In [None]:
fiducial = model_from_theta(soln.x, components)
compare_caah(fiducial)

plt.show()
compare_caafe(fiducial)


In [None]:
def log_prior(theta):
    alpha, y0, zeta = theta
    if (alpha < 0) or (alpha > 10):
        return -np.inf
    if (y0 < 0) or (y0 > 6):
        return -np.inf
    if (zeta < -10) or (zeta > 10):
        return -np.inf
    return 0

In [None]:
def log_probability(theta, components, subgiants_binned):
    return log_prior(theta) + log_likelihood_mc(theta, components, subgiants_binned)

In [None]:
log_probability(initial, components, subgiants_binned)

In [None]:
pos = initial + 0.05 * np.random.randn(12, len(initial))
nwalkers, ndim = pos.shape
sampler = emcee.EnsembleSampler(
    nwalkers, ndim, log_probability, args=(components, subgiants_binned)
)

In [None]:
%timeit model_from_theta(initial, components);

In [None]:
0.031 * 1e3 * 12 * len(initial) / 60

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    sampler.run_mcmc(pos, 1000, progress=True)

In [None]:
flat_samples = sampler.get_chain(discard=100, thin=15, flat=True)

In [None]:
corner.corner(flat_samples, labels=labels);

In [None]:
inds = np.random.randint(len(flat_samples), size=100)


for i in inds:
    sample = flat_samples[i]
    model = model_from_theta(sample[:len(initial)], components)
    df = model["mg_h"]
    plt.scatter(df.MG_H, df.C_MG, color=arya.COLORS[1], alpha=0.1)

df = subgiants_binned["mg_h"]
plt.errorbar(df.x, df.med, yerr=df.err)
plt.xlabel("[Mg/H]")
plt.ylabel("[C/Mg]")

In [None]:
inds = np.random.randint(len(flat_samples), size=100)


for i in inds:
    sample = flat_samples[i]
    model = model_from_theta(sample[:len(initial)], components)
    df = model["mg_fe"]
    plt.scatter(df.MG_FE, df.C_MG, color=arya.COLORS[2], alpha=0.1)

df = subgiants_binned["mg_fe"]
plt.errorbar(df.x, df.med, yerr=df.err)
plt.xlabel("[Mg/H]")
plt.ylabel("[C/Mg]")

In [None]:
plt.scatter(df_2.y, df_2.med, c=df_2.x)

In [None]:
plt.scatter(subgiants_binned.y, subgiants_binned.med, c=subgiants_binned.x)

In [None]:
plt.xlabel("MG/H")
plt.ylabel("Mg/Fe")

plt.scatter(fiducial.MG_H, fiducial.MG_FE, c=np.log10(fiducial.med), s=np.log(0.001/fiducial.err))
plt.colorbar(label="log Z(c)")

In [None]:
plt.xlabel("MG/H")
plt.ylabel("Mg/Fe")

plt.scatter(subgiants_binned.x, subgiants_binned.y, c=(subgiants_binned.med))
plt.colorbar(label="log Z(c)")

In [None]:
plt.xlabel("MG/H")
plt.ylabel("Mg/Fe")

plt.scatter(df_2.x, df_2.y, c=(df_2.med))
plt.colorbar(label="log Z(c)")

In [None]:
subgiants_mgh = bin_mg_h(subgiants, x="MG_H", val="C_MG")
fruity_mgh = bin_mg_h(fruity)

In [None]:
plt.scatter(fruity.MG_H, fruity.AG_MG, s=1, alpha=0.3, c=fruity.high_alpha)
plt.scatter(fruity_mgh.x, np.log10(fruity_mgh.med / vice.solar_z("c")) - fruity_mgh.x)

In [None]:
plt.scatter(subgiants_mgh.x, subgiants_mgh.med)
plt.scatter(fruity_mgh.x, np.log10(fruity_mgh.med / vice.solar_z("c")) - fruity_mgh.x)