# alternate star sampling

Does changing where we pull stars from affect our conclusions?

In [None]:
import surp

import arya
import matplotlib.pyplot as plt

In [None]:
from arya import COLORS
from surp import subgiants

In [None]:
data_kwargs = dict(
    color="k", 
    stat="median",
    err_kwargs=dict(facecolor="k", alpha=0.3)
)

model_kwargs = dict(
    stat="median", errorbar=None,
    aes="line"
)


def zooh_models(models, labels,x="MG_H", y="C_MG", use_true=True, sequential=False, filt_ha=True, **kwargs):
    kwargs = dict(numbins=20, **kwargs)
    
    N = len(models)
    
    if sequential:
        hm = arya.HueMap(clim=(0, N))
    else:
        hm = lambda i: COLORS[i]
    # end
        
    if use_true:
        xm=x + "_true"
        ym=y+ "_true"
    else:
        xm = x
        ym = y
    for i in range(N):
        name = labels[i]
        model = models[i]
        if filt_ha:
            df = surp.plots.filter_high_alpha(model)
        else:
            df = model
        color = hm(i)
            
        arya.medianplot(df, xm, ym, label=name, color=color, **model_kwargs, **kwargs)
    # end

    if filt_ha:
        df = surp.plots.filter_high_alpha(subgiants)
    else:
        df = subgiants
        
    arya.medianplot(df, x=x, y=y, zorder=-2, **data_kwargs, **kwargs)
    plt.xlabel(to_nice_name(x))
    plt.ylabel(to_nice_name(y))
    

# end


    
def zofeo_models(models, labels, x="MG_FE", y="C_MG", use_true=True, sequential=False, mg_0=-0.0, w=0.025, **kwargs):
    kwargs = dict(numbins=12, x=x, y=y, **kwargs)
    df = surp.plots.filter_metallicity(subgiants, c=mg_0, w=w)

    arya.medianplot(df, **data_kwargs, **kwargs)
    
    N = len(models)
    if use_true:
        kwargs["x"] = x + "_true"
        kwargs["y"] = y+ "_true"

    if sequential:
        hm = arya.HueMap(clim=(0, N))
    else:
        hm = lambda i: COLORS[i]
    # end    
    
    for i in range(N):
        model = models[i]
        df = surp.plots.filter_metallicity(model, c=mg_0, w=w)
        color = hm(i)

        arya.medianplot(df, label=labels[i], color=color, **model_kwargs, **kwargs)
    plt.xlabel(to_nice_name(x))
    plt.ylabel(to_nice_name(y))
    

#

In [None]:
def to_nice_name(apogee_name):
    name = apogee_name.replace("_", "/").title()
    name = f"[{name}]"
    return name

## Sampling

In [None]:
fiducial = surp.ViceModel.from_file("../../models/fiducial/run/model.json")

In [None]:
stars_unsampled = fiducial.stars_unsampled

In [None]:
stars_unsampled

In [None]:
create_sample = surp.vice_utils.create_star_sample

In [None]:
cdf = surp.vice_utils.load_cdf()
cdf

In [None]:
cdf_low = cdf.copy()
cdf_low.R /= 1.5

In [None]:
cdf_high = cdf.copy()
cdf_high.R *= 1.5

In [None]:
plt.plot(cdf_low.R, cdf_low.cdf)
plt.plot(cdf_high.R, cdf_high.cdf)
plt.plot(cdf.R, cdf.cdf)

In [None]:
samples = {}
samples["low"] = create_sample(stars_unsampled, cdf=cdf_low)
samples["fiducial"] = fiducial.stars

samples["high"] = create_sample(stars_unsampled, cdf=cdf_high)


In [None]:
for name, sample in samples.items():
    plt.hist(sample.r_final, histtype="step")

In [None]:
samples.keys()

In [None]:
models = [v for k, v in samples.items()]
labels = [k for k, v in samples.items()]

In [None]:
zooh_models(models, labels)

In [None]:
zofeo_models(models, labels)

In [None]:
cdf

In [None]:
import numpy as np
import pandas as pd

In [None]:
def make_uniform_cdf(low, high):
    def f(r):
        return np.where(r < low, 0, 
            np.where(r > high, 1,
            1 - (high - r) / (high - low) 
                    ))


    R = np.arange(0, 15.5, 0.01)
    cdf = f(R)

    return pd.DataFrame(dict(R=R, cdf=cdf))

In [None]:
samples = {}
samples["low"] = create_sample(stars_unsampled, cdf=make_uniform_cdf(3, 5))
samples["mid"] = create_sample(stars_unsampled, cdf=make_uniform_cdf(7, 9))
samples["high"] = create_sample(stars_unsampled, cdf=make_uniform_cdf(11, 13))


In [None]:
samples["wide"] = create_sample(stars_unsampled, cdf=make_uniform_cdf(4, 12))


In [None]:
for low, high in [(3,5), (7,9), (11, 13), (4, 12)]:
    cdf = make_uniform_cdf(low, high)
    plt.plot(cdf.R, cdf.cdf)

In [None]:
models = [v for k, v in samples.items()]
labels = [k for k, v in samples.items()]

In [None]:
zooh_models(models, labels)

In [None]:
zofeo_models(models, labels)

In [None]:
for name, sample in samples.items():
    plt.hist(sample.r_final, histtype="step")

# Conclusions

caafe is entirely unaffected by the samples. 
caah only differs in the metallicity range sampled and the low-metallicity area divergies.