In [None]:
import pandas as pd
from astropy.io import fits
from astropy.table import Table
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np


In [None]:
import surp
from surp import gce_math as gcem
import arya

from surp import subgiants

In [None]:
data_dir = surp._globals.DATA_DIR

In [None]:
def scotts_bin_width(x):
    f = np.isfinite(x)
    N = len(x)
    return 3.49 * np.std(x[f]) / np.cbrt(N)

def scotts_bins(x):
    f = np.isfinite(x)
    return np.arange(np.min(x[f]), np.nanmax(x[f]), scotts_bin_width(x))

In [None]:
def hist(x, **kwargs):
    bins = scotts_bins(x)
    plt.hist(x, bins=bins, **kwargs)

In [None]:
def hist2d(x, y, bins=None, **kwargs):
    if bins is None:
        xbins = scotts_bins(x)
        ybins = scotts_bins(y)
        
    plt.hist2d(x, y, norm="log", bins=(xbins, ybins), **kwargs)

In [None]:
def keil_plot(teff, logg, bins=None, **kwargs):
    if bins is None:
        bins = 200
        xbins = np.linspace(np.nanmin(teff), np.nanmax(teff), bins)
        ybins = np.linspace(np.nanmin(logg), np.nanmax(logg), bins)
        
    plt.hist2d(teff, logg, norm="log", bins=(xbins, ybins), **kwargs)
    plt.colorbar(label="density")
    plt.gca().invert_xaxis()
    plt.gca().invert_yaxis()
    plt.xlabel(r"$T_{\rm eff}$ / K")
    plt.ylabel(r"$\log g$")

In [None]:
def to_nice_name(apogee_name):
    name = apogee_name.replace("_", "/").title()
    name = f"[{name}]"
    return name

In [None]:
def plot_ab(df, x, y, **kwargs):
    hist2d(df[x], df[y], **kwargs)
    arya.medianplot(df, x, y, numbins=25, color="k")
    plt.xlabel(to_nice_name(x))
    plt.ylabel(to_nice_name(y))

# GALAH DR3

see https://www.galah-survey.org/dr3/using_the_data/.

In [None]:
galah_all = Table.read(f"{data_dir}/GALAH_DR3_main_allstar_v2.fits", format="fits", memmap=True)

In [None]:
# recommended cuts in documentation
mask = galah_all["flag_sp"] == 0
mask &= galah_all["flag_fe_h"] == 0
mask &= galah_all["snr_c3_iraf"] > 30
print(np.sum(mask))

mask &= ~np.isnan(galah_all["teff"])
mask &= ~np.isnan(galah_all["logg"])
print(np.sum(mask))

mask &= galah_all["e_logg"] < 0.5
mask &= galah_all["e_teff"] < 200
print(np.sum(mask))


df = galah_all[mask].to_pandas()

df["MG_H"] = df["Mg_fe"] + df["fe_h"]
df["C_MG"] = df["C_fe"] - df["Mg_fe"] 
df["C_O"] = df["C_fe"] - df["O_fe"] 

df["O_H"] = df["O_fe"] + df["fe_h"]
df["MG_O"] = df["O_fe"] - df["Mg_fe"]

df["FE_H"] = df["fe_h"]
df["MG_FE"] = df["Mg_fe"]


df["LOGG"] = df["logg"]
df["TEFF"] = df["teff"]

galah = df

In [None]:
len(galah_all)

In [None]:
len(galah)

In [None]:
# make sure these things exist

abund_mask = galah["LOGG"] >= 3.5
abund_mask &= galah["TEFF"] >= 4500
abund_mask &= galah["TEFF"] <= 6500

abund_mask &= ~np.isnan(galah["C_MG"])
abund_mask &= ~np.isnan(galah["MG_FE"])
abund_mask &= ~np.isnan(galah["FE_H"])
abund_mask &= galah["flag_C_fe"] == 0
abund_mask &= (galah["flag_O_fe"] == 0) | (galah["flag_Mg_fe"] == 0)

galah_good = galah[abund_mask]
len(galah_good)

In [None]:
print(len(galah_good))

In [None]:
coords = [
    (3.5, 4800),
    (3.8, 4875),
    (4.0, 5300),
    (3.6, 5600),
    (3.5, 5250),
    (3.5, 4800)
]
y = [c[0] for c in coords]
x = [c[1] for c in coords]

In [None]:
keil_plot(galah["TEFF"], galah["LOGG"])

## error histograms

In [None]:
for key in ["e_logg", "e_teff", "e_fe_h", "e_O_fe", "e_Mg_fe", "e_C_fe",]:
    hist(galah[key], log=True);
    plt.xlabel(key)
    plt.show()

## abundance plots

In [None]:

Nb = 30
bins = np.linspace(-1, 0.6, Nb)
hist = np.digitize(df["FE_H"], bins)

Nc = np.zeros(Nb)

h = np.zeros(Nb)
for b in range(Nb):
    filt = hist == b
    
    Nc[b] = np.sum(~np.isnan(df["C_fe"][filt]))
    h[b] = np.sum(filt)
    


In [None]:
df["C_meas"] = ~np.isnan(df["C_fe"])

In [None]:
fig, ax = plt.subplots() 
ax_twin = ax.twinx() 

plt.sca(ax)
#plt.plot(bins, Nc/h)
plt.ylabel('fractional completion')
plt.xlabel("[Fe/H]")
arya.medianplot(df, "FE_H", "C_meas", binsize=3000, stat="mean",  errorbar=None, label="completion")

plt.sca(ax_twin)
arya.medianplot(df, "FE_H", "C_MG", binsize=1000, errorbar=None, label="[C/Mg]", color=arya.style.COLORS[1])
plt.ylabel("[C/Mg]")

plt.sca(ax)
plt.legend(loc=2, bbox_to_anchor=(1, 0))
plt.sca(ax_twin)
plt.legend(loc=2, bbox_to_anchor=(1, 1))

In [None]:
plot_ab(galah_good, "fe_h", "Mg_fe")

In [None]:
plot_ab(galah_good, "fe_h", "MG_O")

In [None]:
plot_ab(galah_good, "fe_h", "MG_O")

In [None]:
plot_ab(galah_good, "MG_H", "C_MG")

In [None]:
plot_ab(galah_good, "O_H", "C_O")

In [None]:
plot_ab(galah_good, "MG_FE", "C_MG")

In [None]:
plot_ab(galah_good, "O_fe", "C_O")

In [None]:
galah_good.to_csv("galah_dwarfs.csv")

# GALAH DR4

see https://www.galah-survey.org/dr4/using_the_data/

In [None]:
galah_all = Table.read(f"{data_dir}/surveys/galah_dr4_allstar_240705.fits", format="fits", memmap=True)

In [None]:
galah_all.columns

In [None]:
# recommended cuts in documentation
mask = galah_all["flag_sp"] == 0
mask &= galah_all["flag_fe_h"] == 0
mask &= galah_all["snr_px_ccd3"] > 30
print(np.sum(mask))

mask &= ~np.isnan(galah_all["teff"])
mask &= ~np.isnan(galah_all["logg"])
print(np.sum(mask))

mask &= galah_all["e_logg"] < 0.5
mask &= galah_all["e_teff"] < 200
print(np.sum(mask))


df = galah_all[mask].to_pandas()

df["MG_H"] = df["mg_fe"] + df["fe_h"]
df["MG_H_ERR"] = df["e_mg_fe"] + df["e_fe_h"]
df["C_MG"] = df["c_fe"] - df["mg_fe"] 
df["C_MG_ERR"] = df["e_c_fe"] + df["e_mg_fe"]

df["C_O"] = df["c_fe"] - df["o_fe"] 

df["O_H"] = df["o_fe"] + df["fe_h"]
df["MG_O"] = df["o_fe"] - df["mg_fe"]

df["FE_H"] = df["fe_h"]
df["MG_FE"] = df["mg_fe"]
df["MG_FE_ERR"] = df["e_mg_fe"]



df["LOGG"] = df["logg"]
df["TEFF"] = df["teff"]

galah = df

In [None]:
len(galah_all)

In [None]:
len(galah)

In [None]:
# make sure these things exist

abund_mask = galah["LOGG"] >= 3.5
abund_mask &= galah["TEFF"] >= 4500
abund_mask &= galah["TEFF"] <= 6500

abund_mask &= ~np.isnan(galah["C_MG"])
abund_mask &= ~np.isnan(galah["MG_FE"])
abund_mask &= ~np.isnan(galah["FE_H"])
abund_mask &= galah["flag_c_fe"] == 0
abund_mask &= (galah["flag_o_fe"] == 0) | (galah["flag_mg_fe"] == 0)

galah_good = galah[abund_mask]
len(galah_good)

In [None]:
print(len(galah_good))

In [None]:
coords = [
    (3.5, 4800),
    (3.8, 4875),
    (4.0, 5300),
    (3.6, 5600),
    (3.5, 5250),
    (3.5, 4800)
]
y = [c[0] for c in coords]
x = [c[1] for c in coords]

In [None]:
keil_plot(galah["TEFF"], galah["LOGG"])

In [None]:
keil_plot(galah_good["TEFF"], galah_good["LOGG"])

## error histograms

In [None]:
for key in ["e_logg", "e_teff", "e_fe_h", "e_o_fe", "e_mg_fe", "e_c_fe",]:
    hist(galah[key], log=True);
    plt.xlabel(key)
    plt.show()

## abundance plots

In [None]:

Nb = 30
bins = np.linspace(-1, 0.6, Nb)
hist = np.digitize(df["FE_H"], bins)

Nc = np.zeros(Nb)

h = np.zeros(Nb)
for b in range(Nb):
    filt = hist == b
    
    Nc[b] = np.sum(~np.isnan(df["c_fe"][filt]))
    h[b] = np.sum(filt)
    


In [None]:
df["C_meas"] = ~np.isnan(df["c_fe"])

In [None]:
fig, ax = plt.subplots() 
ax_twin = ax.twinx() 

plt.sca(ax)
#plt.plot(bins, Nc/h)
plt.ylabel('fractional completion')
plt.xlabel("[Fe/H]")
arya.medianplot(df, "FE_H", "C_meas", binsize=3000, stat="mean",  errorbar=None, label="completion")

plt.sca(ax_twin)
arya.medianplot(df, "FE_H", "C_MG", binsize=1000, errorbar=None, label="[C/Mg]", color=arya.style.COLORS[1])
plt.ylabel("[C/Mg]")

plt.sca(ax)
plt.legend(loc=2, bbox_to_anchor=(1, 0))
plt.sca(ax_twin)
plt.legend(loc=2, bbox_to_anchor=(1, 1))

In [None]:
galah_good["high_alpha"] = np.where(galah_good.FE_H > 0, galah_good.MG_FE > 0.08 - 0.1*galah_good.FE_H, galah_good.MG_FE > 0.08 - 0.35*galah_good.FE_H)


In [None]:
plt.scatter(galah_good.FE_H, galah_good.MG_FE, c=galah_good.high_alpha, s=1, alpha=0.01)

In [None]:
plot_ab(galah_good, "fe_h", "mg_fe")

In [None]:
plot_ab(galah_good[galah_good.high_alpha], "fe_h", "mg_fe")

In [None]:
plot_ab(galah_good, "fe_h", "MG_O")

In [None]:
plot_ab(galah_good, "fe_h", "MG_O")

In [None]:
plot_ab(galah_good, "MG_H", "C_MG")

In [None]:
plot_ab(galah_good, "O_H", "C_O")

In [None]:
plot_ab(galah_good, "MG_FE", "C_MG")

In [None]:
plot_ab(galah_good, "o_fe", "C_O")

In [None]:
galah_good.to_csv("galah_dwarfs.csv")

# GaiaESO


- DR 5.1
- WG 10 is the low resolution (GIRRAFFE), WG 11 is high res (UVES).
- Datamodel https://www.eso.org/qi/catalog/show/411.
- Description
https://www.eso.org/rm/api/v1/public/releaseDescriptions/191

In [None]:
gaiaeso_all = Table.read(f"{data_dir}/surveys/gaiaeso_dr5.1.fits", format="fits", memmap=True)

In [None]:
len(gaiaeso_all) == 114_916

Quality cuts

In [None]:
gaiaeso_all.columns

In [None]:
sflags = pd.Series(gaiaeso_all["SFLAGS"]).str.decode("utf-8")

quality_mask = ~sflags.str.contains("SNR")
quality_mask &= ~sflags.str.contains("SRP")
quality_mask &= ~sflags.str.contains("BIN") # binary
quality_mask &= ~sflags.str.contains("SSA")
quality_mask &= ~sflags.str.contains("IPA")
quality_mask &= ~sflags.str.contains("PSC")
quality_mask &= ~sflags.str.contains("EML") # emission line

print(np.sum(quality_mask))
quality_mask &= gaiaeso_all["E_LOGG"] < 0.5
quality_mask &= gaiaeso_all["E_TEFF"] < 200
print(np.sum(quality_mask))


gaiaeso = gaiaeso_all[quality_mask.values].to_pandas()

while e.g. MgII, C II are in the dataframe, none of these are successfuly measured for these stars

In [None]:
gaiaeso["MG_H"] = gcem.eps_to_brak(gaiaeso.MG1, "Mg")
gaiaeso["MG_H_ERR"] = gaiaeso.E_MG1

gaiaeso["CI_H"] = gcem.eps_to_brak(gaiaeso.C1, "C")
gaiaeso["CI_H_ERR"] = gaiaeso.E_C1
gaiaeso["CC_H"] = gcem.eps_to_brak(gaiaeso.C_C2, "C")
gaiaeso["CC_H_ERR"] = gaiaeso.E_C_C2

gaiaeso["FE_H"] = gaiaeso["FEH"]
gaiaeso["FE_H_ERR"] = gaiaeso["E_FEH"]


gaiaeso["MG_FE"] = gaiaeso.MG_H - gaiaeso.FE_H
gaiaeso["MG_FE_ERR"] = gaiaeso.MG_H_ERR + gaiaeso.FE_H_ERR
gaiaeso["C_MG"] = gaiaeso.CC_H - gaiaeso.MG_H
gaiaeso["C_MG_ERR"] = gaiaeso.MG_H_ERR + gaiaeso.CC_H_ERR

gaiaeso["CI_MG"] = gaiaeso.CI_H - gaiaeso.MG_H
gaiaeso["CI_MG_ERR"] = gaiaeso.MG_H_ERR + gaiaeso.CI_H_ERR


In [None]:
print(len(gaiaeso_all))
print(len(gaiaeso))

In [None]:
setup = gaiaeso.SETUP.str.decode("utf-8")

In [None]:
setup.unique()

In [None]:
logg = gaiaeso.LOGG
teff = gaiaeso.TEFF

mask = ~np.isnan(logg)
mask &= ~np.isnan(teff)

mask &= logg >= 3.5
# mask &= logg <= 0.004*teff - 15.7
# mask &= logg <= 0.000706*teff + 0.36
# mask &= logg <= -0.0015 * teff + 12.05
# mask &= logg >= 0.0012*teff - 2.8


mask &= gaiaeso.E_MG1 > 0
#mask &= gaiaeso.C_MG_ERR > 0

subgiant_mask = np.copy(mask)


gso_dwarf = gaiaeso[subgiant_mask]

In [None]:
uves_filt = setup.str.contains("U580") | setup.str.contains("U520")
print(np.sum(uves_filt))
uves_filt &= gso_dwarf.REC_WG == b"WG11" # cuts out a few weird stars...
print(np.sum(uves_filt))

giraffe_filt = gso_dwarf.REC_WG == b"WG10"

In [None]:
gso_uv = gso_dwarf[uves_filt]
gso_uv

In [None]:
gso_lr = gso_dwarf[giraffe_filt]
gso_lr

In [None]:
gso_uv.REC_WG.value_counts()

In [None]:
gso_uv.ORIGIN_C_C2.value_counts()

In [None]:
gso_uv.NL_C_C2.value_counts()

In [None]:
gso_uv.NL_C1.value_counts()

In [None]:
136 + 78

In [None]:
gso_uv.NN_C1.value_counts()

In [None]:
gso_uv.NN_C_C2.value_counts()

In [None]:
gso_uv.NN_MG1.value_counts()

In [None]:
gso_lr.REC_WG.value_counts()

In [None]:
gso_lr.NN_C1.value_counts()

In [None]:
keil_plot(gso_uv["TEFF"], gso_uv["LOGG"])


In [None]:
keil_plot(gso_lr["TEFF"], gso_lr["LOGG"])


In [None]:
plt.errorbar(gso_uv.MG_H, gso_uv.C_MG, xerr=gso_uv.MG_H_ERR, yerr=gso_uv.C_MG_ERR, fmt="o", capsize=0, alpha=0.1)

plt.xlabel("[Mg/H]")
plt.ylabel(r"[C$_2$/Mg]")


In [None]:
plt.errorbar(gso_uv.MG_H, gso_uv.CI_MG, xerr=gso_uv.MG_H_ERR, yerr=gso_uv.CI_MG_ERR, fmt="o", capsize=0, alpha=0.1)

plt.xlabel("[Mg/H]")
plt.ylabel(r"[C\,I/Mg]")


In [None]:
plt.scatter(gso_uv.FE_H, gso_uv.MG_FE);
plt.xlabel("[FE/H]")
plt.ylabel("[Mg/Fe]")

Looks okay, no bimodality

## low resolution survey

In [None]:
plt.errorbar(gso_lr.MG_H, gso_lr.CI_MG, xerr=gso_lr.MG_H_ERR, yerr=gso_lr.CI_MG_ERR, fmt="o", capsize=0, alpha=0.1)

plt.xlabel("[Mg/H]")
plt.ylabel(r"[C\,I/Mg]")


In [None]:
plt.errorbar(gso_lr.MG_FE, gso_lr.CI_MG, xerr=gso_lr.MG_H_ERR, yerr=gso_lr.CI_MG_ERR, fmt="o", capsize=0, alpha=0.1)

plt.xlabel("[Mg/Fe]")
plt.ylabel(r"[C\,I/Mg]")

In [None]:
hist2d(gso_lr.FE_H, gso_lr.MG_FE)
plt.xlabel("[Fe/H]")
plt.ylabel("[Mg/Fe]")

In [None]:
plt.scatter(gso_uv.MG_H, gso_uv.C_MG, alpha=0.1, label="UVES C2")
plt.scatter(gso_uv.MG_H, gso_uv.CI_MG, alpha=0.1, label="UVES CI")
plt.scatter(gso_lr.MG_H, gso_lr.CI_MG, alpha=0.1, label="GIRAFFE CI")
plt.legend()
plt.xlabel("[Mg/H]")
plt.ylabel("[C/Mg]")

In [None]:
filt = np.isnan(gso_dwarf["C_MG"]) & ~np.isnan(gso_dwarf["CI_MG"])

gso_dwarf.loc[filt, "C_MG"] = gso_dwarf["CI_MG"][filt]
gso_dwarf.loc[filt, "C_MG_ERR"] = gso_dwarf["CI_MG_ERR"][filt]

In [None]:
gso_dwarf["high_alpha"] = np.where(gso_dwarf.FE_H > 0, gso_dwarf.MG_FE > 0.00 - 0.1*gso_dwarf.FE_H, gso_dwarf.MG_FE > 0.00 - 0.35*gso_dwarf.FE_H)


In [None]:
plt.scatter(gso_dwarf.FE_H, gso_dwarf.MG_FE, s=1, c=gso_dwarf.high_alpha, alpha=0.01)
plt.xlim(-1, 0.5)
plt.ylim(-0.4, 0.8)

In [None]:
gso_dwarf[gso_dwarf.C_MG_ERR > 0].to_csv("gso_dwarfs.csv")

In [None]:
print(np.sum(gso_lr.CI_MG_ERR > 0) / len(gso_lr))

In [None]:
print(np.sum(gso_uv.CI_MG_ERR > 0 | (gso_uv.C_MG_ERR > 0)) / len(gso_uv))

In [None]:
len(gso_uv)

In [None]:
plt.errorbar(gso_uv.CI_H, gso_uv.CC_H - gso_uv.CI_H, xerr=gso_uv.CI_H_ERR, yerr=gso_uv.CC_H_ERR, fmt="o", capsize=0, alpha=0.1)
plt.xlabel("[C I / H]")
plt.ylabel(r"[C$_2$ / C I]")
plt.axhline(0, color="k")

In [None]:
len(gaiaeso_all[gaiaeso_all["REC_WG"] == b"WG11"])

# DESI MWM

Description: MNRAS 533, 1012–1031 (2024) https://ui.adsabs.harvard.edu/abs/2024MNRAS.533.1012K/abstract. 

Data access:https://data.desi.lbl.gov/doc/access/
Data Model: https://desi-mws-edr-datamodel.readthedocs.io/en/latest/

see below, likely not reliable for this kind of work...


> The carbon and magnesium abundances reported by the SP
pipeline are unreliable. The reason carbon abundances are not
trustworthy is because of an issue with the coating of the DESI blue
collimators, which creates a throughput artefact in many spectra
around 4300 Å (see e.g. fig. 26 in DESI Collaboration 2022).
This is the region of the CH absorption band, which is the most
important indicator of carbon abundance in late-type stars at the
resolution of DESI. While the abundances of iron and calcium
show a good correlation with the \[Fe/H\] and \[α/Fe\] parameters
derived by the same pipeline, that is not the case for magnesium.
We, therefore, recommend not using the abundance of carbon or
magnesium included in the EDR MWS VAC.


From https://academic.oup.com/mnras/article/533/1/1012/7724389.
DESI EDR MWS VAC 1027
MNRAS 533, 1012–1031 (2024)


In [None]:
f_desi = fits.open(f"{data_dir}/surveys/mwsall-pix-fuji.fits", memmap=True)

In [None]:
f_desi[2].header

In [None]:
desi = Table.read(f"{data_dir}/mwsall-pix-fuji.fits", format="fits", hdu=2, memmap=True)
desi_rv = Table.read(f"{data_dir}/mwsall-pix-fuji.fits", format="fits", hdu=1, memmap=True)

desi["TEFF_ERR"] = desi["COVAR"][:, 2, 2]**0.5

desi["FEH_ERR"] = desi["COVAR"][:, 0, 0]**0.5
desi["ALPHAFE_ERR"] = desi["COVAR"][:, 1, 1]**0.5
desi["LOGG_ERR"] = desi["COVAR"][:, 4, 4]**0.5
desi["TEFF_ERR"] = desi["COVAR"][:, 3, 3]**0.5




# this is not well documented, just guessing here
desi["C_H"] = desi["ELEM"][:, 0]
desi["MG_H"] = desi["ELEM"][:, 1]
desi["CA_H"] = desi["ELEM"][:, 2]
desi["FE_H"] = desi["ELEM"][:, 3]


desi["C_H_ERR"] = desi["ELEM_ERR"][:, 0]
desi["MG_H_ERR"] = desi["ELEM_ERR"][:, 1]
desi["CA_H_ERR"] = desi["ELEM_ERR"][:, 3]
desi["FE_H_ERR"] = desi["ELEM_ERR"][:, 2]

desi["C_MG"] = desi["C_H"] - desi["MG_H"]
desi["MG_FE"] = desi["MG_H"] - desi["FE_H"]


# filter out the worst of it

filt = desi["LOGG"] > 1
filt &= desi["LOGG"] < 5
filt &= desi["TEFF"] > 2000
filt &= desi["TEFF"] < 10_000
filt &= desi["FEH_ERR"] < 0.1
filt &= desi["TEFF_ERR"] < 100
filt &= desi["LOGG_ERR"] < 0.3

filt &= desi["SUCCESS"] > 0

print(len(desi))
print(sum(filt))
desi_rv = desi_rv[filt]
desi = desi[filt]

In [None]:
keil_plot(desi["TEFF"], desi["LOGG"])

In [None]:
keil_plot(desi_rv["TEFF"], desi_rv["LOGG"])

Keil diagram looks like nonsense, not a good sign...

## Abundance validation

In [None]:
plt.hist2d(desi["FEH"], desi["FEH"] - desi_rv["FEH"], 100, range=((-3, 1), (-0.5, 0.5)), );

In [None]:
plt.hist2d(desi["ALPHAFE"], desi["ALPHAFE"] - desi_rv["ALPHAFE"], 100, range=((-1, 1), (-1, 1)), );

In [None]:
plt.hist(desi["ELEM_ERR"][:, 3], 100, range=(0, 0.5));

Fe errors (above) do not correspond with abundance versus stellar parameter errors.

In [None]:
plt.hist2d(desi["FEH"], desi["FEH"] - desi["FE_H"], 100, range=((-1, 1), (-1, 1)), );

In [None]:
plt.hist2d(desi["MG_FE"], desi["ALPHAFE"] - desi["MG_FE"], 100, range=((-1, 1), (-1, 1)), );

## Plots

In [None]:
plt.hist2d(desi["FE_H"], desi["MG_FE"], 100, range=((-1, 1), (-1, 1)), );

almost looks believable

In [None]:
plt.hist2d(desi["MG_H"], desi["C_MG"], 100, range=((-1, 1), (-1, 1)), );

this one not so much

# LAMOST (Cycle-StarNet)


https://ui.adsabs.harvard.edu/abs/2023ApJS..266...40W/abstract
On website uses the other version...

In [None]:
table = pd.read_csv(data_dir + "surveys/lamost_dr8.csv")

In [None]:
table.columns

In [None]:
qual_filt = table["Flag_quality"] == 0
qual_filt &= table["Flag_logg"] == 0
qual_filt &= table["Flag_Teff"] == 0

In [None]:
keil_plot(table["Teff"][qual_filt], table["logg"][qual_filt])

In [None]:
filt = qual_filt.copy()
filt &= table["logg"] > 3.5
filt &= table["logg"] < 5
filt &= table["Teff"] < 7000
filt &= table["Teff"] > 4000
filt &= table["Flag_FeH"] == 0
filt &= table["Flag_MgFe"] == 0

In [None]:
lamost = table[qual_filt].copy()

In [None]:
def rename_lamost_columns(col):
    new_name = col.rstrip('_').upper()
    if new_name.endswith("FE"):
        new_name = new_name[:-2] + "_FE"
    if new_name == "FEH":
        new_name = "FE_H"
        
    if new_name.startswith('ERR_'):
        new_name = new_name[4:] + '_ERR'
        
    if new_name.startswith('FLAG_'):
        new_name = new_name[5:] + '_FLAG'    
    new_name = new_name.lstrip("_")

    return new_name

In [None]:
lamost.columns = [rename_lamost_columns(col) for col in lamost.columns]

In [None]:
lamost.columns

In [None]:
lamost["MG_H"] = lamost.FE_H + lamost.MG_FE

lamost["C_MG"] = lamost.C_FE - lamost.MG_FE

In [None]:
lamost_c = lamost[lamost.C_FE_FLAG == 0]

In [None]:
plot_ab(lamost, "FE_H", "MG_FE")

In [None]:
plot_ab(lamost_c, "MG_H", "C_MG")

In [None]:
len(lamost_c) / len(lamost)

In [None]:
plot_ab(lamost_c, "MG_FE", "C_MG")

In [None]:
lamost_c.to_csv("lamost_dwarfs.csv")

# Crossmathing

In [None]:
def crossmatch_test(df1, df2, col, s=1, alpha=0.1, **kwargs):
    x1 = np.array(df1[col])
    x2 = np.array(df2[col])
    plt.scatter(x1, x2-x1, s=s, alpha=alpha, **kwargs)
    plt.axhline(0, c="k")
    plt.xlabel(col.replace("_", "/"))
    plt.ylabel("delta " + col.replace("_", "/"))

In [None]:
def crossmatch_test_hist(df1, df2, col, range=None, **kwargs):
    x1 = np.array(df1[col])
    x2 = np.array(df2[col])

    hist2d(x1, x2-x1, **kwargs)
    plt.axhline(0, c="k")
    plt.xlabel(col.replace("_", "/"))
    plt.ylabel("delta " + col.replace("_", "/"))
    plt.colorbar(label="count")

In [None]:
from surp import subgiants
from astropy.coordinates import SkyCoord
from astropy import units as u


In [None]:
def get_crossmatch(ra1, dec1, ra2, dec2, max_sep=1*u.arcsec):
    ra1 = np.array(ra1)
    dec1 = np.array(dec1)
    ra2 = np.array(ra2)
    dec2 = np.array(dec2)
    
    # Convert catalogue positions into SkyCoord objects
    coords1 = SkyCoord(ra=ra1 * u.degree, dec=dec1 * u.degree)
    coords2 = SkyCoord(ra=ra2 * u.degree, dec=dec2 * u.degree)

    # Perform crossmatch
    idx, d2d, d3d = coords1.match_to_catalog_sky(coords2)

    sep_constraint = d2d < max_sep

    # Filter matches
    matches = idx[sep_constraint]
    matched_d2d = d2d[sep_constraint]


    idx_l = np.where(sep_constraint)[0] #df indicies
    idx_r = matches # apogee indicies

    print(len(ra1))
    print(len(ra2))
    print(len(idx_l))
    
    plt.scatter(ra2, dec2,  s=0.01, alpha=1, label="2")

    plt.scatter(ra1, dec1,  s=0.01, alpha=1, label="1")
    plt.xlabel("RA")
    plt.ylabel("DEC")
    plt.show()
    
    plt.scatter(ra2[idx_r], dec2[idx_r], 
                s=0.01, alpha=1, label="match")
    plt.scatter(ra1[idx_l], dec1[idx_l],
                s=0.01, alpha=1, label="match")
    plt.xlabel("RA")
    plt.ylabel("DEC")
    
    return idx_l, idx_r

In [None]:
# comes from the notebook APOGEE_minimal.ipynb
apogee = pd.read_csv(f"{data_dir}/apogee_most.csv")

In [None]:
# remove test star
apogee = apogee[1:]
apogee.reset_index()

## GALAH - APOGEE

In [None]:
idx_l, idx_r = get_crossmatch(galah.ra, galah.dec, apogee.RA, apogee.DEC)

In [None]:
for col in ["LOGG", "TEFF", "FE_H", "MG_FE", "C_MG"]:
    crossmatch_test_hist(galah.iloc[idx_l], apogee.iloc[idx_r], col, alpha=1)
    plt.show()

## Crossmatch GaiaESO

In [None]:
idx_l, idx_r = get_crossmatch(gaiaeso.RA, gaiaeso.DECLINATION, apogee.RA, apogee.DEC, max_sep=10*u.arcsec)


In [None]:
for col in ["LOGG", "TEFF", "FE_H", "MG_FE", "C_MG"]:
    crossmatch_test(gaiaeso.iloc[idx_l], apogee.iloc[idx_r], col, alpha=1)
    plt.show()

## galah - gaiaeso

In [None]:
idx_l, idx_r = get_crossmatch(gaiaeso.RA, gaiaeso.DECLINATION, galah.ra, galah.dec, max_sep=1*u.arcsec)


In [None]:
for col in ["LOGG", "TEFF", "FE_H", "MG_FE", "C_MG"]:
    crossmatch_test(gaiaeso.iloc[idx_l], galah.iloc[idx_r], col, alpha=0.5)
    plt.show()

## LAMOST - APOGEE


In [None]:
filt = apogee.STARFLAG == 0
filt &= apogee.ASPCAPFLAG == 0

In [None]:
idx_l, idx_r = get_crossmatch(lamost.RA, lamost.DEC, apogee[filt].RA, apogee[filt].DEC, max_sep=1*u.arcsec)


In [None]:
for col in ["LOGG", "TEFF", "FE_H", "MG_FE", "C_FE"]:
    crossmatch_test_hist(lamost.iloc[idx_l], apogee[filt].iloc[idx_r], col)
    plt.show()

## Lamost - GALAH

In [None]:
idx_l, idx_r = get_crossmatch(lamost.RA, lamost.DEC, galah.ra, galah.dec, max_sep=1*u.arcsec)


In [None]:
for col in ["LOGG", "TEFF", "FE_H", "MG_FE", "C_MG"]:
    crossmatch_test_hist(lamost.iloc[idx_l], galah.iloc[idx_r], col)
    plt.show()