In [30]:
import pandas as pd
import numpy as np
from alfven_estimates import estimate_alfven, estimate_rossby, mass_class, measured_uncertainties, rad_class

In [31]:
full_sample_path = "current-exo-data/nasa_exo.csv"
full_sample = pd.read_csv(full_sample_path)
full_sample = measured_uncertainties(full_sample)
full_sample["vk"] = full_sample["sy_vmag"] - full_sample["sy_kmag"]
full_sample["mass_class"] = mass_class(full_sample["pl_bmasse"])
full_sample["rad_class"] = rad_class(full_sample["pl_rade"])

# pl data
m = full_sample["st_mass"]
vk = full_sample["vk"]# CHZ PLANET CUT

In [32]:
"db" in full_sample.columns

False

In [33]:
# chz flag
exos_habitable = pd.read_csv("current-exo-data/exos_habitable.csv")
exos_hill23 = pd.read_csv("current-exo-data/exos_hill23.csv")

# habitable_pl_names = set(exos_habitable.pl_name)
habitable_pl_names = set(exos_habitable["pl_name"]).union(set(exos_hill23["pl_name"]))
full_sample["habitable"] = full_sample.apply(lambda r: 1 if r["pl_name"] in habitable_pl_names else 0, axis=1)

print("CHZ - [{}] UPR\nCHZ - [{}] Hill23+\nCHZ - [{}] All".format(len(exos_habitable), len(exos_hill23), len(habitable_pl_names)))

CHZ - [62] UPR
CHZ - [323] Hill23+
CHZ - [333] All


In [34]:
# Planet Class
mclass = full_sample["mass_class"]
rclass = full_sample["rad_class"]

print("MASS CLASS")
print("EXOS - [{}] No class".format(mclass[mclass == -1].count()))
print("EXOS - [{}] Subterran (0.1 <= M <= 0.5)".format(mclass[mclass == 0].count()))
print("EXOS - [{}] Terran (0.5 < M <= 3.0)".format(mclass[mclass == 1].count()))
print("EXOS - [{}] Superterran (3.0 < M <= 10.0)".format(mclass[mclass == 2].count()))
print("EXOS - [{}] Giant (M > 10.0)".format(mclass[mclass == 3].count()))

print("RADIUS CLASS")
print("EXOS - [{}] No class".format(rclass[rclass == -1].count()))
print("EXOS - [{}] Subterran (0.4 <= R <= 0.8)".format(rclass[rclass == 0].count()))
print("EXOS - [{}] Terran (0.8 < R <= 1.6)".format(rclass[rclass == 1].count()))
print("EXOS - [{}] Superterran (1.6 < R <= 2.5)".format(rclass[rclass == 2].count()))
print("EXOS - [{}] Giant (R > 2.5)".format(rclass[rclass == 3].count()))

MASS CLASS
EXOS - [32] No class
EXOS - [80] Subterran (0.1 <= M <= 0.5)
EXOS - [871] Terran (0.5 < M <= 3.0)
EXOS - [2017] Superterran (3.0 < M <= 10.0)
EXOS - [2496] Giant (M > 10.0)
RADIUS CLASS
EXOS - [38] No class
EXOS - [72] Subterran (0.4 <= R <= 0.8)
EXOS - [1021] Terran (0.8 < R <= 1.6)
EXOS - [1281] Superterran (1.6 < R <= 2.5)
EXOS - [3084] Giant (R > 2.5)


In [35]:
# ORBITAL DATA CUT

print("EXOS - [{}] semi-major axis (a)\nEXOS - [{}] eccentricity (e)".format(
    full_sample["pl_orbsmax"].count(),
    full_sample["pl_orbeccen"].count()))
print("EXOS - [{}] uncertainty in semi-major axis (da)\nEXOS - [{}] uncertainty in eccentricity (de)".format(
    full_sample["pl_orbsmaxerr"].count(),
    full_sample["pl_orbeccenerr"].count()))

EXOS - [5199] semi-major axis (a)
EXOS - [4739] eccentricity (e)
EXOS - [2594] uncertainty in semi-major axis (da)
EXOS - [1552] uncertainty in eccentricity (de)


In [36]:
# STELLAR/INTERIOR CONVECTION TYPE CUT

# planets with stellar data to calculate tauc
print("EXOS - [{}] M*\nEXOS - [{}] V-K".format(m.count(), vk.count()))
# planets with stellar data satisfying partially- or fully-convective stellar interior constraint from W18+ tauc
print("EXOS - [{}] 0.08 <= M* <= 1.36".format(m[np.fabs(m - 0.72) <= 0.64].count()))
print("EXOS - [{}] 1.1 <= V-K <= 7.0".format(vk[np.fabs(vk - 4.05) <= 2.95].count()))

EXOS - [5492] M*
EXOS - [5271] V-K
EXOS - [5066] 0.08 <= M* <= 1.36
EXOS - [5133] 1.1 <= V-K <= 7.0


In [37]:
# st data
full_sample_st = full_sample.drop_duplicates(subset="hostname", keep="first").drop(["pl_name", "pl_letter"], axis="columns")

m_st = full_sample_st["st_mass"]
vk_st = full_sample_st["vk"]

assert len(pd.unique(full_sample["hostname"])) == len(full_sample_st)
print("HOSTS - [{}]".format(len(full_sample_st)))

HOSTS - [4096]


In [38]:
# stars with calculable tauc
print("HOSTS - [{}] M* \nHOSTS - [{}] V-K".format(m_st.count(), vk_st.count()))
# stars satisfying partially- or fully-convective stellar interior constraint from W18+ tauc
print("HOSTS - [{}] 0.08 <= M* <= 1.36".format(m_st[np.fabs(m_st - 0.72) <= 0.64].count()))
print("HOSTS - [{}] 1.1 <= V-K <= 7.0".format(vk_st[np.fabs(vk_st - 4.05) <= 2.95].count()))

HOSTS - [4093] M* 
HOSTS - [3880] V-K
HOSTS - [3707] 0.08 <= M* <= 1.36
HOSTS - [3772] 1.1 <= V-K <= 7.0


In [39]:
# ROTATION PERIOD CUT

hosts_path = "current-exo-data/hosts_prot.csv"
hosts = pd.read_csv(hosts_path)
prot_sample = pd.merge(full_sample, hosts, on="hostname")
prot_sample_st = pd.merge(full_sample_st, hosts, on="hostname")

assert hosts["Prot"].count() == prot_sample_st["Prot"].count()
assert hosts["e_Prot"].count() == prot_sample_st["e_Prot"].count()

print(hosts.groupby(by="db")["Prot"].count())

db
arm16      6
lu22       3
mar20     18
mcq13    616
mcq14      9
nasa     372
Name: Prot, dtype: int64


In [40]:
print("HOSTS (R) - [{}] Prot\nHOSTS (R) - [{}] e_Prot".format(
    prot_sample_st["Prot"].count(),
    prot_sample_st["e_Prot"].count()))
print("EXOS (R) - [{}] Prot\nEXOS (R)- [{}] e_Prot".format(
    prot_sample["Prot"].count(),
    prot_sample["e_Prot"].count()))

HOSTS (R) - [1024] Prot
HOSTS (R) - [929] e_Prot
EXOS (R) - [1540] Prot
EXOS (R)- [1405] e_Prot


In [41]:
# STELLAR/INTERIOR CONVECTION TYPE CUT (ROTATORS)

print("HOSTS (R) - [{}] V-K\nHOSTS (R)- [{}] M*".format(
    prot_sample_st["vk"].count(),
    prot_sample_st["st_mass"].count()))
print("EXOS (R) - [{}] V-K\nEXOS (R) - [{}] M*".format(
    prot_sample["vk"].count(),
    prot_sample["st_mass"].count()))

HOSTS (R) - [1022] V-K
HOSTS (R)- [1024] M*
EXOS (R) - [1538] V-K
EXOS (R) - [1540] M*


In [42]:
print("HOSTS (R) - [{}] dV\nHOSTS (R) - [{}] dK\nHOSTS (R) - [{}] dM*".format(
    prot_sample_st["sy_vmagerr"].count(),
    prot_sample_st["sy_kmagerr"].count(),
    prot_sample_st["st_masserr"].count()))
print("EXOS (R) - [{}] dV\nEXOS (R) - [{}] dK\nEXOS (R) - [{}] dM*".format(
    prot_sample["sy_vmagerr"].count(),
    prot_sample["sy_kmagerr"].count(),
    prot_sample["st_masserr"].count()))

HOSTS (R) - [1019] dV
HOSTS (R) - [1009] dK
HOSTS (R) - [978] dM*
EXOS (R) - [1532] dV
EXOS (R) - [1516] dK
EXOS (R) - [1457] dM*


In [43]:
where_err = (prot_sample["sy_vmagerr"].notnull() | prot_sample["sy_kmagerr"].notnull()) ^ \
    (prot_sample["sy_vmagerr"].notnull() & prot_sample["sy_kmagerr"].notnull())
where_err_st = (prot_sample_st["sy_vmagerr"].notnull() | prot_sample_st["sy_kmagerr"].notnull()) ^ \
    (prot_sample_st["sy_vmagerr"].notnull() & prot_sample_st["sy_kmagerr"].notnull())

In [44]:
print("HOSTS")
print(prot_sample_st.loc[where_err_st, ["sy_vmagerr", "sy_kmagerr"]].count())
print("EXOS")
print(prot_sample.loc[where_err, ["sy_vmagerr", "sy_kmagerr"]].count())

HOSTS
sy_vmagerr    13
sy_kmagerr     3
dtype: int64
EXOS
sy_vmagerr    20
sy_kmagerr     4
dtype: int64


In [45]:
# ORBITAL DATA CUT (ROTATORS)
print("EXOS (R) - [{}] semi-major axis (a)\nEXOS (R) - [{}] eccentricity (e)".format(
    prot_sample["pl_orbsmax"].count(),
    prot_sample["pl_orbeccen"].count()))
print("EXOS (R) - [{}] uncertainty in semi-major axis (da)\nEXOS (R) - [{}] uncertainty in eccentricity (de)".format(
    prot_sample["pl_orbsmaxerr"].count(),
    prot_sample["pl_orbeccenerr"].count()))

EXOS (R) - [1491] semi-major axis (a)
EXOS (R) - [1458] eccentricity (e)
EXOS (R) - [629] uncertainty in semi-major axis (da)
EXOS (R) - [411] uncertainty in eccentricity (de)


In [46]:
ro_sample = estimate_rossby(prot_sample)
ro_sample_st = estimate_rossby(prot_sample_st)

In [47]:
as_sample = estimate_alfven(ro_sample)

no stellar rad: 0

bad vals: 0



In [48]:
where_CHZ = as_sample["habitable"] == 1
where_MHC = as_sample["MHC"] > 1

CHZ_sample = as_sample[where_CHZ]
MHC_sample = as_sample[where_MHC]
CHZ_MHC_sample = as_sample[where_CHZ & where_MHC]

In [49]:
print(CHZ_sample[["pl_name", "MHC", "dMHC"]].count())
print(MHC_sample[["pl_name", "MHC", "dMHC"]].count())
print(CHZ_MHC_sample[["pl_name", "MHC", "dMHC"]].count())

pl_name    84
MHC        84
dMHC       53
dtype: int64
pl_name    620
MHC        620
dMHC       529
dtype: int64
pl_name    76
MHC        76
dMHC       48
dtype: int64


In [50]:
print("[CHZ]",
      CHZ_sample.groupby(by="mass_class")[["MHC", "dMHC"]].count())
print("\n\n[MHC>1]",
      MHC_sample.groupby(by="mass_class")[["MHC", "dMHC"]].count())
print("\n\n[CHZ, MHC>1]",
      CHZ_MHC_sample.groupby(by="mass_class")[["MHC", "dMHC"]].count())

[CHZ]             MHC  dMHC
mass_class           
0             1     1
1            15    12
2            21    17
3            47    23


[MHC>1]             MHC  dMHC
mass_class           
0             4     4
1            63    58
2           279   270
3           274   197


[CHZ, MHC>1]             MHC  dMHC
mass_class           
1            10     9
2            19    16
3            47    23


In [51]:
print("[CHZ]",
      CHZ_sample.groupby(by="rad_class")[["MHC", "dMHC"]].count())
print("\n\n[MHC>1]\n",
      MHC_sample.groupby(by="rad_class")[["MHC", "dMHC"]].count())
print("\n\n[CHZ, MHC>1]\n",
      CHZ_MHC_sample.groupby(by="rad_class")[["MHC", "dMHC"]].count())

[CHZ]            MHC  dMHC
rad_class           
0            1     1
1           16    13
2           14    13
3           53    26


[MHC>1]
            MHC  dMHC
rad_class           
-1           2     2
 0           4     4
 1          72    67
 2         167   161
 3         375   295


[CHZ, MHC>1]
            MHC  dMHC
rad_class           
1           11    10
2           12    12
3           53    26


In [54]:
prot_sample[prot_sample["pl_orbsmax"].notnull() & prot_sample["pl_orbeccen"].isnull()]["pl_orbsmax"].count()

56

In [55]:
where_noecc = prot_sample["pl_orbeccen"].isnull()
where_ecc = prot_sample["pl_orbeccen"].notnull()

In [62]:
prot_sample.loc[where_noecc, "rperi"] = 0.0
prot_sample.loc[where_ecc, "rperi"] = 1.0

In [65]:
prot_sample["rperi"].notnull().count()

1540