# Fix the most important $(N \geq 3)$ RV systems by hand

In [1]:
import numpy as np
import csv
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import newton

from oviraptor.utils import *
from oviraptor.constants import *

In [2]:
#MAINPATH = 'C:/Users/djhoo/Documents/Oviraptor-master/'
MAINPATH = "/Users/research/projects/oviraptor/"

ps_file = MAINPATH + "Catalogs/exoarchive_ps_rv_multis_20210531.csv"
rv_multi_file = MAINPATH + "Catalogs/rv_multis_for_mast.csv"
oviraptor_file = MAINPATH + "Catalogs/oviraptor_A_cleaned_and_crossmatched.csv"

# Read in the RV planetary systems table

In [3]:
# first read in the list of high-multiplicity RV systems
keys, vals = read_csv_file(rv_multi_file, k_index=1, v_index=2)

targets = np.array(get_csv_data("TARGET", keys, vals))

In [4]:
# here's the planetary system data from the archive
keys, vals = read_csv_file(ps_file, k_index=0, v_index=1)

data = {}
for k in keys:
    data[k] = np.array(get_csv_data(k, keys, vals))
    

# grab a reference key
k0 = list(data.keys())[0]

print("Loaded planetary system data for {0} systems".format(len(np.unique(data["hostname"]))))

Loaded planetary system data for 82 systems


In [5]:
# eliminate irrelevant systems (archive filters were set broadly)
keep = np.isin(data["hostname"], targets)

for k in data.keys():
    data[k] = data[k][keep]

In [6]:
data.keys()

dict_keys(['hostname', 'default_flag', 'sy_snum', 'sy_pnum', 'discoverymethod', 'rv_flag', 'soltype', 'pl_controv_flag', 'st_refname', 'st_spectype', 'st_teff', 'st_tefferr1', 'st_tefferr2', 'st_tefflim', 'st_rad', 'st_raderr1', 'st_raderr2', 'st_radlim', 'st_mass', 'st_masserr1', 'st_masserr2', 'st_masslim', 'st_met', 'st_meterr1', 'st_meterr2', 'st_metlim', 'st_metratio', 'st_logg', 'st_loggerr1', 'st_loggerr2', 'st_logglim', 'sy_refname', 'rastr', 'ra', 'decstr', 'dec', 'sy_vmag', 'sy_vmagerr1', 'sy_vmagerr2', 'sy_kmag', 'sy_kmagerr1', 'sy_kmagerr2', 'sy_gaiamag', 'sy_gaiamagerr1', 'sy_gaiamagerr2', 'rowupdate', 'pl_pubdate', 'releasedate'])

# Select the most precise $R_{\star}, M_{\star}, \ T_{\rm eff}$ values

In [7]:
rv = dict.fromkeys(['hostname',
                    'st_teff', 'st_tefferr1', 'st_tefferr2', 'st_teff_ref', 
                    'st_rad', 'st_raderr1', 'st_raderr2',  'st_rad_ref', 
                    'st_mass', 'st_masserr1', 'st_masserr2',  'st_mass_ref'])

for k in rv.keys():
    rv[k] = []

In [8]:
for i, star in enumerate(np.unique(data["hostname"])):
    rv["hostname"].append(star)
    
    use = data["hostname"] == star
    refs = data["st_refname"][use]
    
    # stars with only one reference are easy
    # this step may select empty values - this will be fixed later
    if len(np.unique(refs)) == 1:
        for k in rv.keys():
            if (k != "hostname")*(k[-3:] != "ref"):
                rv[k].append(data[k][use][0])
                
        rv["st_teff_ref"].append(data["st_refname"][use][0])    
        rv["st_rad_ref"].append(data["st_refname"][use][0])
        rv["st_mass_ref"].append(data["st_refname"][use][0])
        
    
    # with multiple references, use the most precise value
    else:
        
        # MASS
        M = {}
        M["mean"] = data["st_mass"][use]
        M["err1"] = data["st_masserr1"][use]
        M["err2"] = data["st_masserr2"][use]
    
        for k in M.keys():
            M[k][M[k] == ""] = "nan"
            M[k] = np.asarray(M[k], dtype="float")
            
        mesq = M["err1"]**2 + M["err2"]**2
        
        if np.all(np.isnan(mesq)):
            rv["st_mass"].append("nan")
            rv["st_masserr1"].append("nan")
            rv["st_masserr2"].append("nan")
            rv["st_mass_ref"].append("")
            
        else:
            loc = np.nanargmin(mesq)
    
            rv["st_mass"].append(data["st_mass"][use][loc])
            rv["st_masserr1"].append(data["st_masserr1"][use][loc])
            rv["st_masserr2"].append(data["st_masserr2"][use][loc])
            rv["st_mass_ref"].append(data["st_refname"][use][loc])
        
        
        # RADIUS
        R = {}
        R["mean"] = data["st_rad"][use]
        R["err1"] = data["st_raderr1"][use]
        R["err2"] = data["st_raderr2"][use]
    
        for k in R.keys():
            R[k][R[k] == ""] = "nan"
            R[k] = np.asarray(R[k], dtype="float")
            
        resq = R["err1"]**2 + R["err2"]**2
            
        if np.all(np.isnan(resq)):
            rv["st_rad"].append("nan")
            rv["st_raderr1"].append("nan")
            rv["st_raderr2"].append("nan")
            rv["st_rad_ref"].append("")
            
        else:
            loc = np.nanargmin(resq)

            rv["st_rad"].append(data["st_rad"][use][loc])
            rv["st_raderr1"].append(data["st_raderr1"][use][loc])
            rv["st_raderr2"].append(data["st_raderr2"][use][loc])
            rv["st_rad_ref"].append(data["st_refname"][use][loc])
            
            
        # TEMPERATURE
        T = {}
        T["mean"] = data["st_teff"][use]
        T["err1"] = data["st_tefferr1"][use]
        T["err2"] = data["st_tefferr2"][use]
    
        for k in T.keys():
            T[k][T[k] == ""] = "nan"
            T[k] = np.asarray(T[k], dtype="float")
            
        tesq = T["err1"]**2 + T["err2"]**2
            
        if np.all(np.isnan(resq)):
            rv["st_teff"].append("nan")
            rv["st_tefferr1"].append("nan")
            rv["st_tefferr2"].append("nan")
            rv["st_teff_ref"].append("")
            
        else:
            loc = np.nanargmin(tesq)

            rv["st_teff"].append(data["st_teff"][use][loc])
            rv["st_tefferr1"].append(data["st_tefferr1"][use][loc])
            rv["st_tefferr2"].append(data["st_tefferr2"][use][loc])
            rv["st_teff_ref"].append(data["st_refname"][use][loc])

# Check for missing data

In [9]:
for k in rv.keys():
    rv[k] = np.asarray(rv[k])


bad = {}
bad["M"] = np.zeros(len(rv["hostname"]), dtype="bool")
bad["R"] = np.zeros(len(rv["hostname"]), dtype="bool")
bad["T"] = np.zeros(len(rv["hostname"]), dtype="bool")

for k in rv.keys():
    if k[:7] == "st_mass":
        bad["M"] += rv[k] == ""
        bad["M"] += rv[k] == "nan"
    
    elif k[:6] == "st_rad":
        bad["R"] += rv[k] == ""
        bad["R"] += rv[k] == "nan"
    
    if k[:7] == "st_teff":
        bad["T"] += rv[k] == ""
        bad["T"] += rv[k] == "nan"
        

        
print("\nThe following systems are missing MASS measurements:")
print(np.unique(rv["hostname"][bad["M"]]))

print("\nThe following systems are missing RADIUS measurements:")
print(np.unique(rv["hostname"][bad["R"]]))

print("\nThe following systems are missing TEMPERATURE measurements:")
print(np.unique(rv["hostname"][bad["T"]]))


The following systems are missing MASS measurements:
['GJ 163' 'GJ 180' 'GJ 3138' 'GJ 3293' 'HD 20781' 'HD 27894' 'HD 31527'
 'Wolf 1061']

The following systems are missing RADIUS measurements:
['GJ 163' 'GJ 180' 'HD 20781' 'HD 20794' 'HD 27894' 'HD 31527' 'HD 37124'
 'HD 69830' 'tau Cet']

The following systems are missing TEMPERATURE measurements:
['GJ 163' 'GJ 180' 'HD 20794' 'HD 27894' 'HD 37124' 'tau Cet']


In [10]:
print("The following systems are missing data and should be manually fixed:\n")
print(rv["hostname"][bad["M"] + bad["R"] + bad["T"]])

The following systems are missing data and should be manually fixed:

['GJ 163' 'GJ 180' 'GJ 3138' 'GJ 3293' 'HD 20781' 'HD 20794' 'HD 27894'
 'HD 31527' 'HD 37124' 'HD 69830' 'Wolf 1061' 'tau Cet']


In [11]:
                    # Name        M_mean    M_err1   M_err2   M_ref
fix_mass = np.array([['GJ 163',     0.40,     0.02,   -0.02,  "Tuomi+ 2013"],
                     ['GJ 180',     0.41,     0.02,   -0.02,  "Schweitzer+ 2019"],
                     ['GJ 3138',    0.68,     0.02,   -0.02,  "Astudillo-Defru+ 2017"],
                     ['GJ 3293',    0.42,     0.04,   -0.04,  "Astudillo-Defru+ 2017"],
                     ['HD 20781',   0.70,     0.05,   -0.05,  "Udry+ 2019"],
                     ['HD 20794',   0.81,     0.02,   -0.01,  "Feng+ 2017"],
                     ['HD 27894',   0.86,     0.06,   -0.06,  "Trevisan+ 2011"],
                     ['HD 31527',   0.96,     0.04,   -0.04,  "Udry+ 2019"],
                     ['HD 37124',   0.80,     0.01,   -0.01,  "Bonfanti+ 2015"],
                     ['HD 69830',   0.86,     0.04,   -0.04,  "Tanner+ 2015"],
                     ['Wolf 1061',  0.29,     0.01,   -0.01,  "Astudillo-Defru+ 2017"],
                     ['tau Cet',    0.78,     0.01,   -0.01,  "Feng+ 2017"]])

In [12]:
                   # Name        R_mean    R_err1   R_err2   R_ref
fix_rad = np.array([['GJ 163',     0.41,     0.01,   -0.01,  "TICv8"],
                    ['GJ 180',     0.41,     0.01,   -0.01,  "Schweitzer+ 2019"],   
                    ['GJ 3138',    0.50,     0.03,   -0.03,  "Astudillo-Defru+ 2017"],
                    ['GJ 3293',    0.40,     0.03,   -0.03,  "Astudillo-Defru+ 2017"],  
                    ['HD 20781',   0.87,     0.05,   -0.05,  "TICv8"],
                    ['HD 20794',   0.90,     0.03,   -0.03,  "Bernkopf+ 2012"],  
                    ['HD 27894',   0.85,     0.05,   -0.05,  "Trevisan+ 2011"],
                    ['HD 31527',   1.08,     0.04,   -0.04,  "TICv8"],
                    ['HD 37124',   0.92,     0.02,   -0.02,  "Bonfanti+ 2015"], 
                    ['HD 69830',   0.91,     0.02,   -0.02,  "Tanner+ 2015"],
                    ['Wolf 1061',  0.31,     0.03,   -0.03,  "Astudillo-Defru+ 2017"],
                    ['tau Cet',    0.79,     0.01,   -0.01,  "Teixeira+ 2009"]])

In [13]:
                    # Name        T_mean    T_err1   T_err2   T_ref
fix_teff = np.array([['GJ 163',     3500,      100,    -100,  "Tuomi+ 2013"],
                     ['GJ 180',     3572,       51,     -51,  "Schweitzer+ 2019"],
                     ['GJ 3138',    3717,       49,     -49,  "Astudillo-Defru+ 2017"],
                     ['GJ 3293',    3466,       49,     -49,  "Astudillo-Defru+ 2017"],
                     ['HD 20781',   5256,       29,     -29,  "Udry+ 2019"],
                     ['HD 20794',   5490,       70,     -70,  "Bernkopf+ 2012"],
                     ['HD 27894',   4920,       45,     -45,  "Trevisan+ 2011"],
                     ['HD 31527',   5898,       13,     -13,  "Udry+ 2019"],
                     ['HD 37124',   5763,       22,     -22,  "Bonfanti+ 2015"], 
                     ['HD 69830',   5394,       62,     -62,  "Tanner+ 2015"], 
                     ['Wolf 1061',  3342,       49,     -49,  "Astudillo-Defru+ 2017"],
                     ['tau Cet',    5344,       29,     -29,  "Santos+ 2004"]])

# Mass-luminosity relations

In [14]:
# use Delfosse+ 2000 for GJ 3138 & GJ 3293 & Wolf-1061 (GJ 628)
# see Astrudillo-Defru+ 2017 for luminosities
V = np.array([11.87, 0.01])

def logMv(x):
    return 1e-3*(0.3 + 1.87*x + 7.6140*x**2 - 1.6980*x**3 + 0.060958*x**4)

def logMk(x):
    return 1e-3*(1.8 + 1.62*x + 13.205*x**2 - 6.2315*x**3 + 0.37529*x**4)


Mv = 10**(logMv(V[0]))
Mv_err = 10**(logMv(V[0]-V[1])) - Mv

# Fill in the main catalog

In [15]:
replace = np.isin(rv["hostname"], rv["hostname"][bad["M"] + bad["R"] + bad["T"]])

rv["st_mass"][replace] = fix_mass[:,1]
rv["st_masserr1"][replace] = fix_mass[:,2]
rv["st_masserr2"][replace] = fix_mass[:,3]
rv["st_mass_ref"][replace] = fix_mass[:,4]

rv["st_rad"][replace] = fix_rad[:,1]
rv["st_raderr1"][replace] = fix_rad[:,2]
rv["st_raderr2"][replace] = fix_rad[:,3]
rv["st_rad_ref"][replace] = fix_rad[:,4]

rv["st_teff"][replace] = fix_teff[:,1]
rv["st_tefferr1"][replace] = fix_teff[:,2]
rv["st_tefferr2"][replace] = fix_teff[:,3]
rv["st_teff_ref"][replace] = fix_teff[:,4]

# Check that all uncertainties are non-zero

In [16]:
rv["st_mass"] = np.array(rv["st_mass"], dtype="float")
rv["st_masserr1"] = np.array(rv["st_masserr1"], dtype="float")
rv["st_masserr2"] = np.array(rv["st_masserr2"], dtype="float")
rv["st_masserr1"][rv["st_masserr1"] < 0.01] = 0.01
rv["st_masserr2"][rv["st_masserr1"] > -0.01] = -0.01

rv["st_rad"] = np.array(rv["st_rad"], dtype="float")
rv["st_raderr1"] = np.array(rv["st_raderr1"], dtype="float")
rv["st_raderr2"] = np.array(rv["st_raderr2"], dtype="float")
rv["st_raderr1"][rv["st_raderr1"] < 0.01] = 0.01
rv["st_raderr2"][rv["st_raderr1"] > -0.01] = -0.01

rv["st_teff"] = np.array(rv["st_teff"], dtype="float")
rv["st_tefferr1"] = np.array(rv["st_tefferr1"], dtype="float")
rv["st_tefferr2"] = np.array(rv["st_tefferr2"], dtype="float")
rv["st_tefferr1"][rv["st_tefferr1"] < 10] = 10
rv["st_tefferr2"][rv["st_tefferr1"] > -10] = -10

## TODO: 
1. Make references hyperlinks
2. Include V_mag data
3. Include discovery instrument/telescope
4. Add code to automatically generate LaTeX table

# Write out the RV stellar catalog

In [17]:
WRITENEW = True
if WRITENEW:
    filepath = MAINPATH + 'Catalogs/rv_multis_stellar_manually_fixed.csv'

    with open(filepath, "w") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(rv.keys())
        writer.writerows(zip(*rv.values()))

print("Writing complete!")

Writing complete!


# Reconcile the main catalog with the RV stellar catalog

In [18]:
# Read in the main catlog
keys, vals = read_csv_file(oviraptor_file, k_index=0, v_index=1)

my_catalog = {}
for k in keys:
    my_catalog[k] = np.array(get_csv_data(k, keys, vals))
    

# grab a reference key
k0 = list(my_catalog.keys())[0]

print('total number of queried objects =', len(my_catalog[k0]))

total number of queried objects = 3903


In [19]:
for i, star in enumerate(rv["hostname"]):
    for k in rv.keys():
        if (k != "hostname")*(k[-3:] != "ref"):
            my_catalog[k][my_catalog["hostname"] == star] = rv[k][rv["hostname"] == star]

In [20]:
WRITENEW = True
if WRITENEW:
    filepath = MAINPATH + 'Catalogs/oviraptor_B_manually_fixed.csv'

    with open(filepath, "w") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(my_catalog.keys())
        writer.writerows(zip(*my_catalog.values()))

print("Writing complete!")

Writing complete!
