### Fills in missing values in the exoarchive catalog using more recently-measured values from the Gaia DR2.
### Removes planets that are controversial or have detection methods other than transit, RV, or TTV.
### Outputs the new results in ovriraptor_crossmatch_catalog.

In [1]:
# IMPORT STATEMENTS AND PRELIMINARIES

import numpy as np
import csv
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import newton

from astropy.constants import R_earth
from astropy.constants import R_jup
from astropy.constants import G
from astropy.constants import M_sun
from astropy.constants import M_earth
from astropy.constants import M_jup

RERJ = float(R_earth/R_jup)

BIGG = G.value
MSUN = M_sun.value
MEARTH = M_earth.value
MJUPITER = M_jup.value

pi = np.pi

MAINPATH = 'C:/Users/djhoo/Documents/Oviraptor-master/'

# Read in exoarchive data

In [2]:
# file names
planet_file = MAINPATH + "catalogues/exoarchive_systems_20201204.csv"

# convenience function to read in csv file
def read_csv_file(filename):
    data = []
    with open(filename) as infile:
        reader = csv.reader(infile)

        for row in reader:
            data.append(row)

        keys   = data[107]
        values = data[108:]
            
        return keys, values


# READ IN DR25 DATABASE -- https://exoplanetarchive.ipac.caltech.edu
csv_keys, csv_data = read_csv_file(planet_file)

In [3]:
# convenience functions to pull data from csv files
def getdata(keyname,keys=csv_keys,data=csv_data):
    '''
    keyname = (string) of column definition, see CKS documentation
    '''
    kid = keys.index(keyname)
    
    outdata = []
    for row in data:
        outdata.append(row[kid])
    
    return outdata

In [4]:
# read data into a dictionary
data = {}
for k in csv_keys:
    data[k] = getdata(k)

In [5]:
def check_lengths(data):
    keys = data.keys()
    k0   = list(keys)[0]
    L0   = len(data[k0])
    
    for k in keys:
        if len(data[k]) != L0:
            raise ValueError('inconsistent array lengths')
            
    return None


def convert_to_arrays(data):
    keys = data.keys()
    dnew = {}
    
    for k in keys:
        dnew[k] = np.asarray(data[k])
        
    return dnew       



# grab a reference key
k0 = list(data.keys())[0]


# convert to arrays
data = convert_to_arrays(data)


# only use default (single reference) values
print(data.keys())
keep = data['default_flag']  == '1'

for k in data.keys():
    data[k] = data[k][keep]


print('total number of queried objects =', len(data[k0]))


check_lengths(data)

dict_keys(['pl_name', 'hostname', 'default_flag', 'sy_snum', 'sy_pnum', 'discoverymethod', 'disc_year', 'disc_facility', 'soltype', 'pl_controv_flag', 'pl_refname', 'pl_orbper', 'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_rade', 'pl_radeerr1', 'pl_radeerr2', 'pl_radelim', 'pl_radj', 'pl_radjerr1', 'pl_radjerr2', 'pl_radjlim', 'pl_bmasse', 'pl_bmasseerr1', 'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassj', 'pl_bmassjerr1', 'pl_bmassjerr2', 'pl_bmassjlim', 'pl_bmassprov', 'pl_orbeccen', 'pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim', 'ttv_flag', 'pl_ratror', 'pl_ratrorerr1', 'pl_ratrorerr2', 'pl_ratrorlim', 'pl_rvamp', 'pl_rvamperr1', 'pl_rvamperr2', 'pl_rvamplim', 'st_refname', 'st_spectype', 'st_teff', 'st_tefferr1', 'st_tefferr2', 'st_tefflim', 'st_rad', 'st_raderr1', 'st_raderr2', 'st_radlim', 'st_mass', 'st_masserr1', 'st_masserr2', 'st_masslim', 'st_met', 'st_meterr1', 'st_meterr2', 'st_metlim', 'st_metratio', 'st_lum', 'st_lumerr1', 'st_lumerr2', 'st_lumlim', 'st_logg

# Remove unwanted objects

In [6]:
# filter detection methods
keep = (data["discoverymethod"] == "Transit") + (data["discoverymethod"] == "Radial Velocity") + \
       (data["discoverymethod"] == "Transit Timing Variations")

for k in data.keys():
    data[k] = data[k][keep]

print("removed {0} objects due to non-relevant DETECTION METHOD".format(np.sum(~keep)))


# controversial flag
bad = data["pl_controv_flag"] == "1"

for k in data.keys():
    data[k] = data[k][~bad]

print("removed {0} objects flagged as CONTROVERSIAL".format(np.sum(bad)))



print("\nafter cuts, {0} objects remain\n".format(len(data[k0])))

print("{0} TRANSITING planets".format(np.sum(data["discoverymethod"] == "Transit")))
print("{0} RADIAL VELOCITY planets".format(np.sum(data["discoverymethod"] == "Radial Velocity")))
print("{0} TTV planets".format(np.sum(data["discoverymethod"] == "Transit Timing Variations")))

removed 190 objects due to non-relevant DETECTION METHOD
removed 10 objects flagged as CONTROVERSIAL

after cuts, 4107 objects remain

3273 TRANSITING planets
813 RADIAL VELOCITY planets
21 TTV planets


# Check stellar parameter consistency for each system

In [7]:
npl = np.array(data["sy_pnum"], dtype="int")

starname = np.array(data["hostname"])
detmet = np.array(data["discoverymethod"])

Mstar = np.array(data["st_mass"])
Mstar_err1 = np.array(data["st_masserr1"])
Mstar_err2 = np.array(data["st_masserr2"])

Rstar = np.array(data["st_rad"])
Rstar_err1 = np.array(data["st_raderr1"])
Rstar_err2 = np.array(data["st_raderr2"])

uniquesys = np.unique(starname)

In [8]:
# some planets have no stellar mass/radius given; others have a value for only a single planet in a system
# for multis where one planet has a stellar mass/radius value, broadcast this to all planets in the system
for i, s in enumerate(uniquesys):
    use = starname == s
    
    # first fix stellar masses
    if np.any(Mstar[use] == ""):
        unique_ms = np.unique(Mstar[use])
        unique_ms_err1 = np.unique(Mstar_err1[use])
        unique_ms_err2 = np.unique(Mstar_err2[use])
        
        if len(unique_ms) == 2:
            Mstar[use] = str(unique_ms[unique_ms != ''].item())
            
            try:
                Mstar_err1[use] = str(unique_ms_err1[unique_ms_err1 != ''].item())
                Mstar_err2[use] = str(unique_ms_err2[unique_ms_err2 != ''].item())
            except:
                Mstar_err1[use] = ''
                Mstar_err2[use] = ''
    
    # then fix stellar radii
    if np.any(Rstar[use] == ""):
        unique_rs = np.unique(Rstar[use])
        unique_rs_err1 = np.unique(Rstar_err1[use])
        unique_rs_err2 = np.unique(Rstar_err2[use])
        

        if len(unique_rs) == 2:
            Rstar[use] = str(unique_rs[unique_rs != ''].item())
            try:
                Rstar_err1[use] = str(unique_rs_err1[unique_rs_err1 != ''].item())
                Rstar_err2[use] = str(unique_rs_err2[unique_rs_err2 != ''].item())
            except:
                Rstar_err1[use] = ''
                Rstar_err2[use] = ''
            

data["st_mass"] = np.copy(Mstar)
data["st_masserr1"] = np.copy(Mstar_err1)
data["st_masserr2"] = np.copy(Mstar_err2)

data["st_rad"] = np.copy(Rstar)
data["st_raderr1"] = np.copy(Rstar_err1)
data["st_raderr2"] = np.copy(Rstar_err2)

In [9]:
RV = data["discoverymethod"] == "Radial Velocity"

npl = np.array(data['sy_pnum'][RV], dtype="int")
Mstar = np.array(data['st_mass'][RV])
Rstar = np.array(data['st_rad'][RV])
starname = np.array(data['hostname'][RV])
planetname = np.array(data['pl_name'][RV])

print("\n\nMissing MASS")
print(np.unique(starname[(Mstar == '')*(npl > 2)]))

print("\n\nMissing RADIUS")
print(np.unique(starname[(Rstar == '')*(npl > 2)]))



Missing MASS
['GJ 163']


Missing RADIUS
['GJ 163' 'GJ 180' 'GJ 433' 'GJ 667 C' 'HD 133131 A' 'HD 133131 B'
 'HD 136352' 'HD 141399' 'HD 160691' 'HD 20781' 'HD 20794' 'HD 27894'
 'HD 31527' 'HD 37124' 'HD 40307' 'HD 69830' 'tau Cet']


# Manually add in missing stellar radii

In [10]:
modified_stars = ['HD 40307', 'HD 31527', 'HD 20781', 'GJ 876', \
                  'GJ 163', 'ups And', 'HD 136352', '55 Cnc', 'tau Cet', \
                  'HD 69830', 'HD 40307', 'HD 37124', 'HD 20794', \
                 'HD 160691', 'HD 141399', 'HD 136352', 'GJ 180', 'GJ 9827', \
                  'TRAPPIST-1', 'GJ 433', 'HD 27894', 'HD 181433', \
                 'HD 37124']
modified_Rstar = [0.72, 1.077, 0.86, 0.35, 0.41, 1.48, 1.012, 0.98, \
                  0.793, 0.91, 0.71, 1.004, 0.9, 1.36, 1.46, 1.02, 0.423, \
                  0.579, 0.1192, 0.46, 0.85, 0.81, 0.93]
modified_Rerr1 = [0.060, 0.046, 0.06, 0.35*0.2814, 0.01, 0.087, 0.018,\
                  0.016, 0.004, 0.019, 0.01, 0.046, 0.03, 0.06, 0.15, \
                 0.02, 0.005, 0.018, 0.0013, 0.05, 0.05, 0.07, 0.04]
modified_Rerr2 = [0.048, 0.042, 0.05, 0.35*0.2814, 0.01, 0.087, 0.018,\
                  0.016, 0.004, 0.019, 0.01, 0.05, 0.03, 0.06, 0.15, \
                 0.02, 0.005, 0.018, 0.0013, 0.05, 0.05, 0.05, 0.04]

In [11]:
for i in range(len(modified_stars)):
    data['st_rad'][data['hostname']==modified_stars[i]] = \
    [modified_Rstar[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])
    data['st_raderr1'][data['hostname']==modified_stars[i]] = \
    [modified_Rerr1[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])
    data['st_raderr2'][data['hostname']==modified_stars[i]] = \
    [modified_Rerr2[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])

# Manually add in missing stellar mass

In [12]:
modified_stars = ['HD 31527', 'HD 20781', 'Wolf 1061', 'GJ 3293', \
                 'GJ 3138', 'ups And', 'HD 20794', 'HD 160691', \
                 'HD 136352', 'GJ 180', '55 Cnc', 'GJ 163', 'HD 27894', \
                 'GJ 433', 'HD 181433', 'HD 37124']
modified_Mstar = [1.07, 0.9, 0.25, 0.45, 0.62, 1.150, 0.813, 1.10, \
                  0.906, 0.432, 1.015, 0.40, 0.81, 0.46, 0.81, 0.99]
modified_Merr1 = [0.15, 0.12, 0.25*0.4649, 0.02, 0.08, 0.165, 0.018, \
                 0.02, 0.055, 0.005, 0.051, 0.02, 0.11, 0.05, 0.09, 0.14]
modified_Merr2 = [0.13, 0.1, 0.25*0.4649, 0.02, 0.08, 0.144, 0.012, \
                 0.02, 0.047, 0.005, 0.051, 0.02, 0.08, 0.05, 0.10, 0.10]

In [13]:
for i in range(len(modified_stars)):
    data['st_mass'][data['hostname']==modified_stars[i]] = \
    [modified_Mstar[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])
    data['st_masserr1'][data['hostname']==modified_stars[i]] = \
    [modified_Merr1[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])
    data['st_masserr2'][data['hostname']==modified_stars[i]] = \
    [modified_Merr2[i]]*len(data['hostname'][data['hostname']== \
                                             modified_stars[i]])

# Missing RV semi-amplitudes

In [14]:
data['pl_rvamp'][data['pl_name']=='55 Cnc e'] = [5.07]
data['pl_rvamperr1'][data['pl_name']=='55 Cnc e'] = [0.53]
data['pl_rvamperr2'][data['pl_name']=='55 Cnc e'] = [-0.53]

# Missing planetary mass

In [15]:
modified_planets = ['HD 160691 b', 'HD 160691 c', 'HD 160691 d', \
                   'HD 160691 e', 'HD 3167 d', 'HD 69830 b', 'HD 69830 c', \
                   'HD 69830 d', 'HD 181433 b', 'HD 181433 c', 'HD 181433 d']
modified_mp = [1.676*MJUPITER/MEARTH, 0.03321*MJUPITER/MEARTH, \
              0.5219*MJUPITER/MEARTH, 1.814*MJUPITER/MEARTH, \
              6.90, 10.2, 11.8, 18.1, 0.0223*MJUPITER/MEARTH, \
              0.674*MJUPITER/MEARTH, 0.612*MJUPITER/MEARTH]
modified_mperr1 = [0, 0, 0, 0, 0.71, 0, 0, 0, 0.0003*MJUPITER/MEARTH, \
                  0.003*MJUPITER/MEARTH, 0.004*MJUPITER/MEARTH]
modified_mperr2 = [0, 0, 0, 0, 0.71, 0, 0, 0, 0.0003*MJUPITER/MEARTH, \
                  0.003*MJUPITER/MEARTH, 0.004*MJUPITER/MEARTH]

In [16]:
for i in range(len(modified_planets)):
    data['pl_bmasse'][data['pl_name']==modified_planets[i]] = \
    [modified_mp[i]]*len(data['pl_name'][data['pl_name']== \
                                             modified_planets[i]])
    data['pl_bmasseerr1'][data['pl_name']==modified_planets[i]] = \
    [modified_mperr1[i]]*len(data['pl_name'][data['pl_name']== \
                                             modified_planets[i]])
    data['pl_bmasseerr2'][data['pl_name']==modified_planets[i]] = \
    [modified_mperr2[i]]*len(data['pl_name'][data['pl_name']== \
                                             modified_planets[i]])

# Double-check missing data

In [17]:
# Some RV systems still have missing radius
# This does not matter because these systems are not included in the
# final RV data.  After all the cuts to the data, there are less than 
# 3 exoplanets within each of these systems.

RV = data["discoverymethod"] == "Radial Velocity"

npl = np.array(data['sy_pnum'][RV], dtype="int")
Mstar = np.array(data['st_mass'][RV])
Rstar = np.array(data['st_rad'][RV])
starname = np.array(data['hostname'][RV])
planetname = np.array(data['pl_name'][RV])

print("\n\nMissing MASS")
print(np.unique(starname[(Mstar == '')*(npl > 2)]))

print("\n\nMissing RADIUS")
print(np.unique(starname[(Rstar == '')*(npl > 2)]))



Missing MASS
[]


Missing RADIUS
['GJ 667 C' 'HD 133131 A' 'HD 133131 B']


# Read in Kepler names

In [18]:
kepnamepath = MAINPATH + "catalogues/kepler_names.txt"

# read in the stellar output parameters
with open(kepnamepath, "r") as infile:
    raw_kepnames = []
    
    for i, line in enumerate(infile):
        raw_kepnames.append(line.split(","))
            
raw_kepnames = np.array(raw_kepnames)

# strip off trailing \newline commands
for i in range(len(raw_kepnames)):
    raw_kepnames[i,-1] = raw_kepnames[i,-1].strip("\n").strip("\ ")

In [19]:
kepnames = {}

for i, k in enumerate(raw_kepnames[0]):
    kepnames[k] = raw_kepnames[1:,i]

# Read in Gaia DR2

In [20]:
gaiapath = MAINPATH + "catalogues/berger_2020_gaia_kepler_tab2_output.txt"

# read in the stellar output parameters
with open(gaiapath, "r") as infile:
    raw_gaia_data = []
    
    for i, line in enumerate(infile):
        raw_gaia_data.append(line.split("&"))
            
raw_gaia_data = np.array(raw_gaia_data)


# strip off trailing \newline commands
for i in range(len(raw_gaia_data)):
    raw_gaia_data[i,-1] = raw_gaia_data[i,-1].strip("\n").strip("\ ")
    
    
gaia_stars = {}

for i, k in enumerate(raw_gaia_data[0]):
    gaia_stars[k] = raw_gaia_data[1:,i]

In [21]:
gaiapath = MAINPATH + "catalogues/berger_2020_gaia_kepler_planets.txt"

gaia_planets = {}

gaia_planets["KIC"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=0)
gaia_planets["radius"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=3)
gaia_planets["radius_err1"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=4)
gaia_planets["radius_err2"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=5)
gaia_planets["sma"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=6)
gaia_planets["sma_err1"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=7)
gaia_planets["sma_err2"] = np.loadtxt(gaiapath, skiprows=32, dtype="str", usecols=8)

# Cross-match Kepler vs. Gaia and combine

In [22]:
gaia_kic = np.asarray(gaia_stars["KIC"], dtype="int")

for i in range(len(data[k0])):
    hostname = data["hostname"][i]
    
    if hostname[:3] == "Kep":
        
        for j, kname in enumerate(kepnames["kepler_name"]):
            if kname[:-2] == hostname:
                kic = int(kepnames["kepid"][j])
        
        match = gaia_kic == kic
        
        if np.sum(match) == 1:
            data["st_refname"][i] = "Berger et al. 2020"
            
            data["st_teff"][i] = gaia_stars["iso_teff"][match][0]
            data["st_tefferr1"][i] = gaia_stars["iso_teff_err1"][match][0]
            data["st_tefferr1"][i] = gaia_stars["iso_teff_err2"][match][0]
            data["st_tefflim"][i]  = "0"
            
            data["st_rad"][i] = gaia_stars["iso_rad"][match][0]
            data["st_raderr1"][i] = gaia_stars["iso_rad_err1"][match][0]
            data["st_raderr1"][i] = gaia_stars["iso_rad_err2"][match][0]
            data["st_radlim"][i]  = "0"
            
            data["st_mass"][i] = gaia_stars["iso_mass"][match][0]
            data["st_masserr1"][i] = gaia_stars["iso_mass_err1"][match][0]
            data["st_masserr1"][i] = gaia_stars["iso_mass_err2"][match][0]
            data["st_masslim"][i]  = "0"
            
            data["st_met"][i] = gaia_stars["iso_feh"][match][0]
            data["st_meterr1"][i] = gaia_stars["iso_feh_err1"][match][0]
            data["st_meterr1"][i] = gaia_stars["iso_feh_err2"][match][0]
            data["st_metlim"][i]  = "0"
            data["st_metratio"][i]  = "[Fe/H]"
            
            data["st_logg"][i] = gaia_stars["iso_logg"][match][0]
            data["st_loggerr1"][i] = gaia_stars["iso_logg_err1"][match][0]
            data["st_loggerr1"][i] = gaia_stars["iso_logg_err2"][match][0]
            data["st_logglim"][i]  = "0"

In [23]:
gaia_kic = np.asarray(gaia_planets["KIC"], dtype="int")

count = 0

for i in range(len(data[k0])):
    hostname = data["hostname"][i]
    
    if hostname[:3] == "Kep":
        
        for j, kname in enumerate(kepnames["kepler_name"]):
            if kname[:-2] == hostname:
                kic = int(kepnames["kepid"][j])
        
        match = gaia_kic == kic
        
        if np.sum(match) == 1:
            data["pl_rade"][i] = gaia_planets["radius"][match][0]
            data["pl_radeerr1"][i] = gaia_planets["radius_err1"][match][0]
            data["pl_radeerr1"][i] = gaia_planets["radius_err2"][match][0]
            data["pl_radelim"][i]  = "0"
            
            data["pl_radj"][i] = str(np.round(float(gaia_planets["radius"][match][0])*RERJ,3))
            data["pl_radjerr1"][i] = str(np.round(float(gaia_planets["radius_err1"][match][0])*RERJ,3))
            data["pl_radjerr1"][i] = str(np.round(float(gaia_planets["radius_err2"][match][0])*RERJ,3))
            data["pl_radjlim"][i]  = "0"

In [24]:
def check_uniformity(detection_methods):
    default_disc = detection_methods[0]
    status = True
    for i in range(len(detection_methods)):
        if detection_methods[i] != default_disc:
            status = False
    return status

data['system_disc_method'] = []

for i in range(len(data['hostname'])):
    if check_uniformity(data['discoverymethod'][data['hostname']==data['hostname'][i]]) == False:
        data['system_disc_method'].append('Mixed')
    else:
        data['system_disc_method'].append(data['discoverymethod'][i])
data['system_disc_method'] = np.array(data['system_disc_method'])

print('Here are the mixed systems:')
print(data['hostname'][data['system_disc_method']=='Mixed'])

Here are the mixed systems:
['CoRoT-20' 'CoRoT-20' 'CoRoT-7' 'CoRoT-7' 'GJ 1132' 'GJ 1132' 'GJ 3473'
 'GJ 3473' 'GJ 357' 'GJ 357' 'GJ 357' 'HAT-P-11' 'HAT-P-11' 'HAT-P-13'
 'HAT-P-13' 'HAT-P-17' 'HAT-P-17' 'HAT-P-44' 'HAT-P-44' 'HATS-59'
 'HATS-59' 'HD 213885' 'HD 213885' 'HD 3167' 'HD 3167' 'HD 3167'
 'HD 39091' 'HD 86226' 'HD 86226' 'KELT-6' 'KELT-6' 'KOI-12' 'KOI-142'
 'KOI-142' 'Kepler-122' 'Kepler-122' 'Kepler-122' 'Kepler-122'
 'Kepler-122' 'Kepler-160' 'Kepler-160' 'Kepler-160' 'Kepler-19'
 'Kepler-19' 'Kepler-19' 'Kepler-20' 'Kepler-20' 'Kepler-20' 'Kepler-20'
 'Kepler-20' 'Kepler-20' 'Kepler-25' 'Kepler-25' 'Kepler-25' 'Kepler-338'
 'Kepler-338' 'Kepler-338' 'Kepler-338' 'Kepler-37' 'Kepler-37'
 'Kepler-37' 'Kepler-37' 'Kepler-407' 'Kepler-407' 'Kepler-411'
 'Kepler-411' 'Kepler-411' 'Kepler-411' 'Kepler-419' 'Kepler-419'
 'Kepler-424' 'Kepler-424' 'Kepler-432' 'Kepler-432' 'KOI-12' 'Kepler-454'
 'Kepler-454' 'Kepler-46' 'Kepler-46' 'Kepler-46' 'Kepler-48' 'Kepler-48'
 'Kepler

# Write out catalogue

In [25]:
WRITENEW = True
if WRITENEW:
    filepath = MAINPATH + 'catalogues/oviraptor_crossmatch_catalog_v2.csv'

    with open(filepath, "w") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(data.keys())
        writer.writerows(zip(*data.values()))

print("Writing complete!")

Writing complete!
