In [1]:
import numpy as np
import pandas as pd
import rebound
import mr_forecast as mr
import spock
import os
import glob
import random
import matplotlib.pyplot as plt
import matplotlib
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
% matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
plt.rc('lines', linewidth=6)
plt.rc('font', size=40, family='serif', weight=500)
plt.rc('mathtext', fontset = 'dejavuserif')
plt.rc('axes', linewidth=6, titlepad=20)
plt.rc('patch', linewidth=6)
plt.rc('xtick.major', width=5, size=20)
plt.rc('xtick.minor', width=4, size=15)
plt.rc('ytick.major', width=5, size=20)
plt.rc('ytick.minor', width=4, size=15)
plt.rc('figure.subplot', hspace=0.3, wspace=0.3)

In [3]:
def get_exit_times(dir_name):
    archive_list = glob.glob(dir_name + "/*.bin")
    
    exit_times = np.zeros(len(archive_list))
    periods = np.zeros(len(archive_list))
    identifiers = []
    
    for i, archive_filename in enumerate(archive_list):
        identifiers.append(archive_filename.split('system-')[1].split('.bin')[0])
        sa = rebound.SimulationArchive(archive_filename)
        
        first_sim = sa[0]
        periods[i] = first_sim.particles[1].P
        
        last_sim = sa[-1]
        exit_times[i] = last_sim.t
    
    return identifiers, periods, exit_times

def get_spock_probs(filename):
    data = np.load(filename)
    identifiers = data['identifier_list']
    probs = data['probs']
    
    return identifiers, probs

def get_multiplicities(identifiers):
    sysids = np.zeros(len(identifiers))
    places = np.zeros(len(identifiers))
    multiplicities = np.zeros(len(identifiers))
    
    for i, identifier in enumerate(identifiers):
        sysids[i] = int(identifier.rsplit('-')[0])
        places[i] = int(identifier.rsplit('-')[1])
        
    for sysid in sysids:
        multiplicities[sysids == sysid] +=1
    
    return multiplicities + 1

In [4]:
def get_results(sysid, place, dirname):
    filename = dirname + "spock-probs-system-" + str(sysid) + "-" + str(place) + ".npz"
    data = np.load(filename)
    
    return data

def get_files(dirname):
    file_list = glob.glob(dirname + "spock-probs-system-*")
    
    sys_list = []
    place_list = []
    
    for file in file_list:
        sys_list.append(int(file.split("-")[4]))
        place_list.append(int(file.split(".")[0].split("-")[5]))
    
    return sys_list, place_list

In [5]:
def get_edges_and_centres(min_val, max_val, N_bins=25):
    edges = np.linspace(min_val, max_val, N_bins+1)
    centres = (edges[1:] + edges[0:-1])/2
    
    return edges, centres

def get_summary_stats(x_vals, y_vals, z_vals, x_min, x_max, y_min, y_max, N_bins=25):
    x_edges, x_centres = get_edges_and_centres(x_min, x_max, N_bins)
    y_edges, y_centres = get_edges_and_centres(y_min, y_max, N_bins)
    
    x_grid, y_grid = np.meshgrid(x_centres, y_centres, indexing='ij')
    
    mean_grid = np.zeros((N_bins, N_bins))
    median_grid = np.zeros((N_bins, N_bins))
    std_grid = np.zeros((N_bins, N_bins))
    count_grid = np.zeros((N_bins, N_bins))
    
    for i in range(N_bins):
        x_index = np.logical_and(x_vals >= x_edges[i], x_vals < x_edges[i+1])
        for j in range(N_bins):
            y_index = np.logical_and(y_vals >= y_edges[j], y_vals < y_edges[j+1])
            joint_index = np.logical_and(x_index, y_index)
            
            mean_grid[i,j] = np.mean(z_vals[joint_index])
            median_grid[i,j] = np.median(z_vals[joint_index])
            std_grid[i,j] = np.std(z_vals[joint_index])
            count_grid[i,j] = np.sum(joint_index)
          
    return (x_grid, y_grid, mean_grid, median_grid, std_grid, count_grid)

In [6]:
def get_sys_info(dirname, sysid, place):
    
    m_earth = 3e-6
    year = 2 * np.pi # one year in REBOUND time (in units where G=1)
    day = year / 365 # one day in REBOUND time (in units where G=1)
    
    with get_results(sysid, place, dirname) as data:
        mult = np.shape(data['period'])[0] - 1

        x = data['period'][-1] / day
        y = np.log10(data['mass'][-1] / m_earth)
        z = 1 - data['prob']
        
        m_1 = np.median(data['mass'][place]) / m_earth
        m_2 = np.median(data['mass'][place+1]) / m_earth
        P_1 = data['period'][place][0] / day
        P_2 = data['period'][place+1][0] / day
    
    scaled_period = (x - P_1) / (P_2 - P_1)
#     not_co_orbital = np.logical_and(scaled_period > 0.04, scaled_period < 0.96)
    not_co_orbital = scaled_period > 0
    
    prob_min = z[not_co_orbital].min()
    prob_msc = np.quantile(z[not_co_orbital], 0.05)
    prob_mean = z[not_co_orbital].mean()
    
    return m_1, m_2, prob_min, prob_msc, prob_mean

In [7]:
df = pd.read_csv("cumulative_koi_gaia_bonomo.csv", comment="#")
sys_list, place_list = get_files("spock-probs/sigma_e_p01_sigma_i_p5/")

sys_list = np.array(sys_list)
place_list = np.array(place_list)

identifiers = []

for (sysid, place) in zip(sys_list, place_list):
    identifiers.append(str(sysid) + "-" + str(place))
    
identifiers = np.array(identifiers)

place_list = place_list[sys_list != 3245969]
sys_list = sys_list[sys_list != 3245969]

In [8]:
for i, (sysid, place) in enumerate(zip(sys_list[0:10], place_list[0:10])):
    print(get_sys_info("spock-probs/sigma_e_p05_sigma_i_2p5/", sysid, place))

(3.26, 11.4, 0.012492597103118896, 0.03149145841598511, 0.24120126876085995)
(3.9, 6.8, 0.03175985813140869, 0.7263220027089119, 0.9547158332292922)
(8.03, 1.3, 0.1922590136528015, 0.9570628559216857, 0.9899311798705719)
(1.3, 2.0, 0.03158813714981079, 0.7513747483491898, 0.9565559275302105)
(9.8, 38.0, 0.013939261436462402, 0.034260138869285583, 0.380773373089172)
(0.79, 1.3, 0.16917163133621216, 0.9553765846416354, 0.987274792217277)
(3.8, 10.0, 0.6208954155445099, 0.9908652095124125, 0.9978586498464458)
(10.0, 7.7, 0.6844505071640015, 0.9908833150286228, 0.998028597827442)
(4.9, 8.2, 0.010852575302124023, 0.05713844001293182, 0.6433899292537011)
(7.7, 14.1, 0.480099618434906, 0.983538992330432, 0.9952313978912308)


In [9]:
m_1_sigma_e_p01_sigma_i_p5_old = np.zeros(len(sys_list))
m_1_sigma_e_p01_sigma_i_p5_new = np.zeros(len(sys_list))

m_2_sigma_e_p01_sigma_i_p5_old = np.zeros(len(sys_list))
m_2_sigma_e_p01_sigma_i_p5_new = np.zeros(len(sys_list))

prob_min_sigma_e_p01_sigma_i_p5_old = np.zeros(len(sys_list))
prob_min_sigma_e_p01_sigma_i_p5_new = np.zeros(len(sys_list))

prob_msc_sigma_e_p01_sigma_i_p5_old = np.zeros(len(sys_list))
prob_msc_sigma_e_p01_sigma_i_p5_new = np.zeros(len(sys_list))

prob_mean_sigma_e_p01_sigma_i_p5_old = np.zeros(len(sys_list))
prob_mean_sigma_e_p01_sigma_i_p5_new = np.zeros(len(sys_list))

for i, (sysid, place) in enumerate(zip(sys_list, place_list)):
    m_1_sigma_e_p01_sigma_i_p5_new[i], m_2_sigma_e_p01_sigma_i_p5_new[i], \
    prob_min_sigma_e_p01_sigma_i_p5_new[i], prob_msc_sigma_e_p01_sigma_i_p5_new[i], \
    prob_mean_sigma_e_p01_sigma_i_p5_new[i] = get_sys_info("spock-probs/sigma_e_p01_sigma_i_p5/", sysid, place)
    
    m_1_sigma_e_p01_sigma_i_p5_old[i], m_2_sigma_e_p01_sigma_i_p5_old[i], \
    prob_min_sigma_e_p01_sigma_i_p5_old[i], prob_msc_sigma_e_p01_sigma_i_p5_old[i], \
    prob_mean_sigma_e_p01_sigma_i_p5_old[i] = get_sys_info("../../expanded-phase-space/spock-probs/sigma_e_p01_sigma_i_p5/", sysid, place)

In [10]:
m_1_sigma_e_p05_sigma_i_2p5_old = np.zeros(len(sys_list))
m_1_sigma_e_p05_sigma_i_2p5_new = np.zeros(len(sys_list))

m_2_sigma_e_p05_sigma_i_2p5_old = np.zeros(len(sys_list))
m_2_sigma_e_p05_sigma_i_2p5_new = np.zeros(len(sys_list))

prob_min_sigma_e_p05_sigma_i_2p5_old = np.zeros(len(sys_list))
prob_min_sigma_e_p05_sigma_i_2p5_new = np.zeros(len(sys_list))

prob_msc_sigma_e_p05_sigma_i_2p5_old = np.zeros(len(sys_list))
prob_msc_sigma_e_p05_sigma_i_2p5_new = np.zeros(len(sys_list))

prob_mean_sigma_e_p05_sigma_i_2p5_old = np.zeros(len(sys_list))
prob_mean_sigma_e_p05_sigma_i_2p5_new = np.zeros(len(sys_list))

for i, (sysid, place) in enumerate(zip(sys_list, place_list)):
    m_1_sigma_e_p05_sigma_i_2p5_new[i], m_2_sigma_e_p05_sigma_i_2p5_new[i], \
    prob_min_sigma_e_p05_sigma_i_2p5_new[i], prob_msc_sigma_e_p05_sigma_i_2p5_new[i], \
    prob_mean_sigma_e_p05_sigma_i_2p5_new[i] = get_sys_info("spock-probs/sigma_e_p05_sigma_i_2p5/", sysid, place)
    
    m_1_sigma_e_p05_sigma_i_2p5_old[i], m_2_sigma_e_p05_sigma_i_2p5_old[i], \
    prob_min_sigma_e_p05_sigma_i_2p5_old[i], prob_msc_sigma_e_p05_sigma_i_2p5_old[i], \
    prob_mean_sigma_e_p05_sigma_i_2p5_old[i] = get_sys_info("../../expanded-phase-space/spock-probs/sigma_e_p05_sigma_i_2p5/", sysid, place)

In [11]:
m_1_ratio_sigma_e_p01_sigma_i_p5 = m_1_sigma_e_p01_sigma_i_p5_old / m_1_sigma_e_p01_sigma_i_p5_new

m_2_ratio_sigma_e_p01_sigma_i_p5 = m_2_sigma_e_p01_sigma_i_p5_old / m_2_sigma_e_p01_sigma_i_p5_new

prob_min_ratio_sigma_e_p01_sigma_i_p5 = prob_min_sigma_e_p01_sigma_i_p5_old / prob_min_sigma_e_p01_sigma_i_p5_new

prob_msc_ratio_sigma_e_p01_sigma_i_p5 = prob_msc_sigma_e_p01_sigma_i_p5_old / prob_msc_sigma_e_p01_sigma_i_p5_new

prob_mean_ratio_sigma_e_p01_sigma_i_p5 = prob_mean_sigma_e_p01_sigma_i_p5_old / prob_mean_sigma_e_p01_sigma_i_p5_new

In [12]:
m_1_diff_sigma_e_p01_sigma_i_p5 = m_1_sigma_e_p01_sigma_i_p5_old - m_1_sigma_e_p01_sigma_i_p5_new

m_2_diff_sigma_e_p01_sigma_i_p5 = m_2_sigma_e_p01_sigma_i_p5_old - m_2_sigma_e_p01_sigma_i_p5_new

prob_min_diff_sigma_e_p01_sigma_i_p5 = prob_min_sigma_e_p01_sigma_i_p5_old - prob_min_sigma_e_p01_sigma_i_p5_new

prob_msc_diff_sigma_e_p01_sigma_i_p5 = prob_msc_sigma_e_p01_sigma_i_p5_old - prob_msc_sigma_e_p01_sigma_i_p5_new

prob_mean_diff_sigma_e_p01_sigma_i_p5 = prob_mean_sigma_e_p01_sigma_i_p5_old - prob_mean_sigma_e_p01_sigma_i_p5_new

In [13]:
m_1_ratio_sigma_e_p05_sigma_i_2p5 = m_1_sigma_e_p05_sigma_i_2p5_old / m_1_sigma_e_p05_sigma_i_2p5_new

m_2_ratio_sigma_e_p05_sigma_i_2p5 = m_2_sigma_e_p05_sigma_i_2p5_old / m_2_sigma_e_p05_sigma_i_2p5_new

prob_min_ratio_sigma_e_p05_sigma_i_2p5 = prob_min_sigma_e_p05_sigma_i_2p5_old / prob_min_sigma_e_p05_sigma_i_2p5_new

prob_msc_ratio_sigma_e_p05_sigma_i_2p5 = prob_msc_sigma_e_p05_sigma_i_2p5_old / prob_msc_sigma_e_p05_sigma_i_2p5_new

prob_mean_ratio_sigma_e_p05_sigma_i_2p5 = prob_mean_sigma_e_p05_sigma_i_2p5_old / prob_mean_sigma_e_p05_sigma_i_2p5_new

In [14]:
m_1_diff_sigma_e_p05_sigma_i_2p5 = m_1_sigma_e_p05_sigma_i_2p5_old - m_1_sigma_e_p05_sigma_i_2p5_new

m_2_diff_sigma_e_p05_sigma_i_2p5 = m_2_sigma_e_p05_sigma_i_2p5_old - m_2_sigma_e_p05_sigma_i_2p5_new

prob_min_diff_sigma_e_p05_sigma_i_2p5 = prob_min_sigma_e_p05_sigma_i_2p5_old - prob_min_sigma_e_p05_sigma_i_2p5_new

prob_msc_diff_sigma_e_p05_sigma_i_2p5 = prob_msc_sigma_e_p05_sigma_i_2p5_old - prob_msc_sigma_e_p05_sigma_i_2p5_new

prob_mean_diff_sigma_e_p05_sigma_i_2p5 = prob_mean_sigma_e_p05_sigma_i_2p5_old - prob_mean_sigma_e_p05_sigma_i_2p5_new

In [15]:
print("sigma_e = 0.01, sigma_i = 0.5dg")
print("ratios are [forecaster masses value] / [bonomo+(2023) masses value]")
print("note: forecaster masses values for m_1 and m_2 use their median values")
print("      for each adjacent pair's inner/outer planet mass distribution")
print("m_1 ratio mean, max, std:", m_1_ratio_sigma_e_p01_sigma_i_p5.mean(), m_1_ratio_sigma_e_p01_sigma_i_p5.max(), m_1_ratio_sigma_e_p01_sigma_i_p5.std())
print("m_2 ratio mean, max, std:", m_2_ratio_sigma_e_p01_sigma_i_p5.mean(), m_2_ratio_sigma_e_p01_sigma_i_p5.max(), m_2_ratio_sigma_e_p01_sigma_i_p5.std())

print("prob_mean ratio mean, max, std:", prob_mean_ratio_sigma_e_p01_sigma_i_p5.mean(), prob_mean_ratio_sigma_e_p01_sigma_i_p5.max(), prob_mean_ratio_sigma_e_p01_sigma_i_p5.std())
print("prob_msc ratio mean, max, std:", prob_msc_ratio_sigma_e_p01_sigma_i_p5.mean(), prob_msc_ratio_sigma_e_p01_sigma_i_p5.max(), prob_msc_ratio_sigma_e_p01_sigma_i_p5.std())

sigma_e = 0.01, sigma_i = 0.5dg
ratios are [forecaster masses value] / [bonomo+(2023) masses value]
note: forecaster masses values for m_1 and m_2 use their median values
      for each adjacent pair's inner/outer planet mass distribution
m_1 ratio mean, max, std: 0.6395995921978961 1.405277205961589 0.41617248216917807
m_2 ratio mean, max, std: 0.6730265617988813 2.419411519279041 0.5142009432564699
prob_mean ratio mean, max, std: 0.9726042617548702 1.1330025591428547 0.05780752535262512
prob_msc ratio mean, max, std: 0.8398822728417026 1.4357500521604805 0.2363963142049613


In [16]:
print("sigma_e = 0.01, sigma_i = 0.5dg")
print("diffss are [forecaster masses value] - [bonomo+(2023) masses value]")
print("note: forecaster masses values for m_1 and m_2 use their median values")
print("      for each adjacent pair's inner/outer planet mass distribution")
print("m_1 diff mean, max, std:", m_1_diff_sigma_e_p01_sigma_i_p5.mean(), m_1_diff_sigma_e_p01_sigma_i_p5.max(), m_1_diff_sigma_e_p01_sigma_i_p5.std())
print("m_2 diff mean, max, std:", m_2_diff_sigma_e_p01_sigma_i_p5.mean(), m_2_diff_sigma_e_p01_sigma_i_p5.max(), m_2_diff_sigma_e_p01_sigma_i_p5.std())

print("prob_mean diff mean, max, std:", prob_mean_diff_sigma_e_p01_sigma_i_p5.mean(), prob_mean_diff_sigma_e_p01_sigma_i_p5.max(), prob_mean_diff_sigma_e_p01_sigma_i_p5.std())
print("prob_msc diff mean, max, std:", prob_msc_diff_sigma_e_p01_sigma_i_p5.mean(), prob_msc_diff_sigma_e_p01_sigma_i_p5.max(), prob_msc_diff_sigma_e_p01_sigma_i_p5.std())

sigma_e = 0.01, sigma_i = 0.5dg
diffss are [forecaster masses value] - [bonomo+(2023) masses value]
note: forecaster masses values for m_1 and m_2 use their median values
      for each adjacent pair's inner/outer planet mass distribution
m_1 diff mean, max, std: -1.3145061051765594 1.9845550326330352 2.31001859668399
m_2 diff mean, max, std: -2.8537084563811197 2.8388230385580817 3.5165001723005784
prob_mean diff mean, max, std: -0.023489148471183652 0.08436923759402704 0.04298059003612431
prob_msc diff mean, max, std: -0.07244015127006503 0.025058770179748537 0.0925840657100393


In [17]:
print("sigma_e = 0.05, sigma_i = 2.5dg")
print("ratios are [forecaster masses value] / [bonomo+(2023) masses value]")
print("note: forecaster masses values for m_1 and m_2 use their median values")
print("      for each adjacent pair's inner/outer planet mass distribution")
print("m_1 ratio mean, max, std:", m_1_ratio_sigma_e_p05_sigma_i_2p5.mean(), m_1_ratio_sigma_e_p05_sigma_i_2p5.max(), m_1_ratio_sigma_e_p05_sigma_i_2p5.std())
print("m_2 ratio mean, max, std:", m_2_ratio_sigma_e_p05_sigma_i_2p5.mean(), m_2_ratio_sigma_e_p05_sigma_i_2p5.max(), m_2_ratio_sigma_e_p05_sigma_i_2p5.std())

print("prob_mean ratio mean, max, std:", prob_mean_ratio_sigma_e_p05_sigma_i_2p5.mean(), prob_mean_ratio_sigma_e_p05_sigma_i_2p5.max(), prob_mean_ratio_sigma_e_p05_sigma_i_2p5.std())
print("prob_msc ratio mean, max, std:", prob_msc_ratio_sigma_e_p05_sigma_i_2p5.mean(), prob_msc_ratio_sigma_e_p05_sigma_i_2p5.max(), prob_msc_ratio_sigma_e_p05_sigma_i_2p5.std())

sigma_e = 0.05, sigma_i = 2.5dg
ratios are [forecaster masses value] / [bonomo+(2023) masses value]
note: forecaster masses values for m_1 and m_2 use their median values
      for each adjacent pair's inner/outer planet mass distribution
m_1 ratio mean, max, std: 0.6395995921978961 1.405277205961589 0.41617248216917807
m_2 ratio mean, max, std: 0.6730265617988813 2.419411519279041 0.5142009432564699
prob_mean ratio mean, max, std: 0.9883621360445295 1.0191609471490206 0.03080593012521092
prob_msc ratio mean, max, std: 0.9566150007983959 1.1928884424101869 0.11803977300301433


In [19]:
print("sigma_e = 0.05, sigma_i = 2.5dg")
print("diffs are [forecaster masses value] - [bonomo+(2023) masses value]")
print("note: forecaster masses values for m_1 and m_2 use their median values")
print("      for each adjacent pair's inner/outer planet mass distribution")
print("m_1 diff mean, max, std:", m_1_diff_sigma_e_p05_sigma_i_2p5.mean(), m_1_diff_sigma_e_p05_sigma_i_2p5.max(), m_1_diff_sigma_e_p05_sigma_i_2p5.std())
print("m_2 diff mean, max, std:", m_2_diff_sigma_e_p05_sigma_i_2p5.mean(), m_2_diff_sigma_e_p05_sigma_i_2p5.max(), m_2_diff_sigma_e_p05_sigma_i_2p5.std())

print("prob_mean diff mean, max, std:", prob_mean_diff_sigma_e_p05_sigma_i_2p5.mean(), prob_mean_diff_sigma_e_p05_sigma_i_2p5.max(), prob_mean_diff_sigma_e_p05_sigma_i_2p5.std())
print("prob_msc diff mean, max, std:", prob_msc_diff_sigma_e_p05_sigma_i_2p5.mean(), prob_msc_diff_sigma_e_p05_sigma_i_2p5.max(), prob_msc_diff_sigma_e_p05_sigma_i_2p5.std())

sigma_e = 0.05, sigma_i = 2.5dg
diffs are [forecaster masses value] - [bonomo+(2023) masses value]
note: forecaster masses values for m_1 and m_2 use their median values
      for each adjacent pair's inner/outer planet mass distribution
m_1 diff mean, max, std: -1.3145061051765594 1.9845550326330352 2.31001859668399
m_2 diff mean, max, std: -2.8537084563811197 2.8388230385580817 3.5165001723005784
prob_mean diff mean, max, std: -0.009234721578041926 0.018328517572488656 0.022556336167366266
prob_msc diff mean, max, std: -0.026007316418043856 0.1449315048754214 0.0729641351743765


In [20]:
np.sort(sys_list)

array([ 2989404,  4914423,  5094751,  6850504,  6850504,  6850504,
        6850504,  8478994,  8478994, 10187017, 10187017, 10187017,
       10187017, 10875245, 10875245, 10875245, 11295426, 11904151])

In [30]:
m_1_sigma_e_p05_sigma_i_2p5_old, m_1_sigma_e_p05_sigma_i_2p5_new

(array([3.06593938e+00, 3.19579991e+00, 6.20634389e+00, 3.40184651e-01,
        1.10932822e+01, 1.04320430e-02, 3.47225500e+00, 4.36214100e+00,
        6.88455503e+00, 1.42320437e+00, 9.20895202e-02, 9.01529578e+00,
        6.08049505e-01, 2.15545291e-01, 6.60480287e+00, 4.74070403e-01,
        4.01483469e+00, 2.20006461e+00]),
 array([ 3.26,  3.9 ,  8.03,  1.3 ,  9.8 ,  0.79,  3.8 , 10.  ,  4.9 ,
         7.7 ,  1.1 , 11.1 ,  1.4 ,  1.7 ,  4.7 ,  0.76,  9.7 ,  3.  ]))

In [28]:
sys_list, place_list

(array([11904151,  2989404, 11295426,  8478994,  4914423,  8478994,
        10875245, 10875245,  5094751, 10875245, 10187017,  6850504,
         6850504, 10187017, 10187017,  6850504,  6850504, 10187017]),
 array([0, 0, 0, 1, 0, 0, 0, 1, 0, 2, 0, 2, 3, 1, 3, 1, 0, 2]))