In [1]:
import sys

## This to get the peerless target star DataFrame for example purposes
#sys.path.append('/u/tdm/repositories/peerless/prediction')
#sys.path.append('/u/tdm/repositories/peerless')
#from targets import targets


import pandas as pd
targets = pd.read_hdf('targets.h5')

# The action is here. Depends on vespa & isochrones.
from exosyspop.populations import KeplerBinaryPopulation



In [2]:
pop = KeplerBinaryPopulation(targets, fB=0.4)

In [3]:
# Accessing secondary properties will initialize a secondary simulation,
# calling pop._generate_binaries().  The first time this is called, the
# secondary property regressors get trained.
pop.radius_B

EB: dmag regressor trained, R2=0.999474955286
EB: qR regressor trained, R2=0.999500959958


array([ 0.54746723,         nan,  0.65917675, ...,  0.45777415,
               nan,         nan])

In [4]:
# subsequent calls are much faster; e.g.
pop._generate_binaries()
print(pop.radius_B)
%timeit pop._generate_binaries()

[        nan         nan  0.73552646 ...,         nan  0.58805114
  0.25668352]
10 loops, best of 3: 98 ms per loop


In [5]:
# If physical accuracy is important, you can also choose to generate binary properties
# directly from the isochrone, but it's a factor of a few slower:
pop._generate_binaries(use_ic=True)
print(pop.radius_B)
%timeit pop._generate_binaries(use_ic=True)

[        nan         nan         nan ...,         nan         nan
  0.88414657]
1 loops, best of 3: 353 ms per loop


In [6]:
# Similarly, accessing orbital properties will generate them
pop.period

array([  7.75758516e+03,   3.77519546e+01,   6.60473712e+04, ...,
         9.51993688e+01,   4.99714007e+04,   6.66989058e+04])

# Synthetic observations

In [7]:
# Now, we can observe and see what we see.  This takes into account
# duty cycle & data span, as well as geometry.
obs = pop.observe()
print(len(obs))
print(obs.columns)
obs.head()

545
Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec'],
      dtype='object')


Unnamed: 0,host,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,T14_sec,T23_pri,T23_sec,dataspan,dutycycle,b_target,flux_ratio,n_pri,n_sec,phase_sec
0,27,2.080306,0.09865,1.291243,1.543258,546471600000.0,6.597464,0.164314,0.198739,0.379209,...,0.151751,0.05471,0.065005,1459.789,0.6987,9.622826,0.01017,485,516,0.517405
1,148,30.551956,0.730516,0.127828,1.569594,3071701000000.0,67.692457,0.03473,0.041863,0.819156,...,0.196772,0.015929,0.019033,1459.789,0.6988,9.390008,0.241917,36,40,0.918296
2,257,9.502884,0.83383,3.939256,1.464786,1373113000000.0,19.168045,1.532848,0.387059,0.159523,...,0.059999,0.0,0.040941,1459.789,0.8756,14.234405,0.000835,0,135,0.082569
3,773,1.479016,0.312376,3.384965,1.535437,416346800000.0,5.889356,0.203176,0.174729,0.269444,...,0.089543,0.0578,0.050249,1459.789,0.6986,10.952576,0.003055,685,703,0.309471
4,814,541.794914,0.548791,1.936265,1.56602,23318850000000.0,355.165431,0.783719,2.431847,0.74769,...,0.0,0.0,0.0,1459.789,0.875,19.066479,0.206917,2,0,0.355861


In [8]:
# This is pretty fast, even when generating a new population each time:
%timeit pop.observe(new=True)

1 loops, best of 3: 233 ms per loop


In [9]:
# Even faster if we only generate new orbits.
%timeit pop.observe(new_orbits=True)

10 loops, best of 3: 86.1 ms per loop


In [10]:
# So we can predict the expected number of observations pretty easily.
import numpy as np
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True)) for i in range(N)])
n_obs.mean(), n_obs.std()

(532.69000000000005, 22.27406339220574)

In [11]:
# Notice that the above does not yet have trapezoidal parameters.  There are two options to generate these.
# Either we can set the fit_trap parameter, as follows:
obs = pop.observe(fit_trap=True)
print(len(obs))
obs.columns

515


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri', u'trap_depth_pri', u'trap_slope_pri', u'trap_dur_sec',
       u'trap_depth_sec', u'trap_slope_sec'],
      dtype='object')

In [12]:
# All things considered, this is still pretty fast if we just need to do it a few times:
%timeit pop.observe(fit_trap=True)

1 loops, best of 3: 2.61 s per loop


In [13]:
# However, this is pretty slow if we want to do inference.  To help with this, we can 
# tell it to train & use a regression.  Training only happens once; by default with 10,000 
# synthetic observations.  This takes a minute or so.
obs = pop.observe(regr_trap=True)
print(len(obs))
obs.columns

EB: Depth trained: R2=0.999226099335
EB: Duration trained: R2=0.999011271479
EB: Slope trained: R2=0.991817328692
601


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [14]:
# Subsequent calls are much faster
%timeit pop.observe(regr_trap=True)

10 loops, best of 3: 33.7 ms per loop


In [15]:
# Even generating a new stellar population & observing it is pretty quick
%timeit pop.observe(regr_trap=True, new=True)

1 loops, best of 3: 238 ms per loop


In [16]:
# Or again, you can just generate new orbits (rather than new binaries & new orbits)
%timeit pop.observe(regr_trap=True, new_orbits=True)

10 loops, best of 3: 106 ms per loop


In [17]:
# Generating the training data used for the trapezoid shape regression above used
# this function, which can be otherwise useful to sample >N random observations 
# from the existing population.  `trap_regr` defaults to `True` here.  
# This function also takes `new` or `new_orbits` keywords.
obs_pop = pop.get_N_observed(N=10000, new_orbits=True)
print(len(obs_pop))
obs_pop.columns

10081


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [18]:
# We can now look, e.g. at the expected number of single/double eclipsing systems:
query = '(n_pri < 3) & (n_sec < 3) & (n_pri==0 | n_sec==0)'
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(6.0999999999999996, 2.435159132377184)

In [19]:
# Try this again, this time using the empirical eccentricity distribution
# (as opposed to the beta distribution with default params)---eccentricity matters!
pop.ecc_empirical = True
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(10.210000000000001, 3.22581772578675)

In [20]:
pop.save_hdf('eb_model.h5')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->['tm_designation', 'prov_sec', 'datalink_dvr', 'st_delivname', 'st_vet_date_str', 'ra_str', 'dec_str', 'teff_prov', 'logg_prov', 'feh_prov']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


HDF5ExtError: HDF5 error back trace

  File "H5A.c", line 259, in H5Acreate2
    unable to create attribute
  File "H5Aint.c", line 275, in H5A_create
    unable to create attribute in object header
  File "H5Oattribute.c", line 347, in H5O_attr_create
    unable to create new attribute in header
  File "H5Omessage.c", line 224, in H5O_msg_append_real
    unable to create new message
  File "H5Omessage.c", line 1945, in H5O_msg_alloc
    unable to allocate space for message
  File "H5Oalloc.c", line 1142, in H5O_alloc
    object header message is too large

End of HDF5 error back trace

Can't set attribute 'dmag_pipeline' in node:
 /stars (Group) ''.