From ea51cbd3be701e7bebb8610ef06819f60993c722 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Wed, 18 Sep 2019 11:56:17 -0400 Subject: [PATCH] writing party and reparameterization of SHO kernel --- exoplanet/gp/celerite_test.py | 30 +++++++++++++++++ exoplanet/gp/terms.py | 58 ++++++++++++++++++++++---------- paper/exoplanet.tex | 62 +++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 17 deletions(-) diff --git a/exoplanet/gp/celerite_test.py b/exoplanet/gp/celerite_test.py index 6559130e4..56ee6ee7f 100644 --- a/exoplanet/gp/celerite_test.py +++ b/exoplanet/gp/celerite_test.py @@ -209,3 +209,33 @@ def test_integrated(kernel, seed=1234): kernel = terms.IntegratedTerm(kernel, dt) _check_model(kernel, x, diag, y) + + +def test_sho_reparam(seed=6083): + S0 = 10.0 + w0 = 0.5 + Q = 3.2 + kernel1 = terms.SHOTerm(S0=S0, w0=w0, Q=Q) + kernel2 = terms.SHOTerm(Sw4=S0 * w0 ** 4, w0=w0, Q=Q) + func1 = theano.function([], kernel1.coefficients) + func2 = theano.function([], kernel2.coefficients) + for a, b in zip(func1(), func2()): + assert np.allclose(a, b) + + kernel2 = terms.SHOTerm(log_Sw4=np.log(S0) + 4 * np.log(w0), w0=w0, Q=Q) + func2 = theano.function([], kernel2.coefficients) + for a, b in zip(func1(), func2()): + assert np.allclose(a, b) + + Q = 0.1 + kernel1 = terms.SHOTerm(S0=S0, w0=w0, Q=Q) + kernel2 = terms.SHOTerm(Sw4=S0 * w0 ** 4, w0=w0, Q=Q) + func1 = theano.function([], kernel1.coefficients) + func2 = theano.function([], kernel2.coefficients) + for a, b in zip(func1(), func2()): + assert np.allclose(a, b) + + kernel2 = terms.SHOTerm(log_Sw4=np.log(S0) + 4 * np.log(w0), w0=w0, Q=Q) + func2 = theano.function([], kernel2.coefficients) + for a, b in zip(func1(), func2()): + assert np.allclose(a, b) diff --git a/exoplanet/gp/terms.py b/exoplanet/gp/terms.py index a41e2c273..82387c51c 100644 --- a/exoplanet/gp/terms.py +++ b/exoplanet/gp/terms.py @@ -26,6 +26,26 @@ from ..utils import eval_in_model +def normalize_parameters(names, **kwargs): + results = dict() + results["dtype"] = dtype = kwargs.pop("dtype", theano.config.floatX) + for name in names: + if name not in kwargs and "log_" + name not in kwargs: + raise ValueError( + ( + "Missing required parameter {0}. " "Provide {0} or log_{0}" + ).format(name) + ) + value = ( + kwargs.pop(name) + if name in kwargs + else tt.exp(kwargs.pop("log_" + name)) + ) + results[name] = tt.cast(value, dtype) + + return dict(results, **kwargs) + + class Term(object): """The abstract base "term" that is the superclass of all other terms @@ -37,22 +57,10 @@ class Term(object): parameter_names = tuple() def __init__(self, **kwargs): - self.dtype = kwargs.pop("dtype", theano.config.floatX) - for name in self.parameter_names: - if name not in kwargs and "log_" + name not in kwargs: - raise ValueError( - ( - "Missing required parameter {0}. " - "Provide {0} or log_{0}" - ).format(name) - ) - value = ( - kwargs[name] - if name in kwargs - else tt.exp(kwargs["log_" + name]) - ) - setattr(self, name, tt.cast(value, self.dtype)) - + results = normalize_parameters(self.parameter_names, **kwargs) + self.dtype = results.pop("dtype") + for k, v in results.items(): + setattr(self, k, v) self.coefficients = self.get_coefficients() @property @@ -524,6 +532,11 @@ class SHOTerm(Term): tensor S0 or log_S0: The parameter :math:`S_0`. tensor Q or log_Q: The parameter :math:`Q`. tensor w0 or log_w0: The parameter :math:`\omega_0`. + tensor Sw4 or log_Sw4: It can sometimes be more efficient to + parameterize the amplitude of a SHO kernel using + :math:`S_0\,{\omega_0}^4` instead of :math:`S_0` directly since + :math:`S_0` and :math:`\omega_0` are strongly correlated. If + provided, ``S0`` will be computed from ``Sw4`` and ``w0``. """ @@ -531,7 +544,18 @@ class SHOTerm(Term): def __init__(self, *args, **kwargs): self.eps = tt.as_tensor_variable(kwargs.pop("eps", 1e-5)) - super(SHOTerm, self).__init__(*args, **kwargs) + + results = normalize_parameters(("w0", "Q"), **kwargs) + if "Sw4" in results: + if "S0" in results or "log_S0" in results: + raise ValueError("Sw4 and S0 cannot both be specified") + results["S0"] = results["Sw4"] / results["w0"] ** 4 + elif "log_Sw4" in results: + if "S0" in results or "log_S0" in results: + raise ValueError("Sw4 and S0 cannot both be specified") + results["log_S0"] = results["log_Sw4"] - 4 * tt.log(results["w0"]) + + super(SHOTerm, self).__init__(*args, **results) @property def J(self): diff --git a/paper/exoplanet.tex b/paper/exoplanet.tex index 06dda2ee3..2b999a5b4 100644 --- a/paper/exoplanet.tex +++ b/paper/exoplanet.tex @@ -342,6 +342,68 @@ \section{Orbital conventions} \label{eqn:Z} \end{equation} +\section{Parameterizing an orbit} + +At is heart, \exoplanet\ models Keplerian orbits. +There is some support for relaxing this assumption (REFERENCE N-BODY W REBOUND), but it is important that we clearly present how to represent a Keplerian orbit before generalizing. +In \exoplanet\ these orbits are specified specified with respect to a single central body (generally the most massive body) and then a system of non-interacting bodies orbiting the central. +This is a good parameterization for exoplanetary systems and binary stars, but it is sometimes not sufficient for systems with multiple massive bodies where the interactions will be important to the dynamics. + +The central body is defined by any two of radius $R_\star$, mass $M_\star$, or density $\rho_\star$. +The third physical parameter can always be computed using the other two so \exoplanet\ will throw an error if three are given (even if they are numerically self-consistent). + +The orbits are then defined by: +\begin{itemize} + \item the period $P$ or semi-major axis $a$ of the orbit, + \item the time of conjunction $t_0$ or time of periastron passage $t_\mathrm{p}$, + \item the planet's radius $R_\mathrm{pl}$ and mass $M_\mathrm{pl}$, + \item the eccentricity $e$, argument of periastron $\omega$ and the longitude of ascending node $\Omega$, and + \item the inclination of the orbital plane $i$ or the impact parameter $b$. +\end{itemize} + +A \texttt{KeplerianOrbit} object in \exoplanet\ will always be fully specified. +For example, if the orbit is defined using the period $P$, the semi-major axis $a$ will be computed using Kepler's third law. + +\section{Parameterizing transiting planets} + +It is common practice to parameterize a transiting planet using period $P$, time of conjunction $t_0$, and impact parameter $b$. +If parameters are well constrained then specific choices about priors won't be important, but the eccentricity and argument of periastron are rarely well constrained by the transit alone, so it can be important to consider the details of the parameterization. + +From geometry, we can derive that expected prior distribution for $\cos i$ is uniform from $-1$ to $1$. +This doesn't directly translate to a uniform prior on $b$. +Instead, if we want to sample in $b$ we need to take into account the Jacobian in the change of variables +\begin{eqnarray} +p(\cos i) &=& \left| \frac{\dd b}{\dd \cos i} \right|\,p(b) \quad. +\end{eqnarray} +Since, +\begin{eqnarray} +b = \frac{a}{R_\star}\,\cos i\,\left[\frac{1 - e^2}{1 + e\,\sin\omega}\right]\quad, +\end{eqnarray} +the relevant Jacobian is +\begin{eqnarray} +\frac{\mathrm{d}b}{\mathrm{d}\cos i} = \frac{R_\star}{a}\,\left[\frac{1 + e\,\sin\omega}{1 - e^2}\right] \quad. +\end{eqnarray} + +Similarly, in order for a transit to occur, the impact parameter must satisfy +\begin{eqnarray} + \left|b\right| < 1 + R_\mathrm{pl}/R_\star \quad. +\end{eqnarray} +In other words, the conditional prior probability for the impact parameter (conditioned on the assumption that the planet transits and the other orbital parameters) is +\begin{eqnarray} +p(b\,|\,a,\,e,\,\omega,\,R_\mathrm{pl},\,R_\star) = +\frac{R_\star}{a}\,\left[\frac{1 + e\,\sin\omega}{1 - e^2}\right]\, +\left\{\begin{array}{ll} + 1 / [2\,(1 + R_\mathrm{pl}/R_\star)] & \mathrm{if}\,\left|b\right| < 1 + R_\mathrm{pl}/R_\star \\ + 0 & \mathrm{otherwise} +\end{array}\right. +\end{eqnarray} + +Similarly, the prior on the planet's radius should also depend on the detectability of the signal, but this will only be important for small planets with radius error bars that span the detection threshold which won't be common. + +As discussed by \dfmtodo{CITE exofast}, there is a strong degeneracy between $e$, $b$ and $\omega$ that is introduced by the fact that the duration is the main observable that constrains these three quantities. +While it is possible to change variables to fit in duration instead of $e$ and analytically marginalize over the branches of the solution for $e$, both the bookkeeping and reparameterization is difficult to get right. +In particular, NUTS requires that all parameters have infinite support, but the allowed range of the duration parameter depends on the branch and the other parameters. +Furthermore, there is a funnel situation since at some values of $\omega$, all values of $e$ are identical. \section{The custom operations provided by exoplanet}