# Bottleneck likelihood derivatives for global epistasis models
This Python Jupyter notebook tests the calculation of the bottleneck likelihood derivatives for the global epistasis models.
Note that it isn't actually really testing code--instead, it's just code that verifies that the analytical derivatives are calculated correctly.

Import Python modules:

In [1]:
import math

import numpy

import scipy.optimize
import scipy.special

Choose some values for the "measured" parameters:

In [2]:
f_v_pre = numpy.array([0.2, 0.4, 0.4, 0.1])
F_wt_pre = 0.8
f_v_post = numpy.array([0.25, 0.35, 0.35, 0.05])
F_wt_post = 0.7

Set a few values for the bottleneck and observed phenotypes:

In [3]:
N_bottles = [10, 100, 1000]
p_vs = [numpy.array([-2, -1, 0.1, 2]),
        numpy.array([0.515, 0, 0, -0.807]),  # "real" values if no bottleneck
        ]

Function to compute $n_v^{\rm{bottle}}$:

In [4]:
def n_v_bottle_func(N_bottle, p_v):
    return (f_v_post * F_wt_pre * N_bottle) / (F_wt_post * 2**p_v)

Function to compute log likelihood:

In [5]:
def L_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    return (n_v_bottle * numpy.log(N_bottle * f_v_pre) -
            scipy.special.loggamma(n_v_bottle + 1)
            ).sum() - N_bottle

Derivative of log likelihood with respect to overall bottleneck:

In [6]:
def dL_dNbottle_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    return (n_v_bottle / N_bottle * (numpy.log(N_bottle * f_v_pre) +
                                    1 -
                                    scipy.special.digamma(n_v_bottle + 1)
                                    ) 
            ).sum() - 1

Derivative of log likelihood with respect to observed phenotypes:

In [7]:
def dL_dp_v_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    return math.log(2) * n_v_bottle * (scipy.special.digamma(n_v_bottle + 1) -
                                       numpy.log(N_bottle * f_v_pre))

Compute the log likelihoods and their derivatives.
We see that when $p\left(v\right)$ is close to the "real" value (second value in `p_vs`), then increasing $N_{\rm{bottle}}$ increases the log likelihood.
When $p\left(v\right)$ is very far from the "real" value (first value in `p_vs`), then increasing $N_{\rm{bottle}}$ decreases the log likelihood:

In [8]:
def format_arr(arr):
    return '[' + ', '.join(f"{x:.2g}" for x in arr) + ']'

for p_v in p_vs:
    print(f"p_v = {format_arr(p_v)}")
    for N_bottle in N_bottles:
        print(f"  N_bottle = {N_bottle}")
        L = L_func(N_bottle, p_v)
        print(f"    L = {L:.3g}")
        dL_dNbottle = dL_dNbottle_func(N_bottle, p_v)
        print(f"    dL_dNbottle = {dL_dNbottle:.3g}")
        dL_dp_v = dL_dp_v_func(N_bottle, p_v)
        print(f"    dL_dp_v = {format_arr(dL_dp_v)}")
        
        # check gradient of loglik with respect to Nbottle
        def f(x):
            return L_func(x[0], p_v)
        def df(x):
            return dL_dNbottle_func(x[0], p_v)
        err = scipy.optimize.check_grad(f, df, numpy.array([N_bottle]))
        if err > 5e-3:
            raise ValueError(f"error for dL_dNbottle = {err:.3g}")
        
        # check gradient of loglik with respect to p_v
        def f(x):
            return L_func(N_bottle, x)
        def df(x):
            return dL_dp_v_func(N_bottle, x)
        err = scipy.optimize.check_grad(f, df, p_v)
        if err > 5e-3:
            raise ValueError(f"error for dL_dp_v = {err:.3g}")
        print(f"    value of dL_dp_v = {format_arr(dL_dp_v_func(N_bottle, p_v))}")

p_v = [-2, -1, 0.1, 2]
  N_bottle = 10
    L = -17.7
    dL_dNbottle = -1.33
    dL_dp_v = [14, 4.2, 0.15, -0.036]
    value of dL_dp_v = [14, 4.2, 0.15, -0.036]
  N_bottle = 100
    L = -127
    dL_dNbottle = -1.18
    dL_dp_v = [1.4e+02, 39, -1.4, -1.6]
    value of dL_dp_v = [1.4e+02, 39, -1.4, -1.6]
  N_bottle = 1000
    L = -1.18e+03
    dL_dNbottle = -1.16
    dL_dp_v = [1.4e+03, 3.8e+02, -18, -19]
    value of dL_dp_v = [1.4e+03, 3.8e+02, -18, -19]
p_v = [0.52, 0, 0, -0.81]
  N_bottle = 10
    L = -4.57
    dL_dNbottle = -0.0841
    dL_dp_v = [0.32, 0.33, 0.33, 0.29]
    value of dL_dp_v = [0.32, 0.33, 0.33, 0.29]
  N_bottle = 100
    L = -0.0302
    dL_dNbottle = 0.0802
    dL_dp_v = [0.34, 0.35, 0.35, 0.34]
    value of dL_dp_v = [0.34, 0.35, 0.35, 0.34]
  N_bottle = 1000
    L = 85.4
    dL_dNbottle = 0.098
    dL_dp_v = [0.31, 0.35, 0.35, 0.33]
    value of dL_dp_v = [0.31, 0.35, 0.35, 0.33]
