# Bottleneck likelihood derivatives for global epistasis models
This Python Jupyter notebook tests the calculation of the bottleneck likelihood derivatives for the global epistasis models.
Note that it isn't actually really testing code--instead, it's just code that verifies that the analytical derivatives are calculated correctly.

Import Python modules:

In [1]:
import math

import numpy

import scipy.optimize
import scipy.special

Choose some values for the "measured" parameters:

In [2]:
f_v_pre = numpy.array([0.2, 0.4, 0.4, 0.1])
f_v_post = numpy.array([0.25, 0.35, 0.35, 0.05])

Set a few values for the bottleneck and observed phenotypes:

In [3]:
N_bottles = [10, 100, 1000]

# one value of observed phenotype is real (ML) value
real_p_v = numpy.log(f_v_post / f_v_pre) / numpy.log(
    2
)  # - numpy.log(0.35 / 0.4) / numpy.log(2)
p_vs = [numpy.array([-2, -1, 0.1, 2]), real_p_v]

Function to compute $n_v^{\rm{bottle}}$:

In [4]:
def n_v_bottle_func(N_bottle, p_v):
    return (f_v_post * N_bottle * (f_v_pre * 2 ** p_v).sum()) / (2 ** p_v)

Function to compute log likelihood:

In [5]:
def L_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    return (
        n_v_bottle * numpy.log(N_bottle * f_v_pre)
        - scipy.special.loggamma(n_v_bottle + 1)
    ).sum() - N_bottle

Derivative of log likelihood with respect to overall bottleneck:

In [6]:
def dL_dNbottle_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    return (
        n_v_bottle
        / N_bottle
        * (numpy.log(N_bottle * f_v_pre) + 1 - scipy.special.digamma(n_v_bottle + 1))
    ).sum() - 1

Derivative of log likelihood with respect to observed phenotypes:

In [7]:
def dL_dp_v_func(N_bottle, p_v):
    n_v_bottle = n_v_bottle_func(N_bottle, p_v)
    ln_psi_term = numpy.log(N_bottle * f_v_pre) - scipy.special.digamma(n_v_bottle + 1)
    sum_term = ((f_v_post / (2 ** p_v)) * ln_psi_term).sum()
    return (
        math.log(2) * f_v_pre * 2 ** p_v * N_bottle * sum_term
        - math.log(2) * n_v_bottle * ln_psi_term
    )

Compute the log likelihoods and their derivatives.
We see that when $p\left(v\right)$ is close to the "real" value (second value in `p_vs`), then increasing $N_{\rm{bottle}}$ increases the log likelihood.
When $p\left(v\right)$ is very far from the "real" value (first value in `p_vs`), then increasing $N_{\rm{bottle}}$ decreases the log likelihood:

In [9]:
def format_arr(arr):
    return "[" + ", ".join(f"{x:.2g}" for x in arr) + "]"


for p_v in p_vs:
    print(f"\np_v = {format_arr(p_v)}")
    for N_bottle in N_bottles:
        print(f"  N_bottle = {N_bottle}")
        L = L_func(N_bottle, p_v)
        print(f"    L = {L:.3g}")
        dL_dNbottle = dL_dNbottle_func(N_bottle, p_v)
        print(f"    dL_dNbottle = {dL_dNbottle:.3g}")

        # check gradient of loglik with respect to Nbottle
        def f(x):
            return L_func(x[0], p_v)

        def df(x):
            return dL_dNbottle_func(x[0], p_v)

        err = scipy.optimize.check_grad(f, df, numpy.array([N_bottle]))
        if err > 5e-3:
            raise ValueError(f"error for dL_dNbottle = {err:.3g}")

        # check gradient of loglik with respect to p_v
        def f(x):
            return L_func(N_bottle, x)

        def df(x):
            return dL_dp_v_func(N_bottle, x)

        err = scipy.optimize.check_grad(f, df, p_v)
        if err > 5e-3:
            raise ValueError(f"error for dL_dp_v = {err:.3g}")
        print(f"    value of dL_dp_v = {format_arr(dL_dp_v_func(N_bottle, p_v))}")
        numerical_dL_dp_v = []
        for i in range(len(p_v)):
            L = L_func(N_bottle, p_v)
            p_v_plus = p_v.copy()
            eps = 1e-8
            p_v_plus[i] = p_v_plus[i] + eps
            Lplus = L_func(N_bottle, p_v_plus)
            numerical_dL_dp_v.append((Lplus - L) / eps)
        print(f"    numerical value of dL_dp_v: {format_arr(numerical_dL_dp_v)}")
        # compute ML estimate of p_v
        def neg_f(x):
            return -f(x)

        def neg_df(x):
            return -df(x)

        optres = scipy.optimize.minimize(
            fun=neg_f, x0=p_v, jac=neg_df, bounds=[(-10, 10)] * 4
        )
        print(
            f"    ML value of p_v = {format_arr(optres.x)}, and maxL = {f(optres.x):.3g}"
        )


p_v = [-2, -1, 0.1, 2]
  N_bottle = 10
    L = -16.2
    dL_dNbottle = -1.19
    value of dL_dp_v = [12, 0.59, -6.6, -6.2]
    numerical value of dL_dp_v: [12, 0.59, -6.6, -6.2]
    ML value of p_v = [0.24, -0.32, -0.32, -0.53], and maxL = -4.51
  N_bottle = 100
    L = -113
    dL_dNbottle = -1.05
    value of dL_dp_v = [1.2e+02, 4.7, -65, -59]
    numerical value of dL_dp_v: [1.2e+02, 4.7, -65, -59]
    ML value of p_v = [0.12, -0.4, -0.4, -1.1], and maxL = -0.0218
  N_bottle = 1000
    L = -1.04e+03
    dL_dNbottle = -1.03
    value of dL_dp_v = [1.2e+03, 47, -6.4e+02, -5.9e+02]
    numerical value of dL_dp_v: [1.2e+03, 47, -6.4e+02, -5.9e+02]
    ML value of p_v = [0.4, -0.12, -0.12, -0.92], and maxL = 85.4

p_v = [0.32, -0.19, -0.19, -1]
  N_bottle = 10
    L = -4.57
    dL_dNbottle = -0.0841
    value of dL_dp_v = [-0.00061, -0.11, -0.11, 0.23]
    numerical value of dL_dp_v: [-0.00061, -0.11, -0.11, 0.23]
    ML value of p_v = [0.21, -0.35, -0.35, -0.56], and maxL = -4.51
  N_b