In [1]:
import math as m
import numpy as np
import pandas as pd
import scipy.optimize
import statsmodels.tools.numdiff as smnd

# Import pyplot for plotting
import matplotlib.pyplot as plt

# Some pretty Seaborn settings
import seaborn as sns
rc={'lines.linewidth': 2, 'axes.labelsize': 14, 'axes.titlesize': 14}
sns.set(rc=rc)

# Make Matplotlib plots appear inline

%matplotlib inline

a) 

Model 1 describes the exponential distribution. Microtubulin catastrophe can be categorized as a Poisson process and the waiting time for catastrophe or waiting time for an arrival of a Poisson process is exponentially distributed. $\tau$ = $\frac{1}{r}$ and should be in units of time (seconds) where r is the average rate of microtubulin catastrophe.

Model 2 describes the gamma distribution. It describes the amount of time we have to wait for a certain number of arrivals of a Poisson process, so in this case, describing the amount of time we have to wait for a certain number of microtubulin catastrophes. $a$ is the number of arrivals/ microtubulin catastrophes and $\tau$ = $\frac{1}{r}$ where r is the rate of microtubulin catastrophes. 

Model 3 describes the Weibull distribution. The Weibull distribution serves as a model for aging. In this case, it follows the logic of: the longer we wait for microtubulin to undergo catastrophe, the more likely it will happen. $\beta$ is the shape parameter. For $\beta$ $>$ 1, the longer we have waited, the more likely it is to come, and vice versa for $\beta$ $<$ 1. $\tau$ is the scale parameter, which dictates the rate of catastrophe.

In [3]:
path = "../data/gardner_mt_catastrophe_only_tubulin.csv"
df = pd.read_csv(path, comment='#')

In [5]:
df.head()

Unnamed: 0,12 uM,7 uM,9 uM,10 uM,14 uM
0,25.0,35.0,25.0,50.0,60.0
1,40.0,45.0,40.0,60.0,75.0
2,40.0,50.0,40.0,60.0,75.0
3,45.429,50.0,45.0,75.0,85.0
4,50.0,55.0,50.0,75.0,115.0


In [7]:
df_new1 = df['12 uM']
df_new1.head()

0    25.000
1    40.000
2    40.000
3    45.429
4    50.000
Name: 12 uM, dtype: float64

In [28]:
def log_likelihood_m1(p, d):
    """
    Log likelihood for model 1
    """
    tau = p[0]
    
    return np.sum(m.exp(-d/tau)/ tau)

def log_likelihood_m2(p, d):
    """
    Log likelihood for model 2
    """
    tau = p[0]
    a = p[1]
    
    return np.sum((d/tau) ** a * m.exp(-d/tau)/ (d * m.gamma(a)))

def log_likelihood_m3(p, d):
    """
    Log likelihood for model 3
    """
    tau = p[0]
    beta = p[2]
    
    return np.sum((beta/tau) * (d/tau) ** (beta - 1) * m.exp(-(d/tau)**beta))

    
def log_prior_m1(p):
    """
    Log prior for mitotic spindle length vs droplet size.
    """
    if (p < 0).any():
        return -np.inf

    return -np.log(p[0])

    
def log_prior_m2(p):
    """
    Log prior for mitotic spindle length vs droplet size.
    """
    if (p < 0).any():
        return -np.inf

    return -np.log(p[0]) - np.log(p[1])

def log_prior_m3(p):
    """
    Log prior for mitotic spindle length vs droplet size.
    """
    if (p < 0).any():
        return -np.inf

    return -np.log(p[0]) - np.log(p[1]) - np.log(p[2])

def log_posterior_m1(p, d):
    """
    Log posterior  
    """
    lp = log_prior_m1(p)
    
    if lp == -np.inf:
        return -np.inf
    
    return lp + log_likelihood_m1(p, d)

def log_posterior_m2(p, d):
    """
    Log posterior  
    """
    lp = log_prior_m2(p)
    
    if lp == -np.inf:
        return -np.inf
    
    return lp + log_likelihood_m2(p, d)

def log_posterior_m3(p, d):
    """
    Log posterior  
    """
    lp = log_prior_m3(p)
    
    if lp == -np.inf:
        return -np.inf
    
    return lp + log_likelihood_m3(p, d)

def neg_log_posterior_m1(p, d):
    return -log_posterior_m1(p, d)

def neg_log_posterior_m2(p, d):
    return -log_posterior_m2(p, d)

def neg_log_posterior_m3(p, d):
    return -log_posterior_m3(p, d)

In [30]:
# Extra arguments as a tuple
args = (df['12 uM'])

# Model 1
p0 = np.array([200])
args_A = args
res = scipy.optimize.minimize(neg_log_posterior_m1, p0, args=args_A, 
                              method='powell')
popt_m1 = res.x
cov_m1 = -np.linalg.inv(smnd.approx_hess(popt_m1, log_posterior_m1, args=args_A))

# Model 2
p0 = np.array([200, 4])
args_B = args 
res = scipy.optimize.minimize(neg_log_posterior_m2, p0, args=args_B, 
                              method='powell')
popt_m2 = res.x
cov_m2 = -np.linalg.inv(smnd.approx_hess(popt_m2, log_posterior_m2, args=args_B))

# Model 3
p0 = np.array([200, 4, 1.5])
args_C = args 
res = scipy.optimize.minimize(neg_log_posterior_m3, p0, args=args_C, 
                              method='powell')
popt_m3 = res.x
cov_m3 = -np.linalg.inv(smnd.approx_hess(popt_m3, log_posterior_m3, args=args_B))


TypeError: cannot convert the series to <class 'float'>