Fitting stuff

In [3]:
#@title Initialize environment

#@markdown We recommend setting up a working directory on your google drive. This is a 
#@markdown convenient way to pass files in and out of this analysis. It will 
#@markdown also allow you to save your work. If you put `biophysics` into the form
#@markdown field below, the analyis will save all of its calculations in the 
#@markdown `biophysics` directory in MyDrive (i.e. the top directory at
#@markdown https://drive.google.com). This script will create the directory if 
#@markdown it does not already exist. If the directory already exists, any files
#@markdown that are already in that directory will be available for the analysis. 
#@markdown You could, for example, put a file called `data.csv` in `biophysics` and then
#@markdown access it as "data.csv" in all cells below.
#@markdown <br/>
#@markdown Note: Google may prompt you for permission to access the drive. 
#@markdown To work in a temporary colab environment, leave this blank. Your results
#@markdown will disappear when you close the directory. 

try:
    import google.colab
    RUNNING_IN_COLAB = True
except ImportError:
    RUNNING_IN_COLAB = False
except Exception as e: 
    err = "Could not figure out if runnning in a colab notebook\n"
    raise Exception(err) from e

# ------------------------------------------------------------------------------
# Imports

if RUNNING_IN_COLAB:
    %pip install -q ipywidgets

%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import scipy
from scipy import optimize

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

import os
import colorsys

# ------------------------------------------------------------------------------
# Environment

if RUNNING_IN_COLAB:
    
    working_dir = "/content/"

    # Select a working directory on google drive
    google_drive_directory = "loving_it" #@param {type:"string"}
    google_drive_directory = google_drive_directory.strip()

    # Set up google drive
    if google_drive_directory != "":

        from google.colab import drive
        drive.mount('/content/gdrive/')

        working_dir = f"/content/gdrive/MyDrive/{google_drive_directory}"
        os.system(f"mkdir -p {working_dir}")

    os.chdir(working_dir)
    print(f"Working directory: {os.getcwd()}/")

    print("\nCurrent directory contents:")
    print(os.getcwd())
    for f in os.listdir("."):
        print(f"    {f}")
    print()
    

# ------------------------------------------------------------------------------
# Default graph label sizing

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# ------------------------------------------------------------------------------
# Fitting functions

def binding_model_full(Kd,Sm,Smx,Xtot,Mtot):
    """
    Single site binding model (SM <--> S + M) with no assumption that Mtot is
    less than Kd. 
    
    Parameters
    ----------
    Kd : float
        dissociation constant
    Sm : float
        signal of the M form 
    Smx : float
        signal of the MX form
    Xtot : float
        total X concentration
    Mtot : float
        total M concentration

    Returns
    -------
    signal : float
        spectroscopic signal given the current [MX]/([M] + [MX])
    """
    
    a = 1
    b = -(Xtot + Mtot + Kd)
    c = Mtot*Xtot
    
    MX = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)
    
    theta = MX/Mtot
    
    signal = Sm + theta*(Smx - Sm)
    
    return signal


def binding_model(Kd,Sm,Smx,Xtot):
    """
    Single site binding model (SM <--> S + M)
    
    Parameters
    ----------
    Kd : float
        dissociation constant
    Sm : float
        signal of the M form 
    Smx : float
        signal of the MX form
    X : float
        total X concentration
        
    Returns
    -------
    signal : float
        spectroscopic signal given the current [MX]/([M] + [MX])
    """
    
    alpha = Xtot/Kd
    theta = alpha/(1 + alpha)
    
    obs = Sm + theta*(Smx - Sm)
    
    return obs

def binding_model_residuals(param,x_expt,y_expt,Mtot=None):
    """
    Calculate the difference between the signal calculated from the model and
    the observed data.
    
    Parameters
    ----------
    param : list-like of floats
        model parameters (Kd, Sm, Smx)
    x_expt : list-like of floats
        Xtot values at which data were collected
    y_expt : list-like of floats
        observed signal at the Xtot values given in x_expt
    Mtot : float, optional
        total M concentration. (If specified, use full binding model; if not,
        used reduced model that assumes [M]tot << Kd
    
    Returns
    -------
    residual : np.ndarray
        difference between calculated signal and y_expt for each value of 
        x_expt
    """
    
    x_expt = np.array(x_expt)
    y_expt = np.array(y_expt)
    
    if Mtot is None:
        calc = binding_model(param[0],param[1],param[2],x_expt)
    else:
        calc = binding_model(param[0],param[1],param[2],x_expt,Mtot)
    
    residual = calc - y_expt
    
    return residual


def _run_regresssion(fcn,
                     residual_fcn,
                     param_guesses,
                     x_expt,
                     y_expt,
                     Mtot=None,
                     ax=None,
                     residual_ax=None,
                     color="black",
                     alpha=1.0,
                     max_nfev=None,
                     label=None):
    """
    Use nonlinear regression to fit a model to data.
    
    Parameters
    ----------
    fcn : function
        function to analyze
    residual_fcn : function
        residual function
    param_guesses : list-like of floats
        model parameters
    x_expt : list-like of floats
        Xtot values at which data were collected
    y_expt : list-like of floats
        observed signal at the Xtot values given in x_expt
    Mtot : float, optional
        total M concentration. (If specified, use full binding model; if not,
        used reduced model that assumes [M]tot << Kd
    ax : matplotlib.Axis, optional
        if specified, draw the points and the fit on this ax
    residual_ax : matplotlib.Axis, optional
        if specified, draw the residual points on this axis
    color : str, default="black"
        color to use to draw series
    alpha : float, default=1.0
        opacity to give to series
    max_nfev : int, optional
        maximum number of iterations to run the regression
    label : str, optional
        label to assign to the newly drawn series
    
    Returns
    -------
    fit.x : np.ndarray
        maximum likelihood fit parameter estimates
    x_calc : np.ndarray
        values of x at which calculation is done to get smooth line
    y_calc : np.ndarray
        function values calculated at x_calc
    y_calc_expt : np.ndarray
        function values calcualated at x_expt
    """
    
    x_min = np.min(x_expt)
    x_max = np.max(x_expt)
    y_min = np.min(y_expt)
    y_max = np.max(y_expt)
    del_y = y_max - y_min
    x_calc = np.linspace(x_min*.9,x_max*1.1,100)
    
    fit = optimize.least_squares(residual_fcn,
                                 x0=param_guesses,
                                 kwargs={"x_expt":x_expt,
                                         "y_expt":y_expt,
                                         "Mtot":Mtot},
                                 max_nfev=max_nfev)
    
    y_calc = fcn(*fit.x,x_calc)
    y_calc_expt = fcn(*fit.x,x_expt)
            
    if ax is not None:
        ax.plot(x_calc,y_calc,'-',lw=2,color=color,alpha=alpha,label=label)

    if residual_ax is not None:

        residual_ax.plot(x_expt,y_calc_expt - y_expt,'o',color=color,alpha=alpha)
            
    return fit.x, x_calc, y_calc, y_calc_expt
    
def fit_data(x_expt,
             y_expt,
             y_err,
             Kd_guess,
             Sm_guess,
             Smx_guess,
             Mtot=None,
             num_bootstraps=0):
    """
    Fit a binding model to the data given in x_expt, y_expt, and y_err. Generate
    plots and potentially bootstrap to assess parameter uncertainty. Model has 
    the form: MX <--> M + X. We assume there is a signal Sm proportional to [M]
    and another signal Smx proportional to [MX]. 
    
    Parameters
    ----------
    x_expt : list-like of floats
        Xtot values at which data were collected
    y_expt : list-like of floats
        observed signal at the Xtot values given in x_expt
    y_err : list-like of floats
        standard deviation of measurements for each y_expt
    Kd_guess : float
        guess for the Kd
    Sm_guess : float
        guess for the signal of M
    Smx_guess : float
        guess for the signal of MX
    Mtot : float, optional
        total M concentration. (If specified, use full binding model; if not,
        used reduced model that assumes [M]tot << Kd 
    num_bootstraps : int, default=0
        do this many bootstrap pseudreplicate fits
    """
            
    # Get data limits
    x_min = np.min(x_expt)
    x_max = np.max(x_expt)
    y_min = np.min(y_expt)
    y_max = np.max(y_expt)
    del_y = y_max - y_min
        
    # Create plot
    fig, ax = plt.subplots(2,1,figsize=(6,12))
    
    # Create data plot
    ax[0].plot(x_expt,y_expt,'o',color="black")
    ax[0].errorbar(x_expt,y_expt,y_err,fmt='o',capsize=5,ms=0,lw=1,color="black")
    ax[0].set_xlabel("$[X]_{tot}$ ($\mu M$)")
    ax[0].set_ylabel("signal")
    ax[0].spines['top'].set_visible(False)
    ax[0].spines['right'].set_visible(False)
    
    # Create residuals plot
    ax[1].plot((x_min,x_max),(0,0),'--',lw=2,color="gray")
    ax[1].set_xlabel("$[X]_{tot}$ ($\mu M$)")
    ax[1].set_ylabel("calc - obs")
    ax[1].spines['top'].set_visible(False)
    ax[1].spines['right'].set_visible(False)

    # Figure out which model to use
    if Mtot is None:
        fcn = binding_model
    else:
        fcn = binding_model_full
    
    # Do maximum likelhood fit
    fit_param, x_calc, y_calc, y_calc_expt = _run_regresssion(fcn,
                                                              binding_model_residuals,
                                                              [Kd_guess,Sm_guess,Smx_guess],
                                                              x_expt,
                                                              y_expt,
                                                              Mtot,
                                                              ax=ax[0],
                                                              residual_ax=ax[1],
                                                              color="black")
    
    # Get fit parameters
    Kd = fit_param[0]
    Sm = fit_param[1]
    Smx = fit_param[2]
    
    # Figure out y limits on residuals plot
    biggest_diff = 1.1*np.max(np.abs(y_calc_expt - y_expt))
    
    if biggest_diff < del_y/4:
        span = del_y/4
    else:
        span = biggest_diff
    
    ax[1].set_ylim(-span,span)

    
    Kd_err = None
    if num_bootstraps > 0:
    
        Kd_err = []
        for i in range(num_bootstraps):
            
            # Generate a pseudoreplicate
            this_y_expt = y_expt + np.random.normal(0,y_err,len(y_expt))

            # Generate color
            s = i/(num_bootstraps)*.9 + 0.1
            color = [s,s,1]
            
            # Plot pseudoreplicate
            ax[0].plot(x_expt,this_y_expt,'o',color=color,alpha=0.5)
            
            # Do pseudoreplicate fit
            fit_param, x_calc, y_calc, y_calc_expt = _run_regresssion(binding_model,
                                                                      binding_model_residuals,
                                                                      [Kd_guess,Sm_guess,Smx_guess],
                                                                      x_expt,
                                                                      this_y_expt,
                                                                      ax=ax[0],
                                                                      residual_ax=ax[1],
                                                                      color=color,
                                                                      alpha=0.5)
            
            # Record pseudoreplicate Kd
            Kd_err.append(fit_param[0])
            
        # Record error bit
        Kd_err = np.std(Kd_err)
        Kd_err = f" $\pm$ {Kd_err:.1f}"
        
    if Kd_err is None:
        Kd_err = ""
        
    ax[0].text(x_max*0.85,y_min + del_y*0.1,f"$K_D$: {Kd:.2f}{Kd_err} $\mu M$")
    

    



In [4]:
#@markdown Press the "Play" button on the left to run.

Kd_guess_slider = widgets.FloatSlider(min=-100,max=100,by=5,value=10,description="guess Kd (micromolar)")

def _fit_data_wrapper(Kd_guess):
    
    return fit_data(x_expt=np.arange(5),
                    y_expt=np.array([0,1,1.5,1.6,1.7]),
                    y_err=0.1*np.ones(5),
                    Kd_guess=Kd_guess,
                    Sm_guess=0,
                    Smx_guess=2,
                    num_bootstraps=0)
    

w = widgets.interactive(_fit_data_wrapper,
                        Kd_guess=Kd_guess_slider)
                        
display(w)
    

interactive(children=(FloatSlider(value=10.0, description='guess Kd (micromolar)', min=-100.0), Output()), _do…

## What is fitting?

In [5]:
#@markdown Press the "Play" button on the left to run.

def plot_fitting_vs_steps(Kd_guess,max_steps):
    
    Sm_guess = 0
    Smx_guess = 2
    x_expt = np.arange(5)
    y_expt = np.array([0,1,1.5,1.6,1.7])
    y_err = 0.1*np.ones(5)

    fig, ax = plt.subplots(1,figsize=(6,6))

    # Create data plot
    ax.plot(x_expt,y_expt,'o',color="black",zorder=100)
    ax.errorbar(x_expt,y_expt,y_err,fmt='o',capsize=5,ms=0,lw=1,color="black",zorder=100)
    ax.set_xlabel("$[X]_{tot}$ ($\mu M$)")
    ax.set_ylabel("signal")
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylim([0,2])
    
    # Do maximum likelhood fit
    for i in range(1,max_steps+1):

        color = colorsys.hsv_to_rgb(i/10,1,0.8)

        fit_param, x_calc, y_calc, y_calc_expt = _run_regresssion(binding_model,
                                                                  binding_model_residuals,
                                                                  [Kd_guess,Sm_guess,Smx_guess],
                                                                  x_expt,
                                                                  y_expt,
                                                                  ax=ax,
                                                                  color=color,
                                                                  max_nfev=i+1,
                                                                  label=f"step {i}")
        print(f"Step: {i}, Kd: {fit_param[0]:.2f}, Sm: {fit_param[1]:.2f}, Smx: {fit_param[2]:.2f}")


    ax.legend(loc="lower right")
    
Kd_guess_slider = widgets.IntSlider(min=1,max=10,by=1,value=5,description="guess Kd (micromolar)")
max_steps_slider = widgets.IntSlider(min=1,max=10,by=1,value=1,description="max_steps")

w = widgets.interactive(plot_fitting_vs_steps,
                        Kd_guess=Kd_guess_slider,
                        max_steps=max_steps_slider)
                        
display(w)
    

interactive(children=(IntSlider(value=5, description='guess Kd (micromolar)', max=10, min=1), IntSlider(value=…

## How to we assess uncertainty?

In [6]:
#@markdown Press the "Play" button on the left to run.         

num_bootstraps_slider = widgets.IntSlider(min=0,max=10,by=1,value=0,description="num bootstraps")

def _fit_data_wrapper(num_bootstraps):
    
    return fit_data(x_expt=np.arange(5),
                    y_expt=np.array([0,1,1.5,1.6,1.7]),
                    y_err=0.1*np.ones(5),
                    Kd_guess=5,
                    Sm_guess=0,
                    Smx_guess=2,
                    num_bootstraps=num_bootstraps)
    

w = widgets.interactive(_fit_data_wrapper,
                        num_bootstraps=num_bootstraps_slider)
                        
display(w)
    

interactive(children=(IntSlider(value=0, description='num bootstraps', max=10), Output()), _dom_classes=('widg…