In [None]:
### THIS CELL SETS UP THE GOOGLE COLAB ENVIRONMENT. 
### IF RUNNING THIS NOTEBOOK LOCALLY, IT MAY BE SAFELY DELETED.

#@title Install software

#@markdown Install the software by pressing the _Play_ button on the left.

try:
    import google.colab
    RUNNING_IN_COLAB = True
except ImportError:
    RUNNING_IN_COLAB = False
except Exception as e: 
    err = "Could not figure out if runnning in a colab notebook\n"
    raise Exception(err) from e

if RUNNING_IN_COLAB:
    !pip install dataprob


In [None]:
%matplotlib inline
import dataprob
import numpy as np

# ------------------------------------------------------------------------
# Define model and generate data

def linear_extrapolation(dG_unfold=5,m_unfold=-2,
                         b_native=1,m_native=0,
                         b_denat=0,m_denat=0,
                         osmolyte=None,T=298.15,R=0.001987):
    """
    Linear extrapolation unfolding model. 

    Parameters
    ----------
    dG_unfold : float, default=5
        unfolding free energy in water
    m_unfold : float, default=-2
        effect of osmoloyte on the folding energy
    b_native : float, default=1
        intercept of the native baseline
    m_native : float, defualt=0
        slope of the native baseline
    b_denat : float, default=0
        intercept of the denatured baseline
    m_denat : float, defualt=0
        slope of the denatured baseline
    osmolyte : numpy.ndarray
        array of osmolyte concentrations
    T : float, default=298.15
        temperature of experiment in K
    R : float, default=0.001987
        gas constant (default is kcal/mol)

    Returns
    -------
    signal : numpy.ndarray
        protein fraction folded signal as a function of osmolyte
    """
        
    RT = R*T
    dG = dG_unfold + m_unfold*osmolyte
    K = np.exp(-dG/RT)
    
    fx = 1/(1 + K)
    native_signal = (m_native*osmolyte + b_native)*fx
    denatured_signal = (m_denat*osmolyte + b_denat)*(1 - fx)

    return native_signal + denatured_signal
        
# Parameter for staphylococcal nuclease d+phs protein, pH 7.0
gen_params = {"dG_unfold":11.9,
              "m_unfold":-4.2,
              "b_native":1.5,
              "m_native":-0.15,
              "b_denat":0.1,
              "m_denat":-0.03}

# Generate data
T = 298
R = 0.001987
err = 0.020
num_points = 50
osmolyte = np.linspace(0,8,num_points)

y_obs_clean = linear_extrapolation(osmolyte=osmolyte,
                                   R=R,T=T,
                                   **gen_params)
y_obs = y_obs_clean + np.random.normal(0,err,num_points)
y_std = err

test_fcn = linear_extrapolation
non_fit_kwargs = {"osmolyte":osmolyte,
                  "R":R,
                  "T":T}

# ------------------------------------------------------------------------
# Run analysis

f = dataprob.setup(some_function=test_fcn,
                   method="ml",
                   non_fit_kwargs=non_fit_kwargs)


f.fit(y_obs=y_obs,
      y_std=y_std)

fig = dataprob.plot_summary(f,x_label="[denaturant]",y_label="fluorescence")

In [None]:
fig = dataprob.plot_corner(f,filter_params=["native","denat"])


In [None]:
f.fit_df

In [None]:
f.fit_quality

### Compare input and fit parameters

In [None]:
from matplotlib import pyplot as plt

fig, ax = plt.subplots(1,figsize=(6,6))

x = np.array(list(gen_params.values()))
y = np.array(f.fit_df.loc[gen_params.keys(),"estimate"])
yerr = np.array(f.fit_df.loc[gen_params.keys(),"std"])

combined = list(x)
combined.extend(y)
span = (np.max(combined) - np.min(combined))
extend_by = 0.1*span
offset = 0.03*span
min_value = np.min(combined) - extend_by
max_value = np.max(combined) + extend_by

ax.scatter(x,y,s=20,edgecolor='none',facecolor='red',zorder=1)
ax.errorbar(x=x,y=y,yerr=yerr,lw=0,elinewidth=1,capsize=3,color='black',zorder=2)

for i, k in enumerate(gen_params.keys()):
    ax.text(x=x[i] - offset,y=y[i] + offset,s=k)


ax.plot((min_value,max_value),(min_value,max_value),'--',color='gray',zorder=0)

ax.set_xlim(min_value,max_value)
ax.set_ylim(min_value,max_value)
ax.set_xlabel("input parameter value")
ax.set_ylabel("estimated parameter value")
