In [1]:
import numpy as np
import scipy.stats as st
import pandas as pd
import os
import scipy
import warnings
import itertools
import scikit_posthocs as sp

import iqplot
import bebi103

import bokeh.io
import bokeh.plotting
import bokeh.layouts
bokeh.io.output_notebook()

### Specifying the data, models, main effects

In [2]:
filename = 'MotorBehaviordata_GFSPF_Livia_111323.xlsx'
data_path = os.path.join("..", "..", "Data", "GFxSPF_ASO", filename)
sheet_name = 'Pole and Beam'

# Group factors
effect_1 = 'Genotype' 
effect_2 = 'Treatment' # "Treatment" or "Microbiome"

# Type pf test for current analysis
mtest = 'Beam_T' # "Beam_T" or "Pole_T"

# List of models to fit into and assess
# Encoded models: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
# 'inverse_gamma', 'inverse_gamma_mixed', 'gamma', 'gamma_mixed'
models = ['weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
          'inverse_gamma', 'inverse_gamma_mixed', 'gamma', 'gamma_mixed']

# Whether or not the experiment has several cohorts and a dedicated cohort column
cohort_col = False 

### Plotting characteristics

Specifying palettes and plotting order.

In [3]:
# Palette:
palette_CI = ["#bdbdbd", "#000000", "#6baed6", "#08519c"]

# Plotting order:
order = {'WT_SPF': 1, 'ASO_SPF': 2, 'WT_GF': 3, 'ASO_GF': 4}

### Tidying up the data

In [38]:
pole_beam_df = pd.read_excel(data_path, sheet_name=sheet_name)
pole_beam_df = pole_beam_df.rename(columns={'ID ':'ID', 'Group ':'Group', 'treatment':'Treatment'})

if (effect_1 not in pole_beam_df.columns) or (effect_2 not in pole_beam_df.columns):
    pole_beam_df[[effect_1, effect_2]] = pole_beam_df['Group'].str.split('-', expand = True)

pole_beam_df

Unnamed: 0,Date,cohort,Group,ID,Pole_T1,Pole_T2,Pole_T3,Average_Pole,Beam_T1,Beam_T2,Beam_T3,Average_Beam,Genotype,Treatment
0,3072020,cohort3,ASO-GF,3856,32.94,13.69,9.57,18.733333,8.74,9.99,16.82,11.850000,ASO,GF
1,3072020,cohort3,ASO-GF,3863,8.09,5.43,6.69,6.736667,60.00,60,18.02,46.006667,ASO,GF
2,3072020,cohort3,ASO-GF,3864,7.22,7.3,13.18,9.233333,60.00,15.91,60,45.303333,ASO,GF
3,3072020,cohort3,ASO-GF,3921,7.76,9.57,9.5,8.943333,60.00,12.2,6.65,26.283333,ASO,GF
4,3072020,cohort3,ASO-GF,3866,8.26,3,4.87,5.376667,60.00,6.53,60,42.176667,ASO,GF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,1112019,cohort1,WT-SPF,1753,13.24,19.75,7.4,13.463333,10.85,2.72,4.28,5.950000,WT,SPF
68,1112019,cohort1,WT-SPF,1754,21.4,20.1,14.79,18.763333,5.82,3.31,5.97,5.033333,WT,SPF
69,1112019,cohort1,WT-SPF,1759,60,60,60,60,10.78,15.82,24.93,17.176667,WT,SPF
70,1112019,cohort1,WT-SPF,1762,59.48,5.9,45.44,36.94,3.81,2.97,5.32,4.033333,WT,SPF


Creating the working data frame only with the values of interest, removing missing values and cases where mice jumped off of the pole/beam.

In [5]:
pole_beam_df = pole_beam_df.replace(to_replace = 'jumped', value = float("NaN"))

res = [i for i in list(pole_beam_df.columns) if (mtest) in i]

if cohort_col:
    col_list = [cohort_col, effect_1, effect_2, 'ID']
else:
    col_list = [effect_1, effect_2, 'ID']

pole_df = pole_beam_df[col_list + res].copy()
pole_df_long = pole_df.melt(id_vars = col_list, var_name='Trial', value_name='Time')
pole_df_long['Trial'] = pole_df_long['Trial'].str[-2:]
pole_df_long['ID'] = pole_df_long['ID'].astype('str')
pole_df_long = pole_df_long.dropna()

Creating dictionaries with groups as keys and experimentally measured values.

In [6]:
group_vals = {}

effect1_lst = pole_df_long[effect_1].unique()
effect2_lst = pole_df_long[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = pole_df_long.loc[(pole_df_long[effect_1] == i) & (pole_df_long[effect_2] == j), 'Time'].values
        if len(n) != 0:
            group_vals[name] = n

### Defining all the necessary functions for the MLE calculations

In [7]:
def log_like_iid_weibull_mixed(params, n):
    """Log likelihood for i.i.d. weibull measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters alpha, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, sigma, omega = params

    if alpha <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))])
        else:
            target += np.log(omega) + st.weibull_min.logpdf(i, alpha, scale=sigma)
                                              

    return target

In [8]:
def log_like_iid_weibull(params, n):
    """Log likelihood for i.i.d. weibull measurements.

    Parameters
    ----------
    params : array
        Parameters alpha, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, sigma = params

    if alpha <= 0 or sigma <= 0:
        return -np.inf

    target = 0
    for i in n:
        if i == 60:
            target += np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))
        else:
            target += st.weibull_min.logpdf(i, alpha, scale=sigma)

    return target

In [9]:
def log_like_iid_lognormal_mixed(params, n):
    """Log likelihood for i.i.d. lognormal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.lognorm.cdf(i, sigma, scale=np.exp(mu)))])
        else:
            target += np.log(omega) + st.lognorm.logpdf(i, sigma, scale=np.exp(mu))
                                              

    return target

In [10]:
def log_like_iid_lognormal(params, n):
    """Log likelihood for i.i.d. lognormal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.lognorm.cdf(60, sigma, scale=np.exp(mu)))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.lognorm.logpdf(i, sigma, scale=np.exp(mu))

    return target
    

In [11]:
def log_like_iid_normal_mixed(params, n):
    """Log likelihood for i.i.d. normal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.norm.logpdf(i, mu, sigma)
                                              

    return target

In [12]:
def log_like_iid_normal(params, n):
    """Log likelihood for i.i.d. normal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.norm.logpdf(i, mu, sigma)

    return target
    

In [13]:
def log_like_iid_gamma_mixed(params, n):
    """Log likelihood for i.i.d. gamma measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta, omega = params

    if alpha <= 0 or beta <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.gamma.cdf(60, alpha, loc=0, scale=1/beta))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.gamma.logpdf(i, alpha, loc=0, scale=1/beta)
                                              

    return target

In [14]:
def log_like_iid_gamma(params, n):
    """Log likelihood for i.i.d. gamma measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta = params

    if alpha <= 0 or beta <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.gamma.cdf(60, alpha, loc=0, scale=1/beta))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.gamma.logpdf(i, alpha, loc=0, scale=1/beta)

    return target
    

In [15]:
def log_like_iid_inv_gamma_mixed(params, n):
    """Log likelihood for i.i.d. inverse gamma measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta, omega = params

    if alpha <= 0 or beta <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.invgamma.cdf(60, alpha, loc=0, scale=beta))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.invgamma.logpdf(i, alpha, loc=0, scale=beta)
                                              

    return target

In [16]:
def log_like_iid_inv_gamma(params, n):
    """Log likelihood for i.i.d. inverse gamma measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta = params

    if alpha <= 0 or beta <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.invgamma.cdf(60, alpha, loc=0, scale=beta))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.invgamma.logpdf(i, alpha, loc=0, scale=beta)

    return target
    

In [17]:
def mle_iid_lognormal_mixed(n):
    
    """Performs maximum likelihood estimates for parameters for i.i.d.
    lognormal mixed measurements, parametrized by mu, sigma"""
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, n: -log_like_iid_lognormal_mixed(params, n),
            x0=np.array([2, 5, 0.5]),
            args=(n,),
            method='Powell'
        )

    if res.success:
        return res.x
    else:
        raise RuntimeError('Convergence failed with message', res.message)



In [18]:
def gen_lognormal_mixed(params, size, rng):
    """Draws a sample out of the mixed lognormal distribution with Dirac delta function
    parametrized by mu, sigma and omega.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    size : int
        Number of data poinst in a sample.
    rng: Generator
        The initialized generator object.

    Returns
    -------
    output : array
        Newly generated sample.
    
    """
    mu, sigma, omega = params

    num_max = rng.binomial(size, (1 - omega))
    y_max = np.ones(num_max) * 60
    num_weib = size - num_max

    y_lognorm = st.lognorm.rvs(sigma, scale=np.exp(mu), size=num_weib)

    if len(y_max) == 0:
        y = y_lognorm
    else:
        y = np.concatenate((y_lognorm, y_max))

    y[y > 60] = 60
    
    return y

In [19]:
def log_like(params, n, model):
    """
    Log likelihood for i.i.d. measurements for the given model.
    
    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.
    model : string
        One of the following: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
            'gamma', 'gamma_mixed', 'inverse gamma', 'inverse_gamma_mixed'

    Returns
    -------
    output : float
        Log-likelihood.
    """

    if model == 'weibull':
        return log_like_iid_weibull(params, n)
    elif model == 'weibull_mixed':
        return log_like_iid_weibull_mixed(params, n)
    elif model == 'lognormal':
        return log_like_iid_lognormal(params, n)
    elif model == 'lognormal_mixed':
        return log_like_iid_lognormal_mixed(params, n)
    elif model == 'normal':
        return log_like_iid_normal(params, n)
    elif model == 'normal_mixed':
        return log_like_iid_normal_mixed(params, n)
    elif model == 'gamma':
        return log_like_iid_gamma(params, n)
    elif model == 'gamma_mixed':
        return log_like_iid_gamma_mixed(params, n)
    elif model == 'inverse_gamma':
        return log_like_iid_inv_gamma(params, n)
    elif model == 'inverse_gamma_mixed':
        return log_like_iid_inv_gamma_mixed(params, n)
    else:
        raise ValueError('Pick an appropriate model!')
                                     

In [20]:
def mle_iid(n, model):
    """Performs maximum likelihood estimates for parameters for i.i.d.
    measurements of a chosen model;

    Parameters
    ----------
    n : array
        Array of data points.
    model : string
        One of the following: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
            'gamma', 'gamma_mixed', 'inverse gamma', 'inverse_gamma_mixed'

    Returns
    -------
    output : float
        MLE for given model parameters.
    """

    if model == 'weibull':
        init_guess = np.array([2, 15])
    elif model == 'weibull_mixed':
        init_guess = np.array([2, 15, 0.1])
    elif model == 'lognormal':
        init_guess = np.array([2, 15])
    elif model == 'lognormal_mixed':
        init_guess = np.array([2, 15, 0.5])
    elif model == 'normal':
        init_guess = np.array([10, 15])
    elif model == 'normal_mixed':
        init_guess = np.array([10, 15, 0.5])
    elif model == 'gamma':
        init_guess = np.array([0.1, 10])
    elif model == 'gamma_mixed':
        init_guess = np.array([0.5, 10, 0.5])
    elif model == 'inverse_gamma':
        init_guess = np.array([5, 15])
    elif model == 'inverse_gamma_mixed':
        init_guess = np.array([5, 15, 0.5])
    else:
        raise ValueError('Pick an appropriate model!')

    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, n: -log_like(params, n, model),
            x0=init_guess,
            args=(n,),
            method='Powell'
        )

        if res.success:
            return res.x
        else:
            raise RuntimeError('Convergence failed with message', res.message)

In [21]:
def draw_perm_sample(x, y):
    """Generate a permutation sample."""
    concat_data = np.concatenate((x, y))
    np.random.shuffle(concat_data)

    return concat_data[:len(x)], concat_data[len(x):]


def draw_perm_reps(x, y, stat_fun, size=1):
    """Generate array of permuation replicates."""
    return np.array([stat_fun(*draw_perm_sample(x, y)) for _ in range(size)])

def draw_perm_reps_diff_mean(x, y, size=1):
    """Generate array of permuation replicates."""
    out = np.empty(size)
    for i in range(size):
        x_perm, y_perm = draw_perm_sample(x, y)
        out[i] = np.abs(np.mean(x_perm) - np.mean(y_perm))

    return out

Plotting functions:

In [22]:
def plot_conf_int_pub(data, parameter_name, palette):
    """Creating a plot of confidence intervals of a given parameter for given data using the specified palette.
    The output is a plot with high resolution that can be exported as an .svg or .html file."""
    p = bebi103.viz.confints(
        data,
        title=parameter_name+' 95%CI, lognormal mixed model',
        palette=palette,
        frame_width=4000,
        frame_height=800,
        hidpi=True,
        line_width=5,
        marker_kwargs={"size":15}
    )
    
    p.title.text_font_size = '48pt'
    p.xaxis.major_label_text_font_size = "40pt"
    p.yaxis.major_label_text_font_size = "40pt"
    p.output_backend = 'svg'
    
    return p

def plot_conf_int_notebook(data, parameter_name, palette):
    """Creating a plot of confidence intervals of a given parameter for given data using the specified palette.
    The output is a plot that can be viewed in the notebook."""
    
    p = bebi103.viz.confints(
        data,
        title=parameter_name+' 95%CI, lognormal mixed model',
        palette=palette
    )
    
    return p

### Calculating MLEs and AICs for all the models

In [23]:
mles_dict = {}
models = ['weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

AICs = pd.DataFrame(index = models)

for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], model=model)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], model = model)
            AICs.loc[model, group] = -2 * (_llk - len(params))

AICs

Unnamed: 0,ASO_GF,ASO_SPF,WT_GF,WT_SPF
weibull,360.050726,371.526209,386.916898,286.052855
weibull_mixed,336.542029,333.131344,373.919077,276.334201
lognormal,340.990256,351.717031,365.551242,265.06646
lognormal_mixed,324.457941,316.824066,358.412639,256.320146
normal,436.758215,458.935979,474.707947,373.115096
normal_mixed,371.972525,364.966926,425.297491,340.198657


Based on the AIC, we decided to proceed with the log-normal mixed model.

## Mixed Lognormal model

Fitting data into the mixed log-normal model with Dirac delta function.

\begin{align}
    &y' \sim \omega \text{ LogNormal}(\mu, \sigma) + (1 - \omega) \delta_{y=60} \\
    &y = \begin{cases} 
      y' & \text{if } y' <60 \\ 
      60  & \text{if } y' \ge 60 \end{cases}
\end{align}

In [24]:
ln_mles = mles_dict['lognormal_mixed']
ln_mles

{'ASO_GF': array([1.93362984, 0.71912277, 0.86907987]),
 'ASO_SPF': array([1.85788545, 0.62313222, 0.79016907]),
 'WT_GF': array([1.77377033, 0.83780191, 0.93365226]),
 'WT_SPF': array([1.50013968, 0.82369209, 0.93409723])}

In [25]:
bs_mle_samples_mx_ln = {}

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work. There is another option of using previously generated bootstrap samples - refer to the code below the following coding cell.**

In [112]:
for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal_mixed,
        data=group_vals[group],
        mle_args=('lognormal_mixed',),
        size=10000,
        n_jobs=7
    )
    
    _df = pd.DataFrame(bs_mle_sample, columns=['mu', 'sigma', 'omega'])
    _df.to_csv(os.path.join("..", "Output", group+"GFxSPF_beam_bs_mle_samples.csv"))

    bs_mle_samples_mx_ln[group] = bs_mle_sample

ASO_GF


KeyboardInterrupt: 

**If you are unable to run the code cell above or it takes too long, you can use the code below.**

In [26]:
for group in group_vals.keys():
    _df = pd.read_csv(os.path.join("..", "Output", group+"GFxSPF_beam_bs_mle_samples.csv"), index_col=0)
    bs_mle_samples_mx_ln[group] = _df.to_numpy()

## Plotting:

### Graphical model assessment

Generating bootstrap samples for further graphical model assessment.

In [27]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in sorted(group_vals, key=lambda x: order[x]):
    params = ln_mles[group]
    single_samples = np.array([gen_lognormal_mixed(params, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


Making Q-Q plots

In [28]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in sorted(group_vals, key=lambda x: order[x]):

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

Making predictive ECDF plots

In [29]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

Predictive ECDFs look reasonable, we can proceed with the analysis.

### Generating 95% confidence intervals for the parameter MLEs

Creating specific dictionary structure needed for the parameter 95% confidence interval plotting.

In [30]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []
omega_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}
omegas_mx_ln = {}

for group in sorted(group_vals, key=lambda x: order[x]):
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])
    _omega_CI = np.percentile(_smpls[:, 2], [2.5, 97.5])

    mu_mle, sigma_mle, omega_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}
    _omega_dct = { 'label':group, 'conf_int':_omega_CI, 'estimate':omega_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)
    omega_lst_mx_ln.append(_omega_dct)

Plotting and saving 95% confidence interval plot for the mu parameter.

In [31]:
p_pub = plot_conf_int_pub(mu_lst_mx_ln, "mu", palette_CI)
bokeh.io.export_svgs(p_pub, filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.svg"))

bokeh.io.save(
    p_pub,
    filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.html"),
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

bokeh.io.show(plot_conf_int_notebook(mu_lst_mx_ln, "mu", palette_CI))

Plotting and saving 95% confidence interval plot for the sigma parameter.

In [32]:
p_pub = plot_conf_int_pub(mu_lst_mx_ln, "sigma", palette_CI)
bokeh.io.export_svgs(p_pub, filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.svg"))

bokeh.io.save(
    p_pub,
    filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.html"),
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

bokeh.io.show(plot_conf_int_notebook(mu_lst_mx_ln, "sigma", palette_CI))

Plotting and saving 95% confidence interval plot for the omega parameter.

In [33]:
p_pub = plot_conf_int_pub(mu_lst_mx_ln, "omega", palette_CI)
bokeh.io.export_svgs(p_pub, filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.svg"))

bokeh.io.save(
    p_pub,
    filename=os.path.join("..", "Output", "GFxSPF_beam_mu_conf_int.html"),
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

bokeh.io.show(plot_conf_int_notebook(mu_lst_mx_ln, "omega", palette_CI))

## Null hypothesis significance testing

Kruskall-Wallis analysis of the groups:

In [34]:
p_val_ks = st.kruskal(group_vals['WT_SPF'], group_vals['ASO_SPF'], group_vals['WT_GF'], group_vals['ASO_GF'])
p_val_ks

KruskalResult(statistic=16.38789593459405, pvalue=0.0009441282629611115)

Post-hoc Conover pairwise comparison:

In [35]:
p_vals = sp.posthoc_conover([group_vals['WT_SPF'], group_vals['ASO_SPF'], group_vals['WT_GF'], group_vals['ASO_GF']], 
                            p_adjust='fdr_bh')
p_vals = p_vals.rename(columns={1:'WT_SPF', 2:'ASO_SPF', 3:'WT_GF', 4:'ASO_GF'}, 
                       index={1:'WT_SPF', 2:'ASO_SPF', 3:'WT_GF', 4:'ASO_GF'})
p_vals['KruskalWallis p-val'] = p_val_ks.pvalue
p_vals['KruskalWallis statistic'] = p_val_ks.statistic
p_vals.to_csv(os.path.join("..", "Output", "GFxSPF_beam_KW_conover.csv"))
p_vals

Unnamed: 0,WT_SPF,ASO_SPF,WT_GF,ASO_GF,KruskalWallis p-val,KruskalWallis statistic
WT_SPF,1.0,0.00144,0.114636,0.003174,0.000944,16.387896
ASO_SPF,0.00144,1.0,0.056052,0.706174,0.000944,16.387896
WT_GF,0.114636,0.056052,1.0,0.111796,0.000944,16.387896
ASO_GF,0.003174,0.706174,0.111796,1.0,0.000944,16.387896


In [36]:
%load_ext watermark
%watermark -v -p numpy,pandas,numba,bokeh,bebi103,jupyterlab

Python implementation: CPython
Python version       : 3.11.5
IPython version      : 8.15.0

numpy     : 1.24.3
pandas    : 2.1.1
numba     : 0.58.0
bokeh     : 3.2.0
bebi103   : 0.1.17
jupyterlab: 4.0.6

