In [18]:
import numpy as np
import scipy.stats as st
import pandas as pd
import os
import scipy
import warnings
import itertools
import scikit_posthocs as sp

import iqplot
import bebi103

import bokeh.io
import bokeh.plotting
import bokeh.layouts
bokeh.io.output_notebook()

### Specifying the data, models, main effects

In [48]:
data_path = '../Data/'
filename = 'MotorBehaviordata_DJKOGFSPFMF_Livia_111023.xlsx'
sheet_name = 'Pole and Beam'

# Group factors
effect_1 = 'Genotype' 
effect_2 = 'Treatment' # "Treatment" or "Microbiome"

# Type pf test for current analysis
mtest = 'Beam_T' # "Beam_T" or "Pole_T"

# List of models to fit into and assess
# Encoded models: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
# 'inverse_gamma', 'inverse_gamma_mixed', 'gamma', 'gamma_mixed'
models = ['weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
          'inverse_gamma', 'inverse_gamma_mixed', 'gamma', 'gamma_mixed']

# Whether or not the experiment has several cohorts and a dedicated cohort column
cohort_col = False 

### Plotting characteristics

Specifying palettes and plotting order depending on the data used (SPF/GF, NAC, Auronafin, DJKO experiments).

In [47]:
# Palettes:
palette_CI = ['#fdb863', '#e66101', '#b2abd2', '#5e3c99']
palette_CI_NAC = ['#fdb863', '#e66101', '#92C5DE', '#0571B0']
palette_CI_auronafin = ['#fdb863', '#e66101', '#BABABA', '#404040']
palette_CI_djko = ['#fdb863', '#e66101', "#008837"]

# Plotting orders:
order = {'WT_SPF': 1, 'ASO_SPF': 2, 'WT_GF': 3, 'ASO_GF': 4}
order_NAC = {'WT_V': 1, 'ASO_V': 2, 'WT_NAC': 3, 'ASO_NAC': 4}
order_auronafin = {'WT_V': 1, 'ASO_V': 2, 'WT_A': 3, 'ASO_A': 4}
order_djko = {'B6_SPF': 1, 'DJKO_SPF': 2, 'DJKO_GF': 3}

Choosing the palette and order needed for current analysis.

In [21]:
palette_CI = palette_CI_djko
order = order_djko

### Exploratory data analysis

Tidying up the data.

In [22]:
pole_beam_df = pd.read_excel(os.path.join(data_path, filename), sheet_name=sheet_name)
pole_beam_df = pole_beam_df.rename(columns={'ID ':'ID', 'Group ':'Group', 'treatment':'Treatment'})

if (effect_1 not in pole_beam_df.columns) or (effect_2 not in pole_beam_df.columns):
    pole_beam_df[[effect_1, effect_2]] = pole_beam_df['Group'].str.split('-', expand = True)

pole_beam_df

Unnamed: 0,Date,cohort,Group,Microbiome,Genotype,Sex,ID,Pole_T1,Pole_T2,Pole_T3,Average_Pole,Beam_T1,Beam_T2,Beam_T3,Average_Beam,Treatment
0,83123,1,DJKO-GF,GF,DJKO,F,1,60.0,10.6,23.81,31.47,2.93,2.69,3.89,3.17,GF
1,83123,1,DJKO-GF,GF,DJKO,F,2,2.75,1.88,2.58,2.403333,3.58,2.33,2.41,2.773333,GF
2,83123,1,DJKO-GF,GF,DJKO,M,3,37.14,19.0,23.47,26.536667,3.53,4.97,60.0,22.833333,GF
3,83123,1,DJKO-GF,GF,DJKO,M,4,4.51,4.03,3.36,3.966667,11.9,60.0,60.0,43.966667,GF
4,83123,1,DJKO-GF,GF,DJKO,M,5,2.95,2.9,3.04,2.963333,7.56,5.22,5.51,6.096667,GF
5,83123,1,DJKO-SPF,SPF,DJKO,F,282,5.5,6.2,5.07,5.59,13.65,13.22,15.3,14.056667,SPF
6,83123,1,DJKO-SPF,SPF,DJKO,F,283,8.24,3.7,6.09,6.01,60.0,26.03,5.46,30.496667,SPF
7,83123,1,DJKO-SPF,SPF,DJKO,F,284,3.88,5.32,4.07,4.423333,9.75,3.82,3.15,5.573333,SPF
8,83123,1,DJKO-SPF,SPF,DJKO,F,285,8.21,60.0,12.63,26.946667,7.65,7.76,5.04,6.816667,SPF
9,83123,1,DJKO-SPF,SPF,DJKO,M,278,60.0,11.39,3.77,25.053333,2.16,60.0,12.33,24.83,SPF


Creating the working data frame only with the values of interest, removing missing values and cases where mice jumped off of the pole/beam.

In [23]:
pole_beam_df = pole_beam_df.replace(to_replace = 'jumped', value = float("NaN"))

res = [i for i in list(pole_beam_df.columns) if (mtest) in i]

if cohort_col:
    col_list = [cohort_col, effect_1, effect_2, 'ID']
else:
    col_list = [effect_1, effect_2, 'ID']

pole_df = pole_beam_df[col_list + res].copy()
pole_df_long = pole_df.melt(id_vars = col_list, var_name='Trial', value_name='Time')
pole_df_long['Trial'] = pole_df_long['Trial'].str[-2:]
pole_df_long['ID'] = pole_df_long['ID'].astype('str')
pole_df_long = pole_df_long.dropna()

Creating dictionaries with groups as keys and experimentally measured values.

In [24]:
group_vals = {}

effect1_lst = pole_df_long[effect_1].unique()
effect2_lst = pole_df_long[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = pole_df_long.loc[(pole_df_long[effect_1] == i) & (pole_df_long[effect_2] == j), 'Time'].values
        if len(n) != 0:
            group_vals[name] = n

### Defining all the necessary functions for the MLE calculations

In [25]:
def log_like_iid_weibull_mixed(params, n):
    """Log likelihood for i.i.d. weibull measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters alpha, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, sigma, omega = params

    if alpha <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))])
        else:
            target += np.log(omega) + st.weibull_min.logpdf(i, alpha, scale=sigma)
                                              

    return target

In [26]:
def log_like_iid_weibull(params, n):
    """Log likelihood for i.i.d. weibull measurements.

    Parameters
    ----------
    params : array
        Parameters alpha, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, sigma = params

    if alpha <= 0 or sigma <= 0:
        return -np.inf

    target = 0
    for i in n:
        if i == 60:
            target += np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))
        else:
            target += st.weibull_min.logpdf(i, alpha, scale=sigma)

    return target

In [27]:
def log_like_iid_lognormal_mixed(params, n):
    """Log likelihood for i.i.d. lognormal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.lognorm.cdf(i, sigma, scale=np.exp(mu)))])
        else:
            target += np.log(omega) + st.lognorm.logpdf(i, sigma, scale=np.exp(mu))
                                              

    return target

In [28]:
def log_like_iid_lognormal(params, n):
    """Log likelihood for i.i.d. lognormal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.lognorm.cdf(60, sigma, scale=np.exp(mu)))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.lognorm.logpdf(i, sigma, scale=np.exp(mu))

    return target
    

In [29]:
def log_like_iid_normal_mixed(params, n):
    """Log likelihood for i.i.d. normal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.norm.logpdf(i, mu, sigma)
                                              

    return target

In [30]:
def log_like_iid_normal(params, n):
    """Log likelihood for i.i.d. normal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.norm.logpdf(i, mu, sigma)

    return target
    

In [31]:
def log_like_iid_gamma_mixed(params, n):
    """Log likelihood for i.i.d. gamma measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta, omega = params

    if alpha <= 0 or beta <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.gamma.cdf(60, alpha, loc=0, scale=1/beta))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.gamma.logpdf(i, alpha, loc=0, scale=1/beta)
                                              

    return target

In [32]:
def log_like_iid_gamma(params, n):
    """Log likelihood for i.i.d. gamma measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta = params

    if alpha <= 0 or beta <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.gamma.cdf(60, alpha, loc=0, scale=1/beta))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.gamma.logpdf(i, alpha, loc=0, scale=1/beta)

    return target
    

In [33]:
def log_like_iid_inv_gamma_mixed(params, n):
    """Log likelihood for i.i.d. inverse gamma measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta, omega = params

    if alpha <= 0 or beta <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0

    cdf_val = np.log(1 - st.invgamma.cdf(60, alpha, loc=0, scale=beta))
    
    for i in n:
        if i == 60:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.invgamma.logpdf(i, alpha, loc=0, scale=beta)
                                              

    return target

In [34]:
def log_like_iid_inv_gamma(params, n):
    """Log likelihood for i.i.d. inverse gamma measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    alpha, beta = params

    if alpha <= 0 or beta <= 0:
        return -np.inf


    target = 0

    cdf_val = np.log(1 - st.invgamma.cdf(60, alpha, loc=0, scale=beta))
    
    for i in n:
        if i == 60:
            target += cdf_val
        else:
            target += st.invgamma.logpdf(i, alpha, loc=0, scale=beta)

    return target
    

In [35]:
def log_like(params, n, model):
    """
    Log likelihood for i.i.d. measurements for the given model.
    
    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.
    model : string
        One of the following: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
            'gamma', 'gamma_mixed', 'inverse gamma', 'inverse_gamma_mixed'

    Returns
    -------
    output : float
        Log-likelihood.
    
    """

    if model == 'weibull':
        return log_like_iid_weibull(params, n)
    elif model == 'weibull_mixed':
        return log_like_iid_weibull_mixed(params, n)
    elif model == 'lognormal':
        return log_like_iid_lognormal(params, n)
    elif model == 'lognormal_mixed':
        return log_like_iid_lognormal_mixed(params, n)
    elif model == 'normal':
        return log_like_iid_normal(params, n)
    elif model == 'normal_mixed':
        return log_like_iid_normal_mixed(params, n)
    elif model == 'gamma':
        return log_like_iid_gamma(params, n)
    elif model == 'gamma_mixed':
        return log_like_iid_gamma_mixed(params, n)
    elif model == 'inverse_gamma':
        return log_like_iid_inv_gamma(params, n)
    elif model == 'inverse_gamma_mixed':
        return log_like_iid_inv_gamma_mixed(params, n)
    else:
        raise ValueError('Pick an appropriate model!')
                                     

In [36]:
def mle_iid(n, model):
    """Performs maximum likelihood estimates for parameters for i.i.d.
    measurements of a chosen model;

    Parameters
    ----------
    n : array
        Array of data points.
    model : string
        One of the following: 'weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
            'gamma', 'gamma_mixed', 'inverse gamma', 'inverse_gamma_mixed'

    Returns
    -------
    output : float
        MLE for given model parameters.
    """

    if model == 'weibull':
        init_guess = np.array([2, 15])
    elif model == 'weibull_mixed':
        init_guess = np.array([2, 15, 0.5])
    elif model == 'lognormal':
        init_guess = np.array([2, 15])
    elif model == 'lognormal_mixed':
        init_guess = np.array([2, 15, 0.5])
    elif model == 'normal':
        init_guess = np.array([10, 15])
    elif model == 'normal_mixed':
        init_guess = np.array([10, 15, 0.5])
    elif model == 'gamma':
        init_guess = np.array([0.1, 10])
    elif model == 'gamma_mixed':
        init_guess = np.array([0.5, 10, 0.5])
    elif model == 'inverse_gamma':
        init_guess = np.array([5, 15])
    elif model == 'inverse_gamma_mixed':
        init_guess = np.array([5, 15, 0.5])
    else:
        raise ValueError('Pick an appropriate model!')

    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, n: -log_like(params, n, model),
            x0=init_guess,
            args=(n,),
            method='Powell'
        )

        if res.success:
            return res.x
        else:
            raise RuntimeError('Convergence failed with message', res.message)

In [37]:
def draw_perm_sample(x, y):
    """Generate a permutation sample."""
    concat_data = np.concatenate((x, y))
    np.random.shuffle(concat_data)

    return concat_data[:len(x)], concat_data[len(x):]


def draw_perm_reps(x, y, stat_fun, size=1):
    """Generate array of permuation replicates."""
    return np.array([stat_fun(*draw_perm_sample(x, y)) for _ in range(size)])

def draw_perm_reps_diff_mean(x, y, size=1):
    """Generate array of permuation replicates."""
    out = np.empty(size)
    for i in range(size):
        x_perm, y_perm = draw_perm_sample(x, y)
        out[i] = np.abs(np.mean(x_perm) - np.mean(y_perm))

    return out

### Calculating MLEs and AICs for all the models

In [38]:
mles_dict = {}
models = ['weibull', 'weibull_mixed', 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

AICs = pd.DataFrame(index = models)

for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], model=model)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], model = model)
            AICs.loc[model, group] = -2 * (_llk - len(params))
    

  target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))])
  target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.weibull_min.cdf(i, alpha, scale=sigma))])
  cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(60, mu, sigma))


In [39]:
AICs

Unnamed: 0,DJKO_GF,DJKO_SPF,B6_SPF
weibull,94.27028,162.202647,147.786332
weibull_mixed,74.918749,147.481531,103.35809
lognormal,87.890924,152.652191,133.647734
lognormal_mixed,71.120121,143.559766,96.932281
normal,125.060406,199.181725,200.443972
normal_mixed,78.176719,156.901739,104.655501


## Mixed Lognormal model

Fitting data into the mixed lognormal model with Dirac delta function.

In [40]:
ln_mles = mles_dict['lognormal_mixed']
ln_mles

{'DJKO_GF': array([1.42891668, 0.46765719, 0.8       ]),
 'DJKO_SPF': array([1.84121032, 0.66213084, 0.87529186]),
 'B6_SPF': array([1.3821976 , 0.34415109, 0.875     ])}

Defining functions for the graphical model assessment.

In [41]:
def mle_iid_lognormal_mixed(n):
    
    """Performs maximum likelihood estimates for parameters for i.i.d.
    lognormal mixed measurements, parametrized by mu, sigma"""
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, n: -log_like_iid_lognormal_mixed(params, n),
            x0=np.array([2, 5, 0.5]),
            args=(n,),
            method='Powell'
        )

    if res.success:
        return res.x
    else:
        raise RuntimeError('Convergence failed with message', res.message)



In [42]:
def gen_lognormal_mixed(params, size, rng):
    """Draws a sample out of the mixed lognormal distribution with Dirac delta function
    parametrized by mu, sigma and omega.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    size : int
        Number of data poinst in a sample.
    rng: Generator
        The initialized generator object.

    Returns
    -------
    output : array
        Newly generated sample.
    
    """
    mu, sigma, omega = params

    num_max = rng.binomial(size, (1 - omega))
    y_max = np.ones(num_max) * 60
    num_weib = size - num_max

    y_lognorm = st.lognorm.rvs(sigma, scale=np.exp(mu), size=num_weib)

    if len(y_max) == 0:
        y = y_lognorm
    else:
        y = np.concatenate((y_lognorm, y_max))

    y[y > 60] = 60
    
    return y

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work.**

In [43]:
bs_mle_samples_mx_ln = {}

for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal_mixed,
        data=group_vals[group],
        mle_args=('lognormal_mixed',),
        size=10000,
        n_jobs=7
    )

    bs_mle_samples_mx_ln[group] = bs_mle_sample

DJKO_GF


KeyboardInterrupt: 

## Plotting:

### Graphical model assessment

Generating bootstrap samples for further graphical model assessment.

In [17]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in sorted(group_vals, key=lambda x: order[x]):
    params = ln_mles[group]
    single_samples = np.array([gen_lognormal_mixed(params, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


NameError: name 'ln_mles' is not defined

Making Q-Q plots

In [37]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in sorted(group_vals, key=lambda x: order[x]):

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

Making predictive ECDF plots

In [38]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

### Generating 95% confidence intervals for the parameter MLEs

Creating specific dictionary structure needed for the parameter 95% confidence interval plotting.

In [35]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []
omega_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}
omegas_mx_ln = {}

for group in sorted(group_vals, key=lambda x: order[x]):
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])
    _omega_CI = np.percentile(_smpls[:, 2], [2.5, 97.5])

    mu_mle, sigma_mle, omega_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}
    _omega_dct = { 'label':group, 'conf_int':_omega_CI, 'estimate':omega_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)
    omega_lst_mx_ln.append(_omega_dct)

Plotting and saving 95% confidence interval plot for the mu parameter.

In [40]:
p = bebi103.viz.confints(
    mu_lst_mx_ln,
    title='mu 95%CI, lognormal mixed model',
    palette=palette_CI,
    frame_width=4000,
    frame_height=800,
    hidpi=True,
    line_width=5,
    marker_kwargs={"size":15}
)
p.title.text_font_size = '48pt'
p.xaxis.major_label_text_font_size = "40pt"
p.yaxis.major_label_text_font_size = "40pt"

p.output_backend = 'svg'
bokeh.io.export_svgs(p, filename='../omega_CIs_png_svg/pole_mu_conf_int_ln_mixed_djko.svg')

bokeh.io.save(
    p,
    filename='../omega_CIs_html_pdf/pole_mu_conf_int_ln_mixed_djko.html',
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

Plotting and saving 95% confidence interval plot for the sigma parameter.

In [42]:
p = bebi103.viz.confints(
    sigma_lst_mx_ln,
    title='sigma 95%CI, lognormal mixed model',
    palette=palette_CI,
    frame_width=4000,
    frame_height=800,
    hidpi=True,
    line_width=5,
    marker_kwargs={"size":15}
)
p.title.text_font_size = '48pt'
p.xaxis.major_label_text_font_size = "40pt"
p.yaxis.major_label_text_font_size = "40pt"

# bokeh.io.show(p)
# bokeh.io.export_svg(p, filename="beam_sigma_conf_int_ln_mixed_djko.svg")
# bokeh.io.export_png(p, filename="beam_sigma_conf_int_ln_mixed_djko.png")

p.output_backend = 'svg'
bokeh.io.export_svgs(p, filename='../omega_CIs_png_svg/pole_sigma_conf_int_ln_mixed_djko.svg')

bokeh.io.save(
    p,
    filename='../omega_CIs_html_pdf/pole_sigma_conf_int_ln_mixed_djko.html',
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

Plotting and saving 95% confidence interval plot for the omega parameter.

In [44]:
p = bebi103.viz.confints(
    omega_lst_mx_ln,
    title='ω 95%CI, lognormal mixed model',
    palette=palette_CI,
    frame_width=4000,
    frame_height=800,
    hidpi=True,
    line_width=5,
    marker_kwargs={"size":15}
)
p.title.text_font_size = '48pt'
p.xaxis.major_label_text_font_size = "40pt"
p.yaxis.major_label_text_font_size = "40pt"

# bokeh.io.show(p)
# bokeh.io.export_svg(p, filename="beam_omega_conf_int_ln_mixed_nac.svg")
# bokeh.io.export_png(p, filename="beam_omega_conf_int_ln_mixed_nac.png")

p.output_backend = 'svg'
bokeh.io.export_svgs(p, filename='../omega_CIs_png_svg/pole_omega_conf_int_ln_mixed_djko.svg')

bokeh.io.save(
    p,
    filename='../omega_CIs_html_pdf/pole_omega_conf_int_ln_mixed_djko.html',
    title='Bokeh plot',
    resources=bokeh.resources.CDN,                          
);

## Null hypothesis significance testing

Kruskall-Wallis analysis of the groups:

In [174]:
p_val_ks = st.kruskal(group_vals['B6_SPF'], group_vals['DJKO_SPF'], group_vals['DJKO_GF'])
p_val_ks

KruskalResult(statistic=3.464997432255659, pvalue=0.17684197953969488)

Post-hoc Conover pairwise comparison:

In [175]:
p_vals = sp.posthoc_conover([group_vals['B6_SPF'], group_vals['DJKO_SPF'], group_vals['DJKO_GF']], p_adjust='fdr_bh')
p_vals = p_vals.rename(columns={1:'B6_SPF', 2:'DJKO_SPF', 3:'DJKO_GF'}, index={1:'B6_SPF', 2:'DJKO_SPF', 3:'DJKO_GF'})
p_vals['KruskalWallis p-val'] = p_val_ks.pvalue
p_vals['KruskalWallis statistic'] = p_val_ks.statistic
p_vals.to_csv("./beam_djko_KW_conover.csv")
p_vals

Unnamed: 0,B6_SPF,DJKO_SPF,DJKO_GF,KruskalWallis p-val,KruskalWallis statistic
B6_SPF,1.0,0.20847,0.633209,0.176842,3.464997
DJKO_SPF,0.20847,1.0,0.387362,0.176842,3.464997
DJKO_GF,0.633209,0.387362,1.0,0.176842,3.464997


In [2]:
%load_ext watermark
%watermark -v -p numpy,pandas,numba,bokeh,bebi103,jupyterlab

Python implementation: CPython
Python version       : 3.11.5
IPython version      : 8.15.0

numpy     : 1.24.3
pandas    : 2.1.1
numba     : 0.58.0
bokeh     : 3.2.0
bebi103   : 0.1.17
jupyterlab: 4.0.6

