In [1]:
import numpy as np
import scipy.stats as st
import pandas as pd
import scipy
import warnings
import itertools
import os

import iqplot
import bebi103

import bokeh.io
import bokeh.plotting
import bokeh.layouts
bokeh.io.output_notebook()

## Uploading the data

1. Uploading the whole excel file to read from all the sheets later.

In [2]:
path_1 = os.path.join('..', 'data', 'behavior_data', 'Fp1_Motor_GI_function.xlsx')
path_2 = os.path.join('..', 'data', 'behavior_data', 'Fp2_Motor_GI_function.xlsx')
data_1 = pd.ExcelFile(path_1)
data_2 = pd.ExcelFile(path_2)

2. Creating a list of all sheets (tests) that we want to analyse.

In [3]:
tests = [i for i in data_1.sheet_names if ('Weight' not in i)]
tests

['Beam_time',
 'Beam_steps',
 'Pole',
 'Wirehang',
 'Adhesive_removal',
 'Hindlimb',
 'Fecal_output',
 'Fecal_score',
 'Water_content',
 'Gut_transit',
 'Bead_exp']

3. Parsing the Excel file into separate datasets (1 test = 1 dataset) and storing them in a dictionary with keys = names of the tests/sheets

In [4]:
data_dict = {}

for test in tests:
    temp_df_1 = data_1.parse(test)   
    temp_df_1['Experiment'] = 'Exp1'
    temp_df_2 = data_2.parse(test) 
    temp_df_2['Experiment'] = 'Exp2'
    temp_df = pd.concat((temp_df_1, temp_df_2))

    temp_df = temp_df.rename(columns={"Trial1": "Measurement", 
                                      "Trial15": "Measurement", 
                                      "Percent_water_content": "Measurement",
                                      "Time_min": "Measurement",
                                      "Slips_Step_Trial1": "Measurement"
                                     })
        

    temp_df = temp_df.dropna()
    data_dict[test] = temp_df


## Defining functions

#### Functions for the MLE calculation

In [5]:
def log_like_iid_lognormal_mixed(params, n, thresh):
    """Log likelihood for i.i.d. lognormal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    for i in n:
        if i == thresh:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + np.log(1 - st.lognorm.cdf(i, sigma, scale=np.exp(mu)))])
        else:
            target += np.log(omega) + st.lognorm.logpdf(i, sigma, scale=np.exp(mu))
                                              
    return target

def log_like_iid_lognormal(params, n, thresh):
    """Log likelihood for i.i.d. lognormal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf

    target = 0
    cdf_val = np.log(1 - st.lognorm.cdf(thresh, sigma, scale=np.exp(mu)))
    
    for i in n:
        if i == thresh:
            target += cdf_val
        else:
            target += st.lognorm.logpdf(i, sigma, scale=np.exp(mu))

    return target
    

def log_like_iid_normal_mixed(params, n, thresh):
    """Log likelihood for i.i.d. normal measurements mixed with Dirac delta function.

    Parameters
    ----------
    params : array
        Parameters mu, sigma, omega.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma, omega = params

    if mu <= 0 or sigma <= 0 or omega <=0 or omega >= 1:
        return -np.inf

    target = 0
    cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
    
    for i in n:
        if i == thresh:
            target += scipy.special.logsumexp([np.log(1 - omega), np.log(omega) + cdf_val])
        else:
            target += np.log(omega) + st.norm.logpdf(i, mu, sigma)
                                              
    return target

def log_like_iid_normal(params, n, thresh):
    """Log likelihood for i.i.d. normal measurements.

    Parameters
    ----------
    params : array
        Parameters mu, sigma.
    n : array
        Array of data points.

    Returns
    -------
    output : float
        Log-likelihood.
    """
    mu, sigma = params

    if mu <= 0 or sigma <= 0:
        return -np.inf

    target = 0
    cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
    
    for i in n:
        if i == thresh:
            target += cdf_val
        else:
            target += st.norm.logpdf(i, mu, sigma)

    return target    
    

def log_like(params, n, model, thresh):
    """
    Model: 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed'
    """
    if model == 'lognormal':
        return log_like_iid_lognormal(params, n, thresh)
    elif model == 'lognormal_mixed':
        return log_like_iid_lognormal_mixed(params, n, thresh)
    elif model == 'normal':
        return log_like_iid_normal(params, n, thresh)
    elif model == 'normal_mixed':
        return log_like_iid_normal_mixed(params, n, thresh)
    else:
        raise ValueError('Pick an appropriate model!')
                                     

def mle_iid(n, model, thresh):
    """Perform maximum likelihood estimates for parameters for i.i.d.
    measurements of a chosen model;
    Models: 'lognormal', 'lognormal_mixed', 'normal', 'normal_mixed', 
    """
    if model == 'lognormal':
        init_guess = np.array([2, 15])
    elif model == 'lognormal_mixed':
        init_guess = np.array([2, 15, 0.5])
    elif model == 'normal':
        init_guess = np.array([10, 15])
    elif model == 'normal_mixed':
        init_guess = np.array([10, 15, 0.5])
    else:
        raise ValueError('Pick an appropriate model!')

    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        res = scipy.optimize.minimize(
            fun=lambda params, n: -log_like(params, n, model, thresh),
            x0=init_guess,
            args=(n,),
            method='Powell'
        )

        if res.success:
            return res.x
        else:
            raise RuntimeError('Convergence failed with message', res.message)



def gen_lognormal_mixed(params, thresh, size, rng):

    mu, sigma, omega = params

    num_max = rng.binomial(size, (1 - omega))
    y_max = np.ones(num_max) * thresh
    num_weib = size - num_max

    y_lognorm = st.lognorm.rvs(sigma, scale=np.exp(mu), size=num_weib)

    if len(y_max) == 0:
        y = y_lognorm
    else:
        y = np.concatenate((y_lognorm, y_max))

    y[y > thresh] = thresh
    
    return y

def gen_lognormal(params, thresh, size, rng):

    mu, sigma = params
    y = st.lognorm.rvs(sigma, scale=np.exp(mu), size=size)
    y[y > thresh] = thresh
    
    return y

def gen_normal(params, thresh, size, rng):

    mu, sigma = params
    y = st.norm.rvs(mu, sigma, size=size)
    y[y > thresh] = thresh
    
    return y


def draw_perm_sample(x, y):
    """Generate a permutation sample."""
    concat_data = np.concatenate((x, y))
    np.random.shuffle(concat_data)

    return concat_data[:len(x)], concat_data[len(x):]


def draw_perm_reps(x, y, stat_fun, size=1):
    """Generate array of permuation replicates."""
    return np.array([stat_fun(*draw_perm_sample(x, y)) for _ in range(size)])


def draw_perm_reps_diff_mean(x, y, size=1):
    """Generate array of permuation replicates."""
    out = np.empty(size)
    for i in range(size):
        x_perm, y_perm = draw_perm_sample(x, y)
        out[i] = np.abs(np.mean(x_perm) - np.mean(y_perm))

    return out

#### Functions for pooling analysis

The following functions were written with the assistance of ChatGPT-5 (auto mode) with the following prompts:  
- "Could you help me write code to perform Q-test and calculate I^2 in Python please?"  
- "Write a code for me to calculate Cliff's delta and bootstrap confidence intervals"  
- In response to the next ChatGPT suggestion:
"Would you like me to adapt the Python code we wrote for Cliff’s delta so that it computes BCa bootstrap CIs instead of the simple percentile ones? That way you can directly compare how much the choice of bootstrap interval affects your results."  
We answered: "Yes, please"

In [6]:
def q_i2_from_es_se(effect, se):
    """
    Compute Cochran's Q, df, p-value, I² (%), and fixed-effect pooled estimate.

    Parameters
    ----------
    effect : array-like
        Effect size estimates (e.g., Cliff's deltas), one per cohort/study.
    se : array-like
        Standard errors for the corresponding effect sizes.

    Returns
    -------
    dict with keys: {'pooled_FE', 'Q', 'df', 'p', 'I2_pct'}
    """
    effect = np.asarray(effect, dtype=float)
    se = np.asarray(se, dtype=float)
    var = se**2
    w = 1.0 / var

    # Fixed-effect pooled estimate
    pooled_FE = np.sum(w * effect) / np.sum(w)

    # Cochran's Q
    Q = np.sum(w * (effect - pooled_FE)**2)
    df = len(effect) - 1
    p = 1 - st.chi2.cdf(Q, df)

    # I² (bounded at 0)
    I2 = 0.0 if Q <= 0 else max(0.0, (Q - df) / Q) * 100.0

    return {"pooled_FE": pooled_FE, "Q": Q, "df": df, "p": p, "I2_pct": I2}


def q_i2_from_es_ci(effect, ci_low, ci_high, z=1.96):
    """
    Same as above, but you provide 95% CIs instead of SEs.
    SE is approximated as (upper - lower) / (2*z), default z=1.96.
    """
    se = (np.asarray(ci_high, dtype=float) - np.asarray(ci_low, dtype=float)) / (2 * z)
    return q_i2_from_es_se(effect, se)

def summarize_by_group(df):
    rows = []
    for grp, sub in df.groupby("group"):
        out = q_i2_from_es_ci(sub["Cliff's delta"].values, sub["CI_lower"].values, sub["CI_higher"].values)
        rows.append({
            "group": grp,
            "pooled_FE": out["pooled_FE"],
            "Q": out["Q"],
            "df": out["df"],
            "p": out["p"],
            "I2 (%)": out["I2_pct"],
        })
    return pd.DataFrame(rows)

In [7]:
def cliffs_delta(x, y):
    """
    Compute Cliff's delta for two independent samples x and y.
    Definition:
        delta = P(X > Y) - P(X < Y)
              = ( #pairs(x_i > y_j) - #pairs(x_i < y_j) ) / (len(x)*len(y))
    Ties contribute 0 to the numerator.
    """
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)
    if x.ndim != 1 or y.ndim != 1:
        raise ValueError("x and y must be 1-D arrays")
    nx, ny = x.size, y.size
    if nx == 0 or ny == 0:
        raise ValueError("x and y must be non-empty")
    # Vectorized pairwise comparisons
    diffs = x[:, None] - y[None, :]
    gt = np.sum(diffs > 0.0)
    lt = np.sum(diffs < 0.0)
    return float((gt - lt) / (nx * ny))


def cliffs_delta_bootstrap_bca(x, y, n_boot=50000, ci=95, random_state=42):
    """
    BCa bootstrap confidence intervals for Cliff's delta.

    Parameters
    ----------
    x, y : array-like (1D)
        Samples for the two groups.
    n_boot : int
        Number of bootstrap resamples.
    ci : float
        Confidence level, e.g., 95 for 95% CI.
    random_state : int or None
        RNG seed for reproducibility.

    Returns
    -------
    delta_hat : float
        Point estimate on the original samples.
    (lo, hi) : tuple
        BCa CI bounds at the requested level.
    boot : np.ndarray
        Bootstrap replicates of Cliff's delta (shape: (n_boot,))
    """
    rng = np.random.default_rng(random_state)
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)
    nx, ny = len(x), len(y)
    if nx == 0 or ny == 0:
        raise ValueError("x and y must be non-empty")

    # Point estimate
    theta_hat = cliffs_delta(x, y)

    # Bootstrap distribution
    boot = np.empty(n_boot, dtype=float)
    for b in range(n_boot):
        xb = x[rng.integers(0, nx, size=nx)]
        yb = y[rng.integers(0, ny, size=ny)]
        boot[b] = cliffs_delta(xb, yb)

    # Bias-correction (z0)
    # proportion of bootstrap samples less than the observed statistic
    # Handle ties conservatively by counting strictly less than
    prop_less = (boot < theta_hat).mean()
    # Avoid exactly 0 or 1
    eps = 1.0 / (n_boot + 1.0)
    prop_less = min(max(prop_less, eps), 1 - eps)
    z0 = scipy.stats.norm.ppf(prop_less)

    # Jackknife for acceleration 'a'
    # Leave-one-out over BOTH groups
    jack = np.empty(nx + ny, dtype=float)
    # leave-one-out from x
    for i in range(nx):
        if nx - 1 == 0:  # can't jackknife a length-1 sample
            jack[i] = theta_hat
        else:
            jack[i] = cliffs_delta(np.delete(x, i), y)
    # leave-one-out from y
    for j in range(ny):
        if ny - 1 == 0:
            jack[nx + j] = theta_hat
        else:
            jack[nx + j] = cliffs_delta(x, np.delete(y, j))

    jack_mean = np.mean(jack)
    num = np.sum((jack_mean - jack) ** 3)
    den = 6.0 * (np.sum((jack_mean - jack) ** 2) ** 1.5)
    if den == 0 or not np.isfinite(den):
        a = 0.0
    else:
        a = num / den

    # Adjusted alpha levels
    alpha = (100.0 - ci) / 100.0
    alow = alpha / 2.0
    ahigh = 1.0 - alpha / 2.0

    def bca_percentile(p):
        # BCa transformed percentile for probability p in (0,1)
        zp = scipy.stats.norm.ppf(p)
        num = z0 + zp
        den = 1.0 - a * (z0 + zp)
        if den == 0:
            adj = 1.0 if num > 0 else 0.0
        else:
            adj = scipy.stats.norm.cdf(z0 + num / den)
        # clip to [eps, 1-eps] to avoid indexing issues
        return min(max(adj, eps), 1 - eps)

    ql = bca_percentile(alow)
    qh = bca_percentile(ahigh)

    lo, hi = np.quantile(boot, [ql, qh])

    # Ensure bounds are within Cliff's delta domain [-1, 1]
    lo = max(-1.0, float(lo))
    hi = min(1.0, float(hi))

    return float(theta_hat), (lo, hi), boot


### Plotting characteristics

Specifying palettes and plotting order.

In [8]:
# Palette:
palette_CI = ["#000000", "#a6cee3", "#1F78B4"]

# Plotting order:
order = {'WT_SPF': 1, 'ASO_SPF': 2, 'ASO_SPF-Fp': 3}

## Pole descent

In [9]:
work_df = data_dict['Pole']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [10]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_pole_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.414141,"(-0.797979797979798, 0.13131313131313133)",Exp1,-0.79798,0.131313
1,"(WT_SPF, ASO_SPF-Fp)",-0.681818,"(-0.9090909090909091, -0.22727272727272727)",Exp1,-0.909091,-0.227273
2,"(ASO_SPF, ASO_SPF-Fp)",-0.388889,"(-0.7962962962962963, 0.16666666666666666)",Exp1,-0.796296,0.166667
0,"(WT_SPF, ASO_SPF)",-0.645833,"(-0.8854166666666666, -0.25)",Exp2,-0.885417,-0.25
1,"(WT_SPF, ASO_SPF-Fp)",-0.617647,"(-0.8725490196078431, -0.19607843137254902)",Exp2,-0.872549,-0.196078
2,"(ASO_SPF, ASO_SPF-Fp)",-0.0625,"(-0.45955882352941174, 0.34558823529411764)",Exp2,-0.459559,0.345588


In [11]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_pole_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",-0.196792,1.038982,1,0.308058,3.751952
1,"(WT_SPF, ASO_SPF)",-0.57202,0.65088,1,0.419798,0.0
2,"(WT_SPF, ASO_SPF-Fp)",-0.64948,0.068595,1,0.793394,0.0


2. Calculating the AIC for a set of models: ['lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

In [12]:
models = ['lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

mles_dict = {}

AICs = pd.DataFrame(index = models)


for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], model=model, thresh=60.0)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], model = model, thresh=60.0)
            AICs.loc[model, group] = -2 * (_llk - len(params))
    

AICs

  cdf_val = np.log(1 - st.lognorm.cdf(thresh, sigma, scale=np.exp(mu)))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))


Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp
lognormal,91.081825,126.233885,166.802814
lognormal_mixed,93.081826,128.233886,168.802815
normal,94.835204,133.732791,178.118681
normal_mixed,96.835205,135.732793,180.118683


We chose to work with the lognormal distribution.

### Lognormal

In [13]:
ln_mles = mles_dict['lognormal']
ln_mles

{'WT_SPF': array([1.77361938, 0.27267499]),
 'ASO_SPF': array([2.0977328 , 0.34232536]),
 'ASO_SPF-Fp': array([2.227825  , 0.43178858])}

4. Calculating confidence intervals for the parameters of the chosen model.

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work. There is another option of using previously generated bootstrap samples - refer to the code below the following coding cell.**

In [14]:
bs_mle_samples_mx_ln = {}

for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal,
        data=group_vals[group],
        mle_args=('lognormal', 60.0),
        gen_args=(60.0, ),
        size=10000,
        n_jobs=7
    )

    _df = pd.DataFrame(bs_mle_sample, columns=['mu', 'sigma'])
    _df.to_csv(os.path.join("..", "output", "Fp_pole_bs_mle_samples"+group+".csv"))

    bs_mle_samples_mx_ln[group] = bs_mle_sample

WT_SPF
ASO_SPF
ASO_SPF-Fp


**If you are unable to run the code cell above or it takes too long, you can use the code below.**

In [15]:
for group in group_vals.keys():
    _df = pd.read_csv(os.path.join("..", "output", "Fp_pole_bs_mle_samples"+group+".csv"), index_col=0)
    bs_mle_samples_mx_ln[group] = _df.to_numpy()

5. Creating specific data structures to be able to plot the confidence intervals.

#### For plotting:

In [16]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}

for group in group_vals:
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])

    mu_mle, sigma_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)

### Graphical model assessment

In [17]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in group_vals:

    params = ln_mles[group]
    single_samples = np.array([gen_lognormal(params, thresh=60.0, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


In [18]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in group_vals:

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

In [19]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

### Generating 95% confidence intervals for the parameter MLEs

In [20]:
p = bebi103.viz.confints(
    mu_lst_mx_ln,
    title='mu 95%CI, lognormal model',
    palette=palette_CI
)

p.output_backend = "svg"

bokeh.io.show(p)

bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_pole_mu_CI.svg"))


bokeh.io.save(
    p,
    filename=('../figures/Fp_pole_mu_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_pole_mu_CI.html'

In [21]:
p = bebi103.viz.confints(
    sigma_lst_mx_ln,
    title='sigma 95%CI, lognormal model',
    palette=palette_CI
)

p.output_backend = "svg"

bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_sigma_mu_CI.svg"))

bokeh.io.save(
    p,
    filename=('../figures/Fp_pole_sigma_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_pole_sigma_CI.html'

## Beam cross

In [22]:
work_df = data_dict['Beam_time']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [23]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_beam_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.89899,"(-1.0, -0.5151515151515151)",Exp1,-1.0,-0.515152
1,"(WT_SPF, ASO_SPF-Fp)",-0.53719,"(-1.0, 0.024793388429752067)",Exp1,-1.0,0.024793
2,"(ASO_SPF, ASO_SPF-Fp)",0.393939,"(-0.25252525252525254, 0.7777777777777778)",Exp1,-0.252525,0.777778
0,"(WT_SPF, ASO_SPF)",-0.947917,"(-1.0, -0.7395833333333334)",Exp2,-1.0,-0.739583
1,"(WT_SPF, ASO_SPF-Fp)",-0.833333,"(-0.9607843137254902, -0.5)",Exp2,-0.960784,-0.5
2,"(ASO_SPF, ASO_SPF-Fp)",0.176471,"(-0.2536764705882353, 0.5477941176470589)",Exp2,-0.253676,0.547794


In [24]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_beam_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.258455,0.426508,1,0.513707,0.0
1,"(WT_SPF, ASO_SPF)",-0.936962,0.121443,1,0.727474,0.0
2,"(WT_SPF, ASO_SPF-Fp)",-0.78353,1.067422,1,0.301528,6.316368


### Model assessment

In [25]:
models = ['lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

mles_dict = {}

AICs = pd.DataFrame(index = models)


for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], model=model, thresh=60.0)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], model = model, thresh=60.0)
            AICs.loc[model, group] = -2 * (_llk - len(params))
    

  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))


In [26]:
AICs

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp
lognormal,85.76852,163.52536,163.640632
lognormal_mixed,87.768521,147.690531,153.197712
normal,108.731728,205.221608,212.320426
normal_mixed,110.731729,158.250067,159.35144


### Lognormal

In [27]:
ln_mles = mles_dict['lognormal_mixed']
ln_mles

{'WT_SPF': array([1.43873104, 0.33956376, 0.99999997]),
 'ASO_SPF': array([2.18827759, 0.44762047, 0.88000955]),
 'ASO_SPF-Fp': array([1.98742449, 0.43160046, 0.96428622])}

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work. There is another option of using previously generated bootstrap samples - refer to the code below the following coding cell.**

In [28]:
bs_mle_samples_mx_ln = {}

for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal_mixed,
        data=group_vals[group],
        mle_args=('lognormal_mixed', 60.0),
        gen_args=(60.0, ),
        size=10000,
        n_jobs=7
    )

    _df = pd.DataFrame(bs_mle_sample, columns=['mu', 'sigma', 'omega'])
    _df.to_csv(os.path.join("..", "output", "Fp_beam_bs_mle_samples"+group+".csv"))

    bs_mle_samples_mx_ln[group] = bs_mle_sample

WT_SPF
ASO_SPF
ASO_SPF-Fp


**If you are unable to run the code cell above or it takes too long, you can use the code below.**

In [29]:
for group in group_vals.keys():
    _df = pd.read_csv(os.path.join("..", "output", "Fp_beam_bs_mle_samples"+group+".csv"), index_col=0)
    bs_mle_samples_mx_ln[group] = _df.to_numpy()

#### For plotting:

In [30]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []
omega_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}
omega_mx_ln = {}

for group in group_vals:
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])
    _omega_CI = np.percentile(_smpls[:, 2], [2.5, 97.5])

    mu_mle, sigma_mle, omega_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}
    _omega_dct = { 'label':group, 'conf_int':_omega_CI, 'estimate':omega_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)
    omega_lst_mx_ln.append(_omega_dct)

### Graphical model assessment

In [31]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in group_vals:

    params = ln_mles[group]
    single_samples = np.array([gen_lognormal_mixed(params, thresh=60.0, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


In [32]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in group_vals:

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

In [33]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

### Generating 95% confidence intervals for the parameter MLEs

In [34]:
p = bebi103.viz.confints(
    mu_lst_mx_ln,
    title='mu 95%CI, mixed lognormal model',
    palette=palette_CI
)
p.output_backend = "svg"

bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_beam_mu_CI.svg"))

bokeh.io.save(
    p,
    filename=('../figures/Fp_beam_mu_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_beam_mu_CI.html'

In [35]:
p = bebi103.viz.confints(
    sigma_lst_mx_ln,
    title='sigma 95%CI, mixed lognormal model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_beam_sigma_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_beam_sigma_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_beam_sigma_CI.html'

In [36]:
p = bebi103.viz.confints(
    omega_lst_mx_ln,
    title='omega 95%CI, mixed lognormal model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_beam_omega_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_beam_omega_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_beam_omega_CI.html'

## Adhesive removal

In [37]:
work_df = data_dict['Adhesive_removal']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [38]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_adhesive_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.636364,"(-0.9191919191919192, -0.1111111111111111)",Exp1,-0.919192,-0.111111
1,"(WT_SPF, ASO_SPF-Fp)",-0.333333,"(-0.7424242424242424, 0.19696969696969696)",Exp1,-0.742424,0.19697
2,"(ASO_SPF, ASO_SPF-Fp)",0.472222,"(-0.09259259259259259, 0.8148148148148148)",Exp1,-0.092593,0.814815
0,"(WT_SPF, ASO_SPF)",-0.864583,"(-0.9791666666666666, -0.5625)",Exp2,-0.979167,-0.5625
1,"(WT_SPF, ASO_SPF-Fp)",-0.303922,"(-0.6666666666666666, 0.14705882352941177)",Exp2,-0.666667,0.147059
2,"(ASO_SPF, ASO_SPF-Fp)",0.444853,"(-0.007352941176470588, 0.7647058823529411)",Exp2,-0.007353,0.764706


In [39]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_adhesive_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.456346,0.008109,1,0.928247,0.0
1,"(WT_SPF, ASO_SPF)",-0.816651,0.968233,1,0.325121,0.0
2,"(WT_SPF, ASO_SPF-Fp)",-0.31653,0.008606,1,0.926088,0.0


### Model assessment

In [40]:
models = ['lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']

mles_dict = {}

AICs = pd.DataFrame(index = models)


for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], thresh=30.0, model=model)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], thresh=30.0, model = model)
            AICs.loc[model, group] = -2 * (_llk - len(params))
    

  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))
  cdf_val = np.log(1 - st.norm.cdf(thresh, mu, sigma))


In [41]:
AICs

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp
lognormal,51.266147,108.252298,86.924602
lognormal_mixed,53.266149,97.638656,88.924603
normal,70.832709,160.468802,102.824968
normal_mixed,72.83271,115.266676,104.824969


### Lognormal

In [42]:
ln_mles = mles_dict['lognormal']
ln_mles

{'WT_SPF': array([0.45434009, 0.42923424]),
 'ASO_SPF': array([1.18193631, 0.6686376 ]),
 'ASO_SPF-Fp': array([0.70412503, 0.49990907])}

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work. There is another option of using previously generated bootstrap samples - refer to the code below the following coding cell.**

In [43]:
bs_mle_samples_mx_ln = {}

for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal,
        data=group_vals[group],
        mle_args=('lognormal', 30.0),
        gen_args=(30.0, ),
        size=10000,
        n_jobs=7
    )

    _df = pd.DataFrame(bs_mle_sample, columns=['mu', 'sigma'])
    _df.to_csv(os.path.join("..", "output", "Fp_adhesive_bs_mle_samples"+group+".csv"))

    bs_mle_samples_mx_ln[group] = bs_mle_sample

WT_SPF
ASO_SPF
ASO_SPF-Fp


**If you are unable to run the code cell above or it takes too long, you can use the code below.**

In [44]:
for group in group_vals.keys():
    _df = pd.read_csv(os.path.join("..", "output", "Fp_adhesive_bs_mle_samples"+group+".csv"), index_col=0)
    bs_mle_samples_mx_ln[group] = _df.to_numpy()

#### For plotting:

In [45]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}

for group in group_vals:
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])

    mu_mle, sigma_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)

### Graphical model assessment

In [46]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in group_vals:

    params = ln_mles[group]
    single_samples = np.array([gen_lognormal(params, thresh=30.0, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


In [47]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in group_vals:

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

In [48]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

### Generating 95% confidence intervals for the parameter MLEs

In [49]:
p = bebi103.viz.confints(
    mu_lst_mx_ln,
    title='mu 95%CI, lognormal model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_adhesive_mu_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_adhesive_mu_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_adhesive_mu_CI.html'

In [50]:
p = bebi103.viz.confints(
    sigma_lst_mx_ln,
    title='sigma 95%CI, lognormal model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_adhesive_sigma_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_adhesive_sigma_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_adhesive_sigma_CI.html'

## Wirehang

1 data point, which was equal to 0, was dropped from the analysis.

In [51]:
work_df = data_dict['Wirehang'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

work_df.loc[work_df['Measurement'] == 0, 'Measurement'] = np.nan
work_df = work_df.dropna()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n


In [52]:
work_df

Unnamed: 0,Genotype,Microbiome,Cage,ID,Measurement,Experiment
0,WT,SPF,1,1787,59.54,Exp1
1,WT,SPF,1,1788,13.72,Exp1
2,WT,SPF,1,1789,60.00,Exp1
3,WT,SPF,2,1814,60.00,Exp1
4,WT,SPF,2,1816,38.56,Exp1
...,...,...,...,...,...,...
40,ASO,SPF-Fp,14,2320,15.01,Exp2
41,ASO,SPF-Fp,14,2321,8.39,Exp2
42,ASO,SPF-Fp,14,2322,9.89,Exp2
43,ASO,SPF-Fp,15,2317,7.63,Exp2


### Pooling analysis

In [53]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_wirehang_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",0.737374,"(0.15151515151515152, 0.9595959595959596)",Exp1,0.151515,0.959596
1,"(WT_SPF, ASO_SPF-Fp)",0.801653,"(0.30578512396694213, 0.9669421487603306)",Exp1,0.305785,0.966942
2,"(ASO_SPF, ASO_SPF-Fp)",-0.232323,"(-0.7575757575757576, 0.41414141414141414)",Exp1,-0.757576,0.414141
0,"(WT_SPF, ASO_SPF)",0.958333,"(0.6853261457257298, 1.0)",Exp2,0.685326,1.0
1,"(WT_SPF, ASO_SPF-Fp)",0.686275,"(0.29411764705882354, 0.9117647058823529)",Exp2,0.294118,0.911765
2,"(ASO_SPF, ASO_SPF-Fp)",-0.25,"(-0.6176470588235294, 0.16911764705882354)",Exp2,-0.617647,0.169118


In [54]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_wirehang_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",-0.244507,0.00241,1,0.960842,0.0
1,"(WT_SPF, ASO_SPF)",0.929239,0.997635,1,0.317883,0.0
2,"(WT_SPF, ASO_SPF-Fp)",0.740043,0.249885,1,0.617156,0.0


### Model assessment

In [55]:
for i in group_vals.values():
    print(len(i))

23
25
28


In [56]:
models = ['lognormal', 'lognormal_mixed', 'normal', 'normal_mixed']
mles_dict = {}

AICs = pd.DataFrame(index = models)


for model in models:
    mles_dict[model] = {}
    for group in group_vals.keys():
        if len(group_vals[group]) != 0:
            params = mle_iid(group_vals[group], thresh=60.0, model=model)
        
            mles_dict[model][group] = params
    
            _llk = log_like(params, group_vals[group], thresh=60.0, model = model)
            AICs.loc[model, group] = -2 * (_llk - len(params))
    

In [57]:
AICs

Unnamed: 0,WT_SPF,ASO_SPF,ASO_SPF-Fp
lognormal,105.792411,174.301747,194.660255
lognormal_mixed,105.671082,175.850213,188.337896
normal,111.374015,200.192633,227.886329
normal_mixed,108.615262,187.079259,192.381344


### Lognormal

In [58]:
ln_mles = mles_dict['lognormal_mixed']
ln_mles

{'WT_SPF': array([3.3639137 , 0.4919686 , 0.41993212]),
 'ASO_SPF': array([2.15533901, 0.88083337, 0.97348677]),
 'ASO_SPF-Fp': array([2.44198339, 0.59363653, 0.85945423])}

Drawing bootstrap replicates of parameter MLEs for every group in the experiment.  
**Choose an appropriate amount for <code>n_jobs</code> according to the number of cores available on the machine. Windows users might need to set it to 1 to make it work. There is another option of using previously generated bootstrap samples - refer to the code below the following coding cell.**

In [59]:
bs_mle_samples_mx_ln = {}

for group in group_vals.keys():
    print(group)

    bs_mle_sample = bebi103.bootstrap.draw_bs_reps_mle(
        mle_iid,
        gen_lognormal_mixed,
        data=group_vals[group],
        mle_args=('lognormal_mixed', 60.0),
        gen_args=(60.0, ),
        size=10000,
        n_jobs=7
    )

    _df = pd.DataFrame(bs_mle_sample, columns=['mu', 'sigma', 'omega'])
    _df.to_csv(os.path.join("..", "output", "Fp_wirehang_bs_mle_samples"+group+".csv"))

    bs_mle_samples_mx_ln[group] = bs_mle_sample

WT_SPF
ASO_SPF
ASO_SPF-Fp


**If you are unable to run the code cell above or it takes too long, you can use the code below.**

In [60]:
for group in group_vals.keys():
    _df = pd.read_csv(os.path.join("..", "output", "Fp_wirehang_bs_mle_samples"+group+".csv"), index_col=0)
    bs_mle_samples_mx_ln[group] = _df.to_numpy()

#### For plotting:

In [61]:
mu_lst_mx_ln = []
sigma_lst_mx_ln = []
omega_lst_mx_ln = []

mus_mx_ln = {}
sigmas_mx_ln = {}
omega_mx_ln = {}

for group in group_vals:
    _smpls = bs_mle_samples_mx_ln[group]
    _mu_CI = np.percentile(_smpls[:, 0], [2.5, 97.5])
    _sigma_CI = np.percentile(_smpls[:, 1], [2.5, 97.5])
    _omega_CI = np.percentile(_smpls[:, 2], [2.5, 97.5])

    mu_mle, sigma_mle, omega_mle = ln_mles[group]

    _mu_dct = { 'label':group, 'conf_int':_mu_CI, 'estimate':mu_mle}
    _sigma_dct = { 'label':group, 'conf_int':_sigma_CI, 'estimate':sigma_mle}
    _omega_dct = { 'label':group, 'conf_int':_omega_CI, 'estimate':omega_mle}

    mu_lst_mx_ln.append(_mu_dct)
    sigma_lst_mx_ln.append(_sigma_dct)
    omega_lst_mx_ln.append(_omega_dct)

### Graphical model assessment

In [62]:
bs_samples_mx_ln = {}
rng = np.random.default_rng()

for group in group_vals:

    params = ln_mles[group]
    single_samples = np.array([gen_lognormal_mixed(params, thresh=60.0, size=len(group_vals[group]), rng=rng) for _ in range(100000)])
    bs_samples_mx_ln[group] = single_samples


In [63]:
qqplots_mx_ln =[]
precdf_plots_mx_ln = []

for group in group_vals:

    p = bebi103.viz.qqplot(
        data=group_vals[group],
        samples=bs_samples_mx_ln[group],
        x_axis_label="time of descent",
        y_axis_label="time of descent",
        title=group + ' Q-Q plot'       
    )
    qqplots_mx_ln.append(p)

    p1 = bebi103.viz.predictive_ecdf(
        samples=bs_samples_mx_ln[group], 
        data=group_vals[group], 
        x_axis_label="time",
        title=group + ' predictive ECDF'
    )
    precdf_plots_mx_ln.append(p1)

qq_lt_mx_ln = bokeh.layouts.row(qqplots_mx_ln)

bokeh.io.show(qq_lt_mx_ln)

In [64]:
pe_lt_mx_ln = bokeh.layouts.row(precdf_plots_mx_ln)

bokeh.io.show(pe_lt_mx_ln)

### Generating 95% confidence intervals for the parameter MLEs

In [65]:
p = bebi103.viz.confints(
    mu_lst_mx_ln,
    title='mu 95%CI, lognormal mixed model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_wirehang_mu_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_wirehang_mu_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_wirehang_mu_CI.html'

In [66]:
p = bebi103.viz.confints(
    sigma_lst_mx_ln,
    title='sigma 95%CI, lognormal mixed model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_wirehang_sigma_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_wirehang_sigma_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_wirehang_sigma_CI.html'

In [67]:
p = bebi103.viz.confints(
    omega_lst_mx_ln,
    title='omega 95%CI, lognormal mixed model',
    palette=palette_CI
)
p.output_backend = "svg"
bokeh.io.show(p)
bokeh.io.export_svgs(p, filename=os.path.join("..", "figures", "Fp_wirehang_omega_CI.svg"))
bokeh.io.save(
    p,
    filename=('../figures/Fp_wirehang_omega_CI.html'),
    title='Bokeh plot',
    resources=bokeh.resources.CDN)

'/Users/anastasiaoguienko/git/amoiseyenko2025/figures/Fp_wirehang_omega_CI.html'

## Hindlimb

In [69]:
work_df = data_dict['Hindlimb'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [70]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_hindlimb_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.89899,"(-0.98989898989899, -0.5757575757575758)",Exp1,-0.989899,-0.575758
1,"(WT_SPF, ASO_SPF-Fp)",-0.719697,"(-0.9242424242424242, -0.29545454545454547)",Exp1,-0.924242,-0.295455
2,"(ASO_SPF, ASO_SPF-Fp)",0.481481,"(-0.07407407407407407, 0.8148148148148148)",Exp1,-0.074074,0.814815
0,"(WT_SPF, ASO_SPF)",-0.984375,"(-1.0, -0.8958333333333334)",Exp2,-1.0,-0.895833
1,"(WT_SPF, ASO_SPF-Fp)",-0.294118,"(-0.6078431372549019, 0.0392156862745098)",Exp2,-0.607843,0.039216
2,"(ASO_SPF, ASO_SPF-Fp)",0.669118,"(0.25735294117647056, 0.8823529411764706)",Exp2,0.257353,0.882353


In [71]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_hindlimb_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.607042,0.458192,1,0.49847,0.0
1,"(WT_SPF, ASO_SPF)",-0.979295,0.614324,1,0.433164,0.0
2,"(WT_SPF, ASO_SPF-Fp)",-0.513001,3.418827,1,0.064457,70.750201


## Bead expulsion

In [72]:
work_df = data_dict['Bead_exp'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### Pooling analysis

In [73]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_beadexp_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.343434,"(-0.7777777777777778, 0.2222222222222222)",Exp1,-0.777778,0.222222
1,"(WT_SPF, ASO_SPF-Fp)",-0.189394,"(-0.6666666666666666, 0.36363636363636365)",Exp1,-0.666667,0.363636
2,"(ASO_SPF, ASO_SPF-Fp)",0.361111,"(-0.2222222222222222, 0.7592592592592593)",Exp1,-0.222222,0.759259
0,"(WT_SPF, ASO_SPF)",-0.886364,"(-1.0, -0.45454545454545453)",Exp2,-1.0,-0.454545
1,"(WT_SPF, ASO_SPF-Fp)",-0.219251,"(-0.6203208556149733, 0.24598930481283424)",Exp2,-0.620321,0.245989
2,"(ASO_SPF, ASO_SPF-Fp)",0.713235,"(0.34558823529411764, 0.8970588235294118)",Exp2,0.345588,0.897059


In [74]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_beadexp_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.628743,1.503285,1,0.220167,33.479015
1,"(WT_SPF, ASO_SPF)",-0.76187,3.490956,1,0.061705,71.354553
2,"(WT_SPF, ASO_SPF-Fp)",-0.206885,0.00756,1,0.930713,0.0


## Fecal output

In [77]:
work_df = data_dict['Fecal_output'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### Pooling analysis

In [78]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_feacloutput_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.070707,"(-0.5959595959595959, 0.45454545454545453)",Exp1,-0.59596,0.454545
1,"(WT_SPF, ASO_SPF-Fp)",0.166667,"(-0.3560606060606061, 0.6060606060606061)",Exp1,-0.356061,0.606061
2,"(ASO_SPF, ASO_SPF-Fp)",0.277778,"(-0.26851851851851855, 0.6851851851851852)",Exp1,-0.268519,0.685185
0,"(WT_SPF, ASO_SPF)",0.260417,"(-0.203125, 0.625)",Exp2,-0.203125,0.625
1,"(WT_SPF, ASO_SPF-Fp)",0.147059,"(-0.29901960784313725, 0.5294117647058824)",Exp2,-0.29902,0.529412
2,"(ASO_SPF, ASO_SPF-Fp)",-0.117647,"(-0.4963235294117647, 0.29044117647058826)",Exp2,-0.496324,0.290441


In [79]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_fecaloutput_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.042483,1.571884,1,0.209934,36.382079
1,"(WT_SPF, ASO_SPF)",0.133509,0.94158,1,0.331872,0.0
2,"(WT_SPF, ASO_SPF-Fp)",0.155407,0.003665,1,0.951726,0.0


## Fecal score

In [80]:
work_df = data_dict['Fecal_score'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [81]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_fecalscore_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",0.353535,"(-0.1919191919191919, 0.7373737373737373)",Exp1,-0.191919,0.737374
1,"(WT_SPF, ASO_SPF-Fp)",-0.121212,"(-0.5606060606060606, 0.3484848484848485)",Exp1,-0.560606,0.348485
2,"(ASO_SPF, ASO_SPF-Fp)",-0.481481,"(-0.8333333333333334, 0.027777777777777776)",Exp1,-0.833333,0.027778
0,"(WT_SPF, ASO_SPF)",0.966667,"(0.8055555555555556, 0.9944444444444445)",Exp2,0.805556,0.994444
1,"(WT_SPF, ASO_SPF-Fp)",0.259804,"(-0.18137254901960784, 0.6274509803921569)",Exp2,-0.181373,0.627451
2,"(ASO_SPF, ASO_SPF-Fp)",-0.964706,"(-1.0, -0.8627450980392157)",Exp2,-1.0,-0.862745


In [82]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_fecalscore_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",-0.952733,4.719061,1,0.02983,78.809343
1,"(WT_SPF, ASO_SPF)",0.94234,6.423796,1,0.01126,84.432881
2,"(WT_SPF, ASO_SPF-Fp)",0.091459,1.506637,1,0.219653,33.627016


## Water content

In [83]:
work_df = data_dict['Water_content'].copy()
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()


for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [84]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_watercont_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",0.222222,"(-0.37777777777777777, 0.6888888888888889)",Exp1,-0.377778,0.688889
1,"(WT_SPF, ASO_SPF-Fp)",0.15,"(-0.4, 0.6)",Exp1,-0.4,0.6
2,"(ASO_SPF, ASO_SPF-Fp)",0.037037,"(-0.5370370370370371, 0.5555555555555556)",Exp1,-0.537037,0.555556
0,"(WT_SPF, ASO_SPF)",0.822222,"(0.4, 0.9666666666666667)",Exp2,0.4,0.966667
1,"(WT_SPF, ASO_SPF-Fp)",0.691176,"(0.27450980392156865, 0.9019607843137255)",Exp2,0.27451,0.901961
2,"(ASO_SPF, ASO_SPF-Fp)",-0.290196,"(-0.6392156862745098, 0.13725490196078433)",Exp2,-0.639216,0.137255


In [85]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_watercont_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",-0.180387,0.91584,1,0.33857,0.0
1,"(WT_SPF, ASO_SPF)",0.690158,3.791861,1,0.051502,73.627726
2,"(WT_SPF, ASO_SPF-Fp)",0.538303,3.229106,1,0.07234,69.031675


## Gut transit

In [86]:
work_df = data_dict['Gut_transit']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n.astype('int32')

### Pooling analysis

In [87]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_guttransit_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.232323,"(-0.696969696969697, 0.31313131313131315)",Exp1,-0.69697,0.313131
1,"(WT_SPF, ASO_SPF-Fp)",-0.333333,"(-0.7348484848484849, 0.16666666666666666)",Exp1,-0.734848,0.166667
2,"(ASO_SPF, ASO_SPF-Fp)",-0.148148,"(-0.6296296296296297, 0.37037037037037035)",Exp1,-0.62963,0.37037
0,"(WT_SPF, ASO_SPF)",-0.536458,"(-0.84375, -0.052083333333333336)",Exp2,-0.84375,-0.052083
1,"(WT_SPF, ASO_SPF-Fp)",0.352941,"(-0.10784313725490197, 0.6862745098039216)",Exp2,-0.107843,0.686275
2,"(ASO_SPF, ASO_SPF-Fp)",0.610294,"(0.22058823529411764, 0.8382352941176471)",Exp2,0.220588,0.838235


In [88]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_guttransit_cochranQ_I2.csv")
summary

Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.400856,6.398381,1,0.011422,84.371047
1,"(WT_SPF, ASO_SPF)",-0.420728,0.862981,1,0.352906,0.0
2,"(WT_SPF, ASO_SPF-Fp)",0.053097,5.014129,1,0.025141,80.056356


## Beam steps/slips

In [89]:
work_df = data_dict['Beam_steps']
group_vals = {}

effect_1 = 'Genotype'
effect_2 = 'Microbiome'

effect1_lst = work_df[effect_1].unique()
effect2_lst = work_df[effect_2].unique()

for i in effect1_lst:
    for j in effect2_lst:
        name = i + '_' + j
        n = work_df.loc[(work_df[effect_1] == i) & (work_df[effect_2] == j), 'Measurement'].values
        if len(n) != 0:
            group_vals[name] = n

### Pooling analysis

In [90]:
pairs = list(itertools.combinations(group_vals.keys(), 2))
cd_1 = {}
cd_ci_1 = {}
cd_2 = {}
cd_ci_2 = {}

cd = [cd_1, cd_2]
cd_ci = [cd_ci_1, cd_ci_2]

for num, exp in enumerate(["Exp1", "Exp2"]):
    
    sub_df = work_df.loc[work_df["Experiment"] == exp]
    group_values = {}
    
    effect_1 = 'Genotype'
    effect_2 = 'Microbiome'
    
    effect1_lst = sub_df[effect_1].unique()
    effect2_lst = sub_df[effect_2].unique()
    
    for i in effect1_lst:
        for j in effect2_lst:
            name = i + '_' + j
            n = sub_df.loc[(sub_df[effect_1] == i) & (sub_df[effect_2] == j), 'Measurement'].values
            if len(n) != 0:
                group_values[name] = n

    for i in pairs:
        g1, g2 = i
        n1 = group_values[g1]
        n2 = group_values[g2]
        res = cliffs_delta_bootstrap_bca(n1, n2)
        cd[num][i] = res[0]
        cd_ci[num][i] = res[1]

p_df_1 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[0].values()),
        "Cdelta CI": list(cd_ci[0].values())
       })

p_df_1["Exp"] = "Exp1"

p_df_2 = pd.DataFrame({"group": pairs,
        "Cliff's delta": list(cd[1].values()),
        "Cdelta CI": list(cd_ci[1].values())
       })

p_df_2["Exp"] = "Exp2"

p_df = pd.concat((p_df_1, p_df_2))
p_df[['CI_lower', 'CI_higher']] = p_df['Cdelta CI'].apply(pd.Series)
p_df.to_csv("../output/Fp_beamslips_cliffs_delta.csv")
p_df

Unnamed: 0,group,Cliff's delta,Cdelta CI,Exp,CI_lower,CI_higher
0,"(WT_SPF, ASO_SPF)",-0.948052,"(-1.0, -0.6883116883116883)",Exp1,-1.0,-0.688312
1,"(WT_SPF, ASO_SPF-Fp)",-0.842975,"(-1.0, -0.35537190082644626)",Exp1,-1.0,-0.355372
2,"(ASO_SPF, ASO_SPF-Fp)",0.246753,"(-0.4025974025974026, 0.7272727272727273)",Exp1,-0.402597,0.727273
0,"(WT_SPF, ASO_SPF)",-1.0,"(-1.0, -1.0)",Exp2,-1.0,-1.0
1,"(WT_SPF, ASO_SPF-Fp)",-0.828125,"(-0.9791666666666666, -0.4270833333333333)",Exp2,-0.979167,-0.427083
2,"(ASO_SPF, ASO_SPF-Fp)",0.7125,"(0.275, 0.925)",Exp2,0.275,0.925


In [91]:
summary = summarize_by_group(p_df)
summary.to_csv("../output/Fp_beamslips_cochranQ_I2.csv")
summary

  w = 1.0 / var
  pooled_FE = np.sum(w * effect) / np.sum(w)


Unnamed: 0,group,pooled_FE,Q,df,p,I2 (%)
0,"(ASO_SPF, ASO_SPF-Fp)",0.596687,1.961784,1,0.161323,49.02599
1,"(WT_SPF, ASO_SPF)",,,1,,0.0
2,"(WT_SPF, ASO_SPF-Fp)",-0.834409,0.004704,1,0.945317,0.0
