In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
from scipy.stats import spearmanr
import re
import arviz as az
import pymc as pm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import cellbayesassay as cba
import pickle
import re
import os.path
import attila_utils

## Import data

In [2]:
fpath = '../../resources/cell-based-assays/CO28151_outcome-summary_27062023-clean.xlsx'
conc2concentration = pd.read_excel(fpath, 1).rename({'TI nr.': 'TI'}, axis=1)
cN_columns = [c for c in conc2concentration.columns if re.match('^c[1-6]$', c)]
other_columns = [c for c in conc2concentration.columns if not re.match('^c[1-6]$', c)]
conc2concentration = pd.concat([conc2concentration[other_columns], conc2concentration[cN_columns].applymap(lambda x: x * 1e-6)], axis=1)
conc2concentration['VC'] = conc2concentration.apply(lambda r: r.loc['c6'] * 1e-3 if not np.isnan(r.loc['c6']) else r.loc['c3'] * 1e-3, axis=1)
conc2concentration['LPS'] = conc2concentration['VC']
conc2concentration = conc2concentration.set_index('TI')
conc2concentration

  conc2concentration = pd.concat([conc2concentration[other_columns], conc2concentration[cN_columns].applymap(lambda x: x * 1e-6)], axis=1)


Unnamed: 0_level_0,TI ID,name,c1,c2,c3,c4,c5,c6,VC,LPS
TI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
TI1,2542,Amiloride,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI2,2543,Phenamil methanesulfonate salt,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI3,2538,Benzamil hydrochloride hydrate,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI4,2544,"5-(N,N-Dimethyl)amiloride hydrochloride",0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI5,2540,5-(N-Ethyl-N-isopropyl)amiloride,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI6,2541,"5-(N,N-Hexamethylene)amiloride",1e-05,1e-06,2e-07,,,,2e-10,2e-10
TI7,2539,5-(N-Methyl-N-isobutyl)amiloride,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI8,2719,NS 383,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI9,2703,A-317567,0.0001,1e-05,1e-06,,,,1e-09,1e-09
TI10,2816,Cilostazol,5e-05,5e-06,5e-07,,,,5e-10,5e-10


In [3]:
control_TIl = [
    'VC',
    'VC',
    'LPS',
    'VC',
    'VC'
]

experimentl = [
    'Aβ clearance (BV2 cells)',
    'Aβ release (H4 cells)',
    'LPS neuroinflammation (BV2 cells)',
    'Tau phosphorylation',
    'Trophic factor withdrawal (primary neurons)'
              ]
experimentd = dict(zip(experimentl, np.arange(len(experimentl))))
treatmentd = {
    'TI11': 'DIM-C-pPhOCH3',
    'TI12': 'C-DIM12',
}
fpath = '../../resources/cell-based-assays/CO28151_data-summary-clean.xlsx'

In [4]:
def extract_TI_exp_data(experiment, TI, control_TI='VC', batchvars=['Batch', 'Plate'], experimentd=experimentd, fpath=fpath):
    data = pd.read_excel(fpath, experimentd[experiment])
    TI_data = data.loc[data.TI == TI]
    # ensure that all treatment TI data are from the same batch:plate
    if len(TI_data.groupby(batchvars)) != 1:
        print('treatment with multiple batches')
        return(None)
    b = data.Plate == TI_data.iloc[0].loc['Plate']
    # if there's no information on the
    if not TI_data.iloc[0].isna().loc['Batch']:
        b = b & (data.Batch == data.iloc[0].loc['Batch'])
    TI_exp_data = data.loc[b]
    TI_exp_data_control = TI_exp_data.loc[TI_exp_data.TI == control_TI].copy()
    # if there's no control for the same batch:plate, use controls from all other batch:plate combinations
    if len(TI_exp_data_control) == 0:
        TI_exp_data_control = data.loc[data.TI == control_TI].copy()
    TI_exp_data_control['conc'] = control_TI
    TI_exp_data_TI = TI_exp_data.loc[TI_exp_data.TI == TI].copy()
    TI_exp_data = pd.concat([TI_exp_data_control, TI_exp_data_TI], axis=0)
    return(TI_exp_data)

experiment = experimentl[0]
TI = list(treatmentd.keys())[0]

def add_concentrations(df, conc2concentration=conc2concentration):
    df['concentration'] = df.conc.apply(lambda x: conc2concentration.loc[TI, x])
    df['conc_log10'] = df.concentration.apply(lambda x: np.log10(x))
    return(df)

def process_TI_list_exp(experiment, control_TI='VC', TI_list=['TI11', 'TI12'], conc2concentration=conc2concentration):
    d = {TI: add_concentrations(extract_TI_exp_data(experiment, TI, control_TI)) for TI in TI_list}
    return(d)

datad1 = pickle.load(open('../../results/2023-09-26-cell-bayes-assays/data.nc', 'rb'))
controls1 = pd.read_csv('TI_26_11_12-controls.csv', index_col='experiment')

key2key_data1 = {
    'Aβ clearance (BV2 cells)': 'Abeta clearance',
    'Aβ release (H4 cells)': 'Abeta release',
    'LPS neuroinflammation (BV2 cells)': 'LPS',
    'Tau phosphorylation': 'Tau phosphorylation',
    'Trophic factor withdrawal (primary neurons)': 'GF withdrawal',
}

datad = {experiment: process_TI_list_exp(experiment, control_TI=cTI) for experiment, cTI in zip(experimentl, control_TIl)}

for pretty_name, name1 in key2key_data1.items():
    df = datad1[name1]
    df = df.loc[~ df.TI.isin(['TI21', 'TI22'])]
    group_var, group_num = controls1.loc[pretty_name, ['group_var', 'TI26']]
    if isinstance(group_var, str):
        df = df.loc[df[group_var] == group_num]
    datad[pretty_name].update({'TI26': df})

ModuleNotFoundError: No module named 'pandas.core.indexes.numeric'

In [None]:
datad[experimentl[2]]['TI11']

In [None]:
datad[experimentl[0]]['TI26']

## Plot data

### Aβ clearance (BV2 cells)

In [None]:
def data_plotter(assay, data, group_var=None, sharey=True):
    my_treatments = data.keys()
    fig, ax = plt.subplots(1, len(my_treatments), sharey=sharey)
    for axi, compound in zip(ax, my_treatments):
        df = data[compound].copy()
        if group_var is not None:
            groups = df[group_var].unique()
            groupd = dict(zip(groups, ['C' + str(i) for i in range(len(groups))]))
            df['color'] = df[group_var].apply(lambda x: groupd[x])
        else:
            df['color'] = 'C0'
        #df['color'] = 'C0'
        axi.scatter(x='concentration', y=assay, c='color', data=df, marker='+')
        axi.set_xscale('log')
        axi.set_title(compound)
    fig.suptitle(assay)
    return((fig, ax))

experiment = 'Aβ clearance (BV2 cells)'
datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'plate': 'Plate'}, axis=1)
fig, ax = data_plotter('Aβ42 Ly', datad[experiment], group_var='Plate', sharey=False)

In [None]:
fig, ax = data_plotter('Aβ42 SN', datad[experiment], group_var='Plate')

### Aβ release (H4 cells)

In [None]:
experiment = 'Aβ release (H4 cells)'
datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'plate': 'Plate'}, axis=1)
fig, ax = data_plotter('Aβ38', datad[experiment], group_var='Plate', sharey=True)

In [None]:
fig, ax = data_plotter('Aβ40', datad[experiment], group_var='Plate', sharey=False)

In [None]:
fig, ax = data_plotter('Aβ42', datad[experiment], group_var='Plate', sharey=False)

### LPS neuroinflammation (BV2 cells)

In [None]:
experiment = 'LPS neuroinflammation (BV2 cells)'
datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'Cohort': 'Plate'}, axis=1)
fig, ax = data_plotter('IL-10', datad[experiment], group_var='Plate', sharey=True)

In [None]:
fig, ax = data_plotter('IL-1β', datad[experiment], group_var='Plate', sharey=True)

In [None]:
fig, ax = data_plotter('IL-6', datad[experiment], group_var='Plate', sharey=True)

In [None]:
fig, ax = data_plotter('KC/GRO', datad[experiment], group_var='Plate', sharey=True)

In [None]:
fig, ax = data_plotter('TNF-α', datad[experiment], group_var='Plate', sharey=True)

### Tau phoshorylation

In [None]:
experiment = 'Tau phosphorylation'

fig, ax = plt.subplots(figsize=(4.8, 4.8))
ax.scatter(x='pTau (T231) [AU]', y='pTau (T231) [AU2]', data=datad[experiment]['TI11'], label='TI11')
ax.scatter(x='pTau (T231) [AU]', y='pTau (T231) [AU2]', data=datad[experiment]['TI12'], label='TI12')
#ax.scatter(x='pTau (T231) [AU]', y='pTau (T231) [AU2]', data=datad[experiment]['TI26'], label='TI26')


In [None]:
experiment = 'Tau phosphorylation'
datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'Cohort': 'Plate', 'Tau [pg/µg prot]': 'Tau [pg/µg]', 'pTau (T231)': 'pTau (T231) [AU2]'}, axis=1)
for TI in ['TI11', 'TI12']:
    datad[experiment][TI] = datad[experiment][TI].rename({'Ratio pTau /Tau': 'ratio'}, axis=1)

for TI in datad[experiment].keys():
    datad[experiment][TI] = datad[experiment][TI].rename({'Tau [pg/µg]': 'Tau', 'pTau (T231) [AU2]': 'pTau (T231)', 'ratio': 'pT/T ratio'}, axis=1)

#datad[experiment]['TI11'] = datad[experiment]['TI11'].rename({'Ratio pTau /Tau': 'ratio'}, axis=1)
#datad[experiment]['TI11'] = datad[experiment]['TI11'].rename({'Ratio pTau /Tau': 'ratio'}, axis=1)

fig, ax = data_plotter('Tau', datad[experiment], group_var='Plate', sharey=False)

In [None]:
fig, ax = data_plotter('pTau (T231)', datad[experiment], group_var='Plate', sharey=True)

In [None]:
#datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'pTau (T231) [AU2]': 'pTau (T231) [AU]'}, axis=1)
fig, ax = data_plotter('pTau (T231)', datad[experiment], group_var='Plate', sharey=True)

In [None]:
#datad[experiment]['TI26'] = datad[experiment]['TI26'].rename({'pTau (T231) [AU2]': 'pTau (T231) [AU1]'}, axis=1)
fig, ax = data_plotter('pT/T ratio', datad[experiment], group_var='Plate', sharey=False)

### Trophic factor withdrawal (primary neurons)

In [None]:
experiment = 'Trophic factor withdrawal (primary neurons)'
fig, ax = data_plotter('LDH', datad[experiment], group_var=None, sharey=False)

In [None]:
experiment = 'Trophic factor withdrawal (primary neurons)'
fig, ax = data_plotter('MTT', datad[experiment], group_var=None, sharey=False)

In [None]:
def extract_regr_data(exper, assay, TI, datad=datad):
    data = datad[exper][TI]
    activity = data[assay] / data[assay].std() * 10 # standardization
    data = pd.concat([data, activity.to_frame('activity')], axis=1)
    y_obs = data['activity'].values
    x_obs = data['conc_log10'].values
    return((y_obs, x_obs))

extract_regr_data('Aβ clearance (BV2 cells)', 'Aβ42 Ly', 'TI26')

## Create inference data

In [None]:
treatment2drugname1 = {'TI21': 'TUDCA', 'TI22': 'Cysteamine', 'TI26': 'Arundine'}
idatadf1 = cba.idatadf_from_netcdf(subdir='idatadf/', maindir='../../results/2023-09-26-cell-bayes-assays/').rename(columns=treatment2drugname1)

In [None]:
def extract_regr_data(exper, assay, compound, datad=datad):
    data = datad[exper][compound]
    activity = data[assay] / data[assay].std() * 10 # standardization
    data = pd.concat([data, activity.to_frame('activity')], axis=1)
    y_obs = data['activity'].values
    x_obs = data['conc_log10'].values
    return((y_obs, x_obs))


def fit_single(exper='Aβ clearance (BV2 cells)', assay='Aβ42 SN', compound='TI11', datad=datad):
    y_obs, x_obs = extract_regr_data(exper, assay, compound, datad=datad)
    try:
        model, idata = [cba.sample_sigmoid_2(y_obs, x_obs, return_model=b) for b in [True, False]]
    except pm.SamplingError:
        model, idata = (None, None)
    index = pd.MultiIndex.from_product([[compound], ['model', 'idata']])
    columns = pd.MultiIndex.from_product([[exper], [assay]])
    df = pd.DataFrame([model, idata], index=index, columns=columns).transpose()
    return(df)


def fit_assay(exper='Aβ clearance (BV2 cells)', assay='Aβ42 SN', datad=datad):
    compounds = datad[exper].keys()
    l = [fit_single(exper=exper, assay=assay, compound=c, datad=datad) for c in compounds]
    df = pd.concat(l, axis=1)
    return(df)


def fit_exper(exper='Aβ clearance (BV2 cells)', assays=['Aβ42 SN', 'Aβ42 Ly'], datad=datad):
    l = [fit_assay(exper=exper, assay=a, datad=datad) for a in assays]
    df = pd.concat(l, axis=0)
    return(df)


ideal_H1_increase = cba.read_ideal_H1_increase()

assayd = {exper: [assay for assay in ideal_H1_increase.loc[exper].index if assay in datad[exper]['TI11'].columns] for exper in datad.keys()}

In [None]:
maindir = '../../results/2024-01-21-cell-bayes-assays-dim/'

if not os.path.exists(maindir + 'idatadf/fpaths.csv'):
    l = [fit_exper(exper=exper, assays=assays) for exper, assays in assayd.items()]
    idatadf = pd.concat(l, axis=0).xs('idata', axis=1, level=1)
    fpathdf = cba.idatadf_to_netcdf(idatadf, subdir='idatadf/', maindir=maindir)
    print('idatadf written to netcdf')
else:
    idatadf = cba.idatadf_from_netcdf(subdir='idatadf/', maindir=maindir)
    print('idatadf read from netcdf')


arundine_analogs = pd.read_csv('arundine-analogs.csv', index_col='TI')
idatadf = idatadf.rename(arundine_analogs['pretty name'].to_dict(), axis=1)
idatadf = idatadf.reindex(arundine_analogs['pretty name'].to_list(), axis=1)
idatadf

## Fit diagnostics

In [None]:
cba.get_diagnostics(idatadf, az.ess)

In [None]:
cba.get_diagnostics(idatadf, az.rhat)

In [None]:
cba.get_diagnostics(idatadf, az.mcse)

## Posterior probabilities for hypotheses

In [None]:
poor_fits1 = [
    (('LPS neuroinflammation (BV2 cells)', 'IFN-γ'), 'Arundine'),
    (('Tau phosphorylation', 'pT/T ratio'), 'Arundine'),
    (('Neurite outgrowth (primary neurons)', 'neurogenesis'), 'Cysteamine'),
]

fpath = '../../results/2023-09-26-cell-bayes-assays/H102_posteriors.csv'
if not os.path.exists(fpath):
    H102_posteriors1 = cba.get_H102_posterior_from_idatadf(idatadf1, poor_fits1)
    #H102_posteriors1.to_csv(fpath)
else:
    H102_posteriors1 = pd.read_csv(fpath, index_col=[0, 1], header=[0, 1])
    
H102_posteriors1 = cba.nice_assay_names(H102_posteriors1)
H102_posteriors1.style.format(precision=2).background_gradient(axis=None, vmin=0, vmax=1, cmap='hot')

In [None]:
poor_fits = [
    (('Tau phosphorylation', 'pT/T ratio'), 'Arundine'),
    (('Tau phosphorylation', 'pT/T ratio'), 'C-DIM5'),
    (('LPS neuroinflammation (BV2 cells)', 'IL-1β'), 'C-DIM5'),
    (('LPS neuroinflammation (BV2 cells)', 'IL-1β'), 'C-DIM12'),
    (('Aβ release (H4 cells)', 'Aβ42'), 'C-DIM12'),
]

fpath = '../../results/2024-01-21-cell-bayes-assays-dim/H102_posteriors.csv'
if not os.path.exists(fpath):
    H102_posteriors = cba.get_H102_posterior_from_idatadf(idatadf, poor_fits)
    H102_posteriors.to_csv(fpath)
else:
    H102_posteriors = pd.read_csv(fpath, index_col=[0, 1], header=[0, 1])
    
#H102_posteriors = cba.nice_assay_names(H102_posteriors)
H102_posteriors.style.format(precision=2).background_gradient(axis=None, vmin=0, vmax=1, cmap='hot')

In [None]:
H102_posteriors.mean(axis=0).to_frame('score').transpose().style.format(precision=2).background_gradient(axis=None, vmin=0, vmax=1, cmap='hot')

In [None]:
fig, ax = cba.barchart_H102_posteriors(H102_posteriors1.groupby(level=0, axis=0).mean(), e2l_textbox=False)

In [None]:
fig, ax = cba.barchart_H102_posteriors(H102_posteriors.groupby(level=0, axis=0).mean(), e2l_textbox=False)
attila_utils.savefig(fig, 'H102_posteriors-exper-mean-barchart')

In [None]:
fig, ax = cba.barchart_H102_posteriors(H102_posteriors1, e2l_textbox=True)

In [None]:
fig, ax = cba.barchart_H102_posteriors(H102_posteriors, e2l_textbox=True)
attila_utils.savefig(fig, 'H102_posteriors-barchart-e2l_textbox')

In [None]:
fig, ax = cba.barchart_H102_posteriors(H102_posteriors, e2l_textbox=False)
attila_utils.savefig(fig, 'H102_posteriors-barchart')

In [None]:
fig, ax = cba.violin_posterior_pdf(idatadf1, poor_fits1, text_box=True, H_legend=True)

In [None]:
fig, ax = cba.violin_posterior_pdf(idatadf, poor_fits, text_box=True, H_legend=True)
attila_utils.savefig(fig, 'violin-posterior-pdf-legend')

In [None]:
fig, ax = cba.violin_posterior_pdf(idatadf, poor_fits, text_box=True, H_legend=True, plot_avg=True)
attila_utils.savefig(fig, 'violin-posterior-pdf-legend-avg')

In [None]:
%connect_info