In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os.path
import statsmodels.formula.api as smf
from statsmodels.genmod.bayes_mixed_glm import BinomialBayesMixedGLM
from statsmodels.discrete.discrete_model import Logit
import attila_utils
import gem_tools

In [2]:
ar_mssm = gem_tools.read_active_reactions(groupdict={'m-control': ('all_control', 'MSBB'), 'm-AD-B2': ('SubtypeB2_AD', 'MSBB')})
data_mssm = gem_tools.long_ar_subsys(['Bile acid recycling'], ar_mssm, gem_tools.read_gem_excel()['SUBSYSTEM'])
data_mssm

Unnamed: 0_level_0,Unnamed: 1_level_0,rxn_state,disease_state,rxn_ID,subject_ID
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MAR01847,S151B648.BM_36_428,1,m-control,MAR01847,S151B648.BM_36_428
MAR01847,S151B648.BM_36_429,1,m-control,MAR01847,S151B648.BM_36_429
MAR01847,S111B355.BM_36_412,1,m-control,MAR01847,S111B355.BM_36_412
MAR01847,S111B355.BM_36_416,1,m-control,MAR01847,S111B355.BM_36_416
MAR01847,S111B355.BM_36_414,1,m-control,MAR01847,S111B355.BM_36_414
...,...,...,...,...,...
MAR01896,S151B648.BM_36_496,0,m-AD-B2,MAR01896,S151B648.BM_36_496
MAR01896,P19B648.BM_36_543,0,m-AD-B2,MAR01896,P19B648.BM_36_543
MAR01896,S151B648.BM_36_505,0,m-AD-B2,MAR01896,S151B648.BM_36_505
MAR01896,S111B355.BM_36_387,0,m-AD-B2,MAR01896,S111B355.BM_36_387


In [3]:
gem_1_11 = gem_tools.read_gem_excel()

In [4]:
fpath = '../../resources/tunahan/Dilara-APOE-models/APOE_BinaryModels.xlsx'
dilara_data = pd.read_excel(fpath)

In [5]:
def excise_group(group, dilara_data):
    left = dilara_data.loc[:, ['Rxn_ID']].rename({'Rxn_ID': 'rxn_ID'}, axis=1)
    right = dilara_data.loc[:, [bool(re.match('^' + group + '.*$', c)) for c in dilara_data.columns]]
    res = pd.concat([left, right], axis=1)
    res = res.set_index('rxn_ID').astype('bool')
    return(res)

ar = dict()

for celltype in ['Neuron', 'Astrocyte', 'Microglia']:
    for isoform in ['e3', 'e4']:
        group = celltype + '_' + isoform
        fpath = '../../resources/tunahan/Dilara-APOE-models/' + group + '.xlsx'
        ar[group] = excise_group(group, dilara_data)
        ar[group].to_excel(fpath, index=False)

ar['Astrocyte_e3']

Unnamed: 0_level_0,Astrocyte_e3_1,Astrocyte_e3_2,Astrocyte_e3_3
rxn_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MAR00001,False,False,False
MAR00002,False,False,False
MAR00003,True,True,False
MAR00004,True,True,True
MAR00005,True,True,True
...,...,...,...
MAR13079,False,False,False
MAR13080,False,False,False
MAR13081,False,False,True
MAR13082,True,True,True


In [6]:
gem = dilara_data.loc[:, ['Rxn_ID', 'Subsystem']].rename({'Rxn_ID': 'ID', 'Subsystem': 'SUBSYSTEM'}, axis=1).set_index('ID')['SUBSYSTEM']
gem

ID
MAR00001          Acylglycerides metabolism
MAR00002          Acylglycerides metabolism
MAR00003          Acylglycerides metabolism
MAR00004                Transport reactions
MAR00005          Acylglycerides metabolism
                         ...               
MAR13079                Transport reactions
MAR13080                Transport reactions
MAR13081          Oxidative phosphorylation
MAR13082               Artificial reactions
MAR13083    Metabolism of other amino acids
Name: SUBSYSTEM, Length: 13078, dtype: object

In [7]:
def prepare_data(subsystems, ar, gem):
    data = gem_tools.long_ar_subsys(subsystems, ar, gem)
    data['cell_type'] = data.disease_state.apply(lambda x: re.sub('^([^_]+)_(e[34])$', '\\1', x))    
    data['APOE_isoform'] = data.disease_state.apply(lambda x: re.sub('^([^_]+)_(e[34])$', '\\2', x))
    return(data)

In [8]:
subsystems = ['Bile acid biosynthesis']
data_b = prepare_data(subsystems, ar, gem)
data_b

Unnamed: 0_level_0,Unnamed: 1_level_0,rxn_state,disease_state,rxn_ID,subject_ID,cell_type,APOE_isoform
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MAR00059,Neuron_e3_1,0,Neuron_e3,MAR00059,Neuron_e3_1,Neuron,e3
MAR00059,Neuron_e3_2,0,Neuron_e3,MAR00059,Neuron_e3_2,Neuron,e3
MAR00059,Neuron_e3_3,0,Neuron_e3,MAR00059,Neuron_e3_3,Neuron,e3
MAR00069,Neuron_e3_1,0,Neuron_e3,MAR00069,Neuron_e3_1,Neuron,e3
MAR00069,Neuron_e3_2,0,Neuron_e3,MAR00069,Neuron_e3_2,Neuron,e3
...,...,...,...,...,...,...,...
MAR12299,Microglia_e4_2,0,Microglia_e4,MAR12299,Microglia_e4_2,Microglia,e4
MAR12299,Microglia_e4_3,0,Microglia_e4,MAR12299,Microglia_e4_3,Microglia,e4
MAR12305,Microglia_e4_1,0,Microglia_e4,MAR12305,Microglia_e4_1,Microglia,e4
MAR12305,Microglia_e4_2,0,Microglia_e4,MAR12305,Microglia_e4_2,Microglia,e4


In [9]:
subsystems = ['Bile acid recycling']
data_r = prepare_data(subsystems, ar, gem)
data_r

Unnamed: 0_level_0,Unnamed: 1_level_0,rxn_state,disease_state,rxn_ID,subject_ID,cell_type,APOE_isoform
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MAR01847,Neuron_e3_1,1,Neuron_e3,MAR01847,Neuron_e3_1,Neuron,e3
MAR01847,Neuron_e3_2,1,Neuron_e3,MAR01847,Neuron_e3_2,Neuron,e3
MAR01847,Neuron_e3_3,1,Neuron_e3,MAR01847,Neuron_e3_3,Neuron,e3
MAR01848,Neuron_e3_1,0,Neuron_e3,MAR01848,Neuron_e3_1,Neuron,e3
MAR01848,Neuron_e3_2,1,Neuron_e3,MAR01848,Neuron_e3_2,Neuron,e3
...,...,...,...,...,...,...,...
MAR01894,Microglia_e4_2,0,Microglia_e4,MAR01894,Microglia_e4_2,Microglia,e4
MAR01894,Microglia_e4_3,0,Microglia_e4,MAR01894,Microglia_e4_3,Microglia,e4
MAR01896,Microglia_e4_1,1,Microglia_e4,MAR01896,Microglia_e4_1,Microglia,e4
MAR01896,Microglia_e4_2,1,Microglia_e4,MAR01896,Microglia_e4_2,Microglia,e4


In [10]:
formula = 'rxn_state ~ C(APOE_isoform, levels=["e3", "e4"]) : C(cell_type, levels=["Neuron", "Astrocyte", "Microglia"])'
fit_method = 'fit_vb'
res_mixed = BinomialBayesMixedGLM.from_formula(formula, {'Reactions': 'rxn_ID'}, data=data_b, vcp_p=0.2, fe_p=2)#.fit()
fit = getattr(md, fit_method)
res_mixed = fit()
res_mixed.summary()

NameError: name 'md' is not defined

In [None]:
formula = 'rxn_state ~ C(APOE_isoform, levels=["e3", "e4"]) : C(cell_type, levels=["Neuron", "Astrocyte", "Microglia"])'
res_fixed = Logit.from_formula(formula, data=data_b).fit()
res_fixed.summary()

In [None]:
%connect_info