# 03 BMRA

Run BMRA based on global responses derived from the pathway activity predictions.

In [1]:
import pandas as pd
import numpy as np
import os
import bmra

In [11]:
data_dir_1 = "01_outputs_2020"
data_dir_2 = "02_outputs_2020"
info_dir = "~/Phd_project/project_GBM/gbm_DATA/gbm_DATA_LINCS_GI1"
out_dir = "03_outputs_2020"


os.makedirs(out_dir, exist_ok = True)

## Load data

### Global responses

Assembled from global responses calculated from predicted pathway activities, and DPD global responses.

In [3]:
def assemble_R_global(pathway_rglobal_file, dpd_rglobal_file):
    """
    Assemble full R_global matrix
    """
    R_global_modules_df = pd.read_csv(pathway_rglobal_file, index_col = 0)

    perts = R_global_modules_df.columns.tolist()

    R_global_DPD_df = pd.read_csv(dpd_rglobal_file, index_col=0).T

    # filter by perts list
    R_global_DPD_df = R_global_DPD_df[R_global_DPD_df.index.isin(perts)]

    # sort by perts list
    R_global_DPD_df["sort_col"] = R_global_DPD_df.index.map({val: i for i, val in enumerate(perts)})
    R_global_DPD_df = R_global_DPD_df.sort_values("sort_col")
    R_global_DPD_df = R_global_DPD_df.drop("sort_col", axis = 1)

    # transpose
    R_global_DPD_df = R_global_DPD_df.T

    R_global_df = pd.concat([R_global_modules_df, R_global_DPD_df])

    return R_global_df

In [4]:
R_global_df = assemble_R_global(
    pathway_rglobal_file = os.path.join(data_dir_2, "R_global_annotated.csv"),
    dpd_rglobal_file = os.path.join(data_dir_1, "R_global_DPDonly_annotated.csv"),
)

modules = R_global_df.index.tolist()
exp_ids = R_global_df.columns.tolist()

R_global = R_global_df.values
n_modules = R_global.shape[0]

print(R_global.shape)
display(R_global_df)

(13, 119)


Unnamed: 0,ASG002_GI1_24H:A10,ASG002_GI1_24H:A11,ASG002_GI1_24H:A12,ASG002_GI1_24H:A19,ASG002_GI1_24H:A20,ASG002_GI1_24H:A21,ASG002_GI1_24H:B10,ASG002_GI1_24H:B11,ASG002_GI1_24H:B12,ASG002_GI1_24H:B13,...,MOAR010_GI1_24H:J05,MOAR010_GI1_24H:J06,MOAR010_GI1_24H:J22,MOAR010_GI1_24H:J23,MOAR010_GI1_24H:J24,MOAR010_GI1_24H:K08,MOAR010_GI1_24H:K09,MOAR011_GI1_24H:F07,MOAR011_GI1_24H:F08,MOAR011_GI1_24H:F09
CDK1,-0.532456,-0.090838,-9.1e-05,0.002085,-0.1659,-0.121836,-0.06889,-0.324622,-0.246149,-0.447347,...,0.010588,0.087241,-0.291894,-0.010809,-0.191546,-0.152258,-0.030956,-0.325711,-0.075607,0.065381
CDK2,-0.005915,0.058437,-0.005472,-0.468645,-0.258947,-0.051079,0.083799,-0.149857,0.069219,0.036831,...,-0.089429,0.014765,-0.007504,-0.235402,0.06659,0.036492,-0.111734,0.021896,0.011417,0.041343
CDK4_6,-1.02035,-0.156489,0.006785,-0.309805,-0.362326,-0.206513,-0.273633,-0.376785,-0.431937,-1.081409,...,0.123113,-0.029467,-0.008531,0.122665,-0.161915,-0.206933,-0.269666,-0.194807,-0.201761,0.131028
PI3K,-1.980469,-1.511715,-1.282948,-1.765735,-1.240682,-0.539564,-0.150972,-0.257132,-0.615737,-0.56492,...,0.144486,-0.325722,-0.181422,-0.15662,-0.412223,-0.142093,-0.242985,-0.738465,-0.335861,0.070843
ERK,-1.069782,-0.434094,-0.327457,-0.627189,-0.324659,-0.664645,-0.421138,-0.9768,-0.589117,-1.109856,...,-0.065764,0.098499,-0.047844,0.108805,0.041457,-0.089113,-0.085854,-0.305833,-0.370252,-0.01746
Hypoxia,-0.254387,0.023142,0.058141,0.041423,-0.035411,-0.038288,0.084564,-0.109133,-0.036479,-0.15798,...,-0.116061,-0.009096,0.055604,-0.090556,-0.06251,-0.037085,0.081561,-0.022819,-0.004053,-0.022677
PDGFR,-1.047116,0.085648,0.24996,-0.016258,-0.287235,-0.056367,0.180483,-0.500696,0.003939,-0.280822,...,0.174566,-0.001951,0.210924,0.135909,0.031303,0.232381,0.105209,-0.087007,-0.096901,0.006698
Aurora,-0.176946,0.165538,0.098505,0.043972,-0.156947,-0.192212,-0.36858,-0.472288,0.026675,-0.881248,...,0.098747,0.135528,0.186694,0.251682,0.001502,0.19399,-0.003241,-0.354541,-0.150168,0.353166
Estrogen,-0.059273,-0.011432,-0.115573,0.024551,0.167143,-0.594219,-0.040831,0.054088,-0.049902,0.11237,...,0.133559,0.177735,0.177713,0.137463,-0.49281,0.212787,-0.405344,-0.401177,0.000818,-0.041655
EGFR,-0.39707,-0.016259,0.172429,-0.192615,-0.109157,0.116712,0.128445,-0.615233,0.143768,-1.271298,...,0.054318,-0.351044,-0.453752,-0.124671,-0.112722,-0.202544,-0.215714,-0.202577,-0.156586,0.036562


In [5]:
print(R_global_df.values.min())
print(R_global_df.values.max())

-13.504171722962724
7.677933039891711


### Perturbation matrix

In [6]:
inhib_conc_df = pd.read_csv(os.path.join(data_dir_1, "inhib_conc_annotated.csv"), index_col = 0)
#LFC_PLCg = pd.read_csv(os.path.join(info_dir,"PLCg_Data_log_2020.csv"),index_col=0)
#inhib_conc = inhib_conc_df.drop(columns=LFC_PLCg.index).values
inhib_conc = inhib_conc_df.values

n_DPDs = R_global.shape[0] - inhib_conc.shape[0]

pert_modules = np.where(inhib_conc != 0, 1, 0)
pert_DPD = np.zeros((n_DPDs, len(exp_ids)))

pert = np.vstack([pert_modules, pert_DPD])

print(pert.shape)
print(pert)

(13, 119)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


### Networks

In [13]:
# empty prior
#G_prior = np.zeros((n_modules, n_modules))
G_prior_df = pd.read_excel(os.path.join(info_dir,"ALL_DATA_2020_Jing_gbm.xlsx"), sheet_name = "prior_network", index_col = 0)

G_prior = G_prior_df.values
G_prior_df

Unnamed: 0,CDK1,CDK2,CDK4_6,p53,EGFR,Aurora,Estrogen,PDGFR,Hypoxia,ERK,PI3K
CDK1,0,0,0,0,0,0,0,0,0,0,0
CDK2,0,0,0,0,0,0,0,0,0,0,0
CDK4_6,0,0,0,0,0,0,0,0,0,0,0
p53,0,0,0,0,0,0,0,0,0,0,0
EGFR,0,0,0,0,0,0,0,0,0,0,0
Aurora,0,0,0,0,0,0,0,0,0,0,0
Estrogen,0,0,0,0,0,0,0,0,0,0,0
PDGFR,0,0,0,0,0,0,0,0,0,0,0
Hypoxia,0,0,0,0,0,0,0,0,0,0,0
ERK,0,0,0,0,0,0,0,0,0,0,0


In [8]:
#G_prior_r_df = pd.read_excel(os.path.join(info_dir, "ALL_DATA_2020.xlsx"), sheet_name = "prior_r", index_col = 0)

#G_prior_r = G_prior_r_df.values
#G_prior_r_df

In [18]:
G_not = np.eye(n_modules)
G_not[:, n_modules - n_DPDs :] = 1

G_not

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.]])

## Run BMRA

In [15]:
n_gibbs = 200_000
n_window = 5_000

mode = "all"
n_iterations = 1

In [19]:
A_mean, A_sd, r_mean, r_sd = bmra.run_bmra(R_global,
            pert,
            G_prior,
            G_not,
            #r_prior = G_prior_r,
            n_gibbs = n_gibbs,
            n_window = n_window,
            mode = mode,
            n_iterations = n_iterations,
        )

A_mean_df = pd.DataFrame(A_mean, index = modules, columns = modules)
A_sd_df = pd.DataFrame(A_sd, index = modules, columns = modules)
r_mean_df = pd.DataFrame(r_mean, index = modules, columns = modules)
r_sd_df = pd.DataFrame(r_sd, index = modules, columns = modules)

display(A_mean_df)
display(A_sd_df)
display(r_mean_df)
display(r_sd_df)

A_mean_df.to_csv(os.path.join(out_dir, "A_mean.csv"))
A_sd_df.to_csv(os.path.join(out_dir, "A_sd.csv"))
r_mean_df.to_csv(os.path.join(out_dir, "r_mean.csv"))
r_sd_df.to_csv(os.path.join(out_dir, "r_sd.csv"))

ValueError: G and G_not must be of the same shape.

In [None]:
np.fill_diagonal(r_mean_df.values,-1)
rm_minus_inv = pd.DataFrame(np.linalg.pinv(r_mean_df),index=r_mean_df.index,columns=r_mean_df.columns)*(-1)
rm_minus_inv.to_csv(os.path.join(out_dir, 'r_minv.csv'))
display(rm_minus_inv)

Unnamed: 0,CDK1/2,CDK4/6,IKK,TLR4,mTOR,PAK,ERK,JAK/STAT,PLK1,ITK,DPD_resist,DPD_TB,DPD_load,DPD_damage,DPD_microa
CDK1/2,1.070377,0.098171,0.10063,-0.138134,-0.030489,0.172891,0.238128,-0.049432,0.138265,0.033675,1.317576e-16,1.70488e-16,3.338688e-16,-2.5105980000000002e-17,9.466358e-17
CDK4/6,0.172332,1.341999,0.5458,0.034785,-0.02771,0.301949,0.529164,0.022211,0.246798,-0.111058,3.236202e-16,-9.743404000000001e-17,-5.372116e-16,-3.146626e-16,-1.050246e-15
IKK,-0.115346,0.648635,1.476664,-0.103897,-0.114574,0.076622,0.546149,-0.106072,0.291224,0.34998,-8.1193e-18,-6.350974e-16,-2.300832e-16,3.313475e-16,-4.342529e-16
TLR4,-0.246354,-0.273611,-0.404709,1.357987,0.385261,-0.036704,-0.636462,0.519983,-0.339718,-0.442515,-3.241428e-16,1.513121e-16,-3.323238e-16,-4.91972e-16,-7.899423e-17
mTOR,-0.257144,-0.05492,-0.190872,0.525376,1.161262,0.679497,-0.320712,0.406306,-0.061652,-0.219527,-5.466398e-16,-4.744804e-16,-1.253299e-16,2.1976660000000002e-17,-1.827421e-16
PAK,0.293164,0.123785,0.023221,-0.018794,0.011831,1.112692,-0.0262,-0.010009,0.173921,0.042812,4.5323780000000006e-17,4.0826700000000005e-17,-1.994769e-16,1.788809e-16,-8.493427e-17
ERK,0.135166,0.649465,0.626907,-0.128036,0.032357,0.416245,1.79089,-0.008202,0.776404,-0.130813,-1.3081820000000002e-17,-4.309692e-16,2.1246960000000002e-17,3.092141e-16,-4.503158e-16
JAK/STAT,-0.32529,-0.128466,-0.271533,0.419534,0.135917,-0.009221,-0.19776,1.225439,-0.170616,-0.526538,-3.78275e-16,-3.239907e-16,1.039663e-16,-6.268207e-16,1.1935360000000001e-17
PLK1,0.360758,0.423964,0.350179,-0.202923,-0.097297,0.545544,1.159246,-0.107581,1.595682,0.206329,-3.81521e-16,-3.781127e-16,1.075563e-16,1.087014e-16,-3.654697e-16
ITK,0.04861,0.141652,0.501529,-0.276301,-0.138502,-0.127251,0.001878,-0.31372,0.198692,1.333751,1.785239e-16,-4.668509e-16,-1.782604e-16,-2.491266e-16,6.045449000000001e-17


In [None]:
con_mat = pd.DataFrame(columns=['From','To','Strength'])
for con_to in r_mean_df.index:
    for con_from in r_mean_df.columns:
        if ((r_mean_df.loc[con_to,con_from] != 0) & (con_from != con_to)):
            row_df = pd.DataFrame([[con_from,con_to,r_mean_df.loc[con_to,con_from]]],columns=['From','To','Strength'])
            if con_mat.empty:
                con_mat = row_df.copy()
            else:
                con_mat = pd.concat([con_mat,row_df],axis=0,ignore_index=True)
            #con_mat = con_mat.append({'From':con_from,'To':con_to,'Strength':r_mean_df.loc[con_to,con_from]},ignore_index=True)
con_mat.to_csv(os.path.join(out_dir, 'r_net.txt'),sep='\t',index=False)
display(con_mat)

Unnamed: 0,From,To,Strength
0,CDK4/6,CDK1/2,-0.006180
1,TLR4,CDK1/2,-0.089441
2,mTOR,CDK1/2,-0.000007
3,PAK,CDK1/2,0.113034
4,ERK,CDK1/2,0.099631
...,...,...,...
91,IKK,DPD_microa,-0.686179
92,ERK,DPD_microa,-0.545829
93,JAK/STAT,DPD_microa,-0.473638
94,PLK1,DPD_microa,-0.397271


In [None]:
# now the same but with filtered matrices
# threshold value
Athr = 0.5

rm_filt = r_mean_df[np.abs(A_mean_df) > Athr].fillna(0).copy()
np.fill_diagonal(rm_filt.values,-1)
rm_filt.to_csv(os.path.join(out_dir, 'r_mean_filt.csv'))
display(rm_filt)

rm_filt_minus_inv = pd.DataFrame(np.linalg.pinv(rm_filt),index=rm_filt.index,columns=rm_filt.columns)*(-1)
rm_filt_minus_inv.to_csv(os.path.join(out_dir, 'r_minv_filt.csv'))
display(rm_minus_inv)

con_mat = pd.DataFrame(columns=['From','To','Strength'])
for con_to in rm_filt.index:
    for con_from in rm_filt.columns:
        if ((rm_filt.loc[con_to,con_from] != 0) & (con_from != con_to)):
            row_df = pd.DataFrame([[con_from,con_to,rm_filt.loc[con_to,con_from]]],columns=['From','To','Strength'])
            if con_mat.empty:
                con_mat = row_df.copy()
            else:
                con_mat = pd.concat([con_mat,row_df],axis=0,ignore_index=True)
            #con_mat = con_mat.append({'From':con_from,'To':con_to,'Strength':r_mean_df.loc[con_to,con_from]},ignore_index=True)
con_mat.to_csv(os.path.join(out_dir, 'r_net_filt.txt'),sep='\t',index=False)
#display(con_mat)


Unnamed: 0,CDK1/2,CDK4/6,IKK,TLR4,mTOR,PAK,ERK,JAK/STAT,PLK1,ITK,DPD_resist,DPD_TB,DPD_load,DPD_damage,DPD_microa
CDK1/2,-1.0,0.0,0.0,-0.089441,0.0,0.113034,0.099631,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CDK4/6,0.143812,-1.0,0.338529,0.070591,-0.034364,0.159793,0.194604,0.0,0.0,-0.143824,0.0,0.0,0.0,0.0,0.0
IKK,-0.222665,0.359726,-1.0,0.0,-0.065159,0.0,0.216279,0.0,0.0,0.308449,0.0,0.0,0.0,0.0,0.0
TLR4,0.0,0.0,0.0,-1.0,0.284653,-0.12421,-0.275527,0.276155,0.0,-0.198946,0.0,0.0,0.0,0.0,0.0
mTOR,-0.293406,0.0,0.0,0.305921,-1.0,0.667978,0.0,0.19537,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PAK,0.233641,0.111028,0.0,0.0,0.037056,-1.0,-0.173905,0.0,0.157598,0.0,0.0,0.0,0.0,0.0,0.0
ERK,0.0,0.210032,0.317996,-0.104185,0.103864,0.0,-1.0,0.0,0.40824,-0.24467,0.0,0.0,0.0,0.0,0.0
JAK/STAT,-0.238836,0.0,0.0,0.220427,0.0,0.0,0.0,-1.0,0.0,-0.315616,0.0,0.0,0.0,0.0,0.0
PLK1,0.119018,0.0,-0.148267,0.0,-0.089469,0.314287,0.665025,0.0,-1.0,0.231009,0.0,0.0,0.0,0.0,0.0
ITK,0.0,0.0,0.361711,-0.127635,0.0,-0.119685,-0.27696,-0.157788,0.163154,-1.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,CDK1/2,CDK4/6,IKK,TLR4,mTOR,PAK,ERK,JAK/STAT,PLK1,ITK,DPD_resist,DPD_TB,DPD_load,DPD_damage,DPD_microa
CDK1/2,1.070377,0.098171,0.10063,-0.138134,-0.030489,0.172891,0.238128,-0.049432,0.138265,0.033675,1.317576e-16,1.70488e-16,3.338688e-16,-2.5105980000000002e-17,9.466358e-17
CDK4/6,0.172332,1.341999,0.5458,0.034785,-0.02771,0.301949,0.529164,0.022211,0.246798,-0.111058,3.236202e-16,-9.743404000000001e-17,-5.372116e-16,-3.146626e-16,-1.050246e-15
IKK,-0.115346,0.648635,1.476664,-0.103897,-0.114574,0.076622,0.546149,-0.106072,0.291224,0.34998,-8.1193e-18,-6.350974e-16,-2.300832e-16,3.313475e-16,-4.342529e-16
TLR4,-0.246354,-0.273611,-0.404709,1.357987,0.385261,-0.036704,-0.636462,0.519983,-0.339718,-0.442515,-3.241428e-16,1.513121e-16,-3.323238e-16,-4.91972e-16,-7.899423e-17
mTOR,-0.257144,-0.05492,-0.190872,0.525376,1.161262,0.679497,-0.320712,0.406306,-0.061652,-0.219527,-5.466398e-16,-4.744804e-16,-1.253299e-16,2.1976660000000002e-17,-1.827421e-16
PAK,0.293164,0.123785,0.023221,-0.018794,0.011831,1.112692,-0.0262,-0.010009,0.173921,0.042812,4.5323780000000006e-17,4.0826700000000005e-17,-1.994769e-16,1.788809e-16,-8.493427e-17
ERK,0.135166,0.649465,0.626907,-0.128036,0.032357,0.416245,1.79089,-0.008202,0.776404,-0.130813,-1.3081820000000002e-17,-4.309692e-16,2.1246960000000002e-17,3.092141e-16,-4.503158e-16
JAK/STAT,-0.32529,-0.128466,-0.271533,0.419534,0.135917,-0.009221,-0.19776,1.225439,-0.170616,-0.526538,-3.78275e-16,-3.239907e-16,1.039663e-16,-6.268207e-16,1.1935360000000001e-17
PLK1,0.360758,0.423964,0.350179,-0.202923,-0.097297,0.545544,1.159246,-0.107581,1.595682,0.206329,-3.81521e-16,-3.781127e-16,1.075563e-16,1.087014e-16,-3.654697e-16
ITK,0.04861,0.141652,0.501529,-0.276301,-0.138502,-0.127251,0.001878,-0.31372,0.198692,1.333751,1.785239e-16,-4.668509e-16,-1.782604e-16,-2.491266e-16,6.045449000000001e-17


In [None]:
# number of zero elements in the matrix
display(len(rm_filt.index)*len(rm_filt.columns) - rm_filt.astype(bool).sum().sum())

135

In [None]:
# number of non-zero elements in the matrix
display(rm_filt.astype(bool).sum().sum() - len(modules))

75