In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [14]:
data_dir = '/home/jing/Phd_project/project_GBM/gbm_Scripts/gbm_Scripts_bmra_gi1/01_outputs_2020_cgga/'

out_dir = "02_outputs_2020_cgga"

os.makedirs(out_dir, exist_ok = True)


# Load Data

In [15]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [7]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 119)

In [8]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [9]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(11, 119)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 0.86139986, 0.94914478,
        0.98245337]])

## Run models

In [10]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [11]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

CDK1        978
CDK2        978
CDK4_6      978
PI3K        978
ERK         978
Hypoxia     978
PDGFR       978
Aurora      978
Estrogen    978
EGFR        978
p53         978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
CDK1,-2e-06,-1.2e-05,8.838761e-06,3.651215e-06,-6.763623e-08,-1.696261e-06,8e-06,1e-06,-3e-06,9.384555e-06,...,7.734411e-06,1.106189e-05,3.034574e-06,9e-06,-1e-06,-3e-06,6e-06,-9.344332e-06,6e-06,-2e-06
CDK2,-6e-06,3.8e-05,1.197123e-05,-5.079572e-06,2.366058e-05,2.046566e-05,-1.3e-05,2.7e-05,2.7e-05,9.640497e-06,...,-4.344706e-06,-1.008456e-05,1.825214e-05,-1.8e-05,-2.5e-05,-7e-06,3e-05,-8.03964e-07,1.5e-05,9e-06
CDK4_6,6e-06,4e-06,3.954207e-07,8.256871e-07,-1.949568e-06,3.13584e-05,-7e-06,1.4e-05,-1.2e-05,7.146313e-06,...,-1.093743e-05,3.48737e-06,6.748e-07,-3e-06,-1.9e-05,-8e-06,2e-06,2.048556e-05,1.7e-05,-1.3e-05
PI3K,-1.4e-05,8e-06,5.413818e-06,1.419593e-06,2.932436e-06,-3.045286e-07,1.9e-05,1.8e-05,7e-06,7.546002e-06,...,1.372224e-05,2.643292e-06,-5.908413e-06,7e-06,-2.6e-05,-0.112062,2e-05,-2.856735e-07,-2e-05,6e-06
ERK,7e-06,9e-06,-2.464994e-05,1.048087e-05,1.991521e-05,2.266995e-05,6e-06,3e-06,-8e-06,1.120914e-05,...,-2.759216e-05,-1.683286e-06,1.523331e-06,-2.3e-05,-4.3e-05,-6e-06,7e-06,-7.678454e-06,8e-06,-1.6e-05
Hypoxia,-5e-06,2.2e-05,-3.230209e-07,-1.121374e-05,1.740565e-05,1.887838e-05,1.4e-05,-8e-06,-2.6e-05,2.868051e-06,...,1.531567e-05,-9.956561e-06,-4.892273e-06,-1.4e-05,7e-06,2.1e-05,-1.9e-05,9.799271e-05,-3.8e-05,7e-06
PDGFR,2.1e-05,6e-06,1.70761e-05,9.346185e-06,-5.30755e-06,9.988088e-06,-4e-06,5e-06,-9e-06,-4.058841e-06,...,-3.882738e-05,-8.747911e-06,-1.873166e-05,3e-05,1.8e-05,-1.6e-05,2.1e-05,1.113305e-05,1.3e-05,1e-05
Aurora,-5e-06,1e-06,-2.923541e-05,-2.256907e-05,8.95169e-06,7.239343e-06,2.7e-05,3e-06,-4e-06,-1.062311e-05,...,-9.648718e-06,2.535999e-05,-3.525336e-06,-3e-06,-1e-06,9e-06,1.8e-05,-1.158392e-05,-4e-06,-1.4e-05
Estrogen,1.8e-05,2e-05,4.005645e-06,-2.422001e-05,-3.201331e-06,-3.695839e-05,-7e-06,-1.9e-05,-1.2e-05,-2.258233e-05,...,-9.32852e-07,-1.559512e-05,-5.730833e-06,7e-06,1.3e-05,-3.5e-05,-7e-06,-2.524136e-05,-2.8e-05,1.6e-05
EGFR,-7e-06,-5e-06,-1.901391e-05,3.399313e-05,1.272359e-05,-1.630304e-06,1.3e-05,1.5e-05,6e-06,4.676573e-07,...,1.202233e-05,4.298379e-06,3.977359e-06,-1.2e-05,-1e-05,5e-06,-4e-06,3.682825e-05,-2.2e-05,3e-06


In [23]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [12]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_GI1_24H:A10,ASG002_GI1_24H:A11,ASG002_GI1_24H:A12,ASG002_GI1_24H:A19,ASG002_GI1_24H:A20,ASG002_GI1_24H:A21,ASG002_GI1_24H:B10,ASG002_GI1_24H:B11,ASG002_GI1_24H:B12,ASG002_GI1_24H:B13,...,MOAR010_GI1_24H:J05,MOAR010_GI1_24H:J06,MOAR010_GI1_24H:J22,MOAR010_GI1_24H:J23,MOAR010_GI1_24H:J24,MOAR010_GI1_24H:K08,MOAR010_GI1_24H:K09,MOAR011_GI1_24H:F07,MOAR011_GI1_24H:F08,MOAR011_GI1_24H:F09
CDK1,-0.533408,-0.091122,7.3e-05,0.001598,-0.164698,-0.12339,-0.069064,-0.325147,-0.247632,-0.445555,...,0.010857,0.087816,-0.291711,-0.011566,-0.192878,-0.152299,-0.031306,-0.327067,-0.076895,0.065275
CDK2,-0.008156,0.057244,-0.005956,-0.475751,-0.262179,-0.051838,0.085078,-0.154491,0.06983,0.035554,...,-0.090894,0.016102,-0.005975,-0.236577,0.066263,0.037357,-0.112649,0.018635,0.008821,0.040558
CDK4_6,-0.9412,-0.162896,-0.009183,-0.311703,-0.338388,-0.200765,-0.29375,-0.374341,-0.441494,-1.003411,...,0.106203,-0.036883,-0.010615,0.127417,-0.141927,-0.201117,-0.247033,-0.197951,-0.203202,0.120236
PI3K,-1.990542,-1.521057,-1.28777,-1.775872,-1.24425,-0.536761,-0.145059,-0.251675,-0.607908,-0.554744,...,0.157741,-0.320781,-0.190377,-0.158643,-0.403793,-0.150622,-0.250384,-0.732725,-0.331865,0.07143
ERK,-1.093333,-0.436228,-0.340494,-0.634265,-0.34291,-0.709084,-0.445084,-1.049589,-0.583134,-1.165706,...,-0.066403,0.098467,-0.047738,0.111746,0.037392,-0.074054,-0.079529,-0.294749,-0.363768,-0.007878
Hypoxia,-0.254376,0.023122,0.058504,0.041085,-0.035473,-0.038286,0.084476,-0.108951,-0.036378,-0.157845,...,-0.115812,-0.009774,0.055337,-0.090489,-0.061701,-0.036901,0.081746,-0.023143,-0.003982,-0.022765
PDGFR,-1.043836,0.086879,0.250074,-0.015853,-0.287542,-0.056167,0.181117,-0.498719,0.004448,-0.279797,...,0.174611,-0.002382,0.211532,0.135977,0.03159,0.232393,0.105611,-0.087704,-0.097829,0.00636
Aurora,-0.175508,0.164342,0.097616,0.043912,-0.156369,-0.190604,-0.366462,-0.468757,0.027359,-0.871402,...,0.098359,0.134676,0.186001,0.250447,0.004023,0.192998,-0.001498,-0.352422,-0.14819,0.352141
Estrogen,-0.058632,-0.011456,-0.115421,0.024613,0.167272,-0.594548,-0.041064,0.053876,-0.049905,0.112325,...,0.133499,0.17783,0.177686,0.138073,-0.491883,0.21281,-0.405375,-0.401116,0.000936,-0.041481
EGFR,-0.398011,-0.015845,0.173159,-0.191456,-0.109141,0.117046,0.129847,-0.610209,0.143537,-1.268097,...,0.052904,-0.347726,-0.445773,-0.12511,-0.109984,-0.199341,-0.212445,-0.203895,-0.158802,0.030774


In [13]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_GI1_24H:A10,ASG002_GI1_24H:A11,ASG002_GI1_24H:A12,ASG002_GI1_24H:A19,ASG002_GI1_24H:A20,ASG002_GI1_24H:A21,ASG002_GI1_24H:B10,ASG002_GI1_24H:B11,ASG002_GI1_24H:B12,ASG002_GI1_24H:B13,...,MOAR010_GI1_24H:J05,MOAR010_GI1_24H:J06,MOAR010_GI1_24H:J22,MOAR010_GI1_24H:J23,MOAR010_GI1_24H:J24,MOAR010_GI1_24H:K08,MOAR010_GI1_24H:K09,MOAR011_GI1_24H:F07,MOAR011_GI1_24H:F08,MOAR011_GI1_24H:F09
CDK1,-0.533408,-0.091122,7.3e-05,0.001598,-0.164698,-0.12339,-0.069064,-0.325147,-0.247632,-0.445555,...,0.010857,0.087816,-0.291711,-0.011566,-0.192878,-0.152299,-0.031306,-0.327067,-0.076895,0.065275
CDK2,-0.008156,0.057244,-0.005956,-0.475751,-0.262179,-0.051838,0.085078,-0.154491,0.06983,0.035554,...,-0.090894,0.016102,-0.005975,-0.236577,0.066263,0.037357,-0.112649,0.018635,0.008821,0.040558
CDK4_6,-0.9412,-0.162896,-0.009183,-0.311703,-0.338388,-0.200765,-0.29375,-0.374341,-0.441494,-1.003411,...,0.106203,-0.036883,-0.010615,0.127417,-0.141927,-0.201117,-0.247033,-0.197951,-0.203202,0.120236
PI3K,-1.990542,-1.521057,-1.28777,-1.775872,-1.24425,-0.536761,-0.145059,-0.251675,-0.607908,-0.554744,...,0.157741,-0.320781,-0.190377,-0.158643,-0.403793,-0.150622,-0.250384,-0.732725,-0.331865,0.07143
ERK,-1.093333,-0.436228,-0.340494,-0.634265,-0.34291,-0.709084,-0.445084,-1.049589,-0.583134,-1.165706,...,-0.066403,0.098467,-0.047738,0.111746,0.037392,-0.074054,-0.079529,-0.294749,-0.363768,-0.007878
Hypoxia,-0.254376,0.023122,0.058504,0.041085,-0.035473,-0.038286,0.084476,-0.108951,-0.036378,-0.157845,...,-0.115812,-0.009774,0.055337,-0.090489,-0.061701,-0.036901,0.081746,-0.023143,-0.003982,-0.022765
PDGFR,-1.043836,0.086879,0.250074,-0.015853,-0.287542,-0.056167,0.181117,-0.498719,0.004448,-0.279797,...,0.174611,-0.002382,0.211532,0.135977,0.03159,0.232393,0.105611,-0.087704,-0.097829,0.00636
Aurora,-0.175508,0.164342,0.097616,0.043912,-0.156369,-0.190604,-0.366462,-0.468757,0.027359,-0.871402,...,0.098359,0.134676,0.186001,0.250447,0.004023,0.192998,-0.001498,-0.352422,-0.14819,0.352141
Estrogen,-0.058632,-0.011456,-0.115421,0.024613,0.167272,-0.594548,-0.041064,0.053876,-0.049905,0.112325,...,0.133499,0.17783,0.177686,0.138073,-0.491883,0.21281,-0.405375,-0.401116,0.000936,-0.041481
EGFR,-0.398011,-0.015845,0.173159,-0.191456,-0.109141,0.117046,0.129847,-0.610209,0.143537,-1.268097,...,0.052904,-0.347726,-0.445773,-0.12511,-0.109984,-0.199341,-0.212445,-0.203895,-0.158802,0.030774
