In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 1971)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(10, 1971)

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,1.2e-05,-2.557963e-06,2e-06,-4.6e-05,-6.01425e-06,1.087405e-05,-1.2e-05,2.466647e-05,5.100335e-06,2.474177e-06,...,1.566968e-05,1.586604e-05,-8e-06,-1e-05,-4.694344e-06,1.65758e-06,6.471447e-06,1.358154e-05,1.3e-05,-1.483128e-05
CDK1,1.5e-05,2.114721e-06,5e-06,-2.7e-05,3.275508e-06,-6.124339e-06,-1.4e-05,-8.136824e-07,-1.1168e-05,-1.913402e-05,...,-8.029867e-06,-5.503797e-06,-1.1e-05,-1.2e-05,-2.151828e-06,-5.865626e-06,-1.544304e-05,3.891235e-06,5e-06,-1.852795e-05
CDK2,1.1e-05,1.372444e-05,3e-06,1.5e-05,-2.983959e-05,-3.488261e-06,2.1e-05,6.664216e-06,2.028356e-06,2.931721e-07,...,5.853267e-07,-3.417784e-06,-1.2e-05,8e-06,1.684107e-05,-1.380204e-06,1.352182e-05,1.457642e-05,1.2e-05,-3.084844e-06
CDK4_6,-9e-06,6.886217e-07,-3e-06,-2e-06,-1.620281e-06,7.446108e-06,4e-06,-8.125526e-06,-3.662732e-06,-1.11582e-05,...,-6.402308e-06,1.237798e-06,-5e-06,7e-06,1.63276e-05,-8.668925e-05,-8.521452e-06,-3.096849e-06,-2.3e-05,9.660163e-06
EGFR,-4.9e-05,3.133117e-06,-3e-05,8e-06,-1.598096e-05,-0.06375666,-1.3e-05,4.717118e-07,2.899297e-06,-0.07599537,...,9.415271e-07,-6.176222e-06,-1.8e-05,-1.9e-05,-4.330224e-08,1.087815e-05,-9.894972e-06,-3.545438e-06,1.1e-05,-5.591422e-07
Estrogen,5.2e-05,-3.080637e-05,-3.6e-05,5.1e-05,-1.754092e-07,-7.955052e-05,7e-06,4.039748e-05,6.65532e-05,-0.1646904,...,2.173892e-05,3.251503e-05,5e-05,-5e-05,-2.093106e-05,-2.602695e-06,1.645329e-05,4.481909e-08,-5.7e-05,4.656855e-05
FGFR,-1.2e-05,-1.069123e-05,5e-06,-1e-06,-2.621144e-06,8.659629e-06,1e-05,-8.879177e-06,1.462276e-05,-6.633269e-05,...,9.906925e-07,1.634266e-05,-3.5e-05,-1.2e-05,5.160225e-05,4.613992e-05,9.493847e-07,-5.30967e-06,1.9e-05,2.264892e-05
PI3K,6e-06,-2.138814e-06,-2e-06,-3e-06,-8.089039e-06,-3.958224e-06,2.5e-05,-4.47303e-06,-7.552046e-08,-0.00019833,...,5.248753e-07,2.344246e-06,1.3e-05,6.3e-05,5.414381e-06,-6.233498e-07,-3.888613e-05,-5.31255e-06,1.4e-05,0.001081066
p53,-5e-06,-2.21094e-05,7e-06,3.7e-05,8.641213e-06,-1.546742e-05,0.1426,1.604618e-05,-5.142607e-06,1.977877e-05,...,7.359743e-06,1.71713e-06,2.2e-05,-1.8e-05,0.000235496,2.669022e-05,2.191839e-06,8.958992e-07,2.1e-05,0.0007409224
TOP2A,-1.9e-05,9.322239e-06,-6.2e-05,-1.6e-05,2.895723e-05,-1.337193e-07,1e-05,-1.314561e-05,-1.577638e-05,-1.612669e-05,...,-3.038144e-06,7.101912e-07,2e-05,-4e-05,1.696092e-05,9.135963e-06,3.381737e-05,-9.38492e-06,-1e-05,-6.833539e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A03,ASG002_BC3C_24H:A04,ASG002_BC3C_24H:A05,ASG002_BC3C_24H:A06,ASG002_BC3C_24H:J13,ASG002_BC3C_24H:J14,ASG002_BC3C_24H:J15,ASG002_BC3C_24H:J16,ASG002_BC3C_24H:J17,ASG002_BC3C_24H:J18,...,MOAR012_BC3C_24H:P15,MOAR012_BC3C_24H:P16,MOAR012_BC3C_24H:P17,MOAR012_BC3C_24H:P18,MOAR012_BC3C_24H:P19,MOAR012_BC3C_24H:P20,MOAR012_BC3C_24H:P21,MOAR012_BC3C_24H:P22,MOAR012_BC3C_24H:P23,MOAR012_BC3C_24H:P24
Androgen,-0.001004,-0.036575,-0.001015,0.018552,0.011435,0.05847,0.045842,0.00541,0.043351,0.034086,...,-0.042211,-0.005753,-0.289057,-0.028941,-0.046035,0.067688,-0.049117,-0.019516,-0.120294,0.117488
CDK1,-0.107155,-0.027749,-0.170211,-0.165788,-0.071171,-0.023798,0.332107,0.097408,-0.066428,0.060357,...,0.313367,0.115013,0.311394,0.387438,-0.469813,-0.220334,0.126878,-0.04924,0.236259,0.073994
CDK2,-0.027338,-0.000981,-0.02039,-0.024261,-0.003196,0.024913,0.047951,0.093718,-0.015713,-0.009533,...,-0.131529,-0.058768,-0.200715,-0.058656,-0.134046,-0.099201,0.001589,-0.051774,-0.078393,0.005564
CDK4_6,-0.068674,-0.092957,0.064813,-0.105586,0.112189,0.064218,0.099836,0.289216,0.087235,-0.100294,...,0.187299,0.209502,0.357975,-0.264725,-0.827796,-0.325305,-0.097651,-0.298316,-0.305687,-0.262353
EGFR,0.355335,0.1653,0.322619,0.010396,0.308689,-0.877414,-0.616765,-0.568826,-0.590209,-0.520601,...,0.502227,0.460392,0.51498,0.381313,0.176064,0.341794,0.29388,0.19196,0.514549,0.436025
Estrogen,-0.654487,-0.0962,-0.210312,0.065807,0.53702,-0.147662,0.487496,-0.235995,0.067327,0.281395,...,0.07876,0.126119,0.628289,0.157741,0.330497,0.261924,0.1289,-0.160335,-0.224909,-0.274088
FGFR,0.016716,-0.007146,0.008861,-0.022626,-0.007839,-0.017687,-0.049999,-0.068068,-0.080346,-0.047682,...,0.359879,0.356197,0.180944,0.334883,-0.191376,-0.473883,0.071754,0.052075,-0.785139,-0.092459
PI3K,-0.324839,-0.853677,0.274759,-0.502169,0.26275,0.313225,0.487181,0.175551,-0.0673,0.090341,...,-0.145342,-0.365778,0.415244,-0.278208,-0.846053,-0.725097,-0.237873,0.065428,-0.588796,-0.001559
p53,-0.292842,-0.158038,0.030774,-0.12797,-0.112705,-0.113667,-0.163947,-0.021778,-0.169898,0.146145,...,0.165787,0.302081,0.665569,0.550028,0.490929,0.193557,0.289949,0.148833,0.403616,0.431188
TOP2A,0.063583,-0.129512,-0.290024,-0.099613,0.130848,0.162122,0.286798,0.286001,-0.163816,0.077335,...,-0.049736,-0.181214,0.090146,-0.156399,-0.10665,-0.398094,0.246172,-0.128153,-0.362487,0.354435


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A03,ASG002_BC3C_24H:A04,ASG002_BC3C_24H:A05,ASG002_BC3C_24H:A06,ASG002_BC3C_24H:J13,ASG002_BC3C_24H:J14,ASG002_BC3C_24H:J15,ASG002_BC3C_24H:J16,ASG002_BC3C_24H:J17,ASG002_BC3C_24H:J18,...,MOAR012_BC3C_24H:P15,MOAR012_BC3C_24H:P16,MOAR012_BC3C_24H:P17,MOAR012_BC3C_24H:P18,MOAR012_BC3C_24H:P19,MOAR012_BC3C_24H:P20,MOAR012_BC3C_24H:P21,MOAR012_BC3C_24H:P22,MOAR012_BC3C_24H:P23,MOAR012_BC3C_24H:P24
Androgen,-0.001004,-0.036575,-0.001015,0.018552,0.011435,0.05847,0.045842,0.00541,0.043351,0.034086,...,-0.042211,-0.005753,-0.289057,-0.028941,-0.046035,0.067688,-0.049117,-0.019516,-0.120294,0.117488
CDK1,-0.107155,-0.027749,-0.170211,-0.165788,-0.071171,-0.023798,0.332107,0.097408,-0.066428,0.060357,...,0.313367,0.115013,0.311394,0.387438,-0.469813,-0.220334,0.126878,-0.04924,0.236259,0.073994
CDK2,-0.027338,-0.000981,-0.02039,-0.024261,-0.003196,0.024913,0.047951,0.093718,-0.015713,-0.009533,...,-0.131529,-0.058768,-0.200715,-0.058656,-0.134046,-0.099201,0.001589,-0.051774,-0.078393,0.005564
CDK4_6,-0.068674,-0.092957,0.064813,-0.105586,0.112189,0.064218,0.099836,0.289216,0.087235,-0.100294,...,0.187299,0.209502,0.357975,-0.264725,-0.827796,-0.325305,-0.097651,-0.298316,-0.305687,-0.262353
EGFR,0.355335,0.1653,0.322619,0.010396,0.308689,-0.877414,-0.616765,-0.568826,-0.590209,-0.520601,...,0.502227,0.460392,0.51498,0.381313,0.176064,0.341794,0.29388,0.19196,0.514549,0.436025
Estrogen,-0.654487,-0.0962,-0.210312,0.065807,0.53702,-0.147662,0.487496,-0.235995,0.067327,0.281395,...,0.07876,0.126119,0.628289,0.157741,0.330497,0.261924,0.1289,-0.160335,-0.224909,-0.274088
FGFR,0.016716,-0.007146,0.008861,-0.022626,-0.007839,-0.017687,-0.049999,-0.068068,-0.080346,-0.047682,...,0.359879,0.356197,0.180944,0.334883,-0.191376,-0.473883,0.071754,0.052075,-0.785139,-0.092459
PI3K,-0.324839,-0.853677,0.274759,-0.502169,0.26275,0.313225,0.487181,0.175551,-0.0673,0.090341,...,-0.145342,-0.365778,0.415244,-0.278208,-0.846053,-0.725097,-0.237873,0.065428,-0.588796,-0.001559
p53,-0.292842,-0.158038,0.030774,-0.12797,-0.112705,-0.113667,-0.163947,-0.021778,-0.169898,0.146145,...,0.165787,0.302081,0.665569,0.550028,0.490929,0.193557,0.289949,0.148833,0.403616,0.431188
TOP2A,0.063583,-0.129512,-0.290024,-0.099613,0.130848,0.162122,0.286798,0.286001,-0.163816,0.077335,...,-0.049736,-0.181214,0.090146,-0.156399,-0.10665,-0.398094,0.246172,-0.128153,-0.362487,0.354435
