In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 104)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(10, 104)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.47368421],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 0.94914478, 0.98245337,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1_2      978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
Src         978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,1.387773e-05,8e-06,-9.772551e-06,-4.1e-05,-7e-06,-5e-06,-2.5e-05,7e-06,-2.7e-05,-1.067617e-07,...,7.427341e-06,-2.107484e-05,-1.5e-05,-2.909537e-05,1.5e-05,1.850091e-05,1.9e-05,6.896009e-06,-1.296892e-05,-2e-06
CDK1_2,6.478758e-06,4e-06,1.363533e-05,6e-06,-9e-06,1.4e-05,5e-06,-8e-06,2e-05,-5.731451e-05,...,-5.865858e-05,1.567525e-05,1.2e-05,-5.805736e-06,2e-05,7.689203e-07,8e-06,7.319789e-06,-1.171074e-05,2.2e-05
CDK4_6,1.549713e-05,1.5e-05,2.55045e-05,-1.1e-05,1.1e-05,3e-05,3e-06,-4.9e-05,-1.1e-05,-6.664856e-05,...,-5.06829e-06,-1.553191e-05,2.6e-05,-1.517561e-07,1.6e-05,-3.4027e-05,-9e-06,-3.193759e-07,-8.816227e-06,1e-05
EGFR,-2.845675e-05,-1.3e-05,2.136532e-05,-1.1e-05,3e-06,-0.000308,-1.2e-05,-5e-06,-1e-06,-0.0269678,...,-3.840118e-05,-2.922484e-06,-5e-05,1.091527e-06,4e-06,0.0001353174,-2e-06,2.182858e-05,6.600471e-06,-3.2e-05
Estrogen,-1.195813e-05,1.2e-05,-4.24645e-06,-2e-06,2e-06,8e-06,-1.2e-05,-1.9e-05,-3.5e-05,-0.2730494,...,4.181633e-07,-1.811958e-05,1e-06,-1.363653e-05,9e-06,-3.933292e-05,7e-06,1.902126e-06,7.922585e-06,-6e-06
FGFR,-0.000982916,1.5e-05,1.212166e-06,-2.1e-05,-1.2e-05,-2.1e-05,1.2e-05,4e-06,1.9e-05,-1.171211e-05,...,1.899406e-05,6.564768e-06,3e-06,1.102331e-05,5e-06,1.412579e-05,-9e-06,-4.714533e-06,5.996814e-06,1.8e-05
PI3K,-9.657841e-06,1.6e-05,1.574826e-07,-8e-06,8e-06,3e-05,-4.6e-05,3e-06,3.1e-05,-3.830111e-05,...,4.875547e-05,6.723178e-07,9e-06,-3.388291e-06,9e-06,-7.821963e-05,-2e-06,-1.516778e-05,3.74695e-05,0.023434
p53,-2.461926e-05,-1.6e-05,1.068769e-05,5.8e-05,3e-05,2e-06,0.232056,2.9e-05,7e-06,1.549343e-05,...,2.454415e-05,1.231203e-05,8e-06,1.542703e-05,7e-06,-1.391738e-05,-3e-06,5.273925e-06,-1.695741e-05,2.1e-05
TOP2A,-1.40925e-05,6e-06,-8.286251e-05,4e-06,9e-06,2.1e-05,-1.5e-05,-1.2e-05,1e-05,5.724121e-07,...,4.162347e-05,-1.034427e-05,4e-06,-5.096519e-06,2.6e-05,1.847574e-05,6e-06,-3.208215e-05,5.091359e-07,-5.9e-05
Src,2.999627e-07,7e-06,1.016242e-05,-3e-06,-1.7e-05,-4e-06,2.6e-05,-9e-06,1.2e-05,-1.253193e-05,...,1.498902e-05,1.826601e-05,-4e-06,7.47438e-06,2.5e-05,-2.199009e-05,1.3e-05,-7.21388e-06,1.997246e-06,1.5e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.051863,-0.008976,0.001774,-0.039996,0.001823,0.020283,0.004023,0.016199,0.021165,0.036052,...,-0.044942,-0.015609,-0.001692,-0.00885,-0.052073,-0.005035,-0.054372,0.029677,0.025971,-0.586241
CDK1_2,-0.759049,-0.611922,0.077723,-0.082241,0.089054,-0.28365,0.022446,-0.286249,0.120503,0.058715,...,-0.011907,-0.011212,-0.110485,0.05984,-0.061385,-0.015201,-0.841614,0.108911,-0.056299,0.125358
CDK4_6,-0.025567,-0.234168,-0.041919,-0.515725,0.040858,0.00239,-0.007771,0.097431,0.086049,0.032357,...,-0.575407,0.048657,0.097651,-0.089471,-1.040097,-1.21109,0.318775,0.199396,-0.063239,-0.014519
EGFR,0.595782,0.496815,0.227585,0.366149,0.490477,0.113908,-0.413856,0.264051,0.285211,-0.046581,...,0.178875,0.043486,-0.05552,-0.035948,-0.086028,-0.423306,-0.43625,-0.555279,-0.31591,-0.226343
Estrogen,-0.12428,-0.206679,-0.210077,-0.409447,-0.945416,-0.310837,-0.08525,-0.236102,-0.162541,-0.04032,...,-0.082222,0.059878,0.040465,-0.01144,0.024035,0.134761,-1.506856,-0.227091,0.183448,-0.249039
FGFR,-0.110421,-0.176572,-0.08934,0.051532,-0.028245,-0.406904,-0.033083,-0.026544,-0.06163,-0.300778,...,0.159162,-0.032003,0.048352,-0.023058,0.027569,-0.236628,-0.548688,0.02345,0.221776,0.037643
PI3K,-1.89003,-1.697742,-1.423687,-1.228907,-0.701938,0.297987,-0.150765,-0.182905,-0.861318,-0.277769,...,-0.035034,-0.179601,-0.053394,-0.52568,-0.522395,0.003859,-0.843794,0.216692,-0.23825,0.004889
p53,-0.209905,-0.218104,-0.126854,-0.404891,0.049871,-1.630171,-1.476223,-0.119955,-0.088071,-1.328128,...,-0.304598,0.247399,0.025498,0.231702,0.200218,0.279257,-0.225392,-0.049859,0.030063,0.599728
TOP2A,-0.22739,0.074887,-0.235013,-0.172128,-0.126695,0.054097,0.071529,-2.000953,-0.20574,-0.287905,...,0.053221,0.09987,0.077575,-0.177121,-0.523505,0.050589,-0.751306,-0.008967,-0.032708,-0.439585
Src,-0.930403,-1.678796,0.522821,-1.206341,0.571286,-1.127422,0.495679,0.393984,0.397012,0.447155,...,0.073682,-0.1974,0.060819,0.062738,-0.104216,-0.01254,0.040212,0.245081,0.777876,-0.15573


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.051863,-0.008976,0.001774,-0.039996,0.001823,0.020283,0.004023,0.016199,0.021165,0.036052,...,-0.044942,-0.015609,-0.001692,-0.00885,-0.052073,-0.005035,-0.054372,0.029677,0.025971,-0.586241
CDK1_2,-0.759049,-0.611922,0.077723,-0.082241,0.089054,-0.28365,0.022446,-0.286249,0.120503,0.058715,...,-0.011907,-0.011212,-0.110485,0.05984,-0.061385,-0.015201,-0.841614,0.108911,-0.056299,0.125358
CDK4_6,-0.025567,-0.234168,-0.041919,-0.515725,0.040858,0.00239,-0.007771,0.097431,0.086049,0.032357,...,-0.575407,0.048657,0.097651,-0.089471,-1.040097,-1.21109,0.318775,0.199396,-0.063239,-0.014519
EGFR,0.595782,0.496815,0.227585,0.366149,0.490477,0.113908,-0.413856,0.264051,0.285211,-0.046581,...,0.178875,0.043486,-0.05552,-0.035948,-0.086028,-0.423306,-0.43625,-0.555279,-0.31591,-0.226343
Estrogen,-0.12428,-0.206679,-0.210077,-0.409447,-0.945416,-0.310837,-0.08525,-0.236102,-0.162541,-0.04032,...,-0.082222,0.059878,0.040465,-0.01144,0.024035,0.134761,-1.506856,-0.227091,0.183448,-0.249039
FGFR,-0.110421,-0.176572,-0.08934,0.051532,-0.028245,-0.406904,-0.033083,-0.026544,-0.06163,-0.300778,...,0.159162,-0.032003,0.048352,-0.023058,0.027569,-0.236628,-0.548688,0.02345,0.221776,0.037643
PI3K,-1.89003,-1.697742,-1.423687,-1.228907,-0.701938,0.297987,-0.150765,-0.182905,-0.861318,-0.277769,...,-0.035034,-0.179601,-0.053394,-0.52568,-0.522395,0.003859,-0.843794,0.216692,-0.23825,0.004889
p53,-0.209905,-0.218104,-0.126854,-0.404891,0.049871,-1.630171,-1.476223,-0.119955,-0.088071,-1.328128,...,-0.304598,0.247399,0.025498,0.231702,0.200218,0.279257,-0.225392,-0.049859,0.030063,0.599728
TOP2A,-0.22739,0.074887,-0.235013,-0.172128,-0.126695,0.054097,0.071529,-2.000953,-0.20574,-0.287905,...,0.053221,0.09987,0.077575,-0.177121,-0.523505,0.050589,-0.751306,-0.008967,-0.032708,-0.439585
Src,-0.930403,-1.678796,0.522821,-1.206341,0.571286,-1.127422,0.495679,0.393984,0.397012,0.447155,...,0.073682,-0.1974,0.060819,0.062738,-0.104216,-0.01254,0.040212,0.245081,0.777876,-0.15573
