In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 103)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(11, 103)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.47368421],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 0.94914478, 0.98245337,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
Src         978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,1.3e-05,1.4e-05,-3.5e-05,-8.241945e-06,-3e-06,-1.261487e-05,7e-06,-5e-06,3.4e-05,6.2e-05,...,1.2e-05,8e-06,-1e-05,1.1e-05,-7e-06,-6.895469e-06,1.510959e-05,-1.2e-05,-1.4e-05,-9e-06
CDK1,1.6e-05,2.8e-05,2.2e-05,-5.262034e-06,-1.1e-05,-2.395746e-06,-3e-06,2.3e-05,1.6e-05,7e-06,...,-3.4e-05,-1.9e-05,1.4e-05,-1.5e-05,-1.2e-05,-1.340558e-06,-1.163019e-05,1.1e-05,-1.2e-05,-1e-05
CDK2,1.1e-05,-5e-06,-2.1e-05,-1.766485e-05,-2.1e-05,-1.552558e-07,-1e-05,-1.7e-05,-1.4e-05,-7e-06,...,2.1e-05,1e-05,1.6e-05,3e-06,1.6e-05,-1.132431e-05,8.536727e-06,-1.9e-05,1.5e-05,9e-06
CDK4_6,-7e-06,2.2e-05,2.7e-05,-3.506053e-06,1e-05,2.49964e-05,-1.6e-05,8e-06,-2.1e-05,-5.1e-05,...,1.2e-05,-1.1e-05,-6e-06,-7e-06,-1e-06,1.12132e-05,-1.163027e-05,-2e-05,-1.1e-05,-1.1e-05
EGFR,-0.000143,1e-05,-2e-06,6.360245e-06,1.6e-05,-0.0001954897,1.5e-05,-5e-06,6e-06,-0.026463,...,-0.000105,1e-05,-3e-06,6e-06,-1.1e-05,0.0002862564,6.047525e-06,5e-06,-6e-06,1e-05
Estrogen,-2e-05,3e-06,-1.2e-05,3.478338e-05,1.7e-05,1.683049e-05,4e-06,9e-06,1.2e-05,-0.273331,...,1.3e-05,-9e-06,2.7e-05,-2.5e-05,-7e-06,-9.523585e-06,-9.280692e-07,2e-05,-9e-06,-2e-06
FGFR,-0.000863,1e-05,1.4e-05,5.267131e-06,6e-06,-1.704393e-05,-1.2e-05,-8e-06,-1e-06,-1.7e-05,...,2e-06,5e-06,-1.1e-05,1e-05,-2e-05,3.980242e-05,-1.841245e-05,2.2e-05,-6e-06,1e-05
PI3K,-1.4e-05,3.2e-05,-9e-06,-8.384287e-06,-1.1e-05,-6.120695e-06,-7.2e-05,1.9e-05,-2e-06,-2.7e-05,...,-7e-06,-2e-06,1.1e-05,-5e-06,1.3e-05,-0.0001315485,-4.549473e-05,-2.2e-05,3.8e-05,0.02393
p53,-8e-06,3e-06,-2.1e-05,1.367569e-06,2e-05,1.481629e-05,0.234177,2e-05,1.9e-05,-1.8e-05,...,1.2e-05,-1e-06,-7.1e-05,3.8e-05,0.000101,8.335073e-06,1.380856e-05,-3.2e-05,2e-05,2.4e-05
TOP2A,-9e-06,-1.7e-05,-1.1e-05,-3.830396e-07,1.7e-05,-1.655469e-05,-9e-06,-1.6e-05,2.1e-05,-1.6e-05,...,7e-06,3.7e-05,1.9e-05,6e-06,-1.2e-05,-2.410482e-07,1.481985e-05,1.8e-05,-1.5e-05,-5e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052588,-0.009338,0.002014,-0.039855,0.001459,0.020207,0.004271,0.016619,0.021259,0.036106,...,-0.044632,-0.015586,-0.00136,-0.008848,-0.051977,-0.004513,-0.054307,0.030098,0.026408,-0.587685
CDK1,-1.025892,-0.838356,0.158636,-0.317907,0.132943,-0.366352,0.164321,0.230745,0.121482,0.175691,...,-0.040664,-0.060485,-0.283612,-0.120744,-0.203191,-0.026597,-0.902761,0.06257,-0.036895,-0.072069
CDK2,-0.197974,-0.177461,-0.033531,-0.009724,-0.001769,-0.025154,0.010669,-0.03446,0.028193,-0.003986,...,-0.039215,-0.011634,-0.000654,-0.000893,-0.020966,0.024416,-0.057298,-0.029378,-0.031922,-0.003093
CDK4_6,-0.024748,-0.232817,-0.041875,-0.5141,0.040668,0.002151,-0.008321,0.097251,0.085904,0.032125,...,-0.573184,0.048539,0.097787,-0.088836,-1.036709,-1.207233,0.318168,0.19885,-0.063558,-0.014745
EGFR,0.594982,0.495966,0.227714,0.367,0.490806,0.114104,-0.414023,0.264308,0.284689,-0.045662,...,0.179421,0.042908,-0.056588,-0.036334,-0.08601,-0.422977,-0.437638,-0.555153,-0.317297,-0.228497
Estrogen,-0.124917,-0.207431,-0.21493,-0.410516,-0.954215,-0.308862,-0.087129,-0.241263,-0.166726,-0.041963,...,-0.084256,0.059731,0.040048,-0.012069,0.022347,0.135252,-1.516856,-0.226879,0.180834,-0.259135
FGFR,-0.109894,-0.176945,-0.089439,0.051564,-0.028174,-0.406617,-0.032708,-0.028303,-0.061885,-0.300268,...,0.159497,-0.031196,0.049439,-0.021507,0.028031,-0.240562,-0.550375,0.024974,0.22304,0.038975
PI3K,-1.916109,-1.693891,-1.429836,-1.245786,-0.713511,0.293498,-0.128705,-0.183233,-0.84298,-0.26614,...,-0.054253,-0.178665,-0.043479,-0.525484,-0.503515,-0.004631,-0.871133,0.21148,-0.25324,-0.001552
p53,-0.209087,-0.218651,-0.129337,-0.403028,0.051266,-1.630868,-1.476994,-0.120347,-0.087329,-1.326672,...,-0.307738,0.247633,0.026916,0.233343,0.202869,0.278503,-0.228552,-0.050539,0.030125,0.600242
TOP2A,-0.187341,0.098568,-0.228705,-0.159756,-0.125706,0.04261,0.076285,-1.999032,-0.195916,-0.29774,...,0.029965,0.11981,0.107893,-0.121429,-0.445142,0.074617,-0.712991,0.016319,-0.009356,-0.408625


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052588,-0.009338,0.002014,-0.039855,0.001459,0.020207,0.004271,0.016619,0.021259,0.036106,...,-0.044632,-0.015586,-0.00136,-0.008848,-0.051977,-0.004513,-0.054307,0.030098,0.026408,-0.587685
CDK1,-1.025892,-0.838356,0.158636,-0.317907,0.132943,-0.366352,0.164321,0.230745,0.121482,0.175691,...,-0.040664,-0.060485,-0.283612,-0.120744,-0.203191,-0.026597,-0.902761,0.06257,-0.036895,-0.072069
CDK2,-0.197974,-0.177461,-0.033531,-0.009724,-0.001769,-0.025154,0.010669,-0.03446,0.028193,-0.003986,...,-0.039215,-0.011634,-0.000654,-0.000893,-0.020966,0.024416,-0.057298,-0.029378,-0.031922,-0.003093
CDK4_6,-0.024748,-0.232817,-0.041875,-0.5141,0.040668,0.002151,-0.008321,0.097251,0.085904,0.032125,...,-0.573184,0.048539,0.097787,-0.088836,-1.036709,-1.207233,0.318168,0.19885,-0.063558,-0.014745
EGFR,0.594982,0.495966,0.227714,0.367,0.490806,0.114104,-0.414023,0.264308,0.284689,-0.045662,...,0.179421,0.042908,-0.056588,-0.036334,-0.08601,-0.422977,-0.437638,-0.555153,-0.317297,-0.228497
Estrogen,-0.124917,-0.207431,-0.21493,-0.410516,-0.954215,-0.308862,-0.087129,-0.241263,-0.166726,-0.041963,...,-0.084256,0.059731,0.040048,-0.012069,0.022347,0.135252,-1.516856,-0.226879,0.180834,-0.259135
FGFR,-0.109894,-0.176945,-0.089439,0.051564,-0.028174,-0.406617,-0.032708,-0.028303,-0.061885,-0.300268,...,0.159497,-0.031196,0.049439,-0.021507,0.028031,-0.240562,-0.550375,0.024974,0.22304,0.038975
PI3K,-1.916109,-1.693891,-1.429836,-1.245786,-0.713511,0.293498,-0.128705,-0.183233,-0.84298,-0.26614,...,-0.054253,-0.178665,-0.043479,-0.525484,-0.503515,-0.004631,-0.871133,0.21148,-0.25324,-0.001552
p53,-0.209087,-0.218651,-0.129337,-0.403028,0.051266,-1.630868,-1.476994,-0.120347,-0.087329,-1.326672,...,-0.307738,0.247633,0.026916,0.233343,0.202869,0.278503,-0.228552,-0.050539,0.030125,0.600242
TOP2A,-0.187341,0.098568,-0.228705,-0.159756,-0.125706,0.04261,0.076285,-1.999032,-0.195916,-0.29774,...,0.029965,0.11981,0.107893,-0.121429,-0.445142,0.074617,-0.712991,0.016319,-0.009356,-0.408625
