In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 104)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(11, 104)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.47368421],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 0.94914478, 0.98245337,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
Src         978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,1.274781e-05,-2.975394e-06,-6e-06,-6e-06,2e-05,-3.772989e-06,2.2e-05,-2.5e-05,1.4e-05,1.4e-05,...,-7e-06,1.536262e-05,-2.1e-05,-3.28178e-06,3.2e-05,2.920441e-06,1.551734e-05,1.083191e-05,1.4e-05,3.4e-05
CDK1,7.499444e-06,2.24413e-05,6.1e-05,-5e-06,-2.3e-05,-9.585208e-06,-2.6e-05,-1.1e-05,1.3e-05,-1.9e-05,...,-1.5e-05,-1.816468e-05,-1.5e-05,-4.716443e-06,-1.3e-05,-7.021131e-07,-7.519557e-06,8.255898e-07,-1e-05,2.2e-05
CDK2,-4.658534e-06,3.786107e-05,1.9e-05,-9e-06,-7e-06,1.096075e-05,1.8e-05,2.9e-05,7e-06,-1.5e-05,...,-9e-06,-1.179885e-05,1e-05,2.985783e-07,2e-06,2.902628e-05,1.030585e-05,1.445699e-05,4.1e-05,-2.3e-05
CDK4_6,-6.461975e-06,-4.92379e-06,-2.9e-05,-1.2e-05,1e-05,5.321998e-07,-3.1e-05,2.1e-05,-7e-06,-3.5e-05,...,-3.2e-05,-6.567753e-06,-4e-06,-2.633755e-05,-4e-06,-1.734306e-05,8.238956e-06,-7.809749e-06,-1.9e-05,-1.3e-05
EGFR,2.07356e-07,1.38016e-05,-1e-05,1.3e-05,-2.4e-05,-0.0003143776,3e-06,1.1e-05,-2e-06,-0.027019,...,-6e-06,-5.483669e-06,7e-06,-2.5183e-05,-2.3e-05,0.0001210497,-3.072687e-06,2.810309e-05,3.5e-05,-1.8e-05
Estrogen,-5.29185e-06,-2.002612e-07,3.9e-05,1e-05,1.2e-05,-1.551689e-05,-6e-06,2.1e-05,2.4e-05,-0.273481,...,1.2e-05,2.556565e-06,9e-06,-4.253202e-06,2e-06,6.855206e-06,-1.587263e-05,1.015289e-05,-1.5e-05,-2e-06
FGFR,-0.0003587008,7.828733e-06,1.5e-05,-1.6e-05,3e-06,-2.603027e-06,-7e-06,-9e-06,-1.6e-05,-6.4e-05,...,-7e-06,2.990709e-06,-1.1e-05,4.835675e-06,-1.3e-05,2.314573e-05,6.375012e-06,1.551692e-05,8e-06,7e-06
PI3K,6.907896e-06,1.822333e-05,1.6e-05,1e-05,8e-06,-4.020698e-06,2.4e-05,2e-06,1.3e-05,-2e-06,...,-4e-06,-1.246391e-05,1.1e-05,-1.51528e-06,6e-06,-0.000179412,1.187291e-05,-8.855097e-06,2.6e-05,0.033004
p53,-2.188982e-05,-3.898552e-05,6e-06,2e-06,3e-06,2.850681e-06,0.230167,-1.6e-05,1.7e-05,1e-05,...,-1.2e-05,1.061136e-06,-2.8e-05,6.109904e-06,-8e-06,3.105046e-06,-1.064263e-05,-6.455511e-07,1.7e-05,0.000284
TOP2A,-1.765903e-05,3.483841e-05,-2.3e-05,-2.1e-05,3.9e-05,3.445029e-05,2.4e-05,-2e-06,1.7e-05,-3.1e-05,...,1.4e-05,8.197156e-07,-3e-06,9.316815e-06,1.3e-05,7.499033e-06,5.735121e-07,-1.764111e-05,-1.1e-05,4e-06


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052923,-0.009974,0.001757,-0.04004,0.001745,0.019889,0.00414,0.015856,0.0211,0.035611,...,-0.044382,-0.01554,-0.001313,-0.00851,-0.05202,-0.004399,-0.054581,0.029686,0.026729,-0.58518
CDK1,-1.371527,-1.095251,0.156729,-0.416694,0.141833,-0.397212,0.174571,0.359171,0.120268,0.190067,...,-0.040808,-0.070745,-0.292843,-0.17281,-0.211205,-0.064074,-0.95535,0.019779,-0.051689,-0.084595
CDK2,-0.198739,-0.1792,-0.034046,-0.010016,-0.002004,-0.026154,0.010697,-0.035971,0.028113,-0.004405,...,-0.040448,-0.012437,-0.001918,-0.002106,-0.022549,0.023467,-0.058564,-0.029579,-0.032771,-0.004778
CDK4_6,-0.024786,-0.23307,-0.041343,-0.512842,0.04018,0.00207,-0.007849,0.097854,0.08594,0.03193,...,-0.571649,0.048223,0.097169,-0.089221,-1.034766,-1.206374,0.317459,0.198526,-0.06334,-0.014896
EGFR,0.594302,0.494854,0.227074,0.365559,0.489237,0.112625,-0.412975,0.263036,0.284577,-0.04703,...,0.177804,0.042588,-0.056614,-0.036438,-0.087508,-0.424268,-0.441998,-0.554539,-0.315115,-0.228102
Estrogen,-0.127051,-0.212308,-0.211572,-0.410824,-0.946347,-0.309627,-0.085559,-0.24094,-0.165757,-0.041252,...,-0.083507,0.059879,0.040587,-0.012869,0.02085,0.134703,-1.518803,-0.225347,0.185686,-0.272171
FGFR,-0.112271,-0.187148,-0.090419,0.052235,-0.029786,-0.409007,-0.028128,-0.046896,-0.067067,-0.291842,...,0.161043,-0.015421,0.066762,-0.002451,0.041444,-0.292799,-0.543559,0.042138,0.230391,0.057342
PI3K,-1.907763,-1.695924,-1.449386,-1.244767,-0.697896,0.285072,-0.134038,-0.243812,-0.825338,-0.278903,...,-0.047557,-0.156763,-0.023753,-0.479838,-0.458757,0.035189,-0.773614,0.236607,-0.235869,0.029959
p53,-0.218702,-0.221255,-0.132064,-0.413537,0.047194,-1.62894,-1.474679,-0.119654,-0.086778,-1.324326,...,-0.307034,0.250068,0.023041,0.228404,0.201298,0.285172,-0.226091,-0.050497,0.028624,0.600756
TOP2A,-0.158657,0.118153,-0.222998,-0.149732,-0.124037,0.034621,0.080546,-1.994295,-0.18992,-0.307524,...,0.011648,0.136403,0.133846,-0.079527,-0.387488,0.095069,-0.682865,0.036211,0.009586,-0.387089


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052923,-0.009974,0.001757,-0.04004,0.001745,0.019889,0.00414,0.015856,0.0211,0.035611,...,-0.044382,-0.01554,-0.001313,-0.00851,-0.05202,-0.004399,-0.054581,0.029686,0.026729,-0.58518
CDK1,-1.371527,-1.095251,0.156729,-0.416694,0.141833,-0.397212,0.174571,0.359171,0.120268,0.190067,...,-0.040808,-0.070745,-0.292843,-0.17281,-0.211205,-0.064074,-0.95535,0.019779,-0.051689,-0.084595
CDK2,-0.198739,-0.1792,-0.034046,-0.010016,-0.002004,-0.026154,0.010697,-0.035971,0.028113,-0.004405,...,-0.040448,-0.012437,-0.001918,-0.002106,-0.022549,0.023467,-0.058564,-0.029579,-0.032771,-0.004778
CDK4_6,-0.024786,-0.23307,-0.041343,-0.512842,0.04018,0.00207,-0.007849,0.097854,0.08594,0.03193,...,-0.571649,0.048223,0.097169,-0.089221,-1.034766,-1.206374,0.317459,0.198526,-0.06334,-0.014896
EGFR,0.594302,0.494854,0.227074,0.365559,0.489237,0.112625,-0.412975,0.263036,0.284577,-0.04703,...,0.177804,0.042588,-0.056614,-0.036438,-0.087508,-0.424268,-0.441998,-0.554539,-0.315115,-0.228102
Estrogen,-0.127051,-0.212308,-0.211572,-0.410824,-0.946347,-0.309627,-0.085559,-0.24094,-0.165757,-0.041252,...,-0.083507,0.059879,0.040587,-0.012869,0.02085,0.134703,-1.518803,-0.225347,0.185686,-0.272171
FGFR,-0.112271,-0.187148,-0.090419,0.052235,-0.029786,-0.409007,-0.028128,-0.046896,-0.067067,-0.291842,...,0.161043,-0.015421,0.066762,-0.002451,0.041444,-0.292799,-0.543559,0.042138,0.230391,0.057342
PI3K,-1.907763,-1.695924,-1.449386,-1.244767,-0.697896,0.285072,-0.134038,-0.243812,-0.825338,-0.278903,...,-0.047557,-0.156763,-0.023753,-0.479838,-0.458757,0.035189,-0.773614,0.236607,-0.235869,0.029959
p53,-0.218702,-0.221255,-0.132064,-0.413537,0.047194,-1.62894,-1.474679,-0.119654,-0.086778,-1.324326,...,-0.307034,0.250068,0.023041,0.228404,0.201298,0.285172,-0.226091,-0.050497,0.028624,0.600756
TOP2A,-0.158657,0.118153,-0.222998,-0.149732,-0.124037,0.034621,0.080546,-1.994295,-0.18992,-0.307524,...,0.011648,0.136403,0.133846,-0.079527,-0.387488,0.095069,-0.682865,0.036211,0.009586,-0.387089
