In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 90)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(10, 90)

array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.000000

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,-1e-05,-2.5e-05,1.1e-05,-2.1e-05,1.7e-05,-2.299952e-05,-2.5e-05,-8e-06,-5.332591e-06,-8e-06,...,-1.3e-05,-8e-06,3e-05,3.5e-05,-1e-06,4.368541e-05,-1.74484e-05,-2.029109e-05,-9e-06,5.2e-05
CDK1,-1.3e-05,-8e-06,4e-06,-9e-06,1.7e-05,3.606557e-05,1.4e-05,6e-06,1.300355e-05,3e-06,...,8e-06,5e-06,-3e-06,1.8e-05,-1.7e-05,-9.386619e-06,-1.616763e-05,4.375119e-05,2.8e-05,-3e-06
CDK2,-1.2e-05,5e-06,-1e-05,3.1e-05,2.3e-05,-1.080369e-05,-1.8e-05,9e-06,1.453078e-07,-7e-06,...,7e-06,8e-06,-1.1e-05,-3.2e-05,1.7e-05,1.14066e-06,2.481109e-05,-1.957087e-05,-4e-06,-2.7e-05
CDK4_6,-8e-06,-1.6e-05,1e-05,-2e-06,2e-05,-1.228518e-05,6e-06,5e-06,3.139083e-07,-1.8e-05,...,-2.8e-05,3e-05,1.6e-05,-3e-05,1e-05,-8.348988e-05,1.300585e-05,5.549116e-06,-1.7e-05,-2.6e-05
EGFR,-0.000376,-1.7e-05,3e-06,-1.6e-05,3e-06,-0.1452615,-9e-06,-6e-06,6.274518e-06,-0.084007,...,1.5e-05,-9e-06,2.1e-05,-4e-06,9e-06,0.0003946493,5.020457e-06,3.519275e-07,-1.5e-05,-2e-06
Estrogen,-1.3e-05,-1.5e-05,-6e-06,3e-05,6e-06,6.261238e-05,-2.6e-05,1e-06,-2.501706e-05,-0.251591,...,1.7e-05,-3.3e-05,-3e-06,-1.5e-05,2e-06,3.875111e-06,-2.247202e-05,-1.008662e-05,-1.3e-05,6.2e-05
FGFR,-0.001204,1e-05,-5e-06,-4e-06,-2.1e-05,1.059868e-05,-4e-06,-1e-05,1.811044e-05,-8e-06,...,-1.2e-05,9e-06,-3e-06,4.3e-05,-2.6e-05,-6.949849e-07,2.612758e-06,8.018497e-06,-1.2e-05,1.6e-05
PI3K,-9e-06,-6.5e-05,-2e-05,4.9e-05,-3.9e-05,-2.435243e-05,1.4e-05,6e-06,-1.133036e-05,3.5e-05,...,2.6e-05,-5e-06,-1.9e-05,3.3e-05,-5.3e-05,-0.0005866719,-2.356678e-05,-2.215626e-05,-1.4e-05,-4e-05
p53,-4e-06,-1.6e-05,-4.4e-05,8e-06,-6e-06,9.731189e-07,0.191673,-1.5e-05,-9.603603e-06,1.6e-05,...,-5e-06,-5e-06,2.4e-05,1.8e-05,5e-06,1.034495e-05,1.687188e-06,2.011183e-05,-1.4e-05,8.4e-05
TOP2A,-4e-06,2.6e-05,-2.4e-05,2.3e-05,1.5e-05,3.546133e-05,7e-06,1.5e-05,-7.513372e-06,-1.3e-05,...,1.9e-05,1.5e-05,4e-06,-8e-06,2.2e-05,2.570573e-06,-7.976466e-07,3.956968e-06,8e-06,-4.1e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B12,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,...,MOAR010_BC3C_24H:K09,MOAR010_BC3C_24H:L19,MOAR010_BC3C_24H:L21,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08
Androgen,-0.064393,-0.023087,-0.000702,-0.044655,-0.003758,0.016581,-0.00096,0.033767,0.01716,0.020628,...,-0.044253,0.003331,0.033882,0.002626,0.0248,0.003295,-0.035689,-0.00389,-0.041062,0.047049
CDK1,-0.474178,-0.46553,0.258901,-0.184484,0.224675,-0.376195,0.197394,0.203473,-0.084174,0.2106,...,-0.002954,-0.003745,0.001662,0.012325,-0.170188,0.100717,-0.029292,0.092654,-0.500959,0.131844
CDK2,-0.195326,-0.172657,-0.033362,-0.010694,-0.002057,-0.020852,0.009705,-0.035582,-0.029857,0.026029,...,-0.017494,0.020209,-1.01031,-0.008445,0.004099,0.004567,-0.015628,0.0282,-0.051533,-0.026693
CDK4_6,-0.495571,-0.482552,-0.306154,-0.179866,0.047184,-0.003137,-0.020293,-0.052012,-0.053463,-0.222897,...,-0.074587,0.014086,0.085944,-0.10557,-0.152127,0.004238,-0.285189,-0.156482,-0.613715,0.06548
EGFR,0.569539,0.450394,0.22049,0.232709,0.144885,0.056756,-0.345657,0.039314,0.276951,0.098512,...,-0.074457,-0.119789,-0.540804,0.100276,-0.078087,-0.009004,0.018657,-0.14572,-0.920323,-0.377519
Estrogen,-0.199919,-0.259932,-0.14258,-0.448592,-0.660614,-0.342791,0.009855,-0.127598,-0.167668,-0.11013,...,0.075279,0.108917,0.133747,0.025976,0.050806,-0.011542,0.030152,0.136822,-1.133919,-0.168358
FGFR,-0.161304,-0.082174,-0.119207,0.024376,-0.003728,-0.46111,-0.042683,-0.14249,0.033345,0.006284,...,0.163097,0.054862,-0.107181,-0.073578,0.011335,-0.113435,-0.117198,-0.089919,-0.60281,-0.036339
PI3K,-1.858456,-1.610594,-1.463343,-1.27082,-0.588271,0.268959,0.068152,-0.611527,-0.57468,-0.552304,...,0.12744,-0.13017,0.297385,-0.049975,0.073918,-0.196519,-0.02537,0.251962,-0.272569,0.379042
p53,0.024887,-0.102213,-0.084487,-0.359168,-0.053668,-1.654839,-1.496443,-1.769525,-0.157457,-0.036249,...,-1.55942,-0.213166,-0.011086,0.117921,0.008804,0.144338,0.066361,0.227497,-0.247053,-0.044037
TOP2A,-0.091287,0.103722,-0.252186,-0.145433,-0.117631,0.055866,0.076575,-0.038253,-1.995442,-0.201716,...,0.195364,-0.458269,0.351832,0.149486,0.161428,-0.089586,-0.653632,0.082104,-0.669834,0.081584


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B12,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,...,MOAR010_BC3C_24H:K09,MOAR010_BC3C_24H:L19,MOAR010_BC3C_24H:L21,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08
Androgen,-0.064393,-0.023087,-0.000702,-0.044655,-0.003758,0.016581,-0.00096,0.033767,0.01716,0.020628,...,-0.044253,0.003331,0.033882,0.002626,0.0248,0.003295,-0.035689,-0.00389,-0.041062,0.047049
CDK1,-0.474178,-0.46553,0.258901,-0.184484,0.224675,-0.376195,0.197394,0.203473,-0.084174,0.2106,...,-0.002954,-0.003745,0.001662,0.012325,-0.170188,0.100717,-0.029292,0.092654,-0.500959,0.131844
CDK2,-0.195326,-0.172657,-0.033362,-0.010694,-0.002057,-0.020852,0.009705,-0.035582,-0.029857,0.026029,...,-0.017494,0.020209,-1.01031,-0.008445,0.004099,0.004567,-0.015628,0.0282,-0.051533,-0.026693
CDK4_6,-0.495571,-0.482552,-0.306154,-0.179866,0.047184,-0.003137,-0.020293,-0.052012,-0.053463,-0.222897,...,-0.074587,0.014086,0.085944,-0.10557,-0.152127,0.004238,-0.285189,-0.156482,-0.613715,0.06548
EGFR,0.569539,0.450394,0.22049,0.232709,0.144885,0.056756,-0.345657,0.039314,0.276951,0.098512,...,-0.074457,-0.119789,-0.540804,0.100276,-0.078087,-0.009004,0.018657,-0.14572,-0.920323,-0.377519
Estrogen,-0.199919,-0.259932,-0.14258,-0.448592,-0.660614,-0.342791,0.009855,-0.127598,-0.167668,-0.11013,...,0.075279,0.108917,0.133747,0.025976,0.050806,-0.011542,0.030152,0.136822,-1.133919,-0.168358
FGFR,-0.161304,-0.082174,-0.119207,0.024376,-0.003728,-0.46111,-0.042683,-0.14249,0.033345,0.006284,...,0.163097,0.054862,-0.107181,-0.073578,0.011335,-0.113435,-0.117198,-0.089919,-0.60281,-0.036339
PI3K,-1.858456,-1.610594,-1.463343,-1.27082,-0.588271,0.268959,0.068152,-0.611527,-0.57468,-0.552304,...,0.12744,-0.13017,0.297385,-0.049975,0.073918,-0.196519,-0.02537,0.251962,-0.272569,0.379042
p53,0.024887,-0.102213,-0.084487,-0.359168,-0.053668,-1.654839,-1.496443,-1.769525,-0.157457,-0.036249,...,-1.55942,-0.213166,-0.011086,0.117921,0.008804,0.144338,0.066361,0.227497,-0.247053,-0.044037
TOP2A,-0.091287,0.103722,-0.252186,-0.145433,-0.117631,0.055866,0.076575,-0.038253,-1.995442,-0.201716,...,0.195364,-0.458269,0.351832,0.149486,0.161428,-0.089586,-0.653632,0.082104,-0.669834,0.081584
