In [4]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [7]:
cell_line ='CAL29'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [9]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [10]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 122)

In [11]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [12]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(11, 122)

array([[1.        , 1.        , 1.        , ..., 0.47368421, 0.72992701,
        0.89020772],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [13]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [14]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1_2      978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
Src         978
TGFb        978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,4.683965e-07,2e-06,-1.1e-05,4.220445e-05,3.213859e-05,-1e-06,1.85484e-06,-1.859799e-05,-2.256859e-05,1.587655e-05,...,9.451065e-07,-2.4e-05,9e-06,1.9e-05,-1.2e-05,1.274298e-05,1.375718e-05,-8.475251e-06,5.400007e-06,1.277147e-05
CDK1_2,8.96556e-06,1.1e-05,-1.5e-05,-6.684499e-06,-2.095979e-06,-1.8e-05,7.476243e-06,1.197384e-06,-1.914433e-05,5.1118e-06,...,-6.329996e-06,-1.4e-05,-7e-06,-2e-06,-7e-06,3.905977e-06,2.096032e-05,-5.949615e-06,-5.375677e-06,5.457152e-07
CDK4_6,5.213933e-05,2e-06,3e-06,8.719033e-06,7.813263e-07,5e-06,-6.136749e-06,2.142885e-06,1.071377e-05,2.322729e-05,...,2.926887e-05,8e-06,2.3e-05,1.7e-05,3e-06,1.276554e-06,1.952255e-06,-8.041775e-06,9.0541e-07,3.687259e-06
EGFR,-1.45411e-06,4e-06,-3e-06,-7.962819e-07,-9.283254e-06,-1.3e-05,1.297925e-05,-1.204147e-05,2.008939e-05,3.34347e-06,...,-2.892103e-07,9e-06,6e-06,9e-06,-6e-06,-5.012045e-07,2.590843e-05,-1.358589e-07,-1.235469e-05,-2.005263e-05
Estrogen,2.791464e-05,2e-06,7e-06,2.151621e-05,-1.332336e-05,-1.5e-05,1.091547e-05,-4.951206e-07,1.746489e-05,1.018441e-05,...,5.146484e-06,1e-06,3.2e-05,-1e-05,-9e-06,-0.1125032,2.159188e-07,7.014403e-06,-1.469686e-05,1.986612e-05
FGFR,-2.880645e-05,1.3e-05,1.8e-05,-5.281339e-06,-2.017173e-05,9e-06,1.806543e-07,-2.145145e-05,1.475733e-05,-3.519316e-07,...,4.231237e-05,-6e-06,9e-06,-2.3e-05,1e-05,9.297943e-06,9.235972e-07,-7.113813e-06,2.224783e-07,-2.543607e-05
PI3K,-1.059506e-05,-2e-06,-3e-06,-8.585204e-06,3.226369e-06,-0.000375,3.887775e-06,4.477373e-06,-7.302328e-07,2.975544e-06,...,-5.26062e-06,-4e-06,0.000118,5e-06,1.1e-05,-9.675177e-06,7.9032e-06,-1.076102e-05,8.555835e-06,-1.47658e-05
p53,-1.988266e-06,1.9e-05,-3e-06,-8.033798e-05,-3.830543e-05,-4.8e-05,0.00899454,-1.092243e-05,2.975701e-06,-1.072455e-05,...,4.557887e-06,-3e-06,9e-06,-4.3e-05,-2.2e-05,-1.996863e-05,-4.239791e-06,-4.211856e-06,3.719046e-05,2.951055e-05
TOP2A,2.878796e-05,1.1e-05,5e-06,1.366488e-06,5.928339e-07,4e-06,-1.289963e-05,-7.967224e-06,1.247402e-05,-4.52029e-06,...,-2.464977e-06,8e-06,1.6e-05,-1.2e-05,-5e-06,-2.319925e-05,2.157754e-05,-1.960893e-05,1.518164e-05,-7.242118e-06
Src,-9.839727e-06,5e-06,5e-06,-9.09971e-06,4.106968e-06,4e-06,-1.792557e-05,1.1385e-05,-5.67771e-07,1.100954e-05,...,2.001881e-06,-8e-06,7e-06,8e-06,1.2e-05,1.411909e-06,5.60622e-07,2.339376e-05,-1.092598e-05,-9.861137e-06


In [15]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [16]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_CAL29_24H:A10,ASG002_CAL29_24H:A11,ASG002_CAL29_24H:A12,ASG002_CAL29_24H:A19,ASG002_CAL29_24H:A20,ASG002_CAL29_24H:A21,ASG002_CAL29_24H:B03,ASG002_CAL29_24H:B10,ASG002_CAL29_24H:B11,ASG002_CAL29_24H:B12,...,MOAR011_CAL29_24H:C03,MOAR011_CAL29_24H:C10,MOAR011_CAL29_24H:C11,MOAR011_CAL29_24H:C12,MOAR011_CAL29_24H:F07,MOAR011_CAL29_24H:F08,MOAR011_CAL29_24H:F09,MOAR011_CAL29_24H:J10,MOAR011_CAL29_24H:J11,MOAR011_CAL29_24H:J12
Androgen,0.150262,0.095665,0.012547,-0.013771,-0.006036,0.168258,-0.092567,-0.090164,0.048508,-0.023503,...,-0.019674,-0.114086,-0.066424,-0.029193,0.026365,0.05741,-0.144436,-0.534283,-0.141983,-0.02497
CDK1_2,-0.578339,-0.564554,-0.140775,-0.527591,-0.168757,-0.216183,0.215387,-0.124171,-0.81599,-0.157987,...,-0.021328,-0.042051,0.045416,0.178023,-0.258331,0.011182,-0.061042,0.000832,-0.101726,-0.123756
CDK4_6,-0.507148,-0.652875,-0.232881,-0.054294,-0.103414,-0.274895,-0.44398,-0.33139,-0.128014,-0.206305,...,0.030002,0.072742,-0.075981,0.019697,0.334045,0.450317,0.511619,0.56463,0.526212,0.613684
EGFR,-1.086834,-0.801803,-0.472094,-1.995998,-0.319601,-0.55015,-0.197896,-0.55317,-0.060185,-0.395944,...,0.174706,0.15698,-0.03268,0.077943,0.27351,0.20782,0.189513,0.148242,0.123133,0.319563
Estrogen,-0.620365,0.040248,0.192045,0.104938,-0.199023,-0.017536,-0.116675,-0.204021,-0.082394,-0.300859,...,0.065754,0.19131,-0.081545,-0.007441,0.193023,0.190272,0.13233,0.170874,0.10249,0.197764
FGFR,0.280708,0.22122,0.094227,0.240247,0.235318,0.367413,0.106993,-0.075811,-0.139856,-0.111591,...,0.033177,-0.062255,0.056503,0.120957,-0.56885,-0.106812,0.00585,-0.040851,-0.023175,0.019579
PI3K,-1.988,-1.716374,-1.427059,-1.771798,-1.320343,-0.574043,-0.42084,-0.273139,0.071973,-0.314176,...,0.340491,0.36818,0.223337,0.423294,0.537997,0.003684,-0.009361,0.115438,0.360885,0.202008
p53,-0.241161,0.159501,0.017205,-0.690175,-0.263419,-0.777197,-0.23497,-1.612368,-1.301574,-1.368841,...,0.295969,-0.104491,0.098411,0.246676,-0.110728,-0.020653,-0.090623,-0.53056,0.113633,0.037181
TOP2A,-1.411496,-0.118343,0.111405,0.051349,0.213458,-0.223504,-0.394655,-0.205249,-0.156261,-0.373366,...,-0.081455,0.198192,-0.010148,0.046543,0.1596,0.152943,0.067537,0.169443,0.119695,0.197375
Src,-1.998928,-0.101589,0.047695,-0.147121,-0.940152,-0.831037,-0.076644,-0.606926,0.10894,-0.041947,...,0.150828,0.170718,-0.284087,0.32065,0.230499,0.101866,-0.11965,0.186785,0.178053,-0.308739


In [17]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_CAL29_24H:A10,ASG002_CAL29_24H:A11,ASG002_CAL29_24H:A12,ASG002_CAL29_24H:A19,ASG002_CAL29_24H:A20,ASG002_CAL29_24H:A21,ASG002_CAL29_24H:B03,ASG002_CAL29_24H:B10,ASG002_CAL29_24H:B11,ASG002_CAL29_24H:B12,...,MOAR011_CAL29_24H:C03,MOAR011_CAL29_24H:C10,MOAR011_CAL29_24H:C11,MOAR011_CAL29_24H:C12,MOAR011_CAL29_24H:F07,MOAR011_CAL29_24H:F08,MOAR011_CAL29_24H:F09,MOAR011_CAL29_24H:J10,MOAR011_CAL29_24H:J11,MOAR011_CAL29_24H:J12
Androgen,0.150262,0.095665,0.012547,-0.013771,-0.006036,0.168258,-0.092567,-0.090164,0.048508,-0.023503,...,-0.019674,-0.114086,-0.066424,-0.029193,0.026365,0.05741,-0.144436,-0.534283,-0.141983,-0.02497
CDK1_2,-0.578339,-0.564554,-0.140775,-0.527591,-0.168757,-0.216183,0.215387,-0.124171,-0.81599,-0.157987,...,-0.021328,-0.042051,0.045416,0.178023,-0.258331,0.011182,-0.061042,0.000832,-0.101726,-0.123756
CDK4_6,-0.507148,-0.652875,-0.232881,-0.054294,-0.103414,-0.274895,-0.44398,-0.33139,-0.128014,-0.206305,...,0.030002,0.072742,-0.075981,0.019697,0.334045,0.450317,0.511619,0.56463,0.526212,0.613684
EGFR,-1.086834,-0.801803,-0.472094,-1.995998,-0.319601,-0.55015,-0.197896,-0.55317,-0.060185,-0.395944,...,0.174706,0.15698,-0.03268,0.077943,0.27351,0.20782,0.189513,0.148242,0.123133,0.319563
Estrogen,-0.620365,0.040248,0.192045,0.104938,-0.199023,-0.017536,-0.116675,-0.204021,-0.082394,-0.300859,...,0.065754,0.19131,-0.081545,-0.007441,0.193023,0.190272,0.13233,0.170874,0.10249,0.197764
FGFR,0.280708,0.22122,0.094227,0.240247,0.235318,0.367413,0.106993,-0.075811,-0.139856,-0.111591,...,0.033177,-0.062255,0.056503,0.120957,-0.56885,-0.106812,0.00585,-0.040851,-0.023175,0.019579
PI3K,-1.988,-1.716374,-1.427059,-1.771798,-1.320343,-0.574043,-0.42084,-0.273139,0.071973,-0.314176,...,0.340491,0.36818,0.223337,0.423294,0.537997,0.003684,-0.009361,0.115438,0.360885,0.202008
p53,-0.241161,0.159501,0.017205,-0.690175,-0.263419,-0.777197,-0.23497,-1.612368,-1.301574,-1.368841,...,0.295969,-0.104491,0.098411,0.246676,-0.110728,-0.020653,-0.090623,-0.53056,0.113633,0.037181
TOP2A,-1.411496,-0.118343,0.111405,0.051349,0.213458,-0.223504,-0.394655,-0.205249,-0.156261,-0.373366,...,-0.081455,0.198192,-0.010148,0.046543,0.1596,0.152943,0.067537,0.169443,0.119695,0.197375
Src,-1.998928,-0.101589,0.047695,-0.147121,-0.940152,-0.831037,-0.076644,-0.606926,0.10894,-0.041947,...,0.150828,0.170718,-0.284087,0.32065,0.230499,0.101866,-0.11965,0.186785,0.178053,-0.308739
