In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 116)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(10, 116)

array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 4.73684211e-01],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       ...,
       [2.61931374e-04, 2.35480218e-03, 1.56995915e-02, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        9.49144777e-01, 9.82453367e-01, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,-1.8e-05,-8e-06,-1.579719e-05,1.7e-05,-5.905555e-06,-2.1e-05,8.695907e-06,-3.240004e-06,1.4e-05,2.3e-05,...,-4e-06,-2.5e-05,-1.4e-05,3e-05,-8e-06,7e-06,2.5e-05,1.097575e-05,-5e-06,-1.9e-05
CDK1,3.4e-05,-1.2e-05,-1.256315e-06,3e-06,3.087167e-05,3.8e-05,1.556356e-05,-8.347715e-06,3.5e-05,-6e-06,...,3e-06,-2.1e-05,4e-06,-5e-06,-1.6e-05,1.3e-05,-6e-06,-1.131335e-05,7e-06,6e-06
CDK2,1e-05,1.1e-05,-1.538152e-05,-1.2e-05,1.817472e-05,-3e-06,1.379999e-05,7.477901e-06,1.1e-05,-4e-06,...,2.1e-05,2.3e-05,3.3e-05,3e-06,-7e-06,1.9e-05,-6e-06,-7.895938e-06,-2.6e-05,-1.5e-05
CDK4_6,-2e-06,-4e-06,8.056662e-07,2e-06,1.043432e-05,1.1e-05,-4.667553e-06,-2.525765e-06,-8e-06,-7e-06,...,7e-06,3e-06,-1e-06,6e-06,1.4e-05,-1.4e-05,5e-06,-1.047801e-05,1e-05,-1e-05
EGFR,-9e-06,3e-06,2.922872e-06,-1.2e-05,3.332518e-05,-0.040442,2.447384e-06,-5.623219e-07,1.1e-05,-0.101405,...,-2e-06,-3e-06,-6e-06,-1.8e-05,-9e-06,7e-06,-3e-05,-1.363317e-07,-1.1e-05,-2.5e-05
Estrogen,1.3e-05,3e-06,1.503109e-05,-6e-06,4.35587e-06,7e-06,-6.649357e-06,-2.479396e-06,1.9e-05,-0.217647,...,1e-06,-2.7e-05,5e-06,-1.4e-05,-5e-06,-2.7e-05,-1.4e-05,1.827287e-05,-8e-06,1.2e-05
FGFR,-5e-06,7.2e-05,-1.525614e-05,-7e-06,-2.230359e-07,-6e-06,-6.816297e-06,-2.367912e-05,-2.3e-05,-0.000826,...,2.5e-05,8e-06,2e-06,2e-06,1.6e-05,7e-06,1.4e-05,7.07905e-06,-4e-06,-1e-05
PI3K,1.4e-05,-2e-06,-1.573501e-05,-1e-06,1.340963e-05,-9e-06,6.952158e-07,1.660599e-05,8e-06,-0.000242,...,1.2e-05,1.1e-05,1.1e-05,4e-06,6e-06,-1.3e-05,-2e-06,-4.493404e-05,1.3e-05,4.5e-05
p53,2e-06,-8e-06,5.236184e-06,1.1e-05,-1.400796e-05,2.2e-05,0.244017,-5.577916e-06,2e-06,8e-06,...,3.6e-05,-7e-06,1.3e-05,1.2e-05,4.6e-05,6e-06,-1.4e-05,-2.035222e-05,1e-05,0.000346
TOP2A,2e-06,2.3e-05,-2.563053e-05,2e-06,8.899649e-06,2e-05,2.025705e-05,-9.34507e-06,1e-06,-1e-06,...,-9e-06,-4e-06,-5e-06,-2.6e-05,1.3e-05,1e-06,1e-05,1.269104e-05,3e-05,-1.5e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B12,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,...,MOAR010_BC3C_24H:L21,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.066383,-0.02681,-0.002846,-0.121209,-0.002077,0.015296,0.008254,0.026381,0.01115,0.016022,...,0.013912,-0.011922,0.003319,0.006919,-0.068403,0.005122,-0.044736,0.03441,0.026786,-0.613759
CDK1,-0.68476,-0.654106,0.122886,-0.264504,0.259283,-0.374288,0.082547,0.187527,0.165777,0.135594,...,0.15037,0.099576,-0.132924,-0.014631,0.211416,0.083129,-0.589848,0.173936,-0.040329,0.340719
CDK2,-0.198444,-0.179348,-0.033931,-0.009549,-0.001702,-0.026617,0.010837,-0.036586,-0.036595,0.028302,...,-1.060688,-0.012627,-0.00241,-0.002656,-0.022983,0.023359,-0.058881,-0.029916,-0.033574,-0.005662
CDK4_6,-0.76499,-0.730992,-0.285279,-0.330903,-0.469062,-0.021706,0.093823,0.001591,-0.009615,-0.172371,...,0.178062,-0.009586,0.018364,0.074733,-0.597657,-0.362518,-0.518913,0.13649,0.065511,-0.101307
EGFR,0.549444,0.452324,0.193299,0.179124,0.370865,0.012434,-0.343227,0.025867,0.220917,0.181411,...,-0.629846,0.017271,-0.183831,-0.057593,-0.088515,-0.332594,-1.217961,-0.324715,-0.129304,-0.187491
Estrogen,-0.0647,-0.146567,-0.112804,-0.314575,-0.788776,-0.261706,-0.023994,-0.121496,-0.132983,-0.086547,...,0.117931,0.033153,0.038232,-0.002936,0.038884,0.120571,-1.058685,-0.196859,0.207388,0.047199
FGFR,-0.22885,-0.245,-0.072124,0.069715,-0.021881,-0.400293,-0.006085,0.04474,-0.130965,-0.094341,...,0.146924,-0.068348,0.025694,-0.039654,-0.107467,-0.481338,-0.978019,-0.115399,0.119322,0.004962
PI3K,-1.840868,-1.63806,-1.480251,-1.264902,-0.673181,0.463301,-0.206204,-0.743931,-0.65764,-0.965328,...,0.244673,-0.328258,-0.388938,-0.57384,-0.507185,-0.158368,-1.061264,-0.009695,-0.783351,-0.122861
p53,-0.203197,-0.336953,-0.149505,-0.353285,-0.079224,-1.623223,-1.457357,-1.767792,-0.183297,-0.069948,...,0.123831,0.143809,-0.02197,0.136895,0.030533,0.213351,-0.2184,-0.053723,0.037355,0.374609
TOP2A,-0.153562,0.116304,-0.223548,-0.146255,-0.1276,0.042875,0.078235,-0.025817,-1.999627,-0.184919,...,0.319783,0.124103,0.112396,-0.102392,-0.414168,0.077822,-0.695963,0.018656,-0.004948,-0.398356


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B12,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,...,MOAR010_BC3C_24H:L21,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.066383,-0.02681,-0.002846,-0.121209,-0.002077,0.015296,0.008254,0.026381,0.01115,0.016022,...,0.013912,-0.011922,0.003319,0.006919,-0.068403,0.005122,-0.044736,0.03441,0.026786,-0.613759
CDK1,-0.68476,-0.654106,0.122886,-0.264504,0.259283,-0.374288,0.082547,0.187527,0.165777,0.135594,...,0.15037,0.099576,-0.132924,-0.014631,0.211416,0.083129,-0.589848,0.173936,-0.040329,0.340719
CDK2,-0.198444,-0.179348,-0.033931,-0.009549,-0.001702,-0.026617,0.010837,-0.036586,-0.036595,0.028302,...,-1.060688,-0.012627,-0.00241,-0.002656,-0.022983,0.023359,-0.058881,-0.029916,-0.033574,-0.005662
CDK4_6,-0.76499,-0.730992,-0.285279,-0.330903,-0.469062,-0.021706,0.093823,0.001591,-0.009615,-0.172371,...,0.178062,-0.009586,0.018364,0.074733,-0.597657,-0.362518,-0.518913,0.13649,0.065511,-0.101307
EGFR,0.549444,0.452324,0.193299,0.179124,0.370865,0.012434,-0.343227,0.025867,0.220917,0.181411,...,-0.629846,0.017271,-0.183831,-0.057593,-0.088515,-0.332594,-1.217961,-0.324715,-0.129304,-0.187491
Estrogen,-0.0647,-0.146567,-0.112804,-0.314575,-0.788776,-0.261706,-0.023994,-0.121496,-0.132983,-0.086547,...,0.117931,0.033153,0.038232,-0.002936,0.038884,0.120571,-1.058685,-0.196859,0.207388,0.047199
FGFR,-0.22885,-0.245,-0.072124,0.069715,-0.021881,-0.400293,-0.006085,0.04474,-0.130965,-0.094341,...,0.146924,-0.068348,0.025694,-0.039654,-0.107467,-0.481338,-0.978019,-0.115399,0.119322,0.004962
PI3K,-1.840868,-1.63806,-1.480251,-1.264902,-0.673181,0.463301,-0.206204,-0.743931,-0.65764,-0.965328,...,0.244673,-0.328258,-0.388938,-0.57384,-0.507185,-0.158368,-1.061264,-0.009695,-0.783351,-0.122861
p53,-0.203197,-0.336953,-0.149505,-0.353285,-0.079224,-1.623223,-1.457357,-1.767792,-0.183297,-0.069948,...,0.123831,0.143809,-0.02197,0.136895,0.030533,0.213351,-0.2184,-0.053723,0.037355,0.374609
TOP2A,-0.153562,0.116304,-0.223548,-0.146255,-0.1276,0.042875,0.078235,-0.025817,-1.999627,-0.184919,...,0.319783,0.124103,0.112396,-0.102392,-0.414168,0.077822,-0.695963,0.018656,-0.004948,-0.398356
