In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 107)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(11, 107)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.47368421],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1_2      978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
Src         978
TGFb        978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,3.232697e-05,1.8e-05,-2.240997e-05,-2.37875e-05,4.377578e-07,6e-06,8e-06,-1.1e-05,1.892618e-06,-4.417192e-06,...,-4.515024e-07,3.832943e-05,-1.9e-05,-2.9e-05,-8e-06,8e-06,-9.733658e-07,1.2e-05,-1.2e-05,5e-06
CDK1_2,1.795753e-06,4.4e-05,1.984773e-06,6.179007e-06,3.935644e-06,-2.1e-05,-2.6e-05,-1.3e-05,8.952013e-06,2.509371e-07,...,-1.219263e-05,8.47441e-07,-1.2e-05,2.1e-05,2e-05,-9e-06,-1.204023e-05,4e-06,-1.8e-05,4e-06
CDK4_6,-6.031457e-06,1.3e-05,3.639534e-07,6.736128e-06,-2.619518e-05,2.9e-05,-1.8e-05,-2e-06,3.078866e-06,-3.798429e-05,...,2.716128e-05,3.826204e-06,-1.2e-05,-2.8e-05,-9e-06,-2e-05,1.762917e-05,-6e-06,-2.3e-05,1e-05
EGFR,-6.796049e-05,-2e-06,-1.997297e-05,-6.923632e-06,-1.594189e-05,-0.000275,2e-06,8e-06,-5.889182e-07,-0.02727496,...,-1.255961e-05,4.008025e-07,1.4e-05,-2e-06,1e-06,0.00022,-1.102956e-05,5e-06,-8e-06,-1e-05
Estrogen,-1.606878e-05,-8e-06,1.037475e-05,8.246457e-07,1.441364e-06,3e-06,-2.3e-05,-9e-06,-4.321924e-06,-0.273088,...,1.181582e-05,7.688948e-06,-1.2e-05,2.1e-05,9e-06,-7e-06,7.189575e-06,-6e-06,2e-06,2.1e-05
FGFR,-0.0002152438,4.5e-05,-2.387254e-05,-3.300241e-05,-2.423587e-05,8.9e-05,-1.1e-05,2.8e-05,-4.381635e-05,-9.345337e-05,...,3.695105e-05,2.588822e-05,2.7e-05,-1.8e-05,-5.9e-05,-5.5e-05,-3.30918e-05,3.1e-05,2.3e-05,-2.9e-05
PI3K,6.949902e-06,5e-06,4.543291e-06,1.220479e-05,-2.7297e-06,1e-05,4e-06,9e-06,-1.895826e-05,-5.21905e-06,...,1.207621e-05,-5.892926e-07,-3e-06,-6e-06,-1.3e-05,-0.000169,-5.824767e-06,-1.3e-05,2.7e-05,0.024509
p53,2.111265e-05,-5e-06,1.703083e-05,2.652311e-05,9.654689e-06,-4e-06,0.242462,-7e-06,1.44078e-05,-5.228594e-06,...,1.209835e-05,1.309358e-05,-6.2e-05,4.3e-05,-1.2e-05,-3e-06,-8.687191e-06,-1.2e-05,-4e-06,2.6e-05
TOP2A,-4.173982e-06,1.4e-05,1.119758e-05,-1.339614e-05,4.313476e-06,6.3e-05,6e-06,-7e-06,8.336216e-06,1.122271e-05,...,1.097348e-05,-5.695005e-06,2.4e-05,-1e-05,-3e-06,-1.4e-05,-1.505691e-05,1.2e-05,-1.5e-05,-4e-05
Src,-7.504637e-06,-3e-05,4.09481e-06,-4.008959e-05,1.016587e-05,-1.3e-05,-1.7e-05,-1.7e-05,-9.912394e-06,-4.977106e-05,...,1.232571e-05,-2.541109e-05,-1.1e-05,3e-06,-4.7e-05,6e-06,-1.635366e-05,-1.1e-05,-1.7e-05,-1.7e-05


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052455,-0.009148,0.001937,-0.039724,0.001781,0.019769,0.00399,0.016247,0.021265,0.036161,...,-0.044486,-0.01572,-0.001453,-0.009111,-0.052046,-0.004456,-0.054898,0.029468,0.026118,-0.587685
CDK1_2,-0.740124,-0.606127,0.082724,-0.084641,0.089618,-0.286336,0.031502,-0.286893,0.120831,0.064283,...,-0.014573,-0.012519,-0.117187,0.056421,-0.072967,-0.008869,-0.843978,0.112627,-0.053638,0.111144
CDK4_6,-0.025253,-0.233696,-0.041848,-0.515841,0.040598,0.002317,-0.007709,0.098412,0.085997,0.032184,...,-0.574798,0.048817,0.097616,-0.089236,-1.039905,-1.211545,0.31835,0.198992,-0.063215,-0.014645
EGFR,0.592259,0.492952,0.226333,0.364221,0.487503,0.110757,-0.411256,0.261696,0.283919,-0.048144,...,0.177629,0.041771,-0.057438,-0.0371,-0.088842,-0.424002,-0.448519,-0.554409,-0.313734,-0.227734
Estrogen,-0.124231,-0.207637,-0.209994,-0.409535,-0.94559,-0.310938,-0.085655,-0.236941,-0.162903,-0.040455,...,-0.082394,0.059883,0.040612,-0.011245,0.023317,0.134588,-1.508493,-0.226952,0.183926,-0.253958
FGFR,-0.097671,-0.186454,-0.08808,0.050895,-0.033554,-0.391863,-0.022104,-0.069243,-0.074836,-0.279904,...,0.165802,0.000337,0.081908,0.017082,0.060426,-0.343713,-0.528144,0.062253,0.242403,0.077665
PI3K,-1.905733,-1.691061,-1.427599,-1.243824,-0.707364,0.296333,-0.146641,-0.192071,-0.856527,-0.27771,...,-0.033147,-0.176988,-0.048641,-0.519026,-0.515285,0.009209,-0.825105,0.220407,-0.234555,0.006711
p53,-0.227816,-0.249853,-0.134207,-0.408271,0.04873,-1.630662,-1.475214,-0.119662,-0.08114,-1.328435,...,-0.323656,0.247284,0.026732,0.246185,0.186164,0.300838,-0.227291,-0.049827,0.030888,0.596414
TOP2A,-0.226692,0.075102,-0.235,-0.172096,-0.126577,0.054543,0.07211,-1.99576,-0.205164,-0.287674,...,0.052978,0.099322,0.077347,-0.178076,-0.523397,0.049836,-0.752976,-0.009539,-0.033156,-0.439699
Src,-0.923716,-1.667888,0.524297,-1.207418,0.572451,-1.119442,0.495526,0.397873,0.397281,0.448248,...,0.076092,-0.204775,0.057268,0.057415,-0.105558,0.001541,0.035543,0.242847,0.775984,-0.162424


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052455,-0.009148,0.001937,-0.039724,0.001781,0.019769,0.00399,0.016247,0.021265,0.036161,...,-0.044486,-0.01572,-0.001453,-0.009111,-0.052046,-0.004456,-0.054898,0.029468,0.026118,-0.587685
CDK1_2,-0.740124,-0.606127,0.082724,-0.084641,0.089618,-0.286336,0.031502,-0.286893,0.120831,0.064283,...,-0.014573,-0.012519,-0.117187,0.056421,-0.072967,-0.008869,-0.843978,0.112627,-0.053638,0.111144
CDK4_6,-0.025253,-0.233696,-0.041848,-0.515841,0.040598,0.002317,-0.007709,0.098412,0.085997,0.032184,...,-0.574798,0.048817,0.097616,-0.089236,-1.039905,-1.211545,0.31835,0.198992,-0.063215,-0.014645
EGFR,0.592259,0.492952,0.226333,0.364221,0.487503,0.110757,-0.411256,0.261696,0.283919,-0.048144,...,0.177629,0.041771,-0.057438,-0.0371,-0.088842,-0.424002,-0.448519,-0.554409,-0.313734,-0.227734
Estrogen,-0.124231,-0.207637,-0.209994,-0.409535,-0.94559,-0.310938,-0.085655,-0.236941,-0.162903,-0.040455,...,-0.082394,0.059883,0.040612,-0.011245,0.023317,0.134588,-1.508493,-0.226952,0.183926,-0.253958
FGFR,-0.097671,-0.186454,-0.08808,0.050895,-0.033554,-0.391863,-0.022104,-0.069243,-0.074836,-0.279904,...,0.165802,0.000337,0.081908,0.017082,0.060426,-0.343713,-0.528144,0.062253,0.242403,0.077665
PI3K,-1.905733,-1.691061,-1.427599,-1.243824,-0.707364,0.296333,-0.146641,-0.192071,-0.856527,-0.27771,...,-0.033147,-0.176988,-0.048641,-0.519026,-0.515285,0.009209,-0.825105,0.220407,-0.234555,0.006711
p53,-0.227816,-0.249853,-0.134207,-0.408271,0.04873,-1.630662,-1.475214,-0.119662,-0.08114,-1.328435,...,-0.323656,0.247284,0.026732,0.246185,0.186164,0.300838,-0.227291,-0.049827,0.030888,0.596414
TOP2A,-0.226692,0.075102,-0.235,-0.172096,-0.126577,0.054543,0.07211,-1.99576,-0.205164,-0.287674,...,0.052978,0.099322,0.077347,-0.178076,-0.523397,0.049836,-0.752976,-0.009539,-0.033156,-0.439699
Src,-0.923716,-1.667888,0.524297,-1.207418,0.572451,-1.119442,0.495526,0.397873,0.397281,0.448248,...,0.076092,-0.204775,0.057268,0.057415,-0.105558,0.001541,0.035543,0.242847,0.775984,-0.162424
