In [1]:
import pandas as pd
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
cell_line ='BC3C'

data_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/00_outputs_2020_{cell_line}/"
out_dir = f"/home/jing/Phd_project/project_UCD_blca/blca_publication_OUTPUT/blca_publication_OUTPUT_bmra/blca_publication_OUTPUT_bmra_{cell_line}/01_outputs_2020_{cell_line}/"


os.makedirs(out_dir, exist_ok = True)

# Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
exp_ids = all_metadata["exp_ids"]
genes = all_metadata["genes"]

In [4]:
# load data
L1000_df = pd.read_csv(
    os.path.join(data_dir, "L1000_Data_norm_data.csv"),
    index_col = 0,
)

x = L1000_df.values
x.shape

(978, 101)

In [5]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

# gamma_matrix = pd.read_csv(
#     os.path.join(data_dir, "gamma_annotated.csv"),
#     index_col = 0,
# ).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [6]:
# y_true = (1 + gamma_matrix * inhib_conc_matrix / ic50_matrix) / (1 + inhib_conc_matrix / ic50_matrix)

y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

display(y_true.shape)
y_true

(10, 101)

array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 4.73684211e-01],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       ...,
       [2.61931374e-04, 2.35480218e-03, 1.56995915e-02, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        9.49144777e-01, 9.82453367e-01, 1.00000000e+00],
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00]])

## Run models

In [7]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [8]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))
#a_coeffs_df = pd.read_csv(os.path.join(out_dir,'a_coeffs.csv'),index_col=0)
#a_coeffs = a_coeffs_df.values
display(a_coeffs_df.astype(bool).sum(axis='columns'))
display(a_coeffs_df)

Androgen    978
CDK1        978
CDK2        978
CDK4_6      978
EGFR        978
Estrogen    978
FGFR        978
PI3K        978
p53         978
TOP2A       978
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
Androgen,-9.553169e-06,1e-06,1.85586e-05,-5.680024e-05,4e-06,-4.478833e-07,-1.1e-05,2e-06,-2.817703e-05,-1.809069e-06,...,-2e-06,1.147334e-05,-1e-06,-4.201098e-05,-2.1e-05,-6e-06,-6e-06,-3e-06,6.38639e-06,1.5e-05
CDK1,5.774219e-06,4e-05,3.497044e-06,1.686369e-05,1e-05,-5.210341e-06,2.5e-05,-1e-05,-1.709971e-06,5.974932e-07,...,1.4e-05,-7.928914e-07,1.3e-05,-9.668812e-07,1.5e-05,9e-06,1.5e-05,-2.3e-05,-1.676349e-05,-4.3e-05
CDK2,-9.922947e-08,2.2e-05,6.60801e-06,-4.164534e-06,1.6e-05,8.624532e-06,6e-06,-3e-06,-2.800989e-06,-1.504094e-05,...,5e-06,5.474087e-06,3e-05,6.841134e-06,3e-06,-4e-06,1.4e-05,1.9e-05,1.157576e-05,1.5e-05
CDK4_6,-9.083983e-06,3e-05,-3.466718e-07,6.429998e-06,-1.3e-05,-6.340201e-06,5e-06,9e-06,7.27525e-06,1.703684e-05,...,1.5e-05,6.689758e-07,-9e-06,1.530387e-05,-1.7e-05,-1.5e-05,-2e-06,-1.1e-05,1.593474e-05,-1e-06
EGFR,-4.795616e-05,-1e-05,1.664491e-05,2.359586e-05,-9e-06,-0.0003406296,6e-06,-4e-06,2.164123e-06,-0.02970342,...,-4e-06,-1.262161e-05,-1.9e-05,4.004949e-06,-5e-06,0.000204,8e-06,6e-06,2.560884e-05,-8e-06
Estrogen,-8.162228e-06,-4e-06,2.316646e-05,-3.560306e-06,1.3e-05,-3.53806e-06,2e-06,1e-05,8.930596e-06,-0.2739822,...,7e-06,2.130092e-06,-4e-06,2.019062e-05,-9e-06,-4.1e-05,1.8e-05,8e-06,1.662046e-07,1.9e-05
FGFR,-0.0006846579,4.7e-05,-1.794675e-05,-5.080037e-06,-1.4e-05,-1.344343e-05,-7e-06,-4.8e-05,1.287063e-06,4.899306e-06,...,-1.7e-05,-1.994361e-05,1.3e-05,1.949509e-06,1.3e-05,8e-06,-9e-06,-3e-06,-1.995416e-05,3e-05
PI3K,-8.707866e-07,-4e-06,1.520612e-05,-2.069859e-07,-4e-06,4.071974e-06,-5.2e-05,1.4e-05,9.41538e-08,-1.469054e-05,...,1.7e-05,2.210495e-05,2e-06,4.910975e-08,-1e-05,-0.000227,-2.7e-05,-1e-05,-7.676195e-07,0.032197
p53,9.794045e-06,2e-06,1.54615e-05,5.972054e-06,1.8e-05,2.71812e-05,0.229758,5e-06,1.03953e-06,1.047405e-05,...,6e-06,-1.30596e-05,1.2e-05,1.26875e-05,8e-06,-1.6e-05,3e-06,-7e-06,2.505835e-06,1.9e-05
TOP2A,1.925779e-05,2.9e-05,-1.827896e-05,4.912863e-07,1.6e-05,2.524989e-05,-1.3e-05,-9e-06,-4.003626e-06,7.007817e-06,...,1.7e-05,-4.265238e-06,4e-06,-6.594012e-06,7e-06,4.8e-05,3.5e-05,3e-06,-1.027979e-05,-8e-06


In [9]:
#pathway_activity = a_coeffs @ x
#pathway_activity.shape

In [10]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x,a_coeffs),
    modules,
    L1000_df.columns
)
R_global_df = R_global.dataframe
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052205,-0.009637,0.001458,-0.040076,0.001462,0.020136,0.004311,0.016317,0.020567,0.035863,...,-0.04446,-0.015874,-0.001836,-0.009356,-0.05255,-0.004687,-0.055101,0.029626,0.026057,-0.588229
CDK1,-1.370532,-1.096315,0.158076,-0.415599,0.142851,-0.396843,0.175272,0.357992,0.121663,0.191247,...,-0.041579,-0.070601,-0.292598,-0.170956,-0.211362,-0.063809,-0.952346,0.020134,-0.051757,-0.085772
CDK2,-0.199628,-0.179097,-0.033936,-0.009521,-0.001534,-0.026391,0.010606,-0.036194,0.028296,-0.004736,...,-0.040186,-0.012025,-0.001244,-0.001947,-0.0222,0.023793,-0.058737,-0.029688,-0.032806,-0.004377
CDK4_6,-0.02556,-0.234152,-0.04199,-0.514687,0.040537,0.002469,-0.007975,0.097475,0.086086,0.032284,...,-0.57412,0.048721,0.097801,-0.089174,-1.036702,-1.209372,0.318559,0.199071,-0.0632,-0.014663
EGFR,0.593527,0.494842,0.226497,0.364155,0.488417,0.111375,-0.414051,0.263987,0.283782,-0.048051,...,0.177674,0.04375,-0.055218,-0.034943,-0.085847,-0.42273,-0.448268,-0.557163,-0.31266,-0.226505
Estrogen,-0.129022,-0.211317,-0.197229,-0.405123,-0.902831,-0.31539,-0.077474,-0.226972,-0.153746,-0.038831,...,-0.066488,0.056192,0.047125,-0.011484,0.020754,0.129819,-1.487126,-0.228859,0.211322,-0.210604
FGFR,-0.11384,-0.185143,-0.090836,0.052301,-0.028906,-0.409823,-0.029598,-0.040569,-0.06501,-0.293798,...,0.159993,-0.020353,0.061862,-0.008303,0.036716,-0.275223,-0.54273,0.036991,0.227267,0.050912
PI3K,-1.899083,-1.699003,-1.453657,-1.241045,-0.701467,0.287819,-0.136329,-0.239045,-0.832239,-0.280001,...,-0.047727,-0.159268,-0.026473,-0.483874,-0.467464,0.029214,-0.787233,0.232736,-0.236637,0.026072
p53,-0.214295,-0.218556,-0.12901,-0.409512,0.048253,-1.629077,-1.475287,-0.119606,-0.087778,-1.325797,...,-0.303078,0.248986,0.024075,0.227816,0.201096,0.281975,-0.226557,-0.050237,0.029332,0.600741
TOP2A,-0.157979,0.118584,-0.222759,-0.149415,-0.123947,0.034827,0.080351,-1.993301,-0.189976,-0.307039,...,0.011948,0.136376,0.133636,-0.080119,-0.387286,0.094415,-0.683675,0.035846,0.008868,-0.387796


In [11]:
R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,ASG002_BC3C_24H:A10,ASG002_BC3C_24H:A11,ASG002_BC3C_24H:A19,ASG002_BC3C_24H:A20,ASG002_BC3C_24H:A21,ASG002_BC3C_24H:B10,ASG002_BC3C_24H:B11,ASG002_BC3C_24H:B14,ASG002_BC3C_24H:B15,ASG002_BC3C_24H:C13,...,MOAR010_BC3C_24H:L20,MOAR011_BC3C_24H:C01,MOAR011_BC3C_24H:C02,MOAR011_BC3C_24H:C03,MOAR011_BC3C_24H:C10,MOAR011_BC3C_24H:C11,MOAR011_BC3C_24H:F07,MOAR011_BC3C_24H:F08,MOAR011_BC3C_24H:F09,MOAR011_BC3C_24H:J10
Androgen,-0.052205,-0.009637,0.001458,-0.040076,0.001462,0.020136,0.004311,0.016317,0.020567,0.035863,...,-0.04446,-0.015874,-0.001836,-0.009356,-0.05255,-0.004687,-0.055101,0.029626,0.026057,-0.588229
CDK1,-1.370532,-1.096315,0.158076,-0.415599,0.142851,-0.396843,0.175272,0.357992,0.121663,0.191247,...,-0.041579,-0.070601,-0.292598,-0.170956,-0.211362,-0.063809,-0.952346,0.020134,-0.051757,-0.085772
CDK2,-0.199628,-0.179097,-0.033936,-0.009521,-0.001534,-0.026391,0.010606,-0.036194,0.028296,-0.004736,...,-0.040186,-0.012025,-0.001244,-0.001947,-0.0222,0.023793,-0.058737,-0.029688,-0.032806,-0.004377
CDK4_6,-0.02556,-0.234152,-0.04199,-0.514687,0.040537,0.002469,-0.007975,0.097475,0.086086,0.032284,...,-0.57412,0.048721,0.097801,-0.089174,-1.036702,-1.209372,0.318559,0.199071,-0.0632,-0.014663
EGFR,0.593527,0.494842,0.226497,0.364155,0.488417,0.111375,-0.414051,0.263987,0.283782,-0.048051,...,0.177674,0.04375,-0.055218,-0.034943,-0.085847,-0.42273,-0.448268,-0.557163,-0.31266,-0.226505
Estrogen,-0.129022,-0.211317,-0.197229,-0.405123,-0.902831,-0.31539,-0.077474,-0.226972,-0.153746,-0.038831,...,-0.066488,0.056192,0.047125,-0.011484,0.020754,0.129819,-1.487126,-0.228859,0.211322,-0.210604
FGFR,-0.11384,-0.185143,-0.090836,0.052301,-0.028906,-0.409823,-0.029598,-0.040569,-0.06501,-0.293798,...,0.159993,-0.020353,0.061862,-0.008303,0.036716,-0.275223,-0.54273,0.036991,0.227267,0.050912
PI3K,-1.899083,-1.699003,-1.453657,-1.241045,-0.701467,0.287819,-0.136329,-0.239045,-0.832239,-0.280001,...,-0.047727,-0.159268,-0.026473,-0.483874,-0.467464,0.029214,-0.787233,0.232736,-0.236637,0.026072
p53,-0.214295,-0.218556,-0.12901,-0.409512,0.048253,-1.629077,-1.475287,-0.119606,-0.087778,-1.325797,...,-0.303078,0.248986,0.024075,0.227816,0.201096,0.281975,-0.226557,-0.050237,0.029332,0.600741
TOP2A,-0.157979,0.118584,-0.222759,-0.149415,-0.123947,0.034827,0.080351,-1.993301,-0.189976,-0.307039,...,0.011948,0.136376,0.133636,-0.080119,-0.387286,0.094415,-0.683675,0.035846,0.008868,-0.387796
