In [14]:
import itertools

import numpy as np
import pandas as pd

from pathlib import Path
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from screendl import model as screendl

from cdrpy.feat.encoders import PandasEncoder
from cdrpy.datasets import Dataset
from cdrpy.data.preprocess import normalize_responses
from cdrpy.mapper import BatchedResponseGenerator
from cdrpy.metrics import tf_metrics

In [8]:
root = Path("../../../../datastore")

data_dir = root /  "inputs/CellModelPassportsGDSCv1v2HCIv9AllDrugsHallmarkCombat"

exp_path = data_dir / "ScreenDL/FeatureGeneExpression.csv"
mol_path = data_dir / "ScreenDL/FeatureMorganFingerprints.csv"
labels_path = data_dir / "LabelsLogIC50.csv"

In [22]:
obs = pd.read_csv(labels_path)

exp_feat = pd.read_csv(exp_path, index_col=0)
mol_feat = pd.read_csv(mol_path, index_col=0)

In [21]:
exp_enc = PandasEncoder(exp_feat)
mol_enc = PandasEncoder(mol_feat)

cell_encoders = {"exp": exp_enc}
drug_encoders = {"mol": mol_enc}

D = Dataset(obs, cell_encoders=cell_encoders, drug_encoders=drug_encoders)
print(D)

Dataset(name=None, size=281_924, n_cells=887, n_drugs=455)


In [41]:
model = screendl.create_model(
    exp_dim=D.cell_encoders["exp"].shape[-1],
    mol_dim=D.drug_encoders["mol"].shape[-1],
    exp_hidden_dims=[128, 64],
    mol_hidden_dims=[128, 64],
    shared_hidden_dims=[64, 32],
    activation="relu",
)

model.summary()

Model: "ScreenDL"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 exp_input (InputLayer)         [(None, 4366)]       0           []                               
                                                                                                  
 mol_input (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 exp_mlp_1 (MLPBlock)           (None, 128)          558976      ['exp_input[0][0]']              
                                                                                                  
 mol_mlp_1 (MLPBlock)           (None, 128)          65664       ['mol_input[0][0]']              
                                                                                           

In [42]:
model.compile(
    optimizer=keras.optimizers.Adam(1e-4),
    loss="mean_squared_error",
    metrics=[tf_metrics.pearson],
)

In [43]:
gen = BatchedResponseGenerator(D, 32)
seq = gen.flow_from_dataset(D)

In [None]:
history = model.fit(seq, epochs=50, verbose=2)