# Testing DRPredICT validation

**Authorship:**
Adam Klie, *03/29/2022*
***
**Description:**
Notebook for testing the validation of DRPredICT architectures

<div class="alert alert-block alert-warning">
<b>TODOs</b>:
<ul>
    <b><li></li></b>
    </ul>
</div>

In [3]:
import numpy as np
import pandas as pd
import torch

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import sys
sys.path.append("../drpredict")

In [87]:
torch.cuda.current_device()

0

# Random dataset
<div class="alert alert-info" role="alert">
  <b>Just test out the basic function of our eugene architectures</b>
</div>

## Instantiate DRPredICT architecture: VanillaAE

In [38]:
from VanillaAE import VanillaAE

In [39]:
encoder_args = {"activation": "sigmoid"}
decoder_args = {"dropout_rate": 0.2}

In [40]:
mutAE = VanillaAE(1000, 10, hidden_dims=[500], encoder_kwargs=encoder_args, decoder_kwargs=decoder_args)

In [41]:
x = torch.randn(10, 1000)
out = mutAE(x)
out.shape

torch.Size([10, 1000])

## Load data

In [8]:
from torch.utils.data import dataset
from torch.utils.data import DataLoader

### Training set

In [9]:
training_dataset = dataset.TensorDataset(torch.randn(1000, 1000))
training_dataloader = DataLoader(training_dataset, batch_size=32, num_workers=4)
training_dataset[0][0].shape

torch.Size([1000])

In [10]:
for i_batch, batch in enumerate(training_dataloader):
    x = batch[0]
    outs = mutAE(x)
    print(x.shape, outs.shape)
    if i_batch==3:
        break

torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([32, 1000]) torch.Size([32, 1000])


### Validation set

In [11]:
validation_dataset = dataset.TensorDataset(torch.randn(100, 1000))
validation_dataloader = DataLoader(validation_dataset, batch_size=32, num_workers=4)
validation_dataset[0][0].shape

torch.Size([1000])

In [12]:
for i_batch, batch in enumerate(validation_dataloader):
    x = batch[0]
    outs = mutAE(x)
    print(x.shape, outs.shape)
    if i_batch==3:
        break

torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([32, 1000]) torch.Size([32, 1000])
torch.Size([4, 1000]) torch.Size([4, 1000])


## Training with PyTorch Lightning

In [13]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [14]:
logger = TensorBoardLogger("random_test", name="VanillaAE")
trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [19]:
trainer.fit(model=mutAE, train_dataloader=validation_dataloader, val_dataloaders=validation_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

  | Name    | Type                 | Params
-------------------------------------------------
0 | encoder | FullyConnectedModule | 505 K 
1 | decoder | FullyConnectedModule | 506 K 
-------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.048     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [26]:
trainer.predict(model=mutAE, dataloaders=validation_dataloader)[0].shape

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 4it [00:00, ?it/s]

torch.Size([32, 10])

## Instantiate DRPredICT architecture: MultiEncoder

In [27]:
omic_types = ["mutation", "expression", "cn"]
in_dims = [1000, 1000, 1000]
out_dims = [6, 6, 6]
h_dims = [[500], [500], [500]]
encoder_args = [{"activation": "sigmoid"}, {"activation": "relu"}, {"activation": "sigmoid"}]
fcn_args = {"dropout_rate": 0.2, "output_dim": 1}

In [28]:
from MultiEncoder import MultiEncoder

In [29]:
drpredict = MultiEncoder(omics=omic_types, input_dims=in_dims, output_dims=out_dims, hidden_dims=h_dims, encoder_kwargs=encoder_args, fcn_kwargs=fcn_args)

## Load data

In [8]:
from torch.utils.data import dataset
from torch.utils.data import DataLoader

In [30]:
training_dataset = dataset.TensorDataset(torch.randn(1000, 1000), torch.randn(1000, 1000), torch.randn(1000, 1000), torch.randn(1000))
training_dataloader = DataLoader(training_dataset, batch_size=32, num_workers=4)
training_dataset[0][0].shape, training_dataset[0][1].shape, training_dataset[0][2].shape, training_dataset[0][3]

(torch.Size([1000]), torch.Size([1000]), torch.Size([1000]), tensor(0.0843))

In [33]:
validation_dataset = dataset.TensorDataset(torch.randn(100, 1000), torch.randn(100, 1000), torch.randn(100, 1000), torch.randn(100))
validation_dataloader = DataLoader(validation_dataset, batch_size=32, num_workers=4)
validation_dataset[0][0].shape, validation_dataset[0][1].shape, validation_dataset[0][2].shape, validation_dataset[0][3]

(torch.Size([1000]), torch.Size([1000]), torch.Size([1000]), tensor(-0.6169))

In [34]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [35]:
logger = TensorBoardLogger("random_test", name="MultiEncoder")
trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [37]:
trainer.fit(model=drpredict, train_dataloader=validation_dataloader, val_dataloaders=validation_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

  | Name     | Type                 | Params
--------------------------------------------------
0 | encoders | ModuleDict           | 1.5 M 
1 | fcn      | FullyConnectedModule | 19    
--------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.042     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [26]:
trainer.predict(model=mutAE, dataloaders=validation_dataloader)[0].shape

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 4it [00:00, ?it/s]

torch.Size([32, 10])

# MultiomicDataModule

## Instantiate DRPredICT architecture: VanillaAE

In [109]:
from VanillaAE import VanillaAE

In [110]:
encoder_args = {"activation": "sigmoid"}
decoder_args = {"dropout_rate": 0.2}

In [111]:
mutAE = VanillaAE("mutation", 18679, 10, hidden_dims=[500], encoder_kwargs=encoder_args, decoder_kwargs=decoder_args)

In [112]:
x = torch.randn(10, 18679)
out = mutAE(x)
out.shape

torch.Size([10, 18679])

## Load data

In [113]:
from MultiomicDataModule import MultiomicDataModule

In [114]:
MULTIOMIC_DATA = "/cellar/users/aklie/projects/hackathons/data/multiomic_drug_response/training"

In [115]:
multiomic_datamodule = MultiomicDataModule(file_ext=MULTIOMIC_DATA, batch_size=512, num_workers=4, split=0.9, dataset_kwargs=dict(mutation=True, expression=False, cn=False))

In [116]:
multiomic_datamodule.setup()

In [117]:
for i_batch, batch in enumerate(multiomic_datamodule.test_dataloader()):
    x = batch['mutation']
    outs = mutAE(x)
    print(x.shape, outs.shape)
    if i_batch==3:
        break

torch.Size([512, 18679]) torch.Size([512, 18679])
torch.Size([512, 18679]) torch.Size([512, 18679])
torch.Size([512, 18679]) torch.Size([512, 18679])
torch.Size([512, 18679]) torch.Size([512, 18679])


In [120]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [121]:
logger = TensorBoardLogger("mutation_test", name="VanillaAE")
trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [122]:
trainer.fit(model=mutAE, datamodule=multiomic_datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

  | Name    | Type                 | Params
-------------------------------------------------
0 | encoder | FullyConnectedModule | 9.3 M 
1 | decoder | FullyConnectedModule | 9.4 M 
-------------------------------------------------
18.7 M    Trainable params
0         Non-trainable params
18.7 M    Total params
74.835    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

RuntimeError: DataLoader worker (pid(s) 2889652, 2889655) exited unexpectedly

## Instantiate DRPredICT architecture: MultiEncoder

In [143]:
omic_types = ["mutation", "expression"]
in_dims = [18679, 19177]
out_dims = [6, 6]
h_dims = [[500], [500]]
encoder_args = [{"activation": "sigmoid"}, {"activation": "relu"}]
fcn_args = {"dropout_rate": 0.2, "output_dim": 1}

In [156]:
from MultiEncoder import MultiEncoder

In [157]:
drpredict = MultiEncoder(omics=omic_types, input_dims=in_dims, output_dims=out_dims, hidden_dims=h_dims, encoder_kwargs=encoder_args, fcn_kwargs=fcn_args)

In [158]:
MULTIOMIC_DATA = "/cellar/users/aklie/projects/hackathons/data/multiomic_drug_response/training"

In [159]:
multiomic_datamodule = MultiomicDataModule(file_ext=MULTIOMIC_DATA, batch_size=512, num_workers=4, split=0.9, dataset_kwargs=dict(mutation=True, expression=True, cn=False))

In [160]:
multiomic_datamodule.setup()

In [163]:
for i_batch, batch in enumerate(multiomic_datamodule.test_dataloader()):
    print(batch["mutation"].shape, batch["auc"].shape)
    if i_batch==3:
        break

torch.Size([512, 18679]) torch.Size([512])
torch.Size([512, 18679]) torch.Size([512])
torch.Size([512, 18679]) torch.Size([512])
torch.Size([512, 18679]) torch.Size([512])


In [164]:
batch.keys()

dict_keys(['name', 'tissue', 'drug_name', 'drug_encoding', 'auc', 'mutation', 'expression'])

In [165]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [166]:
logger = TensorBoardLogger("mutation_expression_test", name="MultiEncoder")
trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [168]:
trainer.fit(model=drpredict, datamodule=multiomic_datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

  | Name     | Type                 | Params
--------------------------------------------------
0 | encoders | ModuleDict           | 18.9 M
1 | fcn      | FullyConnectedModule | 13    
--------------------------------------------------
18.9 M    Trainable params
0         Non-trainable params
18.9 M    Total params
75.740    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

RuntimeError: Found dtype Double but expected Float

# References