## This notebook runs inference on a GEARS model trained on Norman

- Download trained GEARS model and Dataloader from Dataverse
- Model is trained on Norman et al. 2019 (Science) dataset
- Example below showing how to make perturbation outcome prediction and GI prediction

In [1]:
import sys
sys.path.append('../')

from gears import PertData, GEARS
from gears.utils import dataverse_download
from zipfile import ZipFile 

### Download saved model and dataloader

In [2]:
## Download dataloader from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979957', 'norman_umi_go.tar.gz')

## Extract and set up dataloader directory
import tarfile
with tarfile.open('norman_umi_go.tar.gz', 'r:gz') as tar:
    tar.extractall()

Downloading...
100%|█████████████████████████████████████| 1.10G/1.10G [01:18<00:00, 13.9MiB/s]


In [3]:
## Download model from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/10457098', 'model.zip')

## Extract and set up model directory
with ZipFile(('model.zip'), 'r') as zip:
    zip.extractall(path = './')

Downloading...
100%|█████████████████████████████████████| 8.50M/8.50M [00:01<00:00, 6.60MiB/s]


### Load model and dataloader

In [4]:
data_path = './'
data_name = 'norman_umi_go'
model_name = 'gears_misc_umi_no_test'

pert_data = PertData(data_path)
pert_data.load(data_path = data_path + data_name)
pert_data.prepare_split(split = 'no_test', seed = 1)
pert_data.get_dataloader(batch_size = 32, test_batch_size = 128)

gears_model = GEARS(pert_data, device = 'cuda:5', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

Downloading...
100%|█████████████████████████████████████| 9.46M/9.46M [00:01<00:00, 8.81MiB/s]
Downloading...
100%|███████████████████████████████████████| 559k/559k [00:00<00:00, 2.05MiB/s]
These perturbations are not in the GO graph and their perturbation can thus not be predicted
['RHOXF2BB+ctrl' 'LYL1+IER5L' 'ctrl+IER5L' 'KIAA1804+ctrl' 'IER5L+ctrl'
 'RHOXF2BB+ZBTB25' 'RHOXF2BB+SET']
Local copy of pyg dataset is detected. Loading...
Done!
Creating new splits....
Saving new splits at ./norman_umi_go/splits/norman_umi_go_no_test_1_0.75.pkl
Done!
Creating dataloaders....
Done!


In [5]:
gears_model = GEARS(pert_data, device = 'cuda:5', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

### Make transcriptional outcome predictions

In [6]:
gears_model.predict([['CNN1', 'CBL']])

{'CNN1_CBL': array([3.9031005e-03, 4.6599126e-01, 9.3744494e-02, ..., 3.5035353e+00,
        1.6265536e-08, 3.7898731e-01], dtype=float32)}

### Make GI outcome prediction

In [None]:
## If reproducing results from paper, you can use the same gene set, 
## although the function works even if GI_genes_file is set to None

dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979958', 
                   'genes_with_hi_mean.npy')

gears_model.GI_predict(['CNN1', 'CBL'], GI_genes_file='./genes_with_hi_mean.npy')

Found local copy...


{'ts': TheilSenRegressor(fit_intercept=False, max_iter=1000,
                   max_subpopulation=100000.0, random_state=1000),
 'c1': 0.7503451149637692,
 'c2': 1.0240665805483913,
 'mag': 1.2695393467498621,
 'dcor': 0.8333100160359697,
 'dcor_singles': 0.743836659719548,
 'dcor_first': 0.774246309241828,
 'dcor_second': 0.7954046018165959,
 'corr_fit': 0.9218557491921747,
 'dominance': 0.13506713411872476,
 'eq_contr': 0.9733993334631894}