In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from new_experiments import run_simulation
from concurrent.futures import ProcessPoolExecutor
from IPython.display import display, clear_output

sys.path.append(os.getcwd())


Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
def run_experiment():
    outputs = []
    ret_df = None
    for dataset in [3]:
        if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/"):
            os.makedirs(f"./zero_imputation_experiments/DS{dataset}/")
        # Run for first iteration to prevent race condition
        res = run_simulation(
            dataset=dataset,
            sergio=True,
            saucie=True, 
            scScope=True, 
            deepImpute=True, 
            magic=True, 
            genie=True,
            arboreto=False,
            pearson=False,
            roc=True,
            precision_recall_k=False,
            run_with_regs=False,
            iteration=0
        )
        clear_output()
        if ret_df is None:
            ret_df = pd.DataFrame(columns=res.keys())
        new_df = pd.DataFrame([res], columns=res.keys())
        ret_df = pd.concat([ret_df, new_df], ignore_index=True)
        #write to temp file
        ret_df.to_csv("zero_imputation_experiments/imputation_results.csv", index=False)
        with ProcessPoolExecutor(max_workers=3) as executor:
            futures = []
            for i in range(1, 30):
                futures.append(executor.submit(run_simulation, 
                        dataset=dataset,
                        sergio=(i == 0),
                        saucie=True, 
                        scScope=True, 
                        deepImpute=True, 
                        magic=True, 
                        genie=True,
                        arboreto=False,
                        pearson=False,
                        roc=True,
                        precision_recall_k=False,
                        run_with_regs=False,
                        iteration=i
                    ))
                clear_output()
            for future in tqdm(futures):
                res = future.result()
                clear_output(wait=True)
                if ret_df is None:
                    ret_df = pd.DataFrame(columns=res.keys())
                new_df = pd.DataFrame([res], columns=res.keys())
                ret_df = pd.concat([ret_df, new_df], ignore_index=True)
                #write to temp file
                ret_df.to_csv("zero_imputation_experiments/imputation_results.csv", index=False)
    return

In [None]:
run_experiment()

### Other Imputation Methods Experimentation

In [8]:
import os
for dataset in [1,2,3]:
    if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/"):
        os.makedirs(f"./zero_imputation_experiments/DS{dataset}/")
    if not os.path.exists(f"./zero_imputation_experiments/DS{dataset}/DS6_noisy.npy"):
        res = run_simulation(
            dataset=dataset,
            sergio=True,
            saucie=False, 
            scScope=False, 
            deepImpute=False, 
            magic=False, 
            genie=False,
            arboreto=False,
            pearson=False,
            roc=False,
            precision_recall_k=False,
            run_with_regs=False,
            iteration=0
        )
        clear_output()

In [7]:
from experiment_utils import run_scvi
import numpy as np
import pandas as pd

def fetch_target_regs(dataset):
    if dataset == 1:   
        target_file = './SERGIO/data_sets/De-noised_100G_9T_300cPerT_4_DS1/Interaction_cID_4.txt'
        regs_path = './SERGIO/data_sets/De-noised_100G_9T_300cPerT_4_DS1/Regs_cID_4.txt'
    elif dataset == 2:
        target_file = './SERGIO/data_sets/De-noised_400G_9T_300cPerT_5_DS2/Interaction_cID_5.txt'
        regs_path = './SERGIO/data_sets/De-noised_400G_9T_300cPerT_5_DS2/Regs_cID_5.txt'
    else:
        target_file = './SERGIO/data_sets/De-noised_1200G_9T_300cPerT_6_DS3/Interaction_cID_6.txt'
        regs_path = './SERGIO/data_sets/De-noised_1200G_9T_300cPerT_6_DS3/Regs_cID_6.txt'
    return target_file, regs_path    

def scvi_impute():
    ret_df = None
    for dataset in [1, 2, 3]:
        save_path = f"./zero_imputation_experiments/DS{dataset}/"
        y = np.load(save_path + "/DS6_noisy.npy")
        target_file, regs_path = fetch_target_regs(dataset)
        with ProcessPoolExecutor(max_workers=4) as executor:
            futures = []
            for i in range(8):
                futures.append(executor.submit(run_scvi, 
                    data=y, 
                    save_path=save_path, 
                    it=i, 
                    file_extension=f"_iter{i}",
                    target_file=target_file
                ))
                clear_output()
            for future in tqdm(futures):
                vim, it = future.result()
                res = {
                    "dataset": dataset,
                    "method": "scvi",
                    "roc": vim,
                    "iteration": it }
                if ret_df is None:
                    ret_df = pd.DataFrame(columns=res.keys())
                new_df = pd.DataFrame([res], columns=res.keys())
                ret_df = pd.concat([ret_df, new_df], ignore_index=True)
                ret_df.to_csv("zero_imputation_experiments/scvi_imputation_results.csv", index=False)  

In [8]:
scvi_impute()

  0%|          | 0/8 [00:00<?, ?it/s]GPU available: True (mps), used: False
GPU available: True (mps), used: False
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
IPU available: False, using: 0 IPUs
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
HPU available: False, using: 0 HPUs
HPU available: False, using: 0 HPUs
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/pyt

Epoch 400/400: 100%|██████████| 400/400 [03:37<00:00,  1.84it/s, v_num=1, train_loss_step=275, train_loss_epoch=223]
Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads


`Trainer.fit` stopped: `max_epochs=400` reached.


Epoch 400/400: 100%|██████████| 400/400 [03:39<00:00,  1.83it/s, v_num=1, train_loss_step=356, train_loss_epoch=234]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Epoch 400/400: 100%|██████████| 400/400 [03:51<00:00,  1.73it/s, v_num=1, train_loss_step=146, train_loss_epoch=240]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Epoch 400/400: 100%|██████████| 400/400 [03:54<00:00,  1.70it/s, v_num=1, train_loss_step=189, train_loss_epoch=225]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Elapsed time: 6879.97 seconds


100%|██████████| 1133/1133 [00:00<00:00, 224486.11it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (9) is smaller than the logging interval Trainer(log

Epoch 15/400:   4%|▎         | 14/400 [00:20<08:33,  1.33s/it, v_num=1, train_loss_step=369, train_loss_epoch=262]Elapsed time: 6947.50 seconds


100%|██████████| 1133/1133 [00:00<00:00, 297865.52it/s]


Epoch 40/400:  10%|▉         | 39/400 [00:51<06:26,  1.07s/it, v_num=1, train_loss_step=373, train_loss_epoch=256]

 12%|█▎        | 1/8 [2:00:02<14:00:16, 7202.31s/it]

Epoch 41/400:  10%|█         | 40/400 [00:52<06:34,  1.10s/it, v_num=1, train_loss_step=182, train_loss_epoch=255]

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (9) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_st

Epoch 6/400:   1%|▏         | 5/400 [00:05<07:19,  1.11s/it, v_num=1, train_loss_step=448, train_loss_epoch=273]5]Elapsed time: 6971.29 seconds


100%|██████████| 1133/1133 [00:00<00:00, 320398.22it/s]


Epoch 24/400:   6%|▌         | 23/400 [00:25<06:11,  1.01it/s, v_num=1, train_loss_step=196, train_loss_epoch=255]Elapsed time: 6987.34 seconds


100%|██████████| 1133/1133 [00:00<00:00, 258437.37it/s]


Epoch 1/400:   0%|          | 0/400 [00:00<?, ?it/s] 1.04s/it, v_num=1, train_loss_step=276, train_loss_epoch=255]

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (9) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_st

Epoch 15/400:   4%|▎         | 14/400 [00:14<06:20,  1.01it/s, v_num=1, train_loss_step=177, train_loss_epoch=271]

 38%|███▊      | 3/8 [2:00:44<2:36:54, 1883.00s/it] 

Epoch 1/400:   0%|          | 0/400 [00:00<?, ?it/s] 1.07s/it, v_num=1, train_loss_step=318, train_loss_epoch=269]

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Users/joshuaweiner/Desktop/Folders/Projects/zero_imputation/venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (9) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_st

Epoch 400/400: 100%|██████████| 400/400 [06:55<00:00,  1.04s/it, v_num=1, train_loss_step=169, train_loss_epoch=239]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Epoch 400/400: 100%|██████████| 400/400 [06:52<00:00,  1.03s/it, v_num=1, train_loss_step=177, train_loss_epoch=235]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Epoch 400/400: 100%|██████████| 400/400 [07:16<00:00,  1.09s/it, v_num=1, train_loss_step=289, train_loss_epoch=247]
Epoch 384/400:  96%|█████████▌| 383/400 [07:01<00:29,  1.75s/it, v_num=1, train_loss_step=479, train_loss_epoch=232]

`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Epoch 400/400: 100%|██████████| 400/400 [07:38<00:00,  1.15s/it, v_num=1, train_loss_step=297, train_loss_epoch=232]


`Trainer.fit` stopped: `max_epochs=400` reached.


Tree method: RF
K: sqrt
Number of trees: 100


running jobs on 12 threads
Elapsed time: 9251.16 seconds


100%|██████████| 1133/1133 [00:00<00:00, 373655.17it/s]
 62%|██████▎   | 5/8 [4:40:52<2:49:14, 3384.80s/it]

Elapsed time: 9261.65 seconds


100%|██████████| 1133/1133 [00:00<00:00, 405334.91it/s]
 75%|███████▌  | 6/8 [4:41:42<1:22:58, 2489.00s/it]

Elapsed time: 9235.34 seconds


100%|██████████| 1133/1133 [00:00<00:00, 340241.03it/s]


Elapsed time: 9281.70 seconds


100%|██████████| 1133/1133 [00:00<00:00, 373802.13it/s]
100%|██████████| 8/8 [4:42:34<00:00, 2119.26s/it]  
