In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
from scipy import sparse

sc.settings.verbosity = 1

In [2]:
from GenKI_SGS.preprocesing import build_adata
from GenKI_SGS.dataLoader import DataLoader
from GenKI_SGS.train import VGAE_trainer
from GenKI_SGS import utils

%load_ext autoreload
%autoreload 2

## Load Data - 1000 Genes, 500 Cells

In [4]:
# Read Data
adata = sc.read_h5ad('Results/Bench/so_1000_500.h5ad') 
adata.layers["counts"] = adata.X
sc.pp.normalize_total(adata)
adata.layers["norm"] = adata.X
sc.pp.scale(adata)
adata.X = sparse.csr_matrix(adata.X)
adata



AnnData object with n_obs × n_vars = 500 × 387
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cluster', 'sample', 'percent.mt', 'RNA_snn_res.0.8', 'seurat_clusters', 'celltype', 'Batches'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'mean', 'std'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'norm'

In [5]:
%%timeit -r 5
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_1000_500", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

use all the cells (500) in adata
build GRN


2024-03-28 10:53:55,269	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 20.20 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
use all the cells (500) in adata
build GRN


2024-03-28 10:54:16,369	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 19.09 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
use all the cells (500) in adata
build GRN


2024-03-28 10:54:37,458	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 19.21 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
use all the cells (500) in adata
build GRN


2024-03-28 10:54:58,270	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 19.06 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
use all the cells (500) in adata
build GRN


2024-03-28 10:55:19,122	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 18.99 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
use all the cells (500) in adata
build GRN


2024-03-28 10:55:39,916	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 18.13 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)
20.7 s ± 398 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [7]:
# raw ranked gene list

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_1000_500", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.to_csv('Results/Bench/GenKI_res_1000_500.csv')
res_raw.head(20)

use all the cells (500) in adata
build GRN


2024-03-28 12:03:08,368	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 19.35 s
GRN has been built and saved in "GRNs\so_1000_500.npz"
init completed

Initiating Training
(387,)


Unnamed: 0,dis,rank
Ccr2,0.0619,1
Gapdh,0.000202,2
Tmem37,0.000191,3
Dpep2,0.000186,4
Cebpa,0.000181,5
Cxcl10,0.000169,6
Clec4n,0.000169,7
Calr,0.000159,8
Chil3,0.000157,9
Ifit3b,0.000155,10


## Load Data - 1000 Genes, 1000 Cells

In [3]:
# Read Data
adata = sc.read_h5ad('Results/Bench/so_1000_1000.h5ad') 
adata.layers["counts"] = adata.X
sc.pp.normalize_total(adata)
adata.layers["norm"] = adata.X
sc.pp.scale(adata)
adata.X = sparse.csr_matrix(adata.X)
adata



AnnData object with n_obs × n_vars = 1000 × 387
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cluster', 'sample', 'percent.mt', 'RNA_snn_res.0.8', 'seurat_clusters', 'celltype', 'Batches'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'mean', 'std'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'norm'

In [4]:
%%timeit -r 5
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_1000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

use all the cells (1000) in adata
build GRN


2024-03-28 12:15:02,464	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 25.69 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:15:30,117	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 25.24 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:15:57,408	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 25.33 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:16:24,737	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 24.47 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:16:50,979	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 25.08 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:17:18,057	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 27.46 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)
27.5 s ± 1.06 s per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [5]:
# raw ranked gene list

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_1000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.to_csv('Results/Bench/GenKI_res_1000_1000.csv')
res_raw.head(20)

use all the cells (1000) in adata
build GRN


2024-03-28 12:21:20,275	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 27.14 s
GRN has been built and saved in "GRNs\so_1000_1000.npz"
init completed

Initiating Training
(387,)


Unnamed: 0,dis,rank
Ccr2,1.546506,1
Cd9,0.000199,2
Ccl7,0.000199,3
Ecm1,0.000198,4
Timp1,0.000198,5
Tppp3,0.000198,6
Ly6c2,0.000195,7
Thbs1,0.000189,8
Cxcl2,0.000189,9
Ankrd37,0.000189,10


## Load Data - 3000 Genes, 1000 Cells

In [3]:
# Read Data
adata = sc.read_h5ad('Results/Bench/so_3000_1000.h5ad') 
adata.layers["counts"] = adata.X
sc.pp.normalize_total(adata)
adata.layers["norm"] = adata.X
sc.pp.scale(adata)
adata.X = sparse.csr_matrix(adata.X)
adata



AnnData object with n_obs × n_vars = 1000 × 730
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cluster', 'sample', 'percent.mt', 'RNA_snn_res.0.8', 'seurat_clusters', 'celltype', 'Batches'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'mean', 'std'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'norm'

In [4]:
%%timeit -r 5
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_3000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

use all the cells (1000) in adata
build GRN


2024-03-28 12:22:53,076	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 122.78 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:25:00,524	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 123.59 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:27:08,722	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 121.49 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:29:15,022	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 120.59 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:31:20,617	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 126.18 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:33:31,902	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 118.46 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)
2min 6s ± 2.82 s per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [5]:
# raw ranked gene list

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_3000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.to_csv('Results/Bench/GenKI_res_3000_1000.csv')
res_raw.head(20)

use all the cells (1000) in adata
build GRN


2024-03-28 12:43:14,616	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 119.46 s
GRN has been built and saved in "GRNs\so_3000_1000.npz"
init completed

Initiating Training
(730,)


Unnamed: 0,dis,rank
Ccr2,10.09984,1
Tgfbi,0.171824,2
Sirpb1c,0.015198,3
Ms4a6c,0.013861,4
Sirpb1b,0.006542,5
Ms4a4c,0.006256,6
Cd63,0.005267,7
AA467197,0.004671,8
Gclm,0.004448,9
Fdps,0.004429,10


## Load Data - 5000 Genes, 1000 Cells

In [3]:
# Read Data
adata = sc.read_h5ad('Results/Bench/so_5000_1000.h5ad') 
adata.layers["counts"] = adata.X
sc.pp.normalize_total(adata)
adata.layers["norm"] = adata.X
sc.pp.scale(adata)
adata.X = sparse.csr_matrix(adata.X)
adata



AnnData object with n_obs × n_vars = 1000 × 900
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cluster', 'sample', 'percent.mt', 'RNA_snn_res.0.8', 'seurat_clusters', 'celltype', 'Batches'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'mean', 'std'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'norm'

In [4]:
%%timeit -r 5
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_5000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

use all the cells (1000) in adata
build GRN


2024-03-28 12:53:21,606	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 205.86 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
use all the cells (1000) in adata
build GRN


2024-03-28 12:56:53,849	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 205.52 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
use all the cells (1000) in adata
build GRN


2024-03-28 13:00:26,025	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 207.61 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
use all the cells (1000) in adata
build GRN


2024-03-28 13:04:00,421	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 206.66 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
use all the cells (1000) in adata
build GRN


2024-03-28 13:07:33,332	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 204.82 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
use all the cells (1000) in adata
build GRN


2024-03-28 13:11:04,725	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 204.47 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)
3min 32s ± 1.16 s per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [5]:
# raw ranked gene list

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_5000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.to_csv('Results/Bench/GenKI_res_5000_1000.csv')
res_raw.head(20)

use all the cells (1000) in adata
build GRN


2024-03-28 13:14:35,839	INFO worker.py:1636 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 204.42 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(900,)


Unnamed: 0,dis,rank
Ccr2,1.689432,1
Lst1,4.7e-05,2
Hsp90b1,2e-05,3
Ly86,1.8e-05,4
Ccl2,1.8e-05,5
Stab1,1.8e-05,6
Tgfbr1,1.7e-05,7
Tuba1b,1.7e-05,8
Nop56,1.6e-05,9
Ccnd2,1.6e-05,10


## Load Data - 5000 Genes, 3000 Cells

In [3]:
# Read Data
adata = sc.read_h5ad('Results/Bench/so_5000_3000.h5ad') 
adata.layers["counts"] = adata.X
sc.pp.normalize_total(adata)
adata.layers["norm"] = adata.X
sc.pp.scale(adata)
adata.X = sparse.csr_matrix(adata.X)
adata



AnnData object with n_obs × n_vars = 3000 × 899
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cluster', 'sample', 'percent.mt', 'RNA_snn_res.0.8', 'seurat_clusters', 'celltype', 'Batches'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'mean', 'std'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'norm'

In [4]:
%%timeit -r 5
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_5000_3000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

use all the cells (3000) in adata
build GRN


2024-12-16 13:01:54,240	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 869.99 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
use all the cells (3000) in adata
build GRN


2024-12-16 13:16:31,790	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 879.66 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
use all the cells (3000) in adata
build GRN


2024-12-16 13:31:16,873	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 847.88 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
use all the cells (3000) in adata
build GRN


2024-12-16 13:45:30,972	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 842.49 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
use all the cells (3000) in adata
build GRN


2024-12-16 13:59:39,751	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 843.04 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
use all the cells (3000) in adata
build GRN


2024-12-16 14:13:49,029	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 846.27 s
GRN has been built and saved in "GRNs\so_5000_3000.npz"
init completed

Initiating Training
(899,)
14min 18s ± 14.1 s per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [5]:
# raw ranked gene list

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["Ccr2"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "so_5000_1000", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

# init trainer

hyperparams = {"epochs": 100, 
            "lr": 7e-4, 
            "beta": 1e-4, 
            "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                    epochs=hyperparams["epochs"], 
                    lr=hyperparams["lr"], 
                    log_dir=log_dir, 
                    beta=hyperparams["beta"],
                    seed=hyperparams["seed"],
                    verbose=False,
                    )
print('Initiating Training')
sensei.train()

# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.to_csv('Results/Bench/GenKI_res_5000_3000.csv')
res_raw.head(20)

use all the cells (3000) in adata
build GRN


2024-12-16 14:43:55,621	INFO worker.py:1821 -- Started a local Ray instance.


ray init, using 8 CPUs
execution time of making pcNet: 864.91 s
GRN has been built and saved in "GRNs\so_5000_1000.npz"
init completed

Initiating Training
(899,)


Unnamed: 0,dis,rank
Ccr2,1.194158,1
Ms4a6c,0.00076,2
Sms,0.000725,3
Plpp1,0.000725,4
Basp1,0.000725,5
Ccl12,0.000723,6
Hist1h2bc,0.000723,7
Lag3,0.000723,8
Trbc2,0.000714,9
Fscn1,0.000708,10
