In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import tqdm
import sklearn
import pickle

from DataLoader import DataLoader
from Inference import Inference
from Predictor import Predictor

In [2]:
adata_path = '../../Data/sc_training.h5ad'

# Infering
n_infer_instances = 1000 # Number of instances used for infering the network. -1 = all 
n_infer_estimators = 4 # Number of trees in random forest for infering the network
max_depth_infer = 100 # max depth of random forest for infering the network
importance_threshold = 1

# Predicting
n_train_instances = 1000 # Number of instances used training predictor models. -1 = all 
n_train_estimators = 4 # Number of trees in random forest training predictor models
max_depth_train = 100 # max depth of random forest training predictor models

# Classification
n_components = 100
n_neighbors = 11

# Dataloader

In [3]:
dataloader = DataLoader()
dataloader.load_data(adata_path)


Loading data from h5ad file.


  self.gene_expressions = pd.DataFrame.sparse.from_spmatrix(self.adata.X, columns=self.adata.var_names).astype(float).to_numpy()



Succesfully loaded the data.


In [47]:
conditions = dataloader.adata.obs['condition'].to_numpy()
gene = np.random.choice(conditions)
proportions = dataloader.get_state_proportions_of_condition(gene)
print(gene, proportions)

Tcf7 [0.1043771  0.29292929 0.31313131 0.27609428 0.01346801]


# Inference

In [6]:
inference = Inference(dataloader)
inference.infer(n_infer_instances, n_infer_estimators, max_depth_infer)
inference.save_network('GRNetwork2')
thresh = inference.find_lowest_threshold(epsilon=0.01)


Infering genetic regulatory network.


  0%|          | 0/15077 [00:01<?, ?it/s]


KeyboardInterrupt: 

# Predictor

In [None]:
predictor = Predictor(dataloader, inference)
predictor.train(n_train_instances, n_train_estimators, max_depth_train)
predictor.save_models('GRNetwork2_models')


Training models for predicting gene expressions.


100%|██████████| 15077/15077 [00:04<00:00, 3066.52it/s]


In [8]:
proportions = dataloader.get_state_proportions_of_condition('Unperturbed')
print(f'Unperturbed: {proportions}')

genes = ['Aqr', 'Bach2', 'Bhlhe40', 'Ets1', 'Fosb', 'Mafk', 'Stat3']
for gene in genes:
    predictions = predictor.predict_knockout_effect(gene, n_components, n_neighbors)
    print(f'{gene}, {predictions}')

Unperturbed: [0.06749699 0.20972278 0.31337887 0.39212535 0.01727601]

Predicting state proportions after knockout of Aqr.
Aqr, [0.06789875 0.2032945  0.31578947 0.39875452 0.01426276]

Predicting state proportions after knockout of Bach2.
Bach2, [0.06809964 0.2032945  0.31578947 0.39855364 0.01426276]

Predicting state proportions after knockout of Bhlhe40.
Bhlhe40, [0.06809964 0.2032945  0.31619124 0.39835275 0.01406187]

Predicting state proportions after knockout of Ets1.
Ets1, [0.06830052 0.20369626 0.31558859 0.39815187 0.01426276]

Predicting state proportions after knockout of Fosb.
Fosb, [0.06789875 0.2032945  0.31578947 0.39875452 0.01426276]

Predicting state proportions after knockout of Mafk.
Mafk, [0.06789875 0.2032945  0.31578947 0.39875452 0.01426276]

Predicting state proportions after knockout of Stat3.
Stat3, [0.06809964 0.20289273 0.31659301 0.39835275 0.01406187]
