# dyGENIE3 tutorial


## Description

Notebook tutorial based on documentation. 

# Setup

## Library import
We import all the required Python libraries

In [1]:
import numpy as np
import pickle

In [2]:
# IO
from pathlib import Path

# Data manipulation
import pandas as pd

# Visualizations
import matplotlib as plt
import matplotlib as mpl

import seaborn as sns


# handy / other
from collections import defaultdict
import re, os, time
from importlib import reload
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d'); today

'2024-08-13'

## Local library import
We import all the required local libraries libraries

In [3]:
from dynGENIE3 import dynGENIE3


# Running dynGENIE3


## Example data

In [4]:
f = open('./TS_data.pkl','rb')
(TS_data, time_points, decay_rates, gene_names) = pickle.load(f)
f.close()

## Run dynGENIE3 with its default parameters

In [5]:
(VIM, alphas, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data, time_points)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 8.87 seconds


## Set the values of the decay rates

In [6]:
(VIM2, alphas2, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points,alpha=decay_rates)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.02
alpha max: 0.02


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 7.91 seconds


## Run dynGENIE3 on time series data and steady-state data

In [7]:
# Load some steady-state data
SS_data = np.loadtxt('SS_data.txt',skiprows=1)

In [8]:
(VIM3, alphas3, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points,SS_data=SS_data)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 10.97 seconds


## Restrict the candidate regulators to a subset of genes

In [9]:
# Genes that are used as candidate regulators
regulators = ['CD19', 'CDH17','RAD51','OSR2','TBX3']
(VIM4, alphas4, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points, gene_names=gene_names,regulators=regulators)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 7.79 seconds


## Change the tree-based method and its settings

In [10]:
# Use Extra-Trees method
tree_method='ET'
# Number of randomly chosen candidate regulators at each node of a tree
K = 7
# Number of trees per ensemble
ntrees = 50
# Run the method with these settings
(VIM5, alphas5, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points, tree_method=tree_method,K=K,ntrees=ntrees)

Tree method: ET
K: 7
Number of trees: 50
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 0.29 seconds


## Compute the ranking quality scores

In [11]:
# The prediction score can only be computed when using Random Forests
tree_method = 'RF'
(VIM6, alphas6, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points, tree_method=tree_method,compute_quality_scores=True)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 11.53 seconds


## Save the tree models

In [12]:
(VIM7, alphas7, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points,save_models=True)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 7.75 seconds


## Predict gene expression profiles in double knockout experiments

In [13]:
# Learn models
(VIM8, alphas8, prediction_score, stability_score, treeEstimators) = dynGENIE3(TS_data,time_points,  gene_names=gene_names,regulators=regulators,save_models=True)

Tree method: RF
K: sqrt
Number of trees: 1000
alpha min: 0.0020247678471533486
alpha max: 0.025135982338199504


running single threaded jobs
Gene 1/10...
Gene 2/10...
Gene 3/10...
Gene 4/10...
Gene 5/10...
Gene 6/10...
Gene 7/10...
Gene 8/10...
Gene 9/10...
Gene 10/10...
Elapsed time: 7.61 seconds


In [14]:
WT_data = np.load('WT_data.npy')

In [15]:
from dynGENIE3 import dynGENIE3_predict_doubleKO

In [16]:
TS_predict = dynGENIE3_predict_doubleKO(WT_data,treeEstimators, alphas8, gene_names,regulators,'CDH17','CD19',10,50)

Predicting time series...
Elapsed time: 2.73 seconds


## Write the predictions

In [17]:
from dynGENIE3 import get_link_list

### Show the names of the genes

In [18]:
for s, t, w in get_link_list(VIM,gene_names=gene_names):
    print(s, t, w)

CREB5 GATA5 0.3864550365898561
TBX3 XRCC2 0.27769405837712835
GATA5 CREB5 0.2692005681065455
CREB5 CD93 0.24202933446478245
GATA5 CD93 0.21355663120517449
CD93 GATA5 0.21170341789536698
CREB5 RAD51 0.2094564213584338
CD93 CREB5 0.18824140549576798
CREB5 XRCC2 0.18781558263620302
XRCC2 TBX3 0.18762976397348094
OSR2 ZNF394 0.17930275380415153
ZNF394 OSR2 0.17376288630909179
CD93 CD19 0.15587608174961215
GATA5 XRCC2 0.15302519860117017
RAD51 CD19 0.15017391683908773
CD93 OSR2 0.1391942418070537
GATA5 OSR2 0.13343785400556535
CREB5 TBX3 0.1307764224471311
CREB5 CDH17 0.1294070441004252
CD93 ZNF394 0.1293640423919777
CDH17 ZNF394 0.12827019358541608
CDH17 TBX3 0.12721823653013478
TBX3 CREB5 0.12671238437543414
CD93 CDH17 0.126677912965819
GATA5 TBX3 0.1261311379758978
OSR2 CD19 0.12464873157274785
TBX3 CDH17 0.12404675957047775
CDH17 CD93 0.11948184414101067
CD19 RAD51 0.11780056507446077
CREB5 CD19 0.11713395990710948
TBX3 GATA5 0.11602328741561012
CD19 CDH17 0.11533652147222265
CREB5 OSR2

### Show only the links that are directed from the candidate regulators

In [19]:
for s, t, w in get_link_list(VIM,gene_names=gene_names,regulators=regulators):
    print(s, t, w)

TBX3 XRCC2 0.27769405837712835
OSR2 ZNF394 0.17930275380415153
RAD51 CD19 0.15017391683908773
CDH17 ZNF394 0.12827019358541608
CDH17 TBX3 0.12721823653013478
TBX3 CREB5 0.12671238437543414
OSR2 CD19 0.12464873157274785
TBX3 CDH17 0.12404675957047775
CDH17 CD93 0.11948184414101067
CD19 RAD51 0.11780056507446077
TBX3 GATA5 0.11602328741561012
CD19 CDH17 0.11533652147222265
CDH17 CD19 0.11446985794282381
OSR2 RAD51 0.1110607653559567
RAD51 CDH17 0.11102170502253313
CD19 ZNF394 0.10985222614023707
TBX3 ZNF394 0.10984531712467561
RAD51 TBX3 0.10680029762116425
CDH17 OSR2 0.1010172124066403
RAD51 ZNF394 0.09779112522401788
OSR2 CDH17 0.09572673679697463
CDH17 RAD51 0.09549532213564663
CD19 TBX3 0.08492213370275689
RAD51 CD93 0.08096465024955939
TBX3 OSR2 0.08066363489330138
CD19 OSR2 0.08064182217467114
OSR2 TBX3 0.0805261512708722
TBX3 CD19 0.07888165555633746
TBX3 RAD51 0.07865553992700589
TBX3 CD93 0.07828008900089484
RAD51 XRCC2 0.07513044971496444
CD19 CREB5 0.07374385730443285
CD19 XRC

### Show the first 5 links only

In [20]:
for s, t, w in get_link_list(VIM,gene_names=gene_names,regulators=regulators,maxcount=5):
    print(s, t, w)

TBX3 XRCC2 0.27769405837712835
OSR2 ZNF394 0.17930275380415153
RAD51 CD19 0.15017391683908773
CDH17 ZNF394 0.12827019358541608
CDH17 TBX3 0.12721823653013478


### Write the predicted links in a file

In [21]:
from dynGENIE3 import write_link_list
f = write_link_list('ranking.txt', VIM,gene_names=gene_names)

Edges written to ranking.txt
