### Read files

Read SHAP scores for TFs

In [2]:
import pandas as pd
import numpy as np

# Specify tissue name here
tissue = "Liver"

# read ranking results (SHAP scores)
shap = pd.read_csv(f"results/ranking/shap-val-{tissue}.csv")

Read the list of all tf ensembl ids

In [3]:
tf_universe = pd.read_csv("data/tf_list/tf_list.csv")["Ensembl ID"].values

Read ensmebl to gene name mapping

In [4]:
ensembl_to_symbol = pd.read_csv("data/ensembl_to_symbol.csv", index_col = 0)
ensembl_to_symbol = ensembl_to_symbol.loc[~ensembl_to_symbol["ensembl_id"].duplicated(),:]

### Generate GRN for the Tissue
parameters to be specified:
- `top_tfs`, the number of top ranked TFs for each target. These will be used to look for the consensus of top ranked ranked TFs among different replicates

In [19]:
top_tfs = 50

In [20]:
from functools import reduce
reps = shap["rep"].unique()
GRN = []
for target,df in shap.groupby("gene_ensembl"):
    
    #Get common top ranked tfs among different replicates of the current target
    tfs = reduce(np.intersect1d, [df.loc[df["rep"] == rep,].iloc[:top_tfs,]["tf_ensembl"] for rep in reps])
    GRN.append(pd.DataFrame({'target':target, 'tf':tfs}))
GRN = pd.concat(GRN)

### Save GRN

In [21]:
import os
if not os.path.exists("results/GRN"):
    os.mkdir("results/GRN")
GRN.to_csv("results/GRN/GRN.csv")