In [None]:
import pandas as pd
import scanpy as sc

### download all BED files

In [None]:
meta = pd.read_csv("../../data/TIP/metadata.tsv", sep="\t")
meta['TF'] = meta['Experiment target'].str.split("-human", expand=True)[0]
genes = pd.read_csv("../../result/data/genes_gene_perturb", header=None)[0].values
meta = meta[meta['TF'].isin(genes)] # keep TF in genes only
meta = meta[meta['Output type'] == "IDR thresholded peaks"]
meta.iloc[:, [0, -1]].to_csv("../../result/TIP/Ensembl_TF.tsv", sep="\t", index=False)

In [None]:
with open("joblist", "w") as ff:
    for fid in meta['File accession']:
        ff.write("wget https://www.encodeproject.org/files/%s/@@download/%s.bed.gz\n" % (fid, fid))

### run TIP

In [None]:
meta = pd.read_csv("../../result/TIP/Ensembl_TF.tsv", sep="\t")
with open("joblist", "w") as f:
    for fid in meta["File accession"]:
        f.write("module load R; Rscript TIP.R /home/jg2447/slayman/perturb/data/TIP/gene_annotation.txt /home/jg2447/slayman/perturb/data/ENCODE/%s.bed.gz /home/jg2447/slayman/perturb/result/TIP/weight/%s.txt /home/jg2447/slayman/perturb/result/TIP/score/%s.txt\n" % (fid, fid, fid))

### get_qvalue

In [None]:
meta = pd.read_csv("../../result/TIP/Ensembl_TF.tsv", sep="\t")
with open("joblist", "w") as f:
    for fid in meta["File accession"]:
        f.write("module load R; Rscript TIP_qval.R /home/jg2447/slayman/perturb/result/TIP/score/%s.txt /home/jg2447/slayman/perturb/result/TIP/qval/%s.txt\n" % (fid, fid))

### parse result to network

In [None]:
genes = pd.read_csv("../../result/data/genes_gene_perturb", header=None)[0].values
data = sc.read_h5ad("../../data/perturb/K562_gwps_normalized_bulk_01.h5ad")
id2name = data.var[data.var['gene_name'].isin(genes)]['gene_name'].to_dict()

In [None]:
name2id = pd.read_csv("../../result/data/genes_gene_perturb", header=None)
name2id = name2id.reset_index().set_index(0)['index'].to_dict()

In [None]:
meta = pd.read_csv("../../result/TIP/Ensembl_TF.tsv", sep="\t")
result = {}
for fid, TF in meta.values:
    df = pd.read_csv("../../result/TIP/qval/%s.txt" % fid, sep="\t")
    df["name"] = df["name"].map(id2name) # keep target in genes only
    df = df.dropna()
    targets = set(df['name'].unique())
    if TF in result.keys():
        result[TF] = set.union(result[TF], targets)
    else:
        result[TF] = targets

In [None]:
source = []
target = []
for ii in result.keys():
    source += [ii] * len(result[ii])
    target += list(result[ii])

In [None]:
net = pd.DataFrame([source, target]).T
net[0] = net[0].map(name2id)
net[1] = net[1].map(name2id)
net.sort_values([0, 1]).to_csv("../../result/network/ChIP_TIP.tsv", sep="\t", header=False, index=False)