In [1]:
import os
import pandas as pd
import pybedtools

#### GLOBALS ####
CURRENT_DIR_PATH = os.path.dirname(os.path.abspath("__file__"))
pybedtools.helpers.set_tempdir(CURRENT_DIR_PATH)

In [2]:
atf2_chip_file = "/data5/deepro/starrseq/papers/reproducibility/0_in-house_dataset/data/encode/hek293/chip/tf/ATF2/ENCSR217HTK/ENCFF225VCG.bed.gz"
foxa1_chip_file = "/data5/deepro/starrseq/papers/reproducibility/0_in-house_dataset/data/encode/hek293/chip/tf/FOXA1/ENCSR094WHO/ENCFF514ZNN.bed.gz"
lef1_chip_file = "/data5/deepro/starrseq/papers/reproducibility/0_in-house_dataset/data/encode/hek293/chip/tf/LEF1/ENCSR240XWM/ENCFF939UTN.bed.gz"
scrt1_chip_file = "/data5/deepro/starrseq/papers/reproducibility/0_in-house_dataset/data/encode/hek293/chip/tf/SCRT1/ENCSR605MGM/ENCFF799CAL.bed.gz"

atf2_responsive_file = "/data5/deepro/starrseq/papers/results/2_categorize_fragments_on_activity/data/ATF2/responsive.bed"
foxa1_responsive_file = "/data5/deepro/starrseq/papers/results/2_categorize_fragments_on_activity/data/FOXA1/responsive.bed"
lef1_responsive_file = "/data5/deepro/starrseq/papers/results/2_categorize_fragments_on_activity/data/LEF1/responsive.bed"
scrt1_responsive_file = "/data5/deepro/starrseq/papers/results/2_categorize_fragments_on_activity/data/SCRT1/responsive.bed"

In [3]:
master_file = "/data5/deepro/starrseq/papers/reproducibility/0_in-house_dataset/data/master/master.bed"

In [4]:
def get_intersecting_fragments(responsive_file, chip_file, master_file, save_file):
    responsive_bed = pybedtools.BedTool(responsive_file)
    chip_bed = pybedtools.BedTool(chip_file)
    master_bed = pybedtools.BedTool(master_file)
    # regions in the master file that intersect with the foxa1 chip
    master_chip_bed = master_bed.intersect(chip_bed, u=True)
    master_chip_bed_len = len(master_chip_bed)
    master_bed_len = len(master_bed)
    intersect_bed = responsive_bed.intersect(master_chip_bed, u=True)
    # responsive fragments which had even a 1 bp overlap with foxa1 chip sites in our library
    responsive_chip_bed_len = len(intersect_bed)
    responsive_bed_len = len(responsive_bed)
    intersect_bed.moveto(save_file)
    pybedtools.helpers.cleanup(remove_all=True)
    return master_chip_bed_len, master_bed_len, responsive_chip_bed_len, responsive_bed_len

In [5]:
chip_files = [atf2_chip_file, foxa1_chip_file, lef1_chip_file, scrt1_chip_file]
responsive_file = [atf2_responsive_file, foxa1_responsive_file, lef1_responsive_file, scrt1_responsive_file]

percentage_master = []
percentage_response = []

save_file = "./example.bed"
for cf, rf in zip(chip_files, responsive_file):
    mcl, ml, rcl, rl = get_intersecting_fragments(rf, cf, master_file, save_file)
    percentage_master.append(mcl*100/ml)
    percentage_response.append(rcl*100/rl)


In [7]:
pd.DataFrame({"chip_in_master": percentage_master, "chip_in_response": percentage_response}, index=["ATF2", "FOXA1", "LEF1", "SCRT1"])

Unnamed: 0,chip_in_master,chip_in_response
ATF2,12.45503,31.233621
FOXA1,1.380521,3.251525
LEF1,1.306835,5.046821
SCRT1,11.094014,23.903918
