In [1]:
import pandas as pd

In [12]:
consensus_peaks_path = f"../homer/Input/consensus_peaks.mRp.clN.boolean.txt"
differential_results_path = f"../homer/Output/differential_results_unfiltered.tsv"

deeptools_profile_input_folder = f"input/macs2"

## Create the regions for upregulated/downregulated/static peaks

In [None]:
# Read in peaks
consensus_peaks = pd.read_csv(consensus_peaks_path, sep="\t")
differential_results = pd.read_csv(differential_results_path, sep="\t")

In [17]:
consensus_peaks_formatted = consensus_peaks[["chr", "start", "end"]]
consensus_peaks_formatted.to_csv(
    f"{deeptools_profile_input_folder}/consensus_peaks.bed",
    sep="\t",
    header=False,
    index=False,
)

differential_up_results_sig = differential_results[(differential_results["padj"] < 0.05) & (differential_results["log2FoldChange"] > 0)]
differential_up_peaks = consensus_peaks[
    consensus_peaks["interval_id"].isin(differential_up_results_sig.index)
]
differential_up_peaks_formatted = differential_up_peaks[["chr", "start", "end"]]
differential_up_peaks_formatted.to_csv(
    f"{deeptools_profile_input_folder}/differential_up_peaks.bed",
    sep="\t",
    header=False,
    index=False,
)

differential_down_results_sig = differential_results[(differential_results["padj"] < 0.05) & (differential_results["log2FoldChange"] < 0)]
differential_down_peaks = consensus_peaks[
    consensus_peaks["interval_id"].isin(differential_down_results_sig.index)
]
differential_down_peaks_formatted = differential_down_peaks[["chr", "start", "end"]]
differential_down_peaks_formatted.to_csv(
    f"{deeptools_profile_input_folder}/differential_down_peaks.bed",
    sep="\t",
    header=False,
    index=False,
)

static_peaks = consensus_peaks[
    ~consensus_peaks["interval_id"].isin(differential_up_results_sig.index) & ~consensus_peaks["interval_id"].isin(differential_down_results_sig.index)
]
static_peaks_formatted = static_peaks[["chr", "start", "end"]]
static_peaks_formatted.to_csv(
    f"{deeptools_profile_input_folder}/static_peaks.bed",
    sep="\t",
    header=False,
    index=False,
)

## Create the regions for peaks separated by Homer annotation

First: run `run_consensus_peak_annotations.sh` on LinStat

In [32]:
# Read in annotations
consensus_peak_annotations = pd.read_csv(
    f"input/macs2/consensus_peaks.annotatePeaks.txt", sep="\t"
)
consensus_peak_annotations.rename(
    columns={consensus_peak_annotations.columns[0]: "PeakID"}, inplace=True
)
consensus_peak_annotations["BasicAnnotation"] = consensus_peak_annotations[
    "Annotation"
].str.split("(", expand=True)[0]

In [33]:
# Separate consensus peaks by region type
value_counts = consensus_peak_annotations["BasicAnnotation"].value_counts()
region_types = value_counts.index.tolist()

for region_type in region_types:
    region_type_formatted = region_type.replace(" ", "").lower()
    region_peaks = consensus_peak_annotations[
        consensus_peak_annotations["BasicAnnotation"] == region_type
    ]
    region_bed = region_peaks[["Chr", "Start", "End"]]
    region_bed.to_csv(
        f"{deeptools_profile_input_folder}/consensus_{region_type_formatted}.bed",
        sep="\t",
        header=False,
        index=False,
    )