## Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory

hg19FastaFilePath = getGenomeFastaFilePath("hg19")
mutperiodHg19Directory = getMutperiodExternalDataDirectory()

relativeTFBS_PatternsDataDirectory = os.path.join(getDataDir(), "relative_TFBS_patterns")

relativeTFBS_Directories = [os.path.join(relativeTFBS_PatternsDataDirectory, "SP1"),
                            os.path.join(relativeTFBS_PatternsDataDirectory, "CTCF"),
                            os.path.join(relativeTFBS_PatternsDataDirectory, "ETS_and_GABPA")]

TFBS_InputFiles = {relativeTFBS_Directories[0]:"hg19_SP1_known.bed",
                   relativeTFBS_Directories[1]:"hg19_CTCF_known.bed",
                   relativeTFBS_Directories[2]:"hg19_ETS_and_GABPA_known.bed"}

### Parse the TFBS positions for mutperiod input

In [None]:
from typing import List
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory

relativeTFBS_MutperiodInputPaths: List[str] = list()

for relativeTFBS_Directory in relativeTFBS_Directories:

    print(f"\nWorking in {os.path.basename(relativeTFBS_Directory)}...")

    if getFilesInDirectory(relativeTFBS_Directory, DataTypeStr.mutations + ".bed", searchRecursively = False) is None:
        print("Parsed data not found. Generating...")
        relativeTFBS_MutperiodInputPaths += parseStandardBed([os.path.join(relativeTFBS_Directory, TFBS_InputFiles[relativeTFBS_Directory])], hg19FastaFilePath)
    else:
        relativeTFBS_MutperiodInputPaths.append(getFilesInDirectory(relativeTFBS_Directory, DataTypeStr.mutations + ".bed", searchRecursively = False))
        print(f"Found parsed data at {relativeTFBS_MutperiodInputPaths[-1]}. Continuing.")

### Run the mutperiod pipeline to determine TFBSs relative to TSSs.

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite
relativeFeatureFilePaths: List[str] = list()
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_genic_regions_unnamed_TSSs", "hg19_genic_regions_unnamed_TSSs.bed"))

relativeFeatureNames = [os.path.basename(relativeFeatureFilePath).rsplit('.', 1)[0] for relativeFeatureFilePath in relativeFeatureFilePaths]

runAnalysisSuite(relativeTFBS_MutperiodInputPaths, relativeFeatureNames,
                 normalizationMethod = "No Normalization", customBackgroundDir = None,
                 useSingleNucRadius = False, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)