### Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory

hg19FastaFilePath = getGenomeFastaFilePath("hg19")
mutperiodHg19Directory = getMutperiodExternalDataDirectory()
projectDataDir = os.path.abspath("../data/ETS_CPD_deamination")
timepoints = ["1h", "8h", "24h", "48h"]

### Parse the mutation data for mutperiod input

In [None]:
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory

mutperiodInputFilePaths = []
for timepoint in timepoints:
    dataDir = os.path.join(projectDataDir, f"NHF1_CPD_{timepoint}_all_reps_all_cytosine_deamination_TGG_filtered")
    preexistingMutperiodInput = getFilesInDirectory(dataDir, DataTypeStr.mutations + ".bed", searchRecursively = False)
    if preexistingMutperiodInput is None:
        print("Parsed data not found. Generating...")
        rawInputData = os.path.join(dataDir, f"NHF1_CPD_{timepoint}_all_reps_C_to_T_CC_to_TT_mismatches_by_read_relation_formatted_TGG_filtered.bed")
        mutperiodInputFilePaths += parseStandardBed([rawInputData], hg19FastaFilePath)
    else:
        mutperiodInputFilePaths.append(preexistingMutperiodInput)
        print(f"Found parsed data at {preexistingMutperiodInput}. Continuing.")

### Run the mutperiod pipeline to count mutations relative to transcription factor binding sites.

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite

TFBS_FilePaths = list()
TFBS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_ETS_known_DHS", "hg19_ETS_known_DHS.bed"))
TFBS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_ETS_known_non_DHS", "hg19_ETS_known_non_DHS.bed"))

TFBS_Names = [os.path.basename(TFBS_FilePath).rsplit('.', 1)[0] for TFBS_FilePath in TFBS_FilePaths]

runAnalysisSuite(mutperiodInputFilePaths, TFBS_Names, normalizationMethod = "No Normalization", customBackgroundDir = None,
                 useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)