### Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory, DataTypeStr

hg19FastaFilePath = getGenomeFastaFilePath("hg19")
mutperiodHg19Directory = getMutperiodExternalDataDirectory()

adarDamageSeqDataDirectory = os.path.join(getDataDir(), "Adar_damage-seq")
adarDamageSeqCellularBasename = "A549_BPDE_cell_2h"
adarDamageSeqNakedBasename = "A549_BPDE_nDNA_2h"

adarDamageSeqCellularMutperiodInputFilePaths = getFilesInDirectory(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqCellularBasename), DataTypeStr.mutations + ".bed")
adarDamageSeqNakedMutperiodInputFilePath = getFilesInDirectory(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqNakedBasename), DataTypeStr.mutations + ".bed", searchRecursively = False)

### Run the mutperiod pipeline to count damaged bases relative to transcription start sites.

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite, generateCustomBackground

TSS_FilePaths = list()
TSS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_protein_coding_genes_TSSs", "hg19_protein_coding_genes_TSSs.bed"))
TSS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_protein_coding_genes_TSSs_A549_high_expression", "hg19_protein_coding_genes_TSSs_A549_high_expression.bed"))
TSS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_protein_coding_genes_TSSs_A549_low_expression", "hg19_protein_coding_genes_TSSs_A549_low_expression.bed"))

TSS_Names = [os.path.basename(TSS_FilePath).rsplit('.', 1)[0] for TSS_FilePath in TSS_FilePaths]

generateCustomBackground(os.path.dirname(adarDamageSeqNakedMutperiodInputFilePath), TSS_Names,
                         useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)

runAnalysisSuite(adarDamageSeqCellularMutperiodInputFilePaths, TSS_Names,
                 normalizationMethod = "Custom Background", customBackgroundDir = os.path.dirname(adarDamageSeqNakedMutperiodInputFilePath),
                 useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)
runAnalysisSuite(adarDamageSeqCellularMutperiodInputFilePaths, TSS_Names,
                 normalizationMethod = "Trinuc/Quadrunuc", customBackgroundDir = None,
                 useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)