## Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory

hg19FastaFilePath = getGenomeFastaFilePath("hg19")
mutperiodHg19Directory = getMutperiodExternalDataDirectory()

relativeNucleosomePatternsDataDirectory = os.path.join(getDataDir(), "relative_nucleosome_patterns")

relativeNucMapDirectories = [os.path.join(relativeNucleosomePatternsDataDirectory, "hybrid"),
                             os.path.join(relativeNucleosomePatternsDataDirectory, "LCL_all_mappable"),
                             os.path.join(relativeNucleosomePatternsDataDirectory, "NHF1_all_mappable")]

nucMapInputFiles = {relativeNucMapDirectories[0]:"hg19_hybrid_nucleosome_map.bed",
                    relativeNucMapDirectories[1]:"hg19_LCL_MNase_nuc_map_all_mappable.bed",
                    relativeNucMapDirectories[2]:"hg19_NHF1_MNase_nuc_map_all_mappable.bed"}

### Parse the nucleosome dyad positions for mutperiod input

In [None]:
from typing import List
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory

relativeNucMapMutperiodInputPaths: List[str] = list()

for relativeNucMapDirectory in relativeNucMapDirectories:

    print(f"\nWorking in {os.path.basename(relativeNucMapDirectory)}...")

    if getFilesInDirectory(relativeNucMapDirectory, DataTypeStr.mutations + ".bed", searchRecursively = False) is None:
        print("Parsed data not found. Generating...")
        relativeNucMapMutperiodInputPaths += parseStandardBed([os.path.join(relativeNucMapDirectory, nucMapInputFiles[relativeNucMapDirectory])], hg19FastaFilePath)
    else:
        relativeNucMapMutperiodInputPaths.append(getFilesInDirectory(relativeNucMapDirectory, DataTypeStr.mutations + ".bed", searchRecursively = False))
        print(f"Found parsed data at {relativeNucMapMutperiodInputPaths[-1]}. Continuing.")

### Run the mutperiod pipeline to determine nucleosome positions relative to CTCF binding sites.

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite
relativeFeatureFilePaths: List[str] = list()
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_CTCF_known_all", "hg19_CTCF_known_all.bed"))
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_ETS_known_all", "hg19_ETS_known_all.bed"))
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_GABPA_known_all", "hg19_GABPA_known_all.bed"))
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_SP1_known_all", "hg19_SP1_known_all.bed"))
relativeFeatureFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_genic_regions_unnamed_TSSs", "hg19_genic_regions_unnamed_TSSs.bed"))

relativeFeatureNames = [os.path.basename(relativeFeatureFilePath).rsplit('.', 1)[0] for relativeFeatureFilePath in relativeFeatureFilePaths]

runAnalysisSuite(relativeNucMapMutperiodInputPaths, relativeFeatureNames,
                 normalizationMethod = "No Normalization", customBackgroundDir = None,
                 useSingleNucRadius = False, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)

In [None]:
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr
from mutperiodpy.RunNucleosomeMutationAnalysis import runNucleosomeMutationAnalysis
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory

# Remove features that are too sparse to count.
relativeFeatureNames.remove("hg19_ETS_known_all")
relativeFeatureNames.remove("hg19_GABPA_known_all")

for relativeFeatureName in relativeFeatureNames:
    rawNucCountsFilePaths = list()
    for relativeNucMapDirectory in relativeNucMapDirectories:
        rawNucCountsFilePaths += getFilesInDirectory(os.path.join(relativeNucMapDirectory, relativeFeatureName), DataTypeStr.rawNucCounts + ".tsv")
        
    runNucleosomeMutationAnalysis(rawNucCountsFilePaths,
                                  outputFilePath = os.path.join(relativeNucleosomePatternsDataDirectory, f"{relativeFeatureName}_relative_nucleosome_periodicities.tsv"),
                                  overridePeakPeriodicityWithExpected = False, alignStrands = False)