### Set up pipeline with basic imports, data directories, and file basenames.

Make sure that the Jiang_damage_maps_preprocessing notebook has been run prior to this notebook.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory, DataTypeStr

hg19FastaFilePath = getGenomeFastaFilePath("hg19")
mutperiodHg19Directory = getMutperiodExternalDataDirectory()

jiangDataDirectory = os.path.join(getDataDir(), "Jiang_BPDE_damage_maps")
jiang2uM_CellularBasename = "BEAS-2B_2uM_BPDE_cell_24h"
jiang2uM_NakedBasename = "BEAS-2B_2uM_BPDE_nDNA_24h"

jiang2uM_CellularDamageMutperiodInputFilePaths = getFilesInDirectory(os.path.join(jiangDataDirectory, jiang2uM_CellularBasename), DataTypeStr.mutations + ".bed")
jiang2uM_NakedDamageMutperiodInputFilePath = getFilesInDirectory(os.path.join(jiangDataDirectory, jiang2uM_NakedBasename), DataTypeStr.mutations + ".bed", searchRecursively = False)

### Run the mutperiod pipeline to determine translational and rotational periodicity of the data.

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite, generateCustomBackground

nucleosomeMapFilePaths = list()
nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_hybrid_nuc_map", "hg19_hybrid_nuc_map.bed"))
nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map", "hg19_LCL_MNase_nuc_map.bed"))
# nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map_euchromatin",
#                                            "hg19_LCL_MNase_nuc_map_euchromatin.bed"))
# nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map_heterochromatin",
#                                            "hg19_LCL_MNase_nuc_map_heterochromatin.bed"))
# nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map_stringent_euchromatin",
#                                            "hg19_LCL_MNase_nuc_map_stringent_euchromatin.bed"))
nucleosomeMapFilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_NHF1_MNase_nuc_map", "hg19_NHF1_MNase_nuc_map.bed"))

nucleosomeMapNames = [os.path.basename(nucleosomeMapFilePath).rsplit('.', 1)[0] for nucleosomeMapFilePath in nucleosomeMapFilePaths]

generateCustomBackground(os.path.dirname(jiang2uM_NakedDamageMutperiodInputFilePath), nucleosomeMapNames,
                         useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True)

runAnalysisSuite(jiang2uM_CellularDamageMutperiodInputFilePaths, nucleosomeMapNames,
                 normalizationMethod = "Custom Background", customBackgroundDir = os.path.dirname(jiang2uM_NakedDamageMutperiodInputFilePath),
                 useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True)

In [None]:
from mutperiodpy.RunNucleosomeMutationAnalysis import runNucleosomeMutationAnalysis

normalizedNucleosomeCountsFilePaths = list()
for nucleosomeMapName in nucleosomeMapNames:
    normalizedNucleosomeCountsFilePaths += getFilesInDirectory(os.path.join(jiangDataDirectory, jiang2uM_CellularBasename, nucleosomeMapName), DataTypeStr.normNucCounts + ".tsv")

runNucleosomeMutationAnalysis(normalizedNucleosomeCountsFilePaths,
                              outputFilePath = os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_naked_normalized_periodicity_data.tsv"),
                              overridePeakPeriodicityWithExpected = False, alignStrands = True)