In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata
from mutperiodpy.quick_scripts.GetNRL import getNRL
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity, parseNucleosomeCountsDataForPlotting, plotPeriodicity

jiangDataDirectory = os.path.join(getDataDir(), "Jiang_BPDE_damage_maps")

dataSets = ["BEAS-2B_2uM_BPDE_cell_24h"]

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2

jiangRawCounts = dict()
jiangNormalizedCounts = dict()
NRLs = dict()

nucleosomeMapNameWhitelist = ['hg19_hybrid_nucleosome_map', 'hg19_LCL_MNase_nuc_map_all_mappable', 'hg19_NHF1_MNase_nuc_map_all_mappable']
# nucleosomeMapNameWhitelist += ['hg19_LCL_MNase_nuc_map_all_mappable_euchromatin', 'hg19_LCL_MNase_nuc_map_all_mappable_heterochromatin',
#                               'hg19_LCL_MNase_nuc_map_all_mappable_stringent_euchromatin']

for dataSet in dataSets:

    jiangRawCounts[dataSet] = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
    jiangNormalizedCounts[dataSet] = {SINGLE_NUC:dict(), NUC_GROUP:dict()}

    for nucleosomeCountsFilePath in getFilesInDirectory(jiangDataDirectory, DataTypeStr.rawNucCounts+".tsv"):

        countsMetadata = Metadata(nucleosomeCountsFilePath)
        if countsMetadata.nucPosName not in nucleosomeMapNameWhitelist: continue

        if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
        else: radius = SINGLE_NUC

        jiangRawCounts[dataSet][radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

        if countsMetadata.nucPosName not in NRLs: NRLs[countsMetadata.nucPosName] = getNRL([countsMetadata.baseNucPosFilePath])[0]

    for nucleosomeCountsFilePath in getFilesInDirectory(jiangDataDirectory, DataTypeStr.normNucCounts+".tsv"):

        countsMetadata = Metadata(nucleosomeCountsFilePath)
        if countsMetadata.nucPosName not in nucleosomeMapNameWhitelist: continue

        if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
        else: radius = SINGLE_NUC

        jiangNormalizedCounts[dataSet][radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

        if countsMetadata.nucPosName not in NRLs: NRLs[countsMetadata.nucPosName] = getNRL([countsMetadata.baseNucPosFilePath])[0]

In [None]:
for dataSet in dataSets:
    for radius in jiangRawCounts[dataSet]:
        for nucleosomeMapName in jiangRawCounts[dataSet][radius]:
            plot = parseAndPlotPeriodicity(jiangRawCounts[dataSet][radius][nucleosomeMapName], dataCol = "Aligned_Strands_Counts",
                                           smoothTranslational = False, nucRepLen = NRLs[nucleosomeMapName],
                                           title = f"Jiang {dataSet} {nucleosomeMapName} Raw Counts", yAxisLabel = "Damage Reads")
            print(plot)

In [None]:
for dataSet in dataSets:
    for radius in jiangNormalizedCounts[dataSet]:
        for nucleosomeMapName in jiangNormalizedCounts[dataSet][radius]:
            plot = parseAndPlotPeriodicity(jiangNormalizedCounts[dataSet][radius][nucleosomeMapName], dataCol = "Normalized_Aligned_Strands",
                                           smoothTranslational = True, nucRepLen = NRLs[nucleosomeMapName],
                                           title = f"Jiang {dataSet} {nucleosomeMapName} Naked Normalized", yAxisLabel = "Cellular/Naked Damage Reads")
            print(plot)