In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata, getExternalDataDirectory as getMutperiodExternalDataDirectory
from mutperiodpy.quick_scripts.GetNRL import getNRL
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity, parseNucleosomeCountsDataForPlotting, plotPeriodicity

mutperiodHg19Directory = os.path.join(getMutperiodExternalDataDirectory(),"hg19")

CPD_DataDirectory = os.path.join(getDataDir(), "CPD_repair_as_control")

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2

CPD_RawCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
CPD_SinglenucNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
CPD_TrinucNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
CPD_DamageNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
NRLs = {"hg19_CTCF_known":190}

for nucleosomeCountsFilePath in getFilesInDirectory(CPD_DataDirectory, DataTypeStr.rawNucCounts+".tsv"):

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    countsMetadata = Metadata(nucleosomeCountsFilePath)

    CPD_RawCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

    if countsMetadata.nucPosName not in NRLs: NRLs[countsMetadata.nucPosName] = getNRL([countsMetadata.baseNucPosFilePath])[0]

for nucleosomeCountsFilePath in getFilesInDirectory(CPD_DataDirectory, DataTypeStr.normNucCounts+".tsv"):

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    countsMetadata = Metadata(nucleosomeCountsFilePath)

    if "singlenuc" in os.path.basename(nucleosomeCountsFilePath):
        CPD_SinglenucNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)
    if "trinuc" in os.path.basename(nucleosomeCountsFilePath):
        CPD_TrinucNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)
    elif "custom_context" in os.path.basename(nucleosomeCountsFilePath):
        CPD_DamageNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

    if countsMetadata.nucPosName not in NRLs: NRLs[countsMetadata.nucPosName] = getNRL([countsMetadata.baseNucPosFilePath])[0]

In [None]:
for radius in CPD_RawCounts:
    for nucleosomeMapName in CPD_RawCounts[radius]:
        plot = parseAndPlotPeriodicity(CPD_RawCounts[radius][nucleosomeMapName], dataCol = "Both_Strands_Counts",
                                       smoothTranslational = False, nucRepLen = NRLs[nucleosomeMapName],
                                       title = f"CPD {nucleosomeMapName} Raw", yAxisLabel = "Counts")
        print(plot)

In [None]:
for radius in CPD_SinglenucNormalizedCounts:
    for nucleosomeMapName in CPD_SinglenucNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(CPD_SinglenucNormalizedCounts[radius][nucleosomeMapName], dataCol = "Normalized_Aligned_Strands",
                                       smoothTranslational = True, nucRepLen = NRLs[nucleosomeMapName],
                                       title = f"CPD {nucleosomeMapName} Singlenuc Normalized", yAxisLabel = "Normalized Counts")
        print(plot)

In [None]:
for radius in CPD_TrinucNormalizedCounts:
    for nucleosomeMapName in CPD_TrinucNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(CPD_TrinucNormalizedCounts[radius][nucleosomeMapName], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = True, nucRepLen = NRLs[nucleosomeMapName],
                                       title = f"CPD {nucleosomeMapName} Trinuc Normalized", yAxisLabel = "Normalized Counts")
        print(plot)

In [None]:
for radius in CPD_DamageNormalizedCounts:
    for nucleosomeMapName in CPD_DamageNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(CPD_DamageNormalizedCounts[radius][nucleosomeMapName], dataCol = "Normalized_Aligned_Strands",
                                       smoothTranslational = True, nucRepLen = NRLs[nucleosomeMapName],
                                       title = f"CPD {nucleosomeMapName} Damage Normalized", yAxisLabel = "Normalized Counts")
        print(plot)