In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity

adarDamageSeqDataDirectory = os.path.join(getDataDir(), "Adar_damage-seq")

dataSets = ["A549_BPDE_2h"]

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2

adarDamageSeqRawCounts = dict()
adarDamageSeqNakedNormalizedCounts = dict()
adarDamageSeqTrinucNormalizedCounts = dict()

TSS_Names = ["hg19_protein_coding_genes_TSSs",
             "hg19_protein_coding_genes_TSSs_A549_high_expression",
             "hg19_protein_coding_genes_TSSs_A549_low_expression"]

for dataSet in dataSets:

    adarDamageSeqRawCounts[dataSet] = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
    adarDamageSeqNakedNormalizedCounts[dataSet] = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
    adarDamageSeqTrinucNormalizedCounts[dataSet] = {SINGLE_NUC:dict(), NUC_GROUP:dict()}

    for nucleosomeCountsFilePath in getFilesInDirectory(adarDamageSeqDataDirectory, DataTypeStr.rawNucCounts+".tsv"):

        countsMetadata = Metadata(nucleosomeCountsFilePath)
        if countsMetadata.nucPosName not in TSS_Names: continue

        if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
        else: radius = SINGLE_NUC

        adarDamageSeqRawCounts[dataSet][radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

    for nucleosomeCountsFilePath in getFilesInDirectory(adarDamageSeqDataDirectory, DataTypeStr.normNucCounts+".tsv"):

        countsMetadata = Metadata(nucleosomeCountsFilePath)
        if countsMetadata.nucPosName not in TSS_Names: continue

        if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
        else: radius = SINGLE_NUC

        if "custom_context" in os.path.basename(nucleosomeCountsFilePath):
            adarDamageSeqNakedNormalizedCounts[dataSet][radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)
        elif "trinuc" in os.path.basename(nucleosomeCountsFilePath):
            adarDamageSeqTrinucNormalizedCounts[dataSet][radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

In [None]:
for dataSet in dataSets:
    for radius in adarDamageSeqRawCounts[dataSet]:
        for TSS_Name in adarDamageSeqRawCounts[dataSet][radius]:
            plot = parseAndPlotPeriodicity(adarDamageSeqRawCounts[dataSet][radius][TSS_Name], dataCol = "Both_Strands_Counts",
                                           smoothTranslational = False, nucRepLen = 2000,
                                           title = f"Adar {dataSet} {TSS_Name} Raw Counts", yAxisLabel = "Damage Reads")
            print(plot)

In [None]:
for dataSet in dataSets:
    for radius in adarDamageSeqNakedNormalizedCounts[dataSet]:
        for TSS_Name in adarDamageSeqNakedNormalizedCounts[dataSet][radius]:
            plot = parseAndPlotPeriodicity(adarDamageSeqNakedNormalizedCounts[dataSet][radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                           smoothTranslational = True, nucRepLen = 2000,
                                           title = f"Adar {dataSet} {TSS_Name} Naked Normalized", yAxisLabel = "Cellular/Naked Damage Reads")
            print(plot)

In [None]:
for dataSet in dataSets:
    for radius in adarDamageSeqTrinucNormalizedCounts[dataSet]:
        for TSS_Name in adarDamageSeqTrinucNormalizedCounts[dataSet][radius]:
            plot = parseAndPlotPeriodicity(adarDamageSeqTrinucNormalizedCounts[dataSet][radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                           smoothTranslational = False, nucRepLen = 2000,
                                           title = f"Adar {dataSet} {TSS_Name} Trinuc Normalized", yAxisLabel = "Trinuc Normalized Damage")
            print(plot)