In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata, getExternalDataDirectory as getMutperiodExternalDataDirectory
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity, plotPlusAndMinus

mutperiodHg19Directory = os.path.join(getMutperiodExternalDataDirectory(),"hg19")

alexandrovDataDirectory = os.path.join(getDataDir(), "Alexandrov_LUAD")

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2
EXTENDED = 3

TSS_Names = ["hg19_protein_coding_genes_TSSs"]

alexandrovRawCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict(), EXTENDED:dict()}
alexandrovNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
for nucleosomeCountsFilePath in getFilesInDirectory(alexandrovDataDirectory, DataTypeStr.rawNucCounts+".tsv"):

    countsMetadata = Metadata(nucleosomeCountsFilePath)
    if countsMetadata.nucPosName not in TSS_Names: continue

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    elif "linker+" in nucleosomeCountsFilePath: radius = EXTENDED
    else: radius = SINGLE_NUC

    alexandrovRawCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

for nucleosomeCountsFilePath in getFilesInDirectory(alexandrovDataDirectory, DataTypeStr.normNucCounts+".tsv"):

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    countsMetadata = Metadata(nucleosomeCountsFilePath)

    if countsMetadata.nucPosName in TSS_Names:
        alexandrovNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

In [None]:
for radius in alexandrovRawCounts:
    for TSS_Name in alexandrovRawCounts[radius]:
        plot = parseAndPlotPeriodicity(alexandrovRawCounts[radius][TSS_Name], dataCol = "Both_Strands_Counts",
                                       smoothTranslational = False, nucRepLen = 2000,
                                       title = f"Alexandrov {TSS_Name} Raw", yAxisLabel = "Mutations")
        print(plot)
        plot = plotPlusAndMinus(alexandrovRawCounts[radius][TSS_Name], f"Alexandrov {TSS_Name} Raw", yAxisLabel = "Mutations",
                                smoothData = True, overlaySmoothedAndNormal = True)
        print(plot)

In [None]:
for radius in alexandrovNormalizedCounts:
    for TSS_Name in alexandrovNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(alexandrovNormalizedCounts[radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = False, nucRepLen = 2000,
                                       title = f"Alexandrov {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts")
        print(plot)
        plot = plotPlusAndMinus(alexandrovNormalizedCounts[radius][TSS_Name], f"Alexandrov {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts",
                                smoothData = True, overlaySmoothedAndNormal = True)
        print(plot)