In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata, getExternalDataDirectory as getMutperiodExternalDataDirectory
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity, plotPlusAndMinus

mutperiodHg19Directory = os.path.join(getMutperiodExternalDataDirectory(),"hg19")

smmSeqDataDirectory = os.path.join(getDataDir(), "Adar_smm-seq")

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2
EXTENDED = 3

TSS_Names = ["hg19_protein_coding_genes_TSSs"]

smmSeqRawCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict(), EXTENDED:dict()}
smmSeqNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
for nucleosomeCountsFilePath in getFilesInDirectory(smmSeqDataDirectory, DataTypeStr.rawNucCounts+".tsv"):

    countsMetadata = Metadata(nucleosomeCountsFilePath)
    if countsMetadata.nucPosName not in TSS_Names: continue

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    elif "linker+" in nucleosomeCountsFilePath: radius = EXTENDED
    else: radius = SINGLE_NUC

    smmSeqRawCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

for nucleosomeCountsFilePath in getFilesInDirectory(smmSeqDataDirectory, DataTypeStr.normNucCounts+".tsv"):

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    countsMetadata = Metadata(nucleosomeCountsFilePath)

    if countsMetadata.nucPosName in TSS_Names:
        smmSeqNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

In [None]:
for radius in smmSeqRawCounts:
    for TSS_Name in smmSeqRawCounts[radius]:
        plot = parseAndPlotPeriodicity(smmSeqRawCounts[radius][TSS_Name], dataCol = "Both_Strands_Counts",
                                       smoothTranslational = False, nucRepLen = 2000,
                                       title = f"Adar SMM-seq {TSS_Name} Raw", yAxisLabel = "Mutations")
        print(plot)
        plot = plotPlusAndMinus(smmSeqRawCounts[radius][TSS_Name], f"Adar SMM-seq {TSS_Name} Raw", yAxisLabel = "Mutations",
                                smoothData = True, overlaySmoothedAndNormal = True)
        print(plot)

In [None]:
for radius in smmSeqNormalizedCounts:
    for TSS_Name in smmSeqNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(smmSeqNormalizedCounts[radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = False, nucRepLen = 2000,
                                       title = f"Adar SMM-seq {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts")
        print(plot)
        plot = plotPlusAndMinus(smmSeqNormalizedCounts[radius][TSS_Name], f"Adar SMM-seq {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts",
                                smoothData = True, overlaySmoothedAndNormal = True)
        print(plot)