In [None]:
import os
from pandas import read_table
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import DataTypeStr, Metadata
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity, plotPlusAndMinus

liDataDirectory = os.path.join(getDataDir(), "Li_tXR-seq")

In [None]:
SINGLE_NUC = 1
NUC_GROUP = 2

liRawCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
liSinglenucNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
liTrinucNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}
liDamageNormalizedCounts = {SINGLE_NUC:dict(), NUC_GROUP:dict()}

TSS_Names = ["hg19_protein_coding_genes_TSSs",
             "hg19_protein_coding_genes_TSSs_GM12878_high_expression",
             "hg19_protein_coding_genes_TSSs_GM12878_low_expression"]

for nucleosomeCountsFilePath in getFilesInDirectory(liDataDirectory, DataTypeStr.rawNucCounts+".tsv"):

    countsMetadata = Metadata(nucleosomeCountsFilePath)
    if countsMetadata.nucPosName not in TSS_Names: continue

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    liRawCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

for nucleosomeCountsFilePath in getFilesInDirectory(liDataDirectory, DataTypeStr.normNucCounts+".tsv"):

    countsMetadata = Metadata(nucleosomeCountsFilePath)
    if countsMetadata.nucPosName not in TSS_Names: continue

    if "nuc-group" in nucleosomeCountsFilePath: radius = NUC_GROUP
    else: radius = SINGLE_NUC

    if "singlenuc" in os.path.basename(nucleosomeCountsFilePath):
        liSinglenucNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)
    if "trinuc" in os.path.basename(nucleosomeCountsFilePath):
        liTrinucNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)
    elif "custom_context" in os.path.basename(nucleosomeCountsFilePath):
        liDamageNormalizedCounts[radius][countsMetadata.nucPosName] = read_table(nucleosomeCountsFilePath)

In [None]:
for radius in liRawCounts:
    for TSS_Name in liRawCounts[radius]:
        plot = parseAndPlotPeriodicity(liRawCounts[radius][TSS_Name], dataCol = "Both_Strands_Counts",
                                       smoothTranslational = False, nucRepLen = 2000,
                                       title = f"Li {TSS_Name} Raw", yAxisLabel = "Counts")
        print(plot)

In [None]:
for radius in liSinglenucNormalizedCounts:
    for TSS_Name in liSinglenucNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(liSinglenucNormalizedCounts[radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = True, nucRepLen = 2000,
                                       title = f"Li {TSS_Name} Singlenuc Normalized", yAxisLabel = "Normalized Counts")
        print(plot)

In [None]:
for radius in liTrinucNormalizedCounts:
    for TSS_Name in liTrinucNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(liTrinucNormalizedCounts[radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = True, nucRepLen = 2000,
                                       title = f"Li {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts",
                                       ylim=(0.5,2.0))
        print(plot)

In [None]:
for radius in liTrinucNormalizedCounts:
    for TSS_Name in liTrinucNormalizedCounts[radius]:
        plot = plotPlusAndMinus(liTrinucNormalizedCounts[radius][TSS_Name], f"Li {TSS_Name} Trinuc Normalized", yAxisLabel = "Normalized Counts",
                                smoothData=True)
        print(plot)

In [None]:
for radius in liDamageNormalizedCounts:
    for TSS_Name in liDamageNormalizedCounts[radius]:
        plot = parseAndPlotPeriodicity(liDamageNormalizedCounts[radius][TSS_Name], dataCol = "Normalized_Both_Strands",
                                       smoothTranslational = True, nucRepLen = 2000,
                                       title = f"Li {TSS_Name} Damage Normalized", yAxisLabel = "Normalized Counts")
        print(plot)