In [None]:
import os
from plotnine import theme
from pandas import read_table
from scipy.stats import pearsonr
from benbiohelpers.FileSystemHandling.DirectoryHandling import checkDirs
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir
from bpde_chromatin_analysis.PlottingFunctions import *
from bpde_chromatin_analysis.CallDyadPeaks import callDyadPeaks
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory
from mutperiodpy.quick_scripts.GetNRL import getNRL
from mutperiodpy.GeneratePlotnineFigures import parseAndPlotPeriodicity

mutperiodHg19Directory = os.path.join(getMutperiodExternalDataDirectory(),"hg19")

alexandrovDataDirectory = os.path.join(getDataDir(), "Alexandrov_LUAD")
controlledAccessDataDirectory = os.path.join(getDataDir(), "controlled_access_LC_combined")
liDataDirectory = os.path.join(getDataDir(), "Li_tXR-seq")
jiangDataDirectory = os.path.join(getDataDir(), "Jiang_BPDE_damage_maps")
adarDataDirectory = os.path.join(getDataDir(), "Adar_damage-seq")
relativeNucleosomeDataDirectory = os.path.join(getDataDir(), "relative_nucleosome_patterns")
relativeTFBS_DataDirectory = os.path.join(getDataDir(), "relative_TFBS_patterns")

hybridNucMapNRL = getNRL([os.path.join(mutperiodHg19Directory, "hg19_hybrid_nuc_map", "hg19_hybrid_nuc_map.bed")])[0]
LCL_NucMapNRL = getNRL([os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map", "hg19_LCL_MNase_nuc_map.bed")])[0]

outputDPI = 1000
paperFiguresOutputDir = os.path.join(getDataDir(), "paper_figures")
checkDirs(paperFiguresOutputDir)

#### Preload Data

Nucleosomes (Translational)

In [None]:
controlledAccessTranslationalData = read_table(os.path.join(controlledAccessDataDirectory, "hg19_hybrid_nuc_map",
                                               "controlled_access_LC_hybrid_nuc_map_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqTranslationalData = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_hybrid_nuc_map",
                                            "A549_BPDE_cell_2h_hybrid_nuc_map_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
jiangTranslationalData = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_hybrid_nuc_map",
                                                 "BEAS-2B_2uM_BPDE_cell_24h_hybrid_nuc_map_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liTranslationalData = read_table(os.path.join(liDataDirectory, "hg19_hybrid_nuc_map",
                                              "Li_tXR-seq_hybrid_nuc_map_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

controlledAccessTranslationalLCLData = read_table(os.path.join(controlledAccessDataDirectory, "hg19_LCL_MNase_nuc_map",
                                                               "controlled_access_LC_LCL_MNase_map_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqTranslationalLCLData = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_LCL_MNase_nuc_map",
                                                            "A549_BPDE_cell_2h_LCL_MNase_map_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
jiangTranslationalLCLData = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_LCL_MNase_nuc_map",
                                                    "BEAS-2B_2uM_BPDE_cell_24h_LCL_MNase_map_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liTranslationalLCLData = read_table(os.path.join(liDataDirectory, "hg19_LCL_MNase_nuc_map",
                                                 "Li_tXR-seq_LCL_MNase_map_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

Nucleosomes (Rotational)

In [None]:
controlledAccessRotationalHybridData = read_table(os.path.join(controlledAccessDataDirectory, "hg19_hybrid_nuc_map",
                                                               "controlled_access_LC_hybrid_nuc_map_trinuc_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqRotationalData = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_hybrid_nuc_map",
                                         "A549_BPDE_cell_2h_hybrid_nuc_map_custom_context_normalized_nucleosome_mutation_counts.tsv"))
jiangRotationalData = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_hybrid_nuc_map",
                                              "BEAS-2B_2uM_BPDE_cell_24h_hybrid_nuc_map_custom_context_normalized_nucleosome_mutation_counts.tsv"))
liRotationalData = read_table(os.path.join(liDataDirectory, "hg19_hybrid_nuc_map",
                                           "Li_tXR-seq_hybrid_nuc_map_trinuc_normalized_nucleosome_mutation_counts.tsv"))

controlledAccessRotationalLCLData = read_table(os.path.join(controlledAccessDataDirectory, "hg19_LCL_MNase_nuc_map",
                                                            "controlled_access_LC_LCL_MNase_map_trinuc_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqRotationalLCLData = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_LCL_MNase_nuc_map",
                                                 "A549_BPDE_cell_2h_LCL_MNase_map_custom_context_normalized_nucleosome_mutation_counts.tsv"))
jiangRotationalLCLData = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_LCL_MNase_nuc_map",
                                                 "BEAS-2B_2uM_BPDE_cell_24h_LCL_MNase_map_custom_context_normalized_nucleosome_mutation_counts.tsv"))
liRotationalLCLData = read_table(os.path.join(liDataDirectory, "hg19_LCL_MNase_nuc_map",
                                              "Li_tXR-seq_LCL_MNase_map_trinuc_normalized_nucleosome_mutation_counts.tsv"))

CTCF

In [None]:
adarDamageSeqCTCF = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_CTCF_known",
                                            "A549_BPDE_cell_2h_CTCF_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
jiangCTCF = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_CTCF_known",
                                    "BEAS-2B_2uM_BPDE_cell_24h_CTCF_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liCTCF = read_table(os.path.join(liDataDirectory, "hg19_CTCF_known",
                                 "Li_tXR-seq_CTCF_known_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liDamageNormalizedCTCF = read_table(os.path.join(liDataDirectory, "hg19_CTCF_known",
                                                 "Li_tXR-seq_CTCF_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
alexandrovCTCF = read_table(os.path.join(alexandrovDataDirectory, "hg19_CTCF_known",
                                         "Alexandrov_LUAD_CTCF_known_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
controlledAccessCTCF = read_table(os.path.join(controlledAccessDataDirectory, "hg19_CTCF_known",
                                               "controlled_access_LC_CTCF_known_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

CTCF_RelativeHybridDyads = read_table(os.path.join(relativeNucleosomeDataDirectory, "hybrid", "hg19_CTCF_known", "hybrid_CTCF_known_nuc-group_raw_nucleosome_mutation_counts.tsv"))
CTCF_RelativeHybridDyadPeaks = callDyadPeaks(CTCF_RelativeHybridDyads, smoothingWindow = 11, blacklistedRegions=range(-100,101), peakExclusionRadius = 160)
# CTCF_RelativeLCL_Dyads = read_table(os.path.join(relativeNucleosomeDataDirectory, "LCL_MNase", "hg19_CTCF_known", "LCL_MNase_CTCF_known_nuc-group_raw_nucleosome_mutation_counts.tsv"))
# CTCF_RelativeLCL_DyadPeaks = callDyadPeaks(CTCF_RelativeLCL_Dyads, smoothingWindow = 11, blacklistedRegions=range(-100,101), peakExclusionRadius = 160)

SP1

In [None]:
liSP1 = read_table(os.path.join(liDataDirectory, "hg19_SP1_known",
                                "Li_tXR-seq_SP1_known_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liDamageNormalizedSP1 = read_table(os.path.join(liDataDirectory, "hg19_SP1_known",
                                                 "Li_tXR-seq_SP1_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqSP1 = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_SP1_known",
                                           "A549_BPDE_cell_2h_SP1_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
jiangSP1 = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_SP1_known",
                                   "BEAS-2B_2uM_BPDE_cell_24h_SP1_known_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
controlledAccessSP1 = read_table(os.path.join(controlledAccessDataDirectory, "hg19_SP1_known",
                                        "controlled_access_LC_SP1_known_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

SP1_RelativeHybridDyads = read_table(os.path.join(relativeNucleosomeDataDirectory, "hybrid", "hg19_SP1_known", "hybrid_SP1_known_nuc-group_raw_nucleosome_mutation_counts.tsv"))

TSS

In [None]:
controlledAccessTSS = read_table(os.path.join(controlledAccessDataDirectory, "hg19_protein_coding_genes_TSSs",
                                              "controlled_access_LC_TSSs_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
controlledAccessTSSHighExpr = read_table(os.path.join(controlledAccessDataDirectory, "hg19_protein_coding_genes_TSSs_A549_high_expression",
                                                      "controlled_access_LC_TSSs_A549_high_expr_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
controlledAccessTSSLowExpr = read_table(os.path.join(controlledAccessDataDirectory, "hg19_protein_coding_genes_TSSs_A549_low_expression",
                                                     "controlled_access_LC_TSSs_A549_low_expr_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

liTSS = read_table(os.path.join(liDataDirectory, "hg19_protein_coding_genes_TSSs",
                                "Li_tXR-seq_TSSs_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liTSSHighExpr = read_table(os.path.join(liDataDirectory, "hg19_protein_coding_genes_TSSs_GM12878_high_expression",
                                        "Li_tXR-seq_TSSs_GM12878_high_expr_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
liTSSLowExpr = read_table(os.path.join(liDataDirectory, "hg19_protein_coding_genes_TSSs_GM12878_low_expression",
                                       "Li_tXR-seq_TSSs_GM12878_low_expr_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

adarDamageSeqTSS = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_protein_coding_genes_TSSs",
                                           "A549_BPDE_cell_2h_TSSs_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqTSSHighExpr = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_protein_coding_genes_TSSs_A549_high_expression",
                                           "A549_BPDE_cell_2h_TSSs_A549_high_expr_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))
adarDamageSeqTSSLowExpr = read_table(os.path.join(adarDataDirectory, "A549_BPDE_cell_2h", "hg19_protein_coding_genes_TSSs_A549_low_expression",
                                           "A549_BPDE_cell_2h_TSSs_A549_low_expr_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))


jiangTSS = read_table(os.path.join(jiangDataDirectory, "BEAS-2B_2uM_BPDE_cell_24h", "hg19_protein_coding_genes_TSSs",
                                    "BEAS-2B_2uM_BPDE_cell_24h_TSSs_custom_context_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

TSS_RelativeSP1 = read_table(os.path.join(relativeTFBS_DataDirectory, "SP1", "hg19_protein_coding_genes_TSSs",
                                          "SP1_TSSs_nuc-group_raw_nucleosome_mutation_counts.tsv"))
TSS_RelativeHybridDyads = read_table(os.path.join(relativeNucleosomeDataDirectory, "hybrid", "hg19_protein_coding_genes_TSSs",
                                                  "hybrid_TSSs_nuc-group_raw_nucleosome_mutation_counts.tsv"))
TSS_RelativeHybridDyadPeaks = callDyadPeaks(CTCF_RelativeHybridDyads, smoothingWindow = 11, blacklistedRegions=range(-1000,1), peakExclusionRadius = 160)
# TSS_RelativeLCL_Dyads = read_table(os.path.join(relativeNucleosomeDataDirectory, "LCL_MNase", "hg19_protein_coding_genes_TSSs",
#                                                 "LCL_MNase_TSSs_nuc-group_raw_nucleosome_mutation_counts.tsv"))

tXR-seq Lesion Positions

In [None]:
liEnrichedIndices = read_table(os.path.join(liDataDirectory, "Li_tXR-seq_enriched_indices.tsv"))
liIndividualNucleotideFrequencies = read_table(os.path.join(liDataDirectory, "Li_tXR-seq_individual_nuc_frequencies.tsv"))

#### Figure 1

1A

In [None]:
plot = parseAndPlotPeriodicity(controlledAccessTranslationalData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"TCGA+ICGC LC Hybrid Nuc Map Trinuc Normalized", yAxisLabel = "Mutation Enrichment",
                               ylim = (0.8,1.3))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "1A.png"), dpi = outputDPI)

1B

In [None]:
plot = parseAndPlotPeriodicity(jiangTranslationalData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Jiang Hybrid Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.3))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "1B.png"), dpi = outputDPI)

print(pearsonr(jiangTranslationalData["Normalized_Both_Strands"], controlledAccessTranslationalData["Normalized_Both_Strands"]))

1C

In [None]:
plot = parseAndPlotPeriodicity(liTranslationalData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Li Hybrid Nuc Map Trinuc Normalized", yAxisLabel = "Normalized Repair Activity",
                               ylim = (0.6,1.6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "1C.png"), dpi = outputDPI)

print(pearsonr(liTranslationalData["Normalized_Both_Strands"], controlledAccessTranslationalData["Normalized_Both_Strands"]))

#### Figure 2

2A

In [None]:
plot = parseAndPlotPeriodicity(jiangRotationalData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Jiang Hybrid Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "2A.png"), dpi = outputDPI)

print(pearsonr(jiangRotationalData["Normalized_Both_Strands"], controlledAccessRotationalHybridData["Normalized_Both_Strands"]))

2B

In [None]:
plot = parseAndPlotPeriodicity(liRotationalData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li Hybrid Nuc Map Trinuc Normalized", yAxisLabel = "Normalized Repair Activity")
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "2B.png"), dpi = outputDPI)

print(pearsonr(liRotationalData["Normalized_Both_Strands"], controlledAccessRotationalHybridData["Normalized_Both_Strands"]))

#### Figure 3

3A

In [None]:
plot = parseAndPlotPeriodicity(liRotationalData, dataCol = "Normalized_Aligned_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li Hybrid Nuc Map Trinuc Normalized",
                               xAxisLabel = "Position Relative to Dyad (bp; strand-aligned)",
                               yAxisLabel = "Normalized Repair Activity", ylim = (0.6,1.6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "3A.png"), dpi = outputDPI)

print(pearsonr(liRotationalData["Normalized_Aligned_Strands"], controlledAccessRotationalHybridData["Normalized_Aligned_Strands"]))

3B

In [None]:
plot = parseAndPlotPeriodicity(liRotationalData, dataCol = "Normalized_Aligned_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li Hybrid Nuc Map Trinuc Normalized",
                               xAxisLabel = "Position Relative to Dyad (bp; strand-aligned)",
                               yAxisLabel = "Normalized Repair Activity", ylim = (0.6,1.6),
                               colorRotationalTransitions = True)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "3B.png"), dpi = outputDPI)

#### Figure 4 is the nucleosome structural analysis from Dr. Wyrick

#### Figure 5

5A

In [None]:
plot = plotFeatureWithCustomNucleosomes(adarDamageSeqCTCF, CTCF_RelativeHybridDyadPeaks, blacklistedRegions = range(-50,51),
                                        title = "Adar Damage-seq CTCF Damage", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",yAxisLabel = "Cellular/Naked Damage")
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "5A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

5B

In [None]:
plot = plotFeatureWithCustomNucleosomes(liCTCF, CTCF_RelativeHybridDyadPeaks, blacklistedRegions = range(-50,51),
                                        title = "CTCF Repair", xAxisLabel = "Position Relative to CTCF Midpoint (bp)", yAxisLabel = "Normalized Repair Activity",
                                        ylim = (0,4))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "5B.png"), dpi = outputDPI)

print(pearsonr(liCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

5C

In [None]:
plot = plotFeatureWithCustomNucleosomes(controlledAccessCTCF, CTCF_RelativeHybridDyadPeaks, blacklistedRegions = range(-50,51),
                         title = "Lung Cancer CTCF Mutations", xAxisLabel = "Position Relative to CTCF Midpoint (bp)", yAxisLabel = "Mutation Enrichment",
                         ylim = (0,2.1))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "5C.png"), dpi = outputDPI)

#### Figure 6

6A

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      ylim = (0,4), smoothData = True, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "6A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))
print(pearsonr(liCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

6B

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      xlim = (-100, 100), ylim = (0,4), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "6B.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF[adarDamageSeqCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))
print(pearsonr(liCTCF[liCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))

6C

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      xlim = (-25, 25), ylim = (0,4), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "6C.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF[adarDamageSeqCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))
print(pearsonr(liCTCF[liCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))

#### Figure 7

7A

In [None]:
plot = plotDamageAndRepair(adarDamageSeqSP1, liSP1,
                           title = "SP1 Damage vs. Repair", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                           ylim = (0,2.5))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "7A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqSP1["Normalized_Both_Strands"], controlledAccessSP1["Normalized_Both_Strands"]))
print(pearsonr(liSP1["Normalized_Both_Strands"], controlledAccessSP1["Normalized_Both_Strands"]))

7B

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqSP1, liSP1, controlledAccessSP1,
                                      title = "SP1 Damage vs. Repair vs. Mutagenesis", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                                      xlim = (-100, 100), ylim = (0,2.5), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "7B.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqSP1[adarDamageSeqSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))
print(pearsonr(liSP1[liSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))

7C (Deprecated)

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqSP1, liSP1, controlledAccessSP1,
                                      title = "SP1 Damage vs. Repair vs. Mutagenesis", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                                      xlim = (-25, 25), ylim = (0,2.5), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "7C.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqSP1[adarDamageSeqSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))
print(pearsonr(liSP1[liSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))

#### Figure 8

S8A

In [None]:
plot = plotDamageAndRepair(adarDamageSeqTSS, liTSS,
                           title = "All TSS", xAxisLabel = "Position Relative to TSS (bp)",
                           xlim = (-1000, 1000), ylim = (0.6,1.6), smoothData = True, overlaySmoothedAndNormal = True)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "8A.png"), dpi = outputDPI)

S8B

In [None]:
plot = plotSingleFeature(controlledAccessTSS,
                         title = "All TSS", xAxisLabel = "Position Relative to TSS (bp)", yAxisLabel = "Mutation Enrichment",
                         xlim = (-1000, 1000), ylim = (0.4,2.0), smoothData = True, overlaySmoothedAndNormal = True,
                         color = PlottingColors.MUTATION.value, underlaidColor = PlottingColors.MUTATION_UNDERLAY.value)
plot = plot + theme(figure_size = (10.3,6)) + scale_y_continuous(breaks = (0.4, 0.8, 1.2, 1.6, 2.0))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "8B.png"), dpi = outputDPI)


print("Damage vs. Mutations:", pearsonr(
    adarDamageSeqTSS[adarDamageSeqTSS["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSS[controlledAccessTSS["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))
print("Repair vs. Mutations:",
    pearsonr(liTSS[liTSS["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSS[controlledAccessTSS["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))

S8C

In [None]:
TSS_RelativeHybridDyadsPlottable = Plottable(
    TSS_RelativeHybridDyads, "Both_Strands_Counts", "Nucleosome Dyads", "#000000", "#BFBFBF"
)
# TSS_RelativeHybridDyadsPlottable = getCustomNucleosomePlottable(
#     TSS_RelativeHybridDyads, TSS_RelativeHybridDyadPeaks, "Both_Strands_Counts", blacklistedRegions = range(-1000,-101)
# )
# TSS_RelativeHybridDyadsPlottable.columnColorBreaksAndLabels["#000000"] = "Disordered Dyads"
TSS_RelativeSP1Plottable = Plottable(
    TSS_RelativeSP1, "Both_Strands_Counts", "SP1 Binding Sites", "#1E8449", "#9AADA1"
)
plot = generalPlotting(
    [TSS_RelativeHybridDyadsPlottable, TSS_RelativeSP1Plottable],
    title = "SP1 and Dyad Positions Relative to TSS", xAxisLabel = "Position Relative to TSS (bp)", yAxisLabel = "Chromatin Feature Counts", ylim = (0,50),
)
plot = plot + theme(figure_size = (13.44,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "8C.png"), dpi = outputDPI)

#### Figure S1

S1A

In [None]:
plot = parseAndPlotPeriodicity(adarDamageSeqTranslationalData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Adar Damage-seq Hybrid Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.3))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S1A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqTranslationalData["Normalized_Both_Strands"], controlledAccessTranslationalData["Normalized_Both_Strands"]))

S1B

In [None]:
plot = parseAndPlotPeriodicity(adarDamageSeqRotationalData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Adar Damage-seq Hybrid Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S1B.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqRotationalData["Normalized_Both_Strands"], controlledAccessRotationalHybridData["Normalized_Both_Strands"]))

#### Figure S2

S2A

In [None]:
plot = parseAndPlotPeriodicity(controlledAccessTranslationalLCLData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Lung Cancer LCL Nuc Map Trinuc Normalized", yAxisLabel = "Mutation Enrichment",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S2A.png"), dpi = outputDPI)

S2B

In [None]:
plot = parseAndPlotPeriodicity(jiangTranslationalLCLData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Jiang LCL Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S2B.png"), dpi = outputDPI)

# plot = parseAndPlotPeriodicity(adarDamageSeqTranslationalLCLData, dataCol = "Normalized_Both_Strands",
#                                smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
#                                title = f"Adar Damage-seq LCL Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
#                                ylim = (0.8,1.2))
# plot.show()
# plot.save(os.path.join(paperFiguresOutputDir, "S2B.png"), dpi = outputDPI)

print(pearsonr(jiangTranslationalLCLData["Normalized_Both_Strands"], controlledAccessTranslationalLCLData["Normalized_Both_Strands"]))
# print(pearsonr(adarDamageSeqTranslationalLCLData["Normalized_Both_Strands"], controlledAccessTranslationalLCLData["Normalized_Both_Strands"]))

S2C

In [None]:
plot = parseAndPlotPeriodicity(liTranslationalLCLData, dataCol = "Normalized_Both_Strands",
                               smoothTranslational = True, overlaySmoothedAndNormal = True, nucRepLen = hybridNucMapNRL,
                               title = f"Li LCL Nuc Map Trinuc Normalized", yAxisLabel = "Normalized Repair Activity")
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S2C.png"), dpi = outputDPI)

print(pearsonr(liTranslationalLCLData["Normalized_Both_Strands"], controlledAccessTranslationalLCLData["Normalized_Both_Strands"]))

#### Figure S3

S3A

In [None]:
plot = parseAndPlotPeriodicity(jiangRotationalLCLData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Jiang LCL Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S3A.png"), dpi = outputDPI)

# plot = parseAndPlotPeriodicity(adarDamageSeqRotationalLCLData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
#                                title = f"Adar Damage-seq LCL Nuc Map Naked Normalized", yAxisLabel = "Cellular/Naked Damage",
#                                ylim = (0.8,1.2))
# plot.show()
# plot.save(os.path.join(paperFiguresOutputDir, "S3A.png"), dpi = outputDPI)

print(pearsonr(jiangRotationalLCLData["Normalized_Both_Strands"], controlledAccessRotationalLCLData["Normalized_Both_Strands"]))
# print(pearsonr(adarDamageSeqRotationalLCLData["Normalized_Both_Strands"], controlledAccessRotationalLCLData["Normalized_Both_Strands"]))

S3B

In [None]:
plot = parseAndPlotPeriodicity(liRotationalLCLData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li LCL Nuc Map Trinuc Normalized", yAxisLabel = "Normalized Repair Activity",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S3B.png"), dpi = outputDPI)

print(pearsonr(liRotationalLCLData["Normalized_Both_Strands"], controlledAccessRotationalLCLData["Normalized_Both_Strands"]))

#### Figure S4

In [None]:
! wc -l {os.path.join(mutperiodHg19Directory, "hg19_hybrid_nuc_map", "hg19_hybrid_nuc_map.bed")}
! wc -l {os.path.join(mutperiodHg19Directory, "hg19_LCL_MNase_nuc_map", "hg19_LCL_MNase_nuc_map.bed")}

S4A

In [None]:
plot = parseAndPlotPeriodicity(controlledAccessRotationalHybridData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Lung Cancer Hybrid Nuc Map Trinuc Normalized", yAxisLabel = "Mutation Enrichment",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S4A.png"), dpi = outputDPI)

S4B

In [None]:
plot = parseAndPlotPeriodicity(controlledAccessRotationalLCLData, dataCol = "Normalized_Both_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Lung Cancer LCL Nuc Map Trinuc Normalized", yAxisLabel = "Mutation Enrichment",
                               ylim = (0.8,1.2))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S4B.png"), dpi = outputDPI)

#### Figure S5

S5A

In [None]:
plot = parseAndPlotPeriodicity(liRotationalLCLData, dataCol = "Normalized_Aligned_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li LCL Nuc Map Trinuc Normalized",
                               xAxisLabel = "Position Relative to Dyad (bp; strand-aligned)",
                               yAxisLabel = "Normalized Repair Activity", ylim = (0.8,1.3))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S5A.png"), dpi = outputDPI)

print(pearsonr(liRotationalLCLData["Normalized_Aligned_Strands"], controlledAccessRotationalLCLData["Normalized_Aligned_Strands"]))

S5B

In [None]:
plot = parseAndPlotPeriodicity(liRotationalLCLData, dataCol = "Normalized_Aligned_Strands", nucRepLen = hybridNucMapNRL,
                               title = f"Li LCL Nuc Map Trinuc Normalized",
                               xAxisLabel = "Position Relative to Dyad (bp; strand-aligned)",
                               yAxisLabel = "Normalized Repair Activity", ylim = (0.8,1.3),
                               colorRotationalTransitions = True)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S5B.png"), dpi = outputDPI)

#### Figure S6 is the Xenopus SASA plot from Dr. Wyrick

#### S7

S7A

In [None]:
plot = plotDamageAndRepair(jiangCTCF, liCTCF,
                           title = "Jiang CTCF Damage vs. Repair", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                           ylim = (0,4))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S7A.png"), dpi = outputDPI)

print(pearsonr(jiangCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))
print(pearsonr(liCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

S7B

In [None]:
plot = plotDamageAndRepair(adarDamageSeqCTCF, liCTCF,
                           title = "Adar CTCF Damage vs. Repair", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                           ylim = (0,4))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S7B.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))
print(pearsonr(liCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

#### Figure S8

S8A

In [None]:
plot = plotFeatureWithCustomNucleosomes(CTCF_RelativeHybridDyads, CTCF_RelativeHybridDyadPeaks, "Both_Strands_Counts", blacklistedRegions = range(-50,51),
                                        title = "Nucleosome Dyads Relative to CTCF", xAxisLabel = "Position Relative to CTCF Midpoint (bp)", yAxisLabel = "Nucleosome Dyad Counts")
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S8A.png"), dpi = outputDPI)

#### Figure S9

S9A

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liDamageNormalizedCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      ylim = (0,4), smoothData = True, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S9A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedCTCF["Normalized_Both_Strands"], controlledAccessCTCF["Normalized_Both_Strands"]))

S9B

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liDamageNormalizedCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      xlim = (-100, 100), ylim = (0,4), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S9B.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF[adarDamageSeqCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedCTCF[liDamageNormalizedCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))

S9C

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liDamageNormalizedCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      xlim = (-25, 25), ylim = (0,4), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S9C.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqCTCF[adarDamageSeqCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedCTCF[liDamageNormalizedCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessCTCF[controlledAccessCTCF["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))

S9_insert

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqCTCF, liDamageNormalizedCTCF, controlledAccessCTCF,
                                      title = "CTCF Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to CTCF Midpoint (bp)",
                                      xlim = (-10, 0), ylim = (4,6), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S9_insert.png"), dpi = outputDPI)

#### Figure S10

S10A

In [None]:
plot = plotSingleFeature(SP1_RelativeHybridDyads, "Both_Strands_Counts", title = "Nucleosome Dyads Relative to SP1",
                         xAxisLabel = "Position Relative to SP1 Midpoint (bp)", yAxisLabel = "Nucleosome Dyad Counts")
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S10A.png"), dpi = outputDPI)

#### Figure S11

S11A

In [None]:
plot = plotDamageAndRepair(adarDamageSeqSP1, liDamageNormalizedSP1,
                           title = "SP1 Damage vs. Damage-Normalized Repair", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                           ylim = (0,2.5))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S11A.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqSP1["Normalized_Both_Strands"], controlledAccessSP1["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedSP1["Normalized_Both_Strands"], controlledAccessSP1["Normalized_Both_Strands"]))

S11B

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqSP1, liDamageNormalizedSP1, controlledAccessSP1,
                                      title = "SP1 Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                                      xlim = (-100, 100), ylim = (0,2.5), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S11B.png"), dpi = outputDPI)

# print(pearsonr(jiangSP1[jiangSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
#                controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))
print(pearsonr(adarDamageSeqSP1[adarDamageSeqSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedSP1[liDamageNormalizedSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-100,100)]["Normalized_Both_Strands"]))

S11C

In [None]:
plot = plotDamageRepairAndMutagenesis(adarDamageSeqSP1, liDamageNormalizedSP1, controlledAccessSP1,
                                      title = "SP1 Damage vs. Damage-Normalized Repair vs. Mutagenesis", xAxisLabel = "Position Relative to SP1 Midpoint (bp)",
                                      xlim = (-25, 25), ylim = (0,2.5), smoothData = False, overlaySmoothedAndNormal = False)
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S11C.png"), dpi = outputDPI)

print(pearsonr(adarDamageSeqSP1[adarDamageSeqSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))
print(pearsonr(liDamageNormalizedSP1[liSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"],
               controlledAccessSP1[controlledAccessSP1["Dyad_Position"].between(-25,25)]["Normalized_Both_Strands"]))

#### Figure S12

S12A

In [None]:
plot = plotDamageAndRepair(adarDamageSeqTSSLowExpr, liTSSLowExpr,
                           title = "Low Expression TSS", xAxisLabel = "Position Relative to TSS (bp)",
                           xlim = (-1000, 1000), ylim = (0.5,2.0), smoothData = True, overlaySmoothedAndNormal = True)
plot = plot + theme(figure_size = (10,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S12A.png"), dpi = outputDPI)

S12B

In [None]:
plot = plotDamageAndRepair(adarDamageSeqTSSHighExpr, liTSSHighExpr,
                           title = "High Expression TSS", xAxisLabel = "Position Relative to TSS (bp)",
                           xlim = (-1000, 1000), ylim = (0.5,2.0), smoothData = True, overlaySmoothedAndNormal = True)
plot = plot + theme(figure_size = (10,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S12B.png"), dpi = outputDPI)

S12C

In [None]:
plot = plotSingleFeature(controlledAccessTSSLowExpr,
                         title = "Low Expression TSS", xAxisLabel = "Position Relative to TSS (bp)", yAxisLabel = "Mutation Enrichment",
                         xlim = (-1000, 1000), ylim = (0,4.0), smoothData = True, overlaySmoothedAndNormal = True,
                         color = PlottingColors.MUTATION.value, underlaidColor = PlottingColors.MUTATION_UNDERLAY.value)
plot = plot + theme(figure_size = (8.4,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S12C.png"), dpi = outputDPI)

print("Damage vs. Mutations:", pearsonr(
    adarDamageSeqTSSLowExpr[adarDamageSeqTSSLowExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSSLowExpr[controlledAccessTSSLowExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))
print("Repair vs. Mutations:", pearsonr(
    liTSSLowExpr[liTSSLowExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSSLowExpr[controlledAccessTSSLowExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))

S12D

In [None]:
plot = plotSingleFeature(controlledAccessTSSHighExpr,
                         title = "High Expression TSS", xAxisLabel = "Position Relative to TSS (bp)", yAxisLabel = "Mutation Enrichment",
                         xlim = (-1000, 1000), ylim = (0,4.0), smoothData = True, overlaySmoothedAndNormal = True,
                         color = PlottingColors.MUTATION.value, underlaidColor = PlottingColors.MUTATION_UNDERLAY.value)
plot = plot + theme(figure_size = (8.4,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S12D.png"), dpi = outputDPI)

print("Damage vs. Mutations:", pearsonr(
    adarDamageSeqTSSHighExpr[adarDamageSeqTSSHighExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSSHighExpr[controlledAccessTSSHighExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))
print("Repair vs. Mutations:", pearsonr(
    liTSSHighExpr[liTSSHighExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"],
    controlledAccessTSSHighExpr[controlledAccessTSSHighExpr["Dyad_Position"].between(-1000,1000)]["Normalized_Both_Strands"]
))

#### Figure S13 is the mutation signature plots from Marian

#### Figure S14

S14A

In [None]:
plot = plotReadLengthDistribution(liEnrichedIndices, xlim = (16, 32), xAxisBreaks = (16,20,24,28,32), ylim = (0, 15000000))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S14A.png"), dpi = outputDPI)

S14B

In [None]:
plot = plotIndividualFrequencies(liIndividualNucleotideFrequencies, ylim = (0, 0.6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "S14B.png"), dpi = outputDPI)

#### Dissertation Figures

In [None]:
alexandrovTSS = read_table(os.path.join(alexandrovDataDirectory, "hg19_protein_coding_genes_TSSs",
                                        "Alexandrov_LUAD_TSSs_trinuc_nuc-group_normalized_nucleosome_mutation_counts.tsv"))

6.1A

In [None]:
plot = plotSingleFeature(alexandrovTSS, title = "TSS Mutations", xAxisLabel = "Position Relative to TSS (bp)", yAxisLabel = "Mutation Enrichment",
                         ylim = (0,2.5))
plot = plot + theme(figure_size = (10.3,6))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "Dissertation_6.1A.png"), dpi = outputDPI)

6.1B is adapted from 5A

In [None]:
plot = plotDamageAndRepair(jiangTSS, liTSS, title = "TSS Damage vs. Repair", xAxisLabel = "Position Relative to TSS (bp)",
                           ylim = (0,2.5))
plot.show()
plot.save(os.path.join(paperFiguresOutputDir, "Dissertation_6.2B.png"), dpi = outputDPI)