### Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir

hg19FastaFilePath = getGenomeFastaFilePath("hg19")

adarDamageSeqDataDirectory = os.path.join(getDataDir(), "Adar_damage-seq")
adarDamageSeqCellularBasename = "A549_BPDE_cell_2h"
adarDamageSeqNakedBasename = "A549_BPDE_nDNA_2h"

# Preprocessing

### Format the reads
Format the reads by combining paired-end reads, removing lone "2nd pair" reads, removing duplicates, and combining replicates.

In [None]:
from bpde_chromatin_analysis.FormatAlignedDamageReads import formatAlignedDamageReads

adarDamageSeqCellularReadsFilePaths = list()
adarDamageSeqNakedReadsFilePaths = list()

for rep in ("_rep1","_rep2","_rep3","_rep4"):
    adarDamageSeqCellularReadsFilePaths.append(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqCellularBasename, adarDamageSeqCellularBasename+rep+".bed"))
    adarDamageSeqNakedReadsFilePaths.append(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqNakedBasename, adarDamageSeqNakedBasename+rep+".bed"))

formattedAdarDamageSeqCellularReadsFilePaths = formatAlignedDamageReads(adarDamageSeqCellularReadsFilePaths)
formattedAdarDamageSeqNakedReadsFilePaths = formatAlignedDamageReads(adarDamageSeqNakedReadsFilePaths)

### Convert the BPDE damage-seq reads to single-nucleotide lesion positions.

In [None]:
from bpde_chromatin_analysis.GetBDPE_DamagePositionsFromAlignedReads import getBPDE_DamagePositionsFromAlignedReads

adarDamageSeqCellularDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedAdarDamageSeqCellularReadsFilePaths, hg19FastaFilePath)
adarDamageSeqNakedDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedAdarDamageSeqNakedReadsFilePaths, hg19FastaFilePath)

### Split damage positions into guanine and non-guanine positions.

In [None]:
from bpde_chromatin_analysis.FilterOnSpecificNucleotides import filterOnSpecificNucleotides

adarDamageSeqCellularDamagePosFilePaths = filterOnSpecificNucleotides(adarDamageSeqCellularDamagePosFilePaths, 'A')
adarDamageSeqNakedDamagePosFilePaths = filterOnSpecificNucleotides(adarDamageSeqNakedDamagePosFilePaths, 'A')

### Move results manually to "A549_BPDE_cell_2h_A" and "A549_BPDE_nDNA_2h_A" directories

In [None]:
adarDamageSeqCellularBasename += "_A"
adarDamageSeqNakedBasename += "_A"

### Parse the damage position data for mutperiod input

In [None]:
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed

adarDamageSeqCellularMutperiodInputFilePath = parseStandardBed(adarDamageSeqCellularDamagePosFilePaths, hg19FastaFilePath)[0]
adarDamageSeqNakedMutperiodInputFilePath = parseStandardBed(adarDamageSeqNakedDamagePosFilePaths, hg19FastaFilePath)[0]

# Retrieve processed data

In [None]:
from benbiohelpers.FileSystemHandling.DirectoryHandling import getFilesInDirectory
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getExternalDataDirectory as getMutperiodExternalDataDirectory, DataTypeStr

mutperiodHg19Directory = getMutperiodExternalDataDirectory()

adarDamageSeqCellularMutperiodInputFilePaths = getFilesInDirectory(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqCellularBasename), DataTypeStr.mutations + ".bed")
adarDamageSeqNakedMutperiodInputFilePath = getFilesInDirectory(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqNakedBasename), DataTypeStr.mutations + ".bed", searchRecursively = False)

# Nucleosome Analysis

# TFBS Analysis

In [None]:
from mutperiodpy.RunAnalysisSuite import runAnalysisSuite, generateCustomBackground

TFBS_FilePaths = list()
TFBS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_CTCF_known", "hg19_CTCF_known.bed"))
# TFBS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_ETS_and_GABPA_known", "hg19_ETS_and_GABPA_known.bed"))
TFBS_FilePaths.append(os.path.join(mutperiodHg19Directory, "hg19_SP1_known", "hg19_SP1_known.bed"))

TFBS_Names = [os.path.basename(TFBS_FilePath).rsplit('.', 1)[0] for TFBS_FilePath in TFBS_FilePaths]

generateCustomBackground(os.path.dirname(adarDamageSeqNakedMutperiodInputFilePath), TFBS_Names,
                         useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)

runAnalysisSuite(adarDamageSeqCellularMutperiodInputFilePaths, TFBS_Names,
                 normalizationMethod = "Custom Background", customBackgroundDir = os.path.dirname(adarDamageSeqNakedMutperiodInputFilePath),
                 useSingleNucRadius = True, includeLinker = False, useNucGroupRadius = True, useNucStrand = True)