### Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir

hg19FastaFilePath = getGenomeFastaFilePath("hg19")

adarDamageSeqDataDirectory = os.path.join(getDataDir(), "Adar_damage-seq")
adarDamageSeqCellularBasename = "A549_BPDE_cell_2h"
adarDamageSeqNakedBasename = "A549_BPDE_nDNA_2h"

### Format the reads
Format the reads by combining paired-end reads, removing lone "2nd pair" reads, removing duplicates, and combining replicates.

In [None]:
from bpde_chromatin_analysis.FormatAlignedDamageReads import formatAlignedDamageReads

adarDamageSeqCellularReadsFilePaths = list()
adarDamageSeqNakedReadsFilePaths = list()

for rep in ("_rep1","_rep2","_rep3","_rep4"):
    adarDamageSeqCellularReadsFilePaths.append(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqCellularBasename, adarDamageSeqCellularBasename+rep+".bed"))
    adarDamageSeqNakedReadsFilePaths.append(os.path.join(adarDamageSeqDataDirectory, adarDamageSeqNakedBasename, adarDamageSeqNakedBasename+rep+".bed"))

formattedAdarDamageSeqCellularReadsFilePaths = formatAlignedDamageReads(adarDamageSeqCellularReadsFilePaths)
formattedAdarDamageSeqNakedReadsFilePaths = formatAlignedDamageReads(adarDamageSeqNakedReadsFilePaths)

### Convert the BPDE damage-seq reads to single-nucleotide lesion positions.

In [None]:
from bpde_chromatin_analysis.GetBDPE_DamagePositionsFromAlignedReads import getBPDE_DamagePositionsFromAlignedReads

adarDamageSeqCellularDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedAdarDamageSeqCellularReadsFilePaths, hg19FastaFilePath)
adarDamageSeqNakedDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedAdarDamageSeqNakedReadsFilePaths, hg19FastaFilePath)

### Split damage positions into guanine and non-guanine positions.

In [None]:
from bpde_chromatin_analysis.SplitGuaninePositions import splitGuaninePositions

adarDamageSeqCellularDamagePosFilePaths = splitGuaninePositions(adarDamageSeqCellularDamagePosFilePaths)
adarDamageSeqNakedDamagePosFilePaths = splitGuaninePositions(adarDamageSeqNakedDamagePosFilePaths)

### Parse the damage position data for mutperiod input

In [None]:
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed

adarDamageSeqCellularMutperiodInputFilePaths = parseStandardBed(adarDamageSeqCellularDamagePosFilePaths, hg19FastaFilePath)
adarDamageSeqNakedMutperiodInputFilePath = parseStandardBed(adarDamageSeqNakedDamagePosFilePaths, hg19FastaFilePath)[0]