### Set up pipeline with basic imports, data directories, and file basenames.

In [None]:
import os
from benbiohelpers.DataPipelineManagement.GenomeManager import getGenomeFastaFilePath
from bpde_chromatin_analysis.helper_scripts.BPDE_DataDir import getDataDir

hg19FastaFilePath = getGenomeFastaFilePath("hg19")

jiangDataDirectory = os.path.join(getDataDir(), "Jiang_BPDE_damage_maps")
jiang2uM_CellularBasename = "BEAS-2B_2uM_BPDE_cell_24h"
jiang2uM_NakedBasename = "BEAS-2B_2uM_BPDE_nDNA_24h"

### Format the reads
Format the reads by combining paired-end reads, removing lone "2nd pair" reads, removing duplicates, and combining replicates.

In [None]:
from bpde_chromatin_analysis.FormatAlignedDamageReads import formatAlignedDamageReads

jiang2uM_CellularReadsFilePaths = list()
jiang2uM_NakedReadsFilePaths = list()

for rep in ("_rep1","_rep2","_rep3"):
    jiang2uM_CellularReadsFilePaths.append(os.path.join(jiangDataDirectory, jiang2uM_CellularBasename, jiang2uM_CellularBasename+rep+".bed"))
    jiang2uM_NakedReadsFilePaths.append(os.path.join(jiangDataDirectory, jiang2uM_NakedBasename, jiang2uM_NakedBasename+rep+".bed"))

formattedJiang2uM_CellularReadsFilePaths = formatAlignedDamageReads(jiang2uM_CellularReadsFilePaths)
formattedJiang2uM_NakedReadsFilePaths = formatAlignedDamageReads(jiang2uM_NakedReadsFilePaths)

### Convert the BPDE damage-seq reads to single-nucleotide lesion positions.

In [None]:
from bpde_chromatin_analysis.GetBDPE_DamagePositionsFromAlignedReads import getBPDE_DamagePositionsFromAlignedReads

jiang2uM_CellularDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedJiang2uM_CellularReadsFilePaths, hg19FastaFilePath)
jiang2uM_NakedDamagePosFilePaths = getBPDE_DamagePositionsFromAlignedReads(formattedJiang2uM_NakedReadsFilePaths, hg19FastaFilePath)

### Split damage positions into guanine and non-guanine positions.

In [None]:
from bpde_chromatin_analysis.SplitGuaninePositions import splitGuaninePositions

jiang2uM_CellularDamagePosFilePaths = splitGuaninePositions(jiang2uM_CellularDamagePosFilePaths)
jiang2uM_NakedDamagePosFilePaths = splitGuaninePositions(jiang2uM_NakedDamagePosFilePaths)

### Parse the damage position data for mutperiod input

In [None]:
from mutperiodpy.input_parsing.ParseStandardBed import parseStandardBed

jiang2uM_CellularDamageMutperiodInputFilePaths = parseStandardBed(jiang2uM_CellularDamagePosFilePaths, hg19FastaFilePath)
jiang2uM_NakedDamageMutperiodInputFilePath = parseStandardBed(jiang2uM_NakedDamagePosFilePaths, hg19FastaFilePath)[0]