In [1]:
import os
from mutperiodpy.helper_scripts.UsefulFileSystemFunctions import getDataDirectory
from benbiohelpers.CountThisInThat.Counter import ThisInThatCounter
from benbiohelpers.CountThisInThat.OutputDataStratifiers import AmbiguityHandling

deaminationDataDir = os.path.join(getDataDirectory(), "circle_seq_deamination_data")
expandedETSFilePath = os.path.join(deaminationDataDir,"MELA-AU_ETS_known_proximal_DHS_100bp_expanded.bed")
singleBaseETSFilePath = expandedETSFilePath.rsplit("_100bp_expanded.bed", 1)[0] + ".bed"

In [2]:
!awk '{{OFS="\t"; $$2=$$2+100; $$3=$$3-100; print}}' $expandedETSFilePath > $singleBaseETSFilePath

In [3]:
class ETS_SiteCounter(ThisInThatCounter):


    def setupOutputDataStratifiers(self):
        self.outputDataHandler.addRelativePositionStratifier(self.currentEncompassingFeature, extraRangeRadius=self.encompassingFeatureExtraRadius,
                                                             strandSpecificPos=True)
        self.outputDataHandler.addStrandComparisonStratifier(strandAmbiguityHandling = AmbiguityHandling.tolerate)


    def setupOutputDataWriter(self):
        self.outputDataHandler.createOutputDataWriter(self.outputFilePath, customStratifyingNames=(None, {True:"Motif_Strand", False:"Opposite_Strand"}))

In [7]:
for timepoint in ("0h", "24h", "48h", "48h_800M"):

    deaminatedPosFilePath = os.path.join(deaminationDataDir, f"DA_{timepoint}", f"{timepoint}_deamination_data_dipy_cytosines.bed")
    deaminationETSCountsFilePath = deaminatedPosFilePath.rsplit('.', 1)[0] + "_ETS_counts.tsv"

    print(f"Working with {os.path.basename(deaminatedPosFilePath)}...")
    ETS_SiteCounter(deaminatedPosFilePath, singleBaseETSFilePath, deaminationETSCountsFilePath, encompassingFeatureExtraRadius=10).count()

Working with 0h_deamination_data_dipy_cytosines.bed...
Checking input files for proper sorting...
Checking encompassed features file for proper sorting...
Checking encompassing features file for proper sorting...
Counting in chr1
Counting in chr10
Counting in chr11
Counting in chr12
Counting in chr13
Counting in chr14
Counting in chr15
Counting in chr16
Counting in chr17
Counting in chr18
Counting in chr19
Counting in chr2
Counting in chr20
Counting in chr21
Counting in chr22
Counting in chr3
Counting in chr4
Counting in chr5
Counting in chr6
Counting in chr7
Counting in chr8
Counting in chr9
Counting in chrX
Working with 24h_deamination_data_dipy_cytosines.bed...
Checking input files for proper sorting...
Checking encompassed features file for proper sorting...
Checking encompassing features file for proper sorting...
Counting in chr1
Counting in chr10
Counting in chr11
Counting in chr12
Counting in chr13
Counting in chr14
Counting in chr15
Counting in chr16
Counting in chr17
Counting