## Goal

**Modify countReadsPerBin such that it takes the barcode (list) argument and returns an np.array of shape (bins, barcodes\*len(bam))**

In [1]:
import os
import time
import sys
import numpy as np
from scipy import sparse, io

debug = 0
old_settings = np.seterr(all='ignore')
## own functions
sys.path.append(".")
import scReadCounter as countR

In [2]:
bamfiles = [ '/hpc/hub_oudenaarden/vbhardwaj/2019_scHiC/04_ESC_NPC/04_workflow_outputs/fESC_CTCF-K4me3_26-02-2020/dedup_bam/HVG-ChIC-fESC-CTCF-i1-26-02-20.bam',
             '/hpc/hub_oudenaarden/vbhardwaj/2019_scHiC/04_ESC_NPC/04_workflow_outputs/fESC_CTCF-K4me3_26-02-2020/dedup_bam/HVG-ChIC-fESC-CTCF-i2-26-02-20.bam',
             '/hpc/hub_oudenaarden/vbhardwaj/2019_scHiC/04_ESC_NPC/04_workflow_outputs/fESC_CTCF-K4me3_26-02-2020/dedup_bam/HVG-ChIC-fESC-CTCF-i3-26-02-20.bam',
             '/hpc/hub_oudenaarden/vbhardwaj/2019_scHiC/04_ESC_NPC/04_workflow_outputs/fESC_CTCF-K4me3_26-02-2020/dedup_bam/HVG-ChIC-fESC-CTCF-i4-26-02-20.bam'
           ]

In [9]:
whitelist = '/hpc/hub_oudenaarden/vbhardwaj/annotations/cell_barcodes_inhouse/maya_384NLA.bc'
# open file and read the content in a list
with open(whitelist, 'r') as f:
    barcodes = f.read().splitlines()

In [13]:
binSize = 100000
distanceBetweenBins = 0
numberOfProcessors = 20
genomeChunkSize = None
stepsize = binSize + distanceBetweenBins
bed_file = None#"/hpc/hub_oudenaarden/vbhardwaj/annotations/mm10_gencode23/custom_annotation/gencode.vM23.level1_only.gtf"
blacklist = "/hpc/hub_oudenaarden/vbhardwaj/annotations/blacklists/mm10-blacklist.v2.bed"

labels = ['i1', 'i2', 'i3', 'i4']
outFilePrefix = "test_counts"

mtxFile = outFilePrefix + ".counts.mtx"
rowNamesFile = outFilePrefix + ".rownames.txt"
colNamesFile = outFilePrefix + ".colnames.txt"

In [14]:
c = countR.CountReadsPerBin(
        bamfiles,
        binLength=binSize,
        stepSize=stepsize,
        barcodes=barcodes,
        tagName='BC',
        numberOfSamples=None,
        numberOfProcessors=numberOfProcessors,
        verbose=False,
        region=None,
        bedFile=bed_file,
        blackListFileName=None,
        extendReads=None,
        minMappingQuality=10,
        ignoreDuplicates=True,
        center_read=False,
        samFlag_include=None,
        samFlag_exclude=None,
        minFragmentLength=1,
        maxFragmentLength=2000,
        zerosToNans=False,
        out_file_for_raw_data=rowNamesFile)

In [15]:
num_reads_per_bin = c.run(allArgs=None)

In [16]:
## add labels to barcodes and write
newlabels = ["{}_{}".format(a, b) for a in labels for b in barcodes ]

f = open(colNamesFile, "w")
f.write("\n".join(newlabels))
f.write("\n")
f.close()

## write the matrix as .mtx
sp = sparse.csr_matrix(num_reads_per_bin)
io.mmwrite(mtxFile, sp, field="integer")