Skip to content

Commit

Permalink
Adding test case and files for hicAverageContacts
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimwolff committed Nov 22, 2018
1 parent c0da96e commit ed21a0c
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 24 deletions.
27 changes: 3 additions & 24 deletions hicexplorer/hicAverageRegions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def parse_arguments(args=None):
parserRequired.add_argument('--regions', '-r',
help='BED file which stores a list of regions that are summed and averaged',
required=True)
parserMutuallyExclusiveGroup = parser.add_mutually_exclusive_group()
parserMutuallyExclusiveGroup = parser.add_mutually_exclusive_group(required=True)
parserMutuallyExclusiveGroup.add_argument('--range', '-ra',
help='Range of region up- and downstream of each region to include in genomic units.',
nargs=2,
Expand Down Expand Up @@ -81,25 +81,17 @@ def calculateViewpointRangeBins(pHiCMatrix, pViewpoint, pRange):
end = viewpoint_index + pRange[1]

return start, end
# log.debug('viewpoint_index {}'.format(viewpoint_index))
# log.debug('max_length {}'.format(max_length))
# def extendMatrix(pStart, pEnd, pSize, pData):

# matrix = np.array((pSize, pSize))
# matrix[abs(pStart):pEnd, abs(pStart):pEnd] = pData
# return matrix

def main():
def main(args=None):

args = parse_arguments().parse_args()
args = parse_arguments().parse_args(args)

hic_ma = hm.hiCMatrix(pMatrixFile=args.matrix)
indices_values = []
# max_bin = hic_ma.matrix.shape[1]
with open(args.regions, 'r') as file:
for line in file.readlines():
_line = line.strip().split('\t')
# log.debug('_line {}'.format(_line))
if len(line) == 0:
continue
if len(_line) == 2:
Expand All @@ -112,36 +104,23 @@ def main():
else:
start_bin, end_bin = calculateViewpointRangeBins(hic_ma, viewpoint, args.rangeInBins)
indices_values.append([start_bin, end_bin])
# elif args.rangeInBins:

if args.range:
dimensions_new_matrix = (args.range[0] // hic_ma.getBinSize()) + (args.range[1] // hic_ma.getBinSize())
elif args.rangeInBins:
dimensions_new_matrix = args.rangeInBins[0] + args.rangeInBins[1]
summed_matrix = csr_matrix((dimensions_new_matrix, dimensions_new_matrix), dtype=np.float32)
# log.debug('indices_values {}'.format(indices_values))
# log.debug('shaoe matrux {}'.format(summed_matrix.shape))
max_length = hic_ma.matrix.shape[1]
for start, end in indices_values:
# log.debug('shape {}'.format(hic_ma.matrix[start:end, start:end].shape))
# log.debug('size; {}'.format(np.absolute(start-end)))
_start = 0
_end = summed_matrix.shape[1]
if start < 0:
log.debug('start')
_start = np.absolute(start)
start = 0
# matrix = hic_ma.matrix[start:end, start:end]
if end >= max_length:
log.debug('end')

_end = end
end = max_length
# matrix = hic_ma.matrix[start:end, start:end]

log.debug('summed_matrix[_start:_end, _start:_end].shape {}'.format(summed_matrix[_start:_end, _start:_end].shape))
log.debug('hic_ma.matrix[start:end, start:end].shape {}'.format(hic_ma.matrix[start:end, start:end].shape))

summed_matrix[_start:_end, _start:_end] += hic_ma.matrix[start:end, start:end]

summed_matrix /= len(indices_values)
Expand Down
43 changes: 43 additions & 0 deletions hicexplorer/test/general/test_hicAverageRegions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
from tempfile import NamedTemporaryFile
from hicexplorer import hicAverageRegions
import numpy.testing as nt
import numpy as np
import pytest
from scipy.sparse import load_npz
import logging
log = logging.getLogger(__name__)

ROOT = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "test_data/")

def test_average_regions():

outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions.bed'
args = "--matrix {} --regions {} -o {} --range 100000 100000".format(matrix, bed_file, outfile.name).split()
log.debug('path: {}'.format(matrix))

hicAverageRegions.main(args)

test_file = load_npz(ROOT + 'hicAverageRegions/result_range_100000.npz')
new_file = load_npz(outfile.name)

nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)

os.remove(outfile.name)

def test_average_regions_range_in_bins():

outfile = NamedTemporaryFile(suffix='.npz', prefix='average_region', delete=False)
matrix = ROOT + 'small_test_matrix.cool'
bed_file = ROOT + 'hicAverageRegions/regions.bed'
args = "--matrix {} --regions {} -o {} --rangeInBins 100 100".format(matrix, bed_file, outfile.name).split()
hicAverageRegions.main(args)

test_file = load_npz(ROOT + 'hicAverageRegions/result_rangeInBins_100.npz')
new_file = load_npz(outfile.name)

nt.assert_almost_equal(test_file.data, new_file.data, decimal=0)

os.remove(outfile.name)
6 changes: 6 additions & 0 deletions hicexplorer/test/test_data/hicAverageRegions/regions.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
chr2L 2500000
chr2L 3500000
chr2L 4500000
chr2L 5500000
chr2L 7000000
chr2L 19000000
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit ed21a0c

Please sign in to comment.