Skip to content

Commit

Permalink
Better multiple cooler file creation, added test case for it
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimwolff committed Nov 9, 2018
1 parent 5f77a80 commit d68aba7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
6 changes: 3 additions & 3 deletions hicexplorer/hicBuildMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,7 +1380,7 @@ def main(args=None):
# will say that the file already exists.
unlink(args.outFileName.name)

if args.outFileName.name.endswith('.mcool') and args.binSize is not None and len(args.binSize) > 2:
if args.outFileName.name.endswith('.cool') and args.binSize is not None and len(args.binSize) > 2:

matrixFileHandlerOutput = MatrixFileHandler(pFileType='cool')
matrixFileHandlerOutput.set_matrix_variables(hic_ma.matrix,
Expand All @@ -1394,13 +1394,13 @@ def main(args=None):
hic_matrix_to_merge = deepcopy(hic_ma)
_mergeFactor = int(resolution) // args.binSize[0]
merged_matrix = hicMergeMatrixBins.merge_bins(hic_matrix_to_merge, _mergeFactor)
matrixFileHandlerOutput = MatrixFileHandler(pFileType='cool')
matrixFileHandlerOutput = MatrixFileHandler(pFileType='cool', pAppend=True)
matrixFileHandlerOutput.set_matrix_variables(merged_matrix.matrix,
merged_matrix.cut_intervals,
merged_matrix.nan_bins,
merged_matrix.correction_factors,
merged_matrix.distance_counts)
matrixFileHandlerOutput.save(args.outFileName.name + '::/resolutions/' + str(resolution), pSymmetric=True, pApplyCorrection=False, pAppend=True)
matrixFileHandlerOutput.save(args.outFileName.name + '::/resolutions/' + str(resolution), pSymmetric=True, pApplyCorrection=False)

else:
hic_ma.save(args.outFileName.name)
Expand Down
60 changes: 60 additions & 0 deletions hicexplorer/test/long_run/test_hicBuildMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,66 @@ def test_build_matrix_cooler():
shutil.rmtree(qc_folder)


def test_build_matrix_cooler_multiple():
outfile = NamedTemporaryFile(suffix='.cool', delete=False)
outfile.close()
qc_folder = mkdtemp(prefix="testQC_")
args = "-s {} {} --outFileName {} -bs 5000 10000 20000 -b /tmp/test.bam --QCfolder {} --threads 4".format(sam_R1, sam_R2,
outfile.name,
qc_folder).split()
hicBuildMatrix.main(args)

test_5000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/5000")
test_10000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/10000")
test_20000 = hm.hiCMatrix(ROOT + "hicBuildMatrix/multi_small_test_matrix.cool::/resolutions/20000")

new_5000 = hm.hiCMatrix(outfile.name + '::/resolutions/5000')
new_10000 = hm.hiCMatrix(outfile.name + '::/resolutions/10000')
new_20000 = hm.hiCMatrix(outfile.name + '::/resolutions/20000')


nt.assert_equal(test_5000.matrix.data, new_5000.matrix.data)
nt.assert_equal(test_10000.matrix.data, new_10000.matrix.data)
nt.assert_equal(test_20000.matrix.data, new_20000.matrix.data)

# nt.assert_equal(test.cut_intervals, new.cut_intervals)
nt.assert_equal(len(new_5000.cut_intervals), len(test_5000.cut_intervals))
nt.assert_equal(len(new_10000.cut_intervals), len(test_10000.cut_intervals))
nt.assert_equal(len(new_20000.cut_intervals), len(test_20000.cut_intervals))

cut_interval_new_ = []
cut_interval_test_ = []
for x in new_5000.cut_intervals:
cut_interval_new_.append(x[:3])
for x in test_5000.cut_intervals:
cut_interval_test_.append(x[:3])

nt.assert_equal(cut_interval_new_, cut_interval_test_)

cut_interval_new_ = []
cut_interval_test_ = []
for x in new_10000.cut_intervals:
cut_interval_new_.append(x[:3])
for x in test_10000.cut_intervals:
cut_interval_test_.append(x[:3])

nt.assert_equal(cut_interval_new_, cut_interval_test_)

cut_interval_new_ = []
cut_interval_test_ = []
for x in new_20000.cut_intervals:
cut_interval_new_.append(x[:3])
for x in test_20000.cut_intervals:
cut_interval_test_.append(x[:3])

nt.assert_equal(cut_interval_new_, cut_interval_test_)
# print(set(os.listdir(ROOT + "QC/")))
assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

os.unlink(outfile.name)
shutil.rmtree(qc_folder)

def test_build_matrix_rf():
outfile = NamedTemporaryFile(suffix='.h5', delete=False)
outfile.close()
Expand Down

0 comments on commit d68aba7

Please sign in to comment.