This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate specialized Variant Reader tests to their own files
- Loading branch information
1 parent
05676cf
commit cbcb8a3
Showing
5 changed files
with
385 additions
and
299 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#include "variant.h" | ||
#include "indexed_variant_reader.h" | ||
#include "indexed_variant_iterator.h" | ||
#include "test_utils.h" | ||
|
||
#include <boost/test/unit_test.hpp> | ||
|
||
using namespace std; | ||
using namespace gamgee; | ||
|
||
// copied / migrated from variant_reader_test | ||
|
||
/* | ||
NOTE: Updated test_variants.bcf and var_idx directory via- | ||
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants.bcf -O b | ||
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants_csi.vcf.gz -O z | ||
cp testdata/var_idx/test_variants_csi.vcf.gz testdata/var_idx/test_variants_tabix.vcf.gz | ||
bcftools index testdata/var_idx/test_variants.bcf | ||
bcftools index testdata/var_idx/test_variants_csi.vcf.gz | ||
bcftools index testdata/var_idx/test_variants_tabix.vcf.gz -t | ||
cp testdata/var_idx/test_variants.bcf testdata/test_variants.bcf | ||
*/ | ||
const auto indexed_variant_vcf_inputs = vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "testdata/var_idx/test_variants_tabix.vcf.gz"}; | ||
const auto indexed_variant_bcf_inputs = vector<string>{"testdata/var_idx/test_variants.bcf"}; | ||
|
||
const auto indexed_variant_chrom_full = vector<string> {"1", "20", "22"}; | ||
const auto indexed_variant_bp_full = vector<string> {"1:10000000-10000000", "20:10001000-10001000", "20:10002000-10002000", "20:10003000-10003000", "22:10004000-10004000"}; | ||
const auto indexed_variant_chrom_partial = vector<string> {"1"}; | ||
const auto indexed_variant_bp_partial = vector<string> {"20:10001000-10001000"}; | ||
|
||
// one (different) record each | ||
BOOST_AUTO_TEST_CASE( indexed_variant_reader_partial_test ) { | ||
for (const auto filename : indexed_variant_bcf_inputs) { | ||
|
||
auto truth_index = 0u; | ||
const auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_partial}; | ||
for (const auto& record : reader1) { | ||
BOOST_CHECK_EQUAL(record.ref(), "T"); | ||
BOOST_CHECK_EQUAL(record.chromosome(), 0u); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), 10000000u); | ||
BOOST_CHECK_EQUAL(record.alignment_stop(), 10000000u); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), 2u); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), "db2342"); | ||
++truth_index; | ||
} | ||
BOOST_CHECK_EQUAL(truth_index, 1u); | ||
|
||
truth_index = 0u; | ||
const auto reader2 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_bp_partial}; | ||
for (const auto& record : reader2) { | ||
BOOST_CHECK_EQUAL(record.ref(), "GG"); | ||
BOOST_CHECK_EQUAL(record.chromosome(), 1u); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), 10001000u); | ||
BOOST_CHECK_EQUAL(record.alignment_stop(), 10001001u); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), 2u); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), "rs837472"); | ||
++truth_index; | ||
} | ||
BOOST_CHECK_EQUAL(truth_index, 1u); | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( indexed_variant_reader_move_test ) { | ||
for (const auto filename : indexed_variant_bcf_inputs) { | ||
auto reader0 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full}; | ||
auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full}; | ||
auto moved = check_move_constructor(reader1); | ||
|
||
auto record0 = reader0.begin().operator*(); | ||
auto moved_record = moved.begin().operator*(); | ||
|
||
BOOST_CHECK_EQUAL(record0.alignment_start(), moved_record.alignment_start()); | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( indexed_variant_iterator_move_test ) { | ||
for (const auto filename : indexed_variant_bcf_inputs) { | ||
auto reader0 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full}; | ||
auto iter0 = reader0.begin(); | ||
auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full}; | ||
auto iter1 = reader1.begin(); | ||
auto moved = check_move_constructor(iter1); | ||
|
||
auto record0 = *iter0; | ||
auto moved_record = *moved; | ||
|
||
BOOST_CHECK_EQUAL(record0.alignment_start(), moved_record.alignment_start()); | ||
} | ||
} | ||
|
||
|
||
BOOST_AUTO_TEST_CASE( indexed_variant_reader_nonexistent_file ) { | ||
// VCF itself doesn't exist | ||
BOOST_CHECK_THROW(IndexedVariantReader<IndexedVariantIterator>("foo/bar/nonexistent.vcf", vector<string>{}), FileOpenException); | ||
|
||
// VCF exists, but no accompanying index file | ||
BOOST_CHECK_THROW(IndexedVariantReader<IndexedVariantIterator>("testdata/unindexed/test_unindexed.vcf", vector<string>{}), IndexLoadException); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#include "variant.h" | ||
#include "variant_header_builder.h" | ||
#include "multiple_variant_reader.h" | ||
#include "multiple_variant_iterator.h" | ||
#include "test_utils.h" | ||
|
||
#include <boost/test/unit_test.hpp> | ||
|
||
using namespace std; | ||
using namespace gamgee; | ||
|
||
// copied / migrated from variant_reader_test | ||
|
||
BOOST_AUTO_TEST_CASE( multi_variant_reader_validation ) | ||
{ | ||
const std::vector<std::string> filenames1{"testdata/test_variants.vcf", "testdata/extra_header.vcf"}; | ||
const std::vector<std::string> filenames2{"testdata/test_variants.vcf", "testdata/missing_header.vcf"}; | ||
|
||
// validate mismatched headers by default | ||
for (const auto filenames_v : {filenames1, filenames2}) | ||
BOOST_CHECK_THROW( | ||
auto reader = MultipleVariantReader<MultipleVariantIterator>(filenames_v), | ||
HeaderCompatibilityException | ||
); | ||
|
||
// don't validate mismatched headers | ||
for (const auto filenames_v : {filenames1, filenames2}) | ||
auto reader = MultipleVariantReader<MultipleVariantIterator>(filenames_v, false); | ||
} | ||
|
||
const auto multi_diff_truth_record_count = vector<uint32_t>{4, 1, 1, 1, 2, 1, 1, 1}; | ||
const auto multi_diff_truth_chromosome = vector<uint32_t>{0, 1, 1, 1, 1, 2, 2, 2}; | ||
const auto multi_diff_truth_alignment_starts = vector<uint32_t>{10000000, 10001000, 10001999, 10002000, 10003000, 10004000, 10005000, 10006000}; | ||
const auto multi_diff_truth_ref = vector<string>{"T", "GG", "TAGTGQA", "TAGTGQA", "A", "GAT", "GAT", "GAT"}; | ||
const auto multi_diff_truth_n_alleles = vector<uint32_t>{2, 2, 2, 2, 2, 3, 3, 3}; | ||
const auto multi_diff_truth_id = vector<string>{"db2342", "rs837472", ".", ".", ".", ".", ".", "."}; | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_difference_test ) { | ||
auto truth_index = 0u; | ||
const auto reader = MultipleVariantReader<MultipleVariantIterator>{{"testdata/test_variants.vcf", "testdata/test_variants_multiple_alt.vcf"}, false}; | ||
for (const auto& vec : reader) { | ||
BOOST_CHECK_EQUAL(vec.size(), multi_diff_truth_record_count[truth_index]); | ||
for (const auto& record : vec) { | ||
BOOST_CHECK_EQUAL(record.chromosome(), multi_diff_truth_chromosome[truth_index]); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), multi_diff_truth_alignment_starts[truth_index]); | ||
BOOST_CHECK_EQUAL(record.ref(), multi_diff_truth_ref[truth_index]); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), multi_diff_truth_n_alleles[truth_index]); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), multi_diff_truth_id[truth_index]); | ||
} | ||
++truth_index; | ||
} | ||
BOOST_CHECK_EQUAL(truth_index, 8u); | ||
} | ||
|
||
void multiple_variant_reader_sample_test(const vector<string> samples, const bool include, const uint desired_samples) { | ||
auto filenames = vector<string>{"testdata/test_variants.vcf", "testdata/test_variants.bcf"}; | ||
|
||
auto reader = MultipleVariantReader<MultipleVariantIterator>{filenames, false, samples, include}; | ||
BOOST_CHECK_EQUAL(reader.combined_header().n_samples(), desired_samples); | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_sites_only ) | ||
{ | ||
multiple_variant_reader_sample_test(vector<string>{}, true, 0); // exclude all samples (sites-only) | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_include_all_samples ) | ||
{ | ||
multiple_variant_reader_sample_test(vector<string>{}, false, 3); // include all samples by setting include == false and passing an empty list | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_including ) | ||
{ | ||
multiple_variant_reader_sample_test(vector<string>{"NA12878"}, true, 1); // include only NA12878 | ||
multiple_variant_reader_sample_test(vector<string>{"NA12878", "NA12892"}, true, 2); // include both these samples | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_excluding ) | ||
{ | ||
multiple_variant_reader_sample_test(vector<string>{"NA12891"}, false, 2); // exclude only NA12891 | ||
multiple_variant_reader_sample_test(vector<string>{"NA12891", "NA12878"}, false, 1); // exclude both these samples | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_headers_test ) { | ||
const auto file1 = "testdata/mvr_hdr/test1.vcf"; | ||
const auto file2 = "testdata/mvr_hdr/test2.vcf"; | ||
|
||
auto header1 = SingleVariantReader{file1}.header(); | ||
auto header2 = SingleVariantReader{file2}.header(); | ||
auto combined_header = VariantHeaderBuilder{header1}.merge(header2).build(); | ||
|
||
const auto reader = MultipleVariantReader<MultipleVariantIterator>{{file1, file2}}; | ||
BOOST_CHECK(reader.combined_header() != header1); | ||
BOOST_CHECK(reader.combined_header() != header2); | ||
BOOST_CHECK(reader.combined_header() == combined_header); | ||
for (auto vec : reader) { | ||
for (auto variant : vec) { | ||
BOOST_CHECK(variant.header() != combined_header); | ||
// order is determined by priority queue - hard to predict | ||
BOOST_CHECK(variant.header() == header1 || variant.header() == header2); | ||
} | ||
} | ||
} | ||
|
||
const auto gvcf_truth_ref = vector<string>{"T", "C", "GG"}; | ||
const auto gvcf_truth_chromosome = vector<uint32_t>{0, 0, 1}; | ||
const auto gvcf_truth_alignment_starts = vector<uint32_t>{10000000, 20000000, 10001000}; | ||
const auto gvcf_truth_alignment_stops = vector<uint32_t>{10000000, 20000123, 10001001}; | ||
const auto gvcf_truth_n_alleles = vector<uint32_t>{2, 2, 2}; | ||
const auto gvcf_truth_id = vector<string>{"db2342", ".", "rs837472"}; | ||
|
||
BOOST_AUTO_TEST_CASE( gvcf_test_multiple ) { | ||
auto truth_index = 0u; | ||
const auto reader = MultipleVariantReader<MultipleVariantIterator>{vector<string>{"testdata/test.g.vcf", "testdata/test.g.bcf"}}; | ||
for (const auto& vec : reader) { | ||
for (const auto& record : vec) { | ||
BOOST_CHECK_EQUAL(record.ref(), gvcf_truth_ref[truth_index]); | ||
BOOST_CHECK_EQUAL(record.chromosome(), gvcf_truth_chromosome[truth_index]); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), gvcf_truth_alignment_starts[truth_index]); | ||
BOOST_CHECK_EQUAL(record.alignment_stop(), gvcf_truth_alignment_stops[truth_index]); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), gvcf_truth_n_alleles[truth_index]); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), gvcf_truth_id[truth_index]); | ||
} | ||
++truth_index; | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( multiple_variant_reader_nonexistent_file ) { | ||
// Single non-existent file | ||
BOOST_CHECK_THROW(MultipleVariantReader<MultipleVariantIterator>(vector<string>{"foo/bar/nonexistent.vcf"}), FileOpenException); | ||
|
||
// Multiple files, one non-existent | ||
BOOST_CHECK_THROW(MultipleVariantReader<MultipleVariantIterator>(vector<string>{"testdata/test_variants.vcf", "foo/bar/nonexistent.vcf"}), FileOpenException); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#include "variant.h" | ||
#include "synced_variant_reader.h" | ||
#include "synced_variant_iterator.h" | ||
#include "test_utils.h" | ||
|
||
#include <boost/test/unit_test.hpp> | ||
|
||
using namespace std; | ||
using namespace gamgee; | ||
|
||
// copied / migrated from variant_reader_test | ||
|
||
/* | ||
NOTE: Updated test_variants.bcf and var_idx directory via- | ||
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants.bcf -O b | ||
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants_csi.vcf.gz -O z | ||
cp testdata/var_idx/test_variants_csi.vcf.gz testdata/var_idx/test_variants_tabix.vcf.gz | ||
bcftools index testdata/var_idx/test_variants.bcf | ||
bcftools index testdata/var_idx/test_variants_csi.vcf.gz | ||
bcftools index testdata/var_idx/test_variants_tabix.vcf.gz -t | ||
cp testdata/var_idx/test_variants.bcf testdata/test_variants.bcf | ||
*/ | ||
|
||
const auto indexed_variant_vcf_inputs = vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "testdata/var_idx/test_variants_tabix.vcf.gz"}; | ||
const auto indexed_variant_bcf_inputs = vector<string>{"testdata/var_idx/test_variants.bcf"}; | ||
|
||
const auto indexed_variant_chrom_full_joined = "1,20,22"; | ||
const auto indexed_variant_bp_full_joined = "1:10000000-10000000,20:10001000-10001000,20:10002000-10002000,20:10003000-10003000,22:10004000-10004000"; | ||
const auto indexed_variant_chrom_partial_joined = "1"; | ||
const auto indexed_variant_bp_partial_joined = "20:10001000-10001000"; | ||
|
||
// one (different) record each | ||
BOOST_AUTO_TEST_CASE( synced_variant_reader_partial_test ) { | ||
const auto intervals1 = indexed_variant_chrom_partial_joined; | ||
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) { | ||
auto truth_index = 0u; | ||
const auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, intervals1}; | ||
for (const auto& vec : reader1) { | ||
for (const auto& record : vec) { | ||
BOOST_CHECK_EQUAL(record.ref(), "T"); | ||
BOOST_CHECK_EQUAL(record.chromosome(), 0u); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), 10000000u); | ||
BOOST_CHECK_EQUAL(record.alignment_stop(), 10000000u); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), 2u); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), "db2342"); | ||
} | ||
++truth_index; | ||
} | ||
BOOST_CHECK_EQUAL(truth_index, 1u); | ||
} | ||
|
||
const auto intervals2 = indexed_variant_bp_partial_joined; | ||
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) { | ||
auto truth_index = 0u; | ||
const auto reader2 = SyncedVariantReader<SyncedVariantIterator>{input_files, intervals2}; | ||
for (const auto& vec : reader2) { | ||
for (const auto& record : vec) { | ||
BOOST_CHECK_EQUAL(record.ref(), "GG"); | ||
BOOST_CHECK_EQUAL(record.chromosome(), 1u); | ||
BOOST_CHECK_EQUAL(record.alignment_start(), 10001000u); | ||
BOOST_CHECK_EQUAL(record.alignment_stop(), 10001001u); | ||
BOOST_CHECK_EQUAL(record.n_alleles(), 2u); | ||
BOOST_CHECK_EQUAL(record.n_samples(), 3u); | ||
BOOST_CHECK_EQUAL(record.id(), "rs837472"); | ||
} | ||
++truth_index; | ||
} | ||
BOOST_CHECK_EQUAL(truth_index, 1u); | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( synced_variant_reader_move_test ) { | ||
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) { | ||
auto reader0 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined}; | ||
auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined}; | ||
auto moved = check_move_constructor(reader1); | ||
|
||
auto record0 = reader0.begin().operator*(); | ||
auto moved_record = moved.begin().operator*(); | ||
|
||
BOOST_CHECK_EQUAL(record0[0].alignment_start(), moved_record[0].alignment_start()); | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( synced_variant_iterator_move_test ) { | ||
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) { | ||
auto reader0 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined}; | ||
auto iter0 = reader0.begin(); | ||
auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined}; | ||
auto iter1 = reader1.begin(); | ||
auto moved = check_move_constructor(iter1); | ||
|
||
auto record0 = *iter0; | ||
auto moved_record = *moved; | ||
|
||
BOOST_CHECK_EQUAL(record0[0].alignment_start(), moved_record[0].alignment_start()); | ||
} | ||
} | ||
|
||
BOOST_AUTO_TEST_CASE( synced_variant_reader_nonexistent_file ) { | ||
// Single non-existent file | ||
BOOST_CHECK_THROW(SyncedVariantReader<SyncedVariantIterator>(vector<string>{"foo/bar/nonexistent.vcf"}, ""), FileOpenException); | ||
|
||
// Multiple files, one non-existent | ||
BOOST_CHECK_THROW(SyncedVariantReader<SyncedVariantIterator>(vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "foo/bar/nonexistent.vcf"}, ""), FileOpenException); | ||
} |
Oops, something went wrong.