Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Migrate specialized Variant Reader tests to their own files
Browse files Browse the repository at this point in the history
  • Loading branch information
jmthibault79 committed Nov 13, 2014
1 parent 05676cf commit cbcb8a3
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 299 deletions.
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Expand Up @@ -4,9 +4,11 @@ set(SOURCE_FILES
fastq_test.cpp
genotypes_test.cpp
indexed_sam_reader_test.cpp
indexed_variant_reader_test.cpp
interval_test.cpp
main.cpp
missing_test.cpp
multiple_variant_reader_test.cpp
read_group_test.cpp
reference_block_splitting_variant_reader_test.cpp
reference_test.cpp
Expand All @@ -16,6 +18,8 @@ set(SOURCE_FILES
sam_test.cpp
select_if_test.cpp
short_value_optimized_storage_test.cpp
synced_variant_reader_test.cpp
test_utils.h
utils_test.cpp
variant_builder_test.cpp
variant_header_test.cpp
Expand Down
101 changes: 101 additions & 0 deletions test/indexed_variant_reader_test.cpp
@@ -0,0 +1,101 @@
#include "variant.h"
#include "indexed_variant_reader.h"
#include "indexed_variant_iterator.h"
#include "test_utils.h"

#include <boost/test/unit_test.hpp>

using namespace std;
using namespace gamgee;

// copied / migrated from variant_reader_test

/*
NOTE: Updated test_variants.bcf and var_idx directory via-
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants.bcf -O b
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants_csi.vcf.gz -O z
cp testdata/var_idx/test_variants_csi.vcf.gz testdata/var_idx/test_variants_tabix.vcf.gz
bcftools index testdata/var_idx/test_variants.bcf
bcftools index testdata/var_idx/test_variants_csi.vcf.gz
bcftools index testdata/var_idx/test_variants_tabix.vcf.gz -t
cp testdata/var_idx/test_variants.bcf testdata/test_variants.bcf
*/
const auto indexed_variant_vcf_inputs = vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "testdata/var_idx/test_variants_tabix.vcf.gz"};
const auto indexed_variant_bcf_inputs = vector<string>{"testdata/var_idx/test_variants.bcf"};

const auto indexed_variant_chrom_full = vector<string> {"1", "20", "22"};
const auto indexed_variant_bp_full = vector<string> {"1:10000000-10000000", "20:10001000-10001000", "20:10002000-10002000", "20:10003000-10003000", "22:10004000-10004000"};
const auto indexed_variant_chrom_partial = vector<string> {"1"};
const auto indexed_variant_bp_partial = vector<string> {"20:10001000-10001000"};

// one (different) record each
BOOST_AUTO_TEST_CASE( indexed_variant_reader_partial_test ) {
for (const auto filename : indexed_variant_bcf_inputs) {

auto truth_index = 0u;
const auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_partial};
for (const auto& record : reader1) {
BOOST_CHECK_EQUAL(record.ref(), "T");
BOOST_CHECK_EQUAL(record.chromosome(), 0u);
BOOST_CHECK_EQUAL(record.alignment_start(), 10000000u);
BOOST_CHECK_EQUAL(record.alignment_stop(), 10000000u);
BOOST_CHECK_EQUAL(record.n_alleles(), 2u);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), "db2342");
++truth_index;
}
BOOST_CHECK_EQUAL(truth_index, 1u);

truth_index = 0u;
const auto reader2 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_bp_partial};
for (const auto& record : reader2) {
BOOST_CHECK_EQUAL(record.ref(), "GG");
BOOST_CHECK_EQUAL(record.chromosome(), 1u);
BOOST_CHECK_EQUAL(record.alignment_start(), 10001000u);
BOOST_CHECK_EQUAL(record.alignment_stop(), 10001001u);
BOOST_CHECK_EQUAL(record.n_alleles(), 2u);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), "rs837472");
++truth_index;
}
BOOST_CHECK_EQUAL(truth_index, 1u);
}
}

BOOST_AUTO_TEST_CASE( indexed_variant_reader_move_test ) {
for (const auto filename : indexed_variant_bcf_inputs) {
auto reader0 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full};
auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full};
auto moved = check_move_constructor(reader1);

auto record0 = reader0.begin().operator*();
auto moved_record = moved.begin().operator*();

BOOST_CHECK_EQUAL(record0.alignment_start(), moved_record.alignment_start());
}
}

BOOST_AUTO_TEST_CASE( indexed_variant_iterator_move_test ) {
for (const auto filename : indexed_variant_bcf_inputs) {
auto reader0 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full};
auto iter0 = reader0.begin();
auto reader1 = IndexedVariantReader<IndexedVariantIterator>{filename, indexed_variant_chrom_full};
auto iter1 = reader1.begin();
auto moved = check_move_constructor(iter1);

auto record0 = *iter0;
auto moved_record = *moved;

BOOST_CHECK_EQUAL(record0.alignment_start(), moved_record.alignment_start());
}
}


BOOST_AUTO_TEST_CASE( indexed_variant_reader_nonexistent_file ) {
// VCF itself doesn't exist
BOOST_CHECK_THROW(IndexedVariantReader<IndexedVariantIterator>("foo/bar/nonexistent.vcf", vector<string>{}), FileOpenException);

// VCF exists, but no accompanying index file
BOOST_CHECK_THROW(IndexedVariantReader<IndexedVariantIterator>("testdata/unindexed/test_unindexed.vcf", vector<string>{}), IndexLoadException);
}

136 changes: 136 additions & 0 deletions test/multiple_variant_reader_test.cpp
@@ -0,0 +1,136 @@
#include "variant.h"
#include "variant_header_builder.h"
#include "multiple_variant_reader.h"
#include "multiple_variant_iterator.h"
#include "test_utils.h"

#include <boost/test/unit_test.hpp>

using namespace std;
using namespace gamgee;

// copied / migrated from variant_reader_test

BOOST_AUTO_TEST_CASE( multi_variant_reader_validation )
{
const std::vector<std::string> filenames1{"testdata/test_variants.vcf", "testdata/extra_header.vcf"};
const std::vector<std::string> filenames2{"testdata/test_variants.vcf", "testdata/missing_header.vcf"};

// validate mismatched headers by default
for (const auto filenames_v : {filenames1, filenames2})
BOOST_CHECK_THROW(
auto reader = MultipleVariantReader<MultipleVariantIterator>(filenames_v),
HeaderCompatibilityException
);

// don't validate mismatched headers
for (const auto filenames_v : {filenames1, filenames2})
auto reader = MultipleVariantReader<MultipleVariantIterator>(filenames_v, false);
}

const auto multi_diff_truth_record_count = vector<uint32_t>{4, 1, 1, 1, 2, 1, 1, 1};
const auto multi_diff_truth_chromosome = vector<uint32_t>{0, 1, 1, 1, 1, 2, 2, 2};
const auto multi_diff_truth_alignment_starts = vector<uint32_t>{10000000, 10001000, 10001999, 10002000, 10003000, 10004000, 10005000, 10006000};
const auto multi_diff_truth_ref = vector<string>{"T", "GG", "TAGTGQA", "TAGTGQA", "A", "GAT", "GAT", "GAT"};
const auto multi_diff_truth_n_alleles = vector<uint32_t>{2, 2, 2, 2, 2, 3, 3, 3};
const auto multi_diff_truth_id = vector<string>{"db2342", "rs837472", ".", ".", ".", ".", ".", "."};

BOOST_AUTO_TEST_CASE( multiple_variant_reader_difference_test ) {
auto truth_index = 0u;
const auto reader = MultipleVariantReader<MultipleVariantIterator>{{"testdata/test_variants.vcf", "testdata/test_variants_multiple_alt.vcf"}, false};
for (const auto& vec : reader) {
BOOST_CHECK_EQUAL(vec.size(), multi_diff_truth_record_count[truth_index]);
for (const auto& record : vec) {
BOOST_CHECK_EQUAL(record.chromosome(), multi_diff_truth_chromosome[truth_index]);
BOOST_CHECK_EQUAL(record.alignment_start(), multi_diff_truth_alignment_starts[truth_index]);
BOOST_CHECK_EQUAL(record.ref(), multi_diff_truth_ref[truth_index]);
BOOST_CHECK_EQUAL(record.n_alleles(), multi_diff_truth_n_alleles[truth_index]);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), multi_diff_truth_id[truth_index]);
}
++truth_index;
}
BOOST_CHECK_EQUAL(truth_index, 8u);
}

void multiple_variant_reader_sample_test(const vector<string> samples, const bool include, const uint desired_samples) {
auto filenames = vector<string>{"testdata/test_variants.vcf", "testdata/test_variants.bcf"};

auto reader = MultipleVariantReader<MultipleVariantIterator>{filenames, false, samples, include};
BOOST_CHECK_EQUAL(reader.combined_header().n_samples(), desired_samples);
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_sites_only )
{
multiple_variant_reader_sample_test(vector<string>{}, true, 0); // exclude all samples (sites-only)
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_include_all_samples )
{
multiple_variant_reader_sample_test(vector<string>{}, false, 3); // include all samples by setting include == false and passing an empty list
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_including )
{
multiple_variant_reader_sample_test(vector<string>{"NA12878"}, true, 1); // include only NA12878
multiple_variant_reader_sample_test(vector<string>{"NA12878", "NA12892"}, true, 2); // include both these samples
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_excluding )
{
multiple_variant_reader_sample_test(vector<string>{"NA12891"}, false, 2); // exclude only NA12891
multiple_variant_reader_sample_test(vector<string>{"NA12891", "NA12878"}, false, 1); // exclude both these samples
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_headers_test ) {
const auto file1 = "testdata/mvr_hdr/test1.vcf";
const auto file2 = "testdata/mvr_hdr/test2.vcf";

auto header1 = SingleVariantReader{file1}.header();
auto header2 = SingleVariantReader{file2}.header();
auto combined_header = VariantHeaderBuilder{header1}.merge(header2).build();

const auto reader = MultipleVariantReader<MultipleVariantIterator>{{file1, file2}};
BOOST_CHECK(reader.combined_header() != header1);
BOOST_CHECK(reader.combined_header() != header2);
BOOST_CHECK(reader.combined_header() == combined_header);
for (auto vec : reader) {
for (auto variant : vec) {
BOOST_CHECK(variant.header() != combined_header);
// order is determined by priority queue - hard to predict
BOOST_CHECK(variant.header() == header1 || variant.header() == header2);
}
}
}

const auto gvcf_truth_ref = vector<string>{"T", "C", "GG"};
const auto gvcf_truth_chromosome = vector<uint32_t>{0, 0, 1};
const auto gvcf_truth_alignment_starts = vector<uint32_t>{10000000, 20000000, 10001000};
const auto gvcf_truth_alignment_stops = vector<uint32_t>{10000000, 20000123, 10001001};
const auto gvcf_truth_n_alleles = vector<uint32_t>{2, 2, 2};
const auto gvcf_truth_id = vector<string>{"db2342", ".", "rs837472"};

BOOST_AUTO_TEST_CASE( gvcf_test_multiple ) {
auto truth_index = 0u;
const auto reader = MultipleVariantReader<MultipleVariantIterator>{vector<string>{"testdata/test.g.vcf", "testdata/test.g.bcf"}};
for (const auto& vec : reader) {
for (const auto& record : vec) {
BOOST_CHECK_EQUAL(record.ref(), gvcf_truth_ref[truth_index]);
BOOST_CHECK_EQUAL(record.chromosome(), gvcf_truth_chromosome[truth_index]);
BOOST_CHECK_EQUAL(record.alignment_start(), gvcf_truth_alignment_starts[truth_index]);
BOOST_CHECK_EQUAL(record.alignment_stop(), gvcf_truth_alignment_stops[truth_index]);
BOOST_CHECK_EQUAL(record.n_alleles(), gvcf_truth_n_alleles[truth_index]);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), gvcf_truth_id[truth_index]);
}
++truth_index;
}
}

BOOST_AUTO_TEST_CASE( multiple_variant_reader_nonexistent_file ) {
// Single non-existent file
BOOST_CHECK_THROW(MultipleVariantReader<MultipleVariantIterator>(vector<string>{"foo/bar/nonexistent.vcf"}), FileOpenException);

// Multiple files, one non-existent
BOOST_CHECK_THROW(MultipleVariantReader<MultipleVariantIterator>(vector<string>{"testdata/test_variants.vcf", "foo/bar/nonexistent.vcf"}), FileOpenException);
}
107 changes: 107 additions & 0 deletions test/synced_variant_reader_test.cpp
@@ -0,0 +1,107 @@
#include "variant.h"
#include "synced_variant_reader.h"
#include "synced_variant_iterator.h"
#include "test_utils.h"

#include <boost/test/unit_test.hpp>

using namespace std;
using namespace gamgee;

// copied / migrated from variant_reader_test

/*
NOTE: Updated test_variants.bcf and var_idx directory via-
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants.bcf -O b
bcftools view testdata/test_variants.vcf -o testdata/var_idx/test_variants_csi.vcf.gz -O z
cp testdata/var_idx/test_variants_csi.vcf.gz testdata/var_idx/test_variants_tabix.vcf.gz
bcftools index testdata/var_idx/test_variants.bcf
bcftools index testdata/var_idx/test_variants_csi.vcf.gz
bcftools index testdata/var_idx/test_variants_tabix.vcf.gz -t
cp testdata/var_idx/test_variants.bcf testdata/test_variants.bcf
*/

const auto indexed_variant_vcf_inputs = vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "testdata/var_idx/test_variants_tabix.vcf.gz"};
const auto indexed_variant_bcf_inputs = vector<string>{"testdata/var_idx/test_variants.bcf"};

const auto indexed_variant_chrom_full_joined = "1,20,22";
const auto indexed_variant_bp_full_joined = "1:10000000-10000000,20:10001000-10001000,20:10002000-10002000,20:10003000-10003000,22:10004000-10004000";
const auto indexed_variant_chrom_partial_joined = "1";
const auto indexed_variant_bp_partial_joined = "20:10001000-10001000";

// one (different) record each
BOOST_AUTO_TEST_CASE( synced_variant_reader_partial_test ) {
const auto intervals1 = indexed_variant_chrom_partial_joined;
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) {
auto truth_index = 0u;
const auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, intervals1};
for (const auto& vec : reader1) {
for (const auto& record : vec) {
BOOST_CHECK_EQUAL(record.ref(), "T");
BOOST_CHECK_EQUAL(record.chromosome(), 0u);
BOOST_CHECK_EQUAL(record.alignment_start(), 10000000u);
BOOST_CHECK_EQUAL(record.alignment_stop(), 10000000u);
BOOST_CHECK_EQUAL(record.n_alleles(), 2u);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), "db2342");
}
++truth_index;
}
BOOST_CHECK_EQUAL(truth_index, 1u);
}

const auto intervals2 = indexed_variant_bp_partial_joined;
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) {
auto truth_index = 0u;
const auto reader2 = SyncedVariantReader<SyncedVariantIterator>{input_files, intervals2};
for (const auto& vec : reader2) {
for (const auto& record : vec) {
BOOST_CHECK_EQUAL(record.ref(), "GG");
BOOST_CHECK_EQUAL(record.chromosome(), 1u);
BOOST_CHECK_EQUAL(record.alignment_start(), 10001000u);
BOOST_CHECK_EQUAL(record.alignment_stop(), 10001001u);
BOOST_CHECK_EQUAL(record.n_alleles(), 2u);
BOOST_CHECK_EQUAL(record.n_samples(), 3u);
BOOST_CHECK_EQUAL(record.id(), "rs837472");
}
++truth_index;
}
BOOST_CHECK_EQUAL(truth_index, 1u);
}
}

BOOST_AUTO_TEST_CASE( synced_variant_reader_move_test ) {
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) {
auto reader0 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined};
auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined};
auto moved = check_move_constructor(reader1);

auto record0 = reader0.begin().operator*();
auto moved_record = moved.begin().operator*();

BOOST_CHECK_EQUAL(record0[0].alignment_start(), moved_record[0].alignment_start());
}
}

BOOST_AUTO_TEST_CASE( synced_variant_iterator_move_test ) {
for (const auto input_files : {indexed_variant_vcf_inputs, indexed_variant_bcf_inputs}) {
auto reader0 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined};
auto iter0 = reader0.begin();
auto reader1 = SyncedVariantReader<SyncedVariantIterator>{input_files, indexed_variant_chrom_full_joined};
auto iter1 = reader1.begin();
auto moved = check_move_constructor(iter1);

auto record0 = *iter0;
auto moved_record = *moved;

BOOST_CHECK_EQUAL(record0[0].alignment_start(), moved_record[0].alignment_start());
}
}

BOOST_AUTO_TEST_CASE( synced_variant_reader_nonexistent_file ) {
// Single non-existent file
BOOST_CHECK_THROW(SyncedVariantReader<SyncedVariantIterator>(vector<string>{"foo/bar/nonexistent.vcf"}, ""), FileOpenException);

// Multiple files, one non-existent
BOOST_CHECK_THROW(SyncedVariantReader<SyncedVariantIterator>(vector<string>{"testdata/var_idx/test_variants_csi.vcf.gz", "foo/bar/nonexistent.vcf"}, ""), FileOpenException);
}

0 comments on commit cbcb8a3

Please sign in to comment.