From 8d6fb3bc0fe958cdf1702342ea930f5e2f7f7b2d Mon Sep 17 00:00:00 2001 From: Choi Shing Wan Date: Mon, 13 Apr 2020 17:38:42 +0800 Subject: [PATCH] [Update] Implemented unit test for load_sample for different file types --- CMakeLists.txt | 2 +- inc/binarygen.hpp | 3 +- inc/binaryplink.hpp | 3 +- src/binarygen.cpp | 140 ++++++-------- src/binaryplink.cpp | 21 +- test/CMakeLists.txt | 4 +- test/csrc/binarygen_sample_load.cpp | 269 ++++++++++++++++++++++++++ test/csrc/binaryplink_sample_load.cpp | 65 +++++++ test/inc/mock_binarygen.hpp | 47 +++++ test/inc/mock_binaryplink.hpp | 17 ++ 10 files changed, 475 insertions(+), 96 deletions(-) create mode 100644 test/csrc/binarygen_sample_load.cpp create mode 100644 test/csrc/binaryplink_sample_load.cpp create mode 100644 test/inc/mock_binarygen.hpp create mode 100644 test/inc/mock_binaryplink.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ad1bdcf6..78cfbea2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ endif() add_subdirectory(src) -option (BUILD_TESTING "Build the testing tree." OFF) +option (BUILD_TESTING "Build the unit test." OFF) # Only build tests if we are the top-level project # Allows this to be used by super projects with `add_subdirectory` if (BUILD_TESTING AND (PROJECT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)) diff --git a/inc/binarygen.hpp b/inc/binarygen.hpp index d8f8f491..a31b5da7 100644 --- a/inc/binarygen.hpp +++ b/inc/binarygen.hpp @@ -44,7 +44,7 @@ class BinaryGen : public Genotype * or just a simple text file * \return */ - static bool check_is_sample_format(const std::string& input); + static bool check_is_sample_format(std::unique_ptr& input); protected: typedef std::vector> Data; @@ -59,6 +59,7 @@ class BinaryGen : public Genotype * \return Vector containing the sample information */ std::vector gen_sample_vector(); + void handle_pheno_header(std::unique_ptr& sample); void gen_snp_vector(const std::vector>& exclusion_regions, const std::string& out_prefix, Genotype* target = nullptr); diff --git a/inc/binaryplink.hpp b/inc/binaryplink.hpp index 4838462d..be485adb 100644 --- a/inc/binaryplink.hpp +++ b/inc/binaryplink.hpp @@ -57,7 +57,8 @@ class BinaryPlink : public Genotype bool force_cal = false); void check_bed(const std::string& bed_name, size_t num_marker, uintptr_t& bed_offset); - std::unordered_set get_founder_info(std::ifstream& famfile); + std::unordered_set + get_founder_info(std::unique_ptr& famfile); inline void read_genotype(uintptr_t* __restrict genotype, const std::streampos byte_pos, const size_t& file_idx) diff --git a/src/binarygen.cpp b/src/binarygen.cpp index 3393b51e..28c8fa52 100644 --- a/src/binarygen.cpp +++ b/src/binarygen.cpp @@ -67,6 +67,37 @@ size_t BinaryGen::get_sex_col(const std::string& header, return sex_col; } +void BinaryGen::handle_pheno_header(std::unique_ptr& sample) +{ + std::string line; + bool have_header = false; + size_t num_line = 0; + while (std::getline(*sample, line)) + { + misc::trim(line); + if (!line.empty()) ++num_line; + } + if (num_line == m_unfiltered_sample_ct + 1) { have_header = true; } + else if (num_line != m_unfiltered_sample_ct) + { + throw std::runtime_error( + "Error: Number of sample in phenotype file does not match " + "number of samples specified in bgen file. Please check " + "you " + "have the correct phenotype file input. Note: Phenotype " + "file " + "should have the same number of samples as the bgen file " + "and " + "they should appear in the same order"); + } + (*sample).clear(); + (*sample).seekg(0); + if (have_header) + { + std::getline(*sample, line); + m_reporter->report("Assume phenotype file has header line: " + line); + } +} std::vector BinaryGen::gen_sample_vector() { // this is the first time we do something w.r.t bgen file @@ -77,48 +108,39 @@ std::vector BinaryGen::gen_sample_vector() // we always know the sample size from context m_unfiltered_sample_ct = m_context_map[0].number_of_samples; init_sample_vectors(); - if (m_is_ref) + if (m_is_ref && m_sample_selection_list.empty()) + { + for (size_t i = 0; i < m_unfiltered_sample_ct; ++i) + { + ++m_sample_ct; + SET_BIT(i, m_calculate_prs.data()); + // we assume all bgen samples to be founder + SET_BIT(i, m_sample_for_ld.data()); + } + } + else if (!m_is_ref || !m_sample_selection_list.empty()) { - // don't bother with sample check if we are using bgen as reference and - // not exclude/extracting samples - if ((!m_keep_file.empty() || !m_remove_file.empty()) - && (m_sample_file.empty())) + // this is the target, where the m_sample_file must be correct, or + // this is the reference, which we asked for --keep or --remove and + // an external sample file was provided (that's why we don't get + // into the runtime_error) + if (m_is_ref && m_sample_file.empty()) { throw std::runtime_error("Error: Cannot perform sample " "filtering on the LD reference " "file without the sample file!"); } - else - { - for (size_t i = 0; i < m_unfiltered_sample_ct; ++i) - { - ++m_sample_ct; - SET_BIT(i, m_calculate_prs.data()); - // we assume all bgen samples to be founder - SET_BIT(i, m_sample_for_ld.data()); - } - } - } - else if (!m_is_ref || (!m_keep_file.empty() || !m_remove_file.empty())) - { - // this is the target, where the m_sample_file must be correct, or this - // is the reference, which we asked for --keep or --remove and an - // external sample file was provided (that's why we don't get into the - // runtime_error) - const bool is_sample_format = check_is_sample_format(m_sample_file); - std::ifstream sample_file(m_sample_file.c_str()); - // don't need to check again as the check_is_sample_format function - // already checked if the file is opened + auto sample = misc::load_stream(m_sample_file); + const bool is_sample_format = check_is_sample_format(sample); std::string line; size_t sex_col = ~size_t(0); - // now check if there's a sex information if (is_sample_format) { // only do this if the file is sample format - std::getline(sample_file, line); + std::getline(*sample, line); std::string format; - std::getline(sample_file, format); + std::getline(*sample, format); sex_col = get_sex_col(line, format); } // now start reading the file @@ -127,44 +149,11 @@ std::vector BinaryGen::gen_sample_vector() std::vector duplicated_sample_id; std::vector token; const size_t required_column = - ((sex_col != ~size_t(0)) ? (sex_col) : (1 + !m_ignore_fid)); + ((sex_col != ~size_t(0)) ? (sex_col + 1) : (1 + !m_ignore_fid)); const size_t iid_idx = (is_sample_format || !m_ignore_fid) ? 1 : 0; const size_t fid_idx = 0; - // more robust header check, only remove header if sample size = line + - // 1 - // first, get number of lines in the file - if (!is_sample_format) - { - bool have_header = false; - size_t num_line = 0; - while (std::getline(sample_file, line)) - { - misc::trim(line); - if (!line.empty()) ++num_line; - } - if (num_line == m_unfiltered_sample_ct + 1) { have_header = true; } - else if (num_line != m_unfiltered_sample_ct) - { - throw std::runtime_error( - "Error: Number of sample in phenotype file does not match " - "number of samples specified in bgen file. Please check " - "you " - "have the correct phenotype file input. Note: Phenotype " - "file " - "should have the same number of samples as the bgen file " - "and " - "they should appear in the same order"); - } - sample_file.clear(); - sample_file.seekg(0); - if (have_header) - { - std::getline(sample_file, line); - m_reporter->report("Assume phenotype file has header line: " - + line); - } - } - while (std::getline(sample_file, line)) + if (!is_sample_format) { handle_pheno_header(sample); } + while (std::getline(*sample, line)) { misc::trim(line); if (line.empty()) continue; @@ -179,8 +168,8 @@ std::vector BinaryGen::gen_sample_vector() + " columns! Number of column=" + misc::to_string(token.size())); } - gen_sample(fid_idx, iid_idx, sex_col, 0, 0, line_id, - std::unordered_set {}, "", token, + gen_sample(fid_idx, iid_idx, sex_col, ~size_t(0), ~size_t(0), + line_id, std::unordered_set {}, "", token, sample_name, sample_in_file, duplicated_sample_id); ++line_id; } @@ -194,29 +183,24 @@ std::vector BinaryGen::gen_sample_vector() "have an " "unique identifier"); } - sample_file.close(); + sample.reset(); } post_sample_read_init(); return sample_name; } -bool BinaryGen::check_is_sample_format(const std::string& input) +bool BinaryGen::check_is_sample_format(std::unique_ptr& input) { // read the sample file // might want to change it according to the new sample file, // which only mandate the first column - std::ifstream sample_file(input.c_str()); - if (!sample_file.is_open()) - { - std::string error_message = "Error: Cannot open sample file: " + input; - throw std::runtime_error(error_message); - } // get the first two line of input std::string first_line, second_line; - std::getline(sample_file, first_line); + std::getline(*input, first_line); // we must have at least 2 row for a sample file - if (!std::getline(sample_file, second_line)) { return false; } - sample_file.close(); + if (!std::getline(*input, second_line)) { return false; } + (*input).clear(); + (*input).seekg(0); // split the first two lines const std::vector first_row = misc::tokenize(first_line); const std::vector second_row = diff --git a/src/binaryplink.cpp b/src/binaryplink.cpp index 4c6429d8..c505a5ae 100644 --- a/src/binaryplink.cpp +++ b/src/binaryplink.cpp @@ -27,12 +27,12 @@ BinaryPlink::BinaryPlink(const GenoFile& geno, const Phenotype& pheno, } std::unordered_set -BinaryPlink::get_founder_info(std::ifstream& famfile) +BinaryPlink::get_founder_info(std::unique_ptr& famfile) { std::string line; std::vector token; std::unordered_set founder_info; - while (std::getline(famfile, line)) + while (std::getline(*famfile, line)) { misc::trim(line); if (line.empty()) continue; @@ -47,20 +47,14 @@ BinaryPlink::get_founder_info(std::ifstream& famfile) founder_info.insert(token[+FAM::FID] + m_delim + token[+FAM::IID]); ++m_unfiltered_sample_ct; } - famfile.clear(); - famfile.seekg(0); + (*famfile).clear(); + (*famfile).seekg(0); return founder_info; } std::vector BinaryPlink::gen_sample_vector() { assert(m_genotype_file_names.size() > 0); - std::ifstream famfile; - famfile.open(m_sample_file.c_str()); - if (!famfile.is_open()) - { - throw std::runtime_error("Error: Cannot open fam file: " - + m_sample_file); - } + auto famfile = misc::load_stream(m_sample_file); m_unfiltered_sample_ct = 0; // will also count number of samples here. Which initialize the important // m_unfiltered_sample_ct @@ -74,7 +68,7 @@ std::vector BinaryPlink::gen_sample_vector() uintptr_t sample_index = 0; // this is just for error message std::vector token; std::string line; - while (std::getline(famfile, line)) + while (std::getline(*famfile, line)) { misc::trim(line); if (line.empty()) continue; @@ -93,8 +87,7 @@ std::vector BinaryPlink::gen_sample_vector() + " duplicated samples detected!\n" + "Please ensure all samples have an unique identifier"); } - - famfile.close(); + famfile.reset(); post_sample_read_init(); return sample_name; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d01fdb8d..61593c2f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,7 +17,9 @@ add_executable(tests ${TEST_SOURCES} ${TEST_SRC_DIR}/misc_test.cpp ${TEST_SRC_DIR}/genotype_basic.cpp ${TEST_SRC_DIR}/genotype_read_base.cpp - ${TEST_SRC_DIR}/genotype_read_sample.cpp) + ${TEST_SRC_DIR}/genotype_read_sample.cpp + ${TEST_SRC_DIR}/binaryplink_sample_load.cpp + ${TEST_SRC_DIR}/binarygen_sample_load.cpp) target_link_libraries(tests PUBLIC Catch genotyping diff --git a/test/csrc/binarygen_sample_load.cpp b/test/csrc/binarygen_sample_load.cpp new file mode 100644 index 00000000..c9b8f8d1 --- /dev/null +++ b/test/csrc/binarygen_sample_load.cpp @@ -0,0 +1,269 @@ +#include "binarygen.hpp" +#include "catch.hpp" +#include "mock_binarygen.hpp" + +TEST_CASE("check phenotype header") +{ + // Check if the external sample file contain a header + // it is technically allow as user can provide one file for external sample + // and another for phenotype parsing + mock_binarygen bgen; + Reporter reporter("log", 60, true); + bgen.set_reporter(&reporter); + bgen.set_sample_size(1); + SECTION("has header") + { + std::unique_ptr input = + std::make_unique("FID IID Pheno\n" + "S1 S1 Case"); + bgen.test_handle_pheno_header(input); + std::string line; + std::getline((*input), line); + REQUIRE(line == "S1 S1 Case"); + } + SECTION("no header") + { + std::unique_ptr input = + std::make_unique("S1 S1 Case"); + bgen.test_handle_pheno_header(input); + std::string line; + std::getline((*input), line); + REQUIRE(line == "S1 S1 Case"); + } +} +TEST_CASE("sample file check") +{ + SECTION("valid formats") + { + auto str = + GENERATE("ID1 ID2 missing\n" + "0 0 0\n" + "1 1 1", + "ID1 ID2 missing Sex\n" + "0 0 0 D\n" + "1 1 1 F", + "ID_1 ID_2 missing sex category binary positive disgrete\n" + "0 0 0 D C B P D\n" + "1 1 1 F 10.0 1 2 Hi"); + std::unique_ptr input = + std::make_unique(str); + REQUIRE(mock_binarygen::check_is_sample_format(input)); + } + SECTION("invalid formats") + { + auto str = GENERATE( + /*simple malform*/ + "ID1 ID2 missing\n" + "0 0\n" + "1 1 1", + /*forgot missing*/ + "ID1 ID2\n" + "0 0\n" + "1 1", + /* unknown format */ + "ID1 ID2 missing sex pheno\n" + "0 0 0 D N\n" + "S1 S1 0 F -1" + /* sex as missing format */ + "ID1 ID2 sex missing pheno\n" + "0 0 D 0 N\n" + "S1 S1 0 F -1", + /*fam file */ + "ID1 ID2 0 0 1 Pheno\n" + "ID2 ID3 0 0 2 Pheno", + /*one sample fam*/ + "ID1 ID2 0 0 1 Pheno" + /*pheno file*/ + "ID1 ID2 Pheno\n" + "ID3 ID4 Pheno"); + std::unique_ptr input = + std::make_unique(str); + REQUIRE_FALSE(mock_binarygen::check_is_sample_format(input)); + } + SECTION("get sex col") + { + mock_binarygen bgen; + Reporter reporter("log", 60, true); + bgen.set_reporter(&reporter); + size_t ncol = GENERATE(range(5, 10)); + size_t sex_col = GENERATE(range(3, 4)); + std::string line, format; + std::vector header(ncol, "0"); + auto colname = GENERATE("Sex", "sex", "SEX", "nosex"); + header[sex_col] = colname; + line = header[0]; + for (size_t i = 1; i < ncol; ++i) { line.append("\t" + header[i]); } + auto code = GENERATE("D", "C"); + header[sex_col] = code; + format = "0\t0\t0"; + for (size_t i = 3; i < ncol; ++i) { format.append("\t" + header[i]); } + + if (std::string(code) != "D" || std::string(colname) == "nosex") + { REQUIRE(bgen.test_get_sex_col(line, format) == ~size_t(0)); } + else + { + REQUIRE(bgen.test_get_sex_col(line, format) == sex_col); + } + } +} + +TEST_CASE("full sample load") +{ + auto ignore_fid = GENERATE(true, false); + auto is_ref = GENERATE(true, false); + auto delim = GENERATE(" ", "\t"); + Reporter reporter("log", 60, true); + GenoFile geno; + geno.is_ref = is_ref; + geno.file_name = "pheno_load,bgen_pheno_load"; + Phenotype pheno; + pheno.ignore_fid = ignore_fid; + mock_binarygen bgen(geno, pheno, delim, &reporter); + bgen.set_reporter(&reporter); + bgen.gen_bgen_header("pheno_load.bgen", 10, 3, "check phenotype loading", + 4294967295u); + if (is_ref) bgen.reference(); + SECTION("with valid input") + { + using record = std::tuple; + auto mock_file = GENERATE( + table({record {"ID1 ID2 missing Sex\n" + "0 0 0 D\n" + "S1 S1 1 F\n" + "remove1 remove 1 F\n" + "S2 S2 1 M", + true}, + // fam format + record {"S1 S1 0 0 2 Pheno\n" + "remove1 remove 0 0 2 Pheno\n" + "S2 S2 0 0 2 Pheno", + false}, + /*pheno file*/ + record {"S1 S1 Pheno\n" + "remove1 remove 0 0 2 Pheno\n" + "S2 S2 Pheno\n", + false}, + /*pheno file with header*/ + record {"FID IID Pheno\n" + "S1 S1 Pheno\n" + "remove1 remove 0 0 2 Pheno\n" + "S2 S2 Pheno", + false}})); + std::ofstream test("bgen_pheno_load"); + test << std::get<0>(mock_file) << std::endl; + test.close(); + SECTION("without selection") + { + bgen.load_samples(false); + auto res = bgen.sample_id(); + if (!is_ref) + { + std::string expected_iid = + std::get<1>(mock_file) + ? "remove" + : (ignore_fid ? "remove1" : "remove"); + REQUIRE(res.size() == 3); + REQUIRE(res[0].FID == (ignore_fid ? "" : "S1")); + REQUIRE(res[0].IID == "S1"); + REQUIRE(res[1].FID == (ignore_fid ? "" : "remove1")); + REQUIRE(res[1].IID == expected_iid); + REQUIRE(res[2].FID == (ignore_fid ? "" : "S2")); + REQUIRE(res[2].IID == "S2"); + for (auto&& i : res) { REQUIRE(i.in_regression); } + } + else + { + REQUIRE(res.empty()); + } + auto in_prs = bgen.calculate_prs(); + auto sample_ld = bgen.sample_for_ld(); + for (size_t i = 0; i < 3; ++i) + { + REQUIRE(IS_SET(in_prs.data(), i)); + REQUIRE(IS_SET(sample_ld.data(), i)); + } + } + SECTION("with selection") + { + auto remove_sample = GENERATE(true, false); + + std::string expected_iid = + std::get<1>(mock_file) ? "remove" + : (ignore_fid ? "remove1" : "remove"); + auto dict_sample = !ignore_fid + ? "remove1" + std::string(delim) + "remove" + : expected_iid; + bgen.add_select_sample(dict_sample); + bgen.change_sample_selection(remove_sample); + bgen.load_samples(false); + auto res = bgen.sample_id(); + if (!is_ref) + { + if (remove_sample) + { + REQUIRE(res.size() == 2); + REQUIRE(res[0].FID == (ignore_fid ? "" : "S1")); + REQUIRE(res[0].IID == "S1"); + REQUIRE(res[1].FID == (ignore_fid ? "" : "S2")); + REQUIRE(res[1].IID == "S2"); + } + else + { + REQUIRE(res.size() == 1); + REQUIRE(res[0].FID == (ignore_fid ? "" : "remove1")); + REQUIRE(res[0].IID == expected_iid); + } + for (auto&& i : res) { REQUIRE(i.in_regression); } + } + else + { + REQUIRE(res.empty()); + } + auto in_prs = bgen.calculate_prs(); + auto sample_ld = bgen.sample_for_ld(); + if (remove_sample) + { + REQUIRE_FALSE(IS_SET(in_prs.data(), 1)); + REQUIRE_FALSE(IS_SET(sample_ld.data(), 1)); + for (auto i : {0, 2}) + { + REQUIRE(IS_SET(in_prs.data(), i)); + REQUIRE(IS_SET(sample_ld.data(), i)); + } + } + else + { + REQUIRE(IS_SET(in_prs.data(), 1)); + REQUIRE(IS_SET(sample_ld.data(), 1)); + for (auto i : {0, 2}) + { + REQUIRE_FALSE(IS_SET(in_prs.data(), i)); + REQUIRE_FALSE(IS_SET(sample_ld.data(), i)); + } + } + } + } + SECTION("with invalid input") + { + std::string mock_file = "ID1 ID2 missing Sex\n" + "0 0 0 D\n" + "S1 S1 1\n" + "S2 S2 1 Pheno\n"; + std::ofstream test("bgen_pheno_load"); + test << mock_file << std::endl; + test.close(); + if (!is_ref) { REQUIRE_THROWS(bgen.load_samples(false)); } + else + { + // we don't do sample check if we are not using keep / remove for LD + // samples + REQUIRE_NOTHROW(bgen.load_samples(false)); + } + } + // sample file + // sample malform + // not sample file + // reference + // reference with selection + // reference with selection but not sample file +} diff --git a/test/csrc/binaryplink_sample_load.cpp b/test/csrc/binaryplink_sample_load.cpp new file mode 100644 index 00000000..6c83ea26 --- /dev/null +++ b/test/csrc/binaryplink_sample_load.cpp @@ -0,0 +1,65 @@ +#include "binaryplink.hpp" +#include "catch.hpp" +#include "mock_binaryplink.hpp" +#include "reporter.hpp" + +TEST_CASE("binary plink load sample") +{ + GenoFile geno; + geno.file_name = "test"; + Phenotype pheno; + pheno.ignore_fid = GENERATE(true, false); + Reporter reporter("log", 60, true); + SECTION("Normal file") + { + // generate a small test file + std::ofstream mock_fam("test.fam"); + mock_fam << "CAS_1 CAS_1 0 0 2 2" << std::endl; + mock_fam << "CAS_1 CAS_2 0 0 1 2" << std::endl; + mock_fam << "CAS_1 CAS_3 CAS_1 CAS_2 1 2" << std::endl; + mock_binaryplink bplink(geno, pheno, " ", &reporter); + auto keep_nonfounder = GENERATE(true, false); + auto is_ref = GENERATE(true, false); + if (is_ref) { bplink.reference(); } + bplink.keep_nonfounder(keep_nonfounder); + // this is ok as we know we haven't check for the bed and bim file yet + bplink.load_samples(false); + auto res = bplink.sample_id(); + auto sample_ld = bplink.sample_for_ld(); + auto in_prs = bplink.calculate_prs(); + if (is_ref) { REQUIRE(res.empty()); } + else + { + REQUIRE(res.size() == 3); + for (size_t i = 0; i < 2; ++i) REQUIRE(res[i].in_regression); + if (pheno.ignore_fid || keep_nonfounder) + { REQUIRE(res[2].in_regression); } + else + { + REQUIRE_FALSE(res[2].in_regression); + } + } + REQUIRE(IS_SET(sample_ld.data(), 0)); + REQUIRE(IS_SET(sample_ld.data(), 1)); + if (pheno.ignore_fid) { REQUIRE(IS_SET(sample_ld.data(), 2)); } + else + { + REQUIRE_FALSE(IS_SET(sample_ld.data(), 2)); + } + for (size_t i = 0; i < 3; ++i) REQUIRE(IS_SET(in_prs.data(), i)); + } + SECTION("malformed file") + { + std::ofstream mock_fam("test.fam"); + mock_fam << "CAS_1 CAS_1 0 0 2 2 3" << std::endl; + mock_fam << "CAS_1 CAS_2" << std::endl; + mock_fam << "CAS_1 CAS_3 CAS_1 CAS_2 1 2 3 4 5 6 7" << std::endl; + mock_binaryplink bplink(geno, pheno, " ", &reporter); + auto keep_nonfounder = GENERATE(true, false); + auto is_ref = GENERATE(true, false); + if (is_ref) { bplink.reference(); } + bplink.keep_nonfounder(keep_nonfounder); + // this is ok as we know we haven't check for the bed and bim file yet + REQUIRE_THROWS(bplink.load_samples(false)); + } +} diff --git a/test/inc/mock_binarygen.hpp b/test/inc/mock_binarygen.hpp new file mode 100644 index 00000000..c0a74d5c --- /dev/null +++ b/test/inc/mock_binarygen.hpp @@ -0,0 +1,47 @@ +#ifndef MOCK_BINARYGEN_HPP +#define MOCK_BINARYGEN_HPP +#include "binarygen.hpp" +#include "reporter.hpp" + +class mock_binarygen : public ::BinaryGen{ +public: + mock_binarygen(){} + mock_binarygen(GenoFile& geno, Phenotype&pheno, const std::string &delim, Reporter* reporter): BinaryGen(geno, pheno, delim, reporter){} + size_t test_get_sex_col(const std::string& header, + const std::string& format_line) + {return get_sex_col(header, format_line); + } + void test_handle_pheno_header(std::unique_ptr& sample){ + handle_pheno_header(sample); + } + void set_reporter(Reporter *reporter){ + m_reporter = reporter; + } + void set_sample_size(uintptr_t sample_size){ + m_unfiltered_sample_ct = sample_size; + } + void add_select_sample(const std::string& in){ + m_sample_selection_list.insert(in); + } + void change_sample_selection(bool remove){ + m_remove_sample = remove; + } + + std::vector sample_for_ld() const { return m_sample_for_ld; } + std::vector calculate_prs() const { return m_calculate_prs; } + std::vector sample_id() const { return m_sample_id;} + void gen_bgen_header(const std::string &file_name, uint32_t number_of_snp_blocks, uint32_t number_of_samples, + std::string free_data, uint32_t flags) + { + std::ofstream dummy(file_name, std::ofstream::binary); + genfile::bgen::Context context; + context.number_of_variants = number_of_snp_blocks; + context.number_of_samples = number_of_samples; + context.free_data = free_data; + context.flags = flags; + genfile::bgen::write_header_block(dummy, context); + dummy.close(); + } +}; + +#endif // MOCK_BINARYGEN_HPP diff --git a/test/inc/mock_binaryplink.hpp b/test/inc/mock_binaryplink.hpp new file mode 100644 index 00000000..af7d726e --- /dev/null +++ b/test/inc/mock_binaryplink.hpp @@ -0,0 +1,17 @@ +#ifndef MOCK_BINARYPLINK_HPP +#define MOCK_BINARYPLINK_HPP +#include "binaryplink.hpp" +#include "genotype.hpp" + +class mock_binaryplink: public ::BinaryPlink +{ +public: + mock_binaryplink(GenoFile& geno, Phenotype&pheno, const std::string &delim, Reporter* reporter): BinaryPlink(geno, pheno, delim, reporter){} + + + std::vector sample_for_ld() const { return m_sample_for_ld; } + std::vector calculate_prs() const { return m_calculate_prs; } + std::vector sample_id() const { return m_sample_id;} +}; + +#endif // MOCK_BINARYPLINK_HPP