| @@ -0,0 +1,197 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "../src/hirschberg_myers_gpu.cu" | ||
| #include "../src/batched_device_matrices.cuh" | ||
| #include <claragenomics/utils/device_buffer.cuh> | ||
| #include <claragenomics/utils/signed_integer_utils.hpp> | ||
| #include <vector> | ||
| #include <gtest/gtest.h> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| using WordType = hirschbergmyers::WordType; | ||
|
|
||
| namespace test | ||
| { | ||
|
|
||
| __global__ void myers_preprocess_kernel(batched_device_matrices<WordType>::device_interface* batched_query_pattern, char const* query, int32_t query_size) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| const int32_t n_words = ceiling_divide<int32_t>(query_size, word_size); | ||
| device_matrix_view<WordType> query_pattern = batched_query_pattern->get_matrix_view(0, n_words, 8); | ||
| hirschbergmyers::myers_preprocess(query_pattern, query, query_size); | ||
| } | ||
|
|
||
| __global__ void myers_get_query_pattern_test_kernel(int32_t n_words, WordType* result, batched_device_matrices<WordType>::device_interface* batched_query_pattern, int32_t idx, char x, bool reverse) | ||
| { | ||
| int const i = blockIdx.x * blockDim.x + threadIdx.x; | ||
| if (i < 32) | ||
| { | ||
| device_matrix_view<WordType> patterns = batched_query_pattern->get_matrix_view(0, n_words, 8); | ||
| result[i] = hirschbergmyers::get_query_pattern(patterns, idx, i, x, reverse); | ||
| } | ||
| } | ||
|
|
||
| matrix<WordType> compute_myers_preprocess_matrix(std::string query_host) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| cudaStream_t stream = nullptr; | ||
| const int32_t n_words = ceiling_divide<int32_t>(query_host.size(), word_size); | ||
|
|
||
| device_buffer<char> query(query_host.size()); | ||
| cudaMemcpy(query.data(), query_host.data(), sizeof(char) * query.size(), cudaMemcpyHostToDevice); | ||
|
|
||
| batched_device_matrices<WordType> query_pattern(1, 8 * n_words, stream); | ||
| myers_preprocess_kernel<<<1, 32>>>(query_pattern.get_device_interface(), query.data(), query.size()); | ||
| return query_pattern.get_matrix(0, n_words, 8, stream); | ||
| } | ||
|
|
||
| std::vector<WordType> myers_get_query_pattern_test(std::string query_host, int32_t idx, char x, bool reverse) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| cudaStream_t stream = nullptr; | ||
| const int32_t n_words = ceiling_divide<int32_t>(query_host.size(), word_size); | ||
| device_buffer<char> query(query_host.size()); | ||
| cudaMemcpy(query.data(), query_host.data(), sizeof(char) * query.size(), cudaMemcpyHostToDevice); | ||
| batched_device_matrices<WordType> query_pattern(1, 8 * n_words, stream); | ||
| myers_preprocess_kernel<<<1, 32>>>(query_pattern.get_device_interface(), query.data(), query.size()); | ||
|
|
||
| device_buffer<WordType> result(32); | ||
| myers_get_query_pattern_test_kernel<<<1, 32>>>(n_words, result.data(), query_pattern.get_device_interface(), idx, x, reverse); | ||
|
|
||
| std::vector<WordType> result_host(result.size()); | ||
| cudaMemcpy(result_host.data(), result.data(), sizeof(WordType) * result.size(), cudaMemcpyDeviceToHost); | ||
| return result_host; | ||
| } | ||
|
|
||
| } // namespace test | ||
|
|
||
| TEST(HirschbergMyers, myers_preprocess_test) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| static_assert(word_size == 32, "This test assumes word_size = 32bit."); | ||
| using test::compute_myers_preprocess_matrix; | ||
| std::string query = | ||
| "AACCGGTTACGTACGT" | ||
| "AAACCCGGGTTTACGT" | ||
| "AAACCCGGGTTTACG"; | ||
| matrix<WordType> patterns = compute_myers_preprocess_matrix(query); | ||
| ASSERT_EQ(patterns.num_rows(), 2); | ||
| ASSERT_EQ(patterns.num_cols(), 8); | ||
| // A=0, C=1, G=2, T=3 | ||
| EXPECT_EQ(patterns(0, 0), 0b00010000000001110001000100000011u); | ||
| EXPECT_EQ(patterns(0, 1), 0b00100000001110000010001000001100u); | ||
| EXPECT_EQ(patterns(0, 2), 0b01000001110000000100010000110000u); | ||
| EXPECT_EQ(patterns(0, 3), 0b10001110000000001000100011000000u); | ||
| EXPECT_EQ(patterns(1, 0), 0b001000000000111u); | ||
| EXPECT_EQ(patterns(1, 1), 0b010000000111000u); | ||
| EXPECT_EQ(patterns(1, 2), 0b100000111000000u); | ||
| EXPECT_EQ(patterns(1, 3), 0b000111000000000u); | ||
| // reverse: A=4, C=5, G=6, T=7 | ||
| EXPECT_EQ(patterns(0, 4), 0b01110000000001000111000000000100u); | ||
| EXPECT_EQ(patterns(0, 6), 0b00000001110000010000000111000001u); | ||
| EXPECT_EQ(patterns(0, 5), 0b00001110000000100000111000000010u); | ||
| EXPECT_EQ(patterns(0, 7), 0b10000000001110001000000000111000u); | ||
| EXPECT_EQ(patterns(1, 4), 0b110000001000100u); | ||
| EXPECT_EQ(patterns(1, 5), 0b001100000100010u); | ||
| EXPECT_EQ(patterns(1, 6), 0b000011000010001u); | ||
| EXPECT_EQ(patterns(1, 7), 0b000000110001000u); | ||
|
|
||
| std::reverse(query.begin(), query.end()); | ||
| matrix<WordType> patterns_reversed = compute_myers_preprocess_matrix(query); | ||
| ASSERT_EQ(patterns.num_rows(), 2); | ||
| ASSERT_EQ(patterns.num_cols(), 8); | ||
| EXPECT_EQ(patterns_reversed(0, 0), patterns(0, 4)); | ||
| EXPECT_EQ(patterns_reversed(1, 0), patterns(1, 4)); | ||
| EXPECT_EQ(patterns_reversed(0, 1), patterns(0, 5)); | ||
| EXPECT_EQ(patterns_reversed(1, 1), patterns(1, 5)); | ||
| EXPECT_EQ(patterns_reversed(0, 2), patterns(0, 6)); | ||
| EXPECT_EQ(patterns_reversed(1, 2), patterns(1, 6)); | ||
| EXPECT_EQ(patterns_reversed(0, 3), patterns(0, 7)); | ||
| EXPECT_EQ(patterns_reversed(1, 3), patterns(1, 7)); | ||
| EXPECT_EQ(patterns_reversed(0, 4), patterns(0, 0)); | ||
| EXPECT_EQ(patterns_reversed(1, 4), patterns(1, 0)); | ||
| EXPECT_EQ(patterns_reversed(0, 5), patterns(0, 1)); | ||
| EXPECT_EQ(patterns_reversed(1, 5), patterns(1, 1)); | ||
| EXPECT_EQ(patterns_reversed(0, 6), patterns(0, 2)); | ||
| EXPECT_EQ(patterns_reversed(1, 6), patterns(1, 2)); | ||
| EXPECT_EQ(patterns_reversed(0, 7), patterns(0, 3)); | ||
| EXPECT_EQ(patterns_reversed(1, 7), patterns(1, 3)); | ||
| } | ||
|
|
||
| TEST(HirschbergMyers, myers_get_query_pattern) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| static_assert(word_size == 32, "This test assumes word_size = 32bit."); | ||
| using test::compute_myers_preprocess_matrix; | ||
| using test::myers_get_query_pattern_test; | ||
| std::string query = | ||
| "AACCGGTTACGTACGT" | ||
| "AAACCCGGGTTTACGT" | ||
| "AAACCCGGGTTTACG"; | ||
| std::vector<WordType> patterns_0 = myers_get_query_pattern_test(query, 0, 'A', false); | ||
| std::vector<WordType> patterns_1 = myers_get_query_pattern_test(query, 1, 'A', false); | ||
| ASSERT_EQ(get_size(patterns_0), get_size(patterns_1)); | ||
| int32_t const n = get_size(patterns_0); | ||
| for (int32_t i = 0; i < n; ++i) | ||
| { | ||
| std::string shifted_query = std::string(query.begin() + i, query.end()); | ||
| matrix<WordType> shifted_p = compute_myers_preprocess_matrix(shifted_query); | ||
| EXPECT_EQ(patterns_0[i], shifted_p(0, 0)) << "for shift:" << i << std::endl; | ||
| if (get_size(shifted_query) > word_size) | ||
| { | ||
| ASSERT_EQ(shifted_p.num_rows(), 2); | ||
| EXPECT_EQ(patterns_1[i], shifted_p(1, 0)) << "for shift:" << i << std::endl; | ||
| } | ||
| else | ||
| { | ||
| EXPECT_EQ(patterns_1[i], WordType(0)) << "for shift:" << i << std::endl; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| TEST(HirschbergMyers, myers_get_query_pattern_reverse) | ||
| { | ||
| CGA_CONSTEXPR int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| static_assert(word_size == 32, "This test assumes word_size = 32bit."); | ||
| using test::compute_myers_preprocess_matrix; | ||
| using test::myers_get_query_pattern_test; | ||
| std::string query = | ||
| "AACCGGTTACGTACGT" | ||
| "AAACCCGGGTTTACGT" | ||
| "AAACCCGGGTTTACG"; | ||
| std::vector<WordType> reverse_patterns_0 = myers_get_query_pattern_test(query, 0, 'A', true); | ||
| std::vector<WordType> reverse_patterns_1 = myers_get_query_pattern_test(query, 1, 'A', true); | ||
| int32_t const n = get_size(reverse_patterns_0); | ||
| ASSERT_EQ(get_size(reverse_patterns_0), get_size(reverse_patterns_1)); | ||
| for (int32_t i = 0; i < n; ++i) | ||
| { | ||
| std::string shifted_end_query = std::string(query.begin(), query.end() - i); | ||
| matrix<WordType> shifted_p = compute_myers_preprocess_matrix(shifted_end_query); | ||
| EXPECT_EQ(reverse_patterns_0[i], shifted_p(0, 4)) << "for shift:" << i << std::endl; | ||
| if (get_size(shifted_end_query) > word_size) | ||
| { | ||
| ASSERT_EQ(shifted_p.num_rows(), 2); | ||
| EXPECT_EQ(reverse_patterns_1[i], shifted_p(1, 4)) << "for shift:" << i << std::endl; | ||
| } | ||
| else | ||
| { | ||
| EXPECT_EQ(reverse_patterns_1[i], WordType(0)) << "for shift:" << i << std::endl; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| @@ -0,0 +1,33 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include <claragenomics/utils/mathutils.hpp> | ||
|
|
||
| #include "gtest/gtest.h" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| TEST(TestCudaAlignerMisc, CeilingDivide) | ||
| { | ||
| EXPECT_EQ(ceiling_divide(0, 5), 0); | ||
| EXPECT_EQ(ceiling_divide(5, 5), 1); | ||
| EXPECT_EQ(ceiling_divide(10, 5), 2); | ||
| EXPECT_EQ(ceiling_divide(20, 5), 4); | ||
|
|
||
| EXPECT_EQ(ceiling_divide(6, 5), 2); | ||
| EXPECT_EQ(ceiling_divide(4, 5), 1); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| @@ -0,0 +1,136 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "../src/myers_gpu.cuh" | ||
| #include "../src/needleman_wunsch_cpu.hpp" | ||
|
|
||
| #include <claragenomics/utils/genomeutils.hpp> | ||
|
|
||
| #include <random> | ||
| #include <gtest/gtest.h> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| struct TestCaseData | ||
| { | ||
| std::string target; | ||
| std::string query; | ||
| int32_t edit_distance; | ||
| }; | ||
|
|
||
| std::vector<TestCaseData> create_myers_test_cases() | ||
| { | ||
| std::vector<TestCaseData> tests; | ||
|
|
||
| TestCaseData t; | ||
|
|
||
| t.target = "AAAAAAAAAA"; | ||
| t.query = "CGTCGTCGTC"; | ||
| t.edit_distance = 10; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "AATAATAATA"; | ||
| t.query = "CGTCGTCGTC"; | ||
| t.edit_distance = 7; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "AATAATAATA"; | ||
| t.query = ""; | ||
| t.edit_distance = 10; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = ""; | ||
| t.query = "CGTCGTCGTC"; | ||
| t.edit_distance = 10; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "AATAATAATA"; | ||
| t.query = "C"; | ||
| t.edit_distance = 10; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "CGTCGTCGTC"; | ||
| t.query = "CGTCGTCGTC"; | ||
| t.edit_distance = 0; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGT"; | ||
| t.query = "AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGA"; | ||
| t.edit_distance = 4; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "CGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; | ||
| t.query = "AGTCGTCGTCCGTAATCGTCCGTCGTCGTCGTA"; | ||
| t.edit_distance = 4; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; | ||
| t.query = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGAAAACGTCGTCCGTCGTCGTCCGTCGTCGAAAACGTCGTCGTCCGTAGTCGTCCGACGTCGTCGTC"; | ||
| t.edit_distance = 10; | ||
| tests.push_back(t); | ||
|
|
||
| t.target = "GTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTCGTCGTCGTCCGTCGTCGTCCGTCGTCGTCGTC"; | ||
| t.query = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; | ||
| t.edit_distance = 96; | ||
| tests.push_back(t); | ||
|
|
||
| std::minstd_rand rng(1); | ||
| t.target = claragenomics::genomeutils::generate_random_genome(5000, rng); | ||
| t.query = claragenomics::genomeutils::generate_random_genome(4800, rng); | ||
| matrix<int> s = needleman_wunsch_build_score_matrix_naive(t.target, t.query); | ||
| t.edit_distance = s(s.num_rows() - 1, s.num_cols() - 1); | ||
| tests.push_back(t); | ||
| return tests; | ||
| } | ||
|
|
||
| class TestMyersEditDistance : public ::testing::TestWithParam<TestCaseData> | ||
| { | ||
| }; | ||
|
|
||
| TEST_P(TestMyersEditDistance, TestCases) | ||
| { | ||
| TestCaseData t = GetParam(); | ||
|
|
||
| int32_t d = myers_compute_edit_distance(t.target, t.query); | ||
| ASSERT_EQ(d, t.edit_distance); | ||
| } | ||
|
|
||
| class TestMyersScoreMatrix : public ::testing::TestWithParam<TestCaseData> | ||
| { | ||
| }; | ||
|
|
||
| TEST_P(TestMyersScoreMatrix, TestCases) | ||
| { | ||
| TestCaseData t = GetParam(); | ||
|
|
||
| matrix<int32_t> m = myers_get_full_score_matrix(t.target, t.query); | ||
| matrix<int32_t> r = needleman_wunsch_build_score_matrix_naive(t.target, t.query); | ||
|
|
||
| ASSERT_EQ(m.num_rows(), r.num_rows()); | ||
| ASSERT_EQ(m.num_cols(), r.num_cols()); | ||
|
|
||
| for (int32_t j = 0; j < m.num_cols(); ++j) | ||
| { | ||
| for (int32_t i = 0; i < m.num_rows(); ++i) | ||
| { | ||
| EXPECT_EQ(m(i, j), r(i, j)) << "index: (" << i << "," << j << ")"; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| INSTANTIATE_TEST_SUITE_P(TestMyersAlgorithm, TestMyersEditDistance, ::testing::ValuesIn(create_myers_test_cases())); | ||
| INSTANTIATE_TEST_SUITE_P(TestMyersAlgorithm, TestMyersScoreMatrix, ::testing::ValuesIn(create_myers_test_cases())); | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| @@ -0,0 +1,132 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <claragenomics/cudapoa/cudapoa.hpp> | ||
|
|
||
| #include <memory> | ||
| #include <vector> | ||
| #include <stdint.h> | ||
| #include <string> | ||
| #include <iostream> | ||
| #include <cuda_runtime_api.h> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudapoa | ||
| { | ||
|
|
||
| /// A structure to represent a sequence entry. | ||
| struct Entry | ||
| { | ||
| /// Pointer to string representing sequence. | ||
| const char* seq; | ||
| /// Pointer to array of weight per base in sequence. | ||
| const int8_t* weights; | ||
| /// Length of sequence. | ||
| int32_t length; | ||
| }; | ||
|
|
||
| /// A type defining the set and order of Entry's in which a POA is processed. | ||
| typedef std::vector<Entry> Group; | ||
|
|
||
| /// \class Batch | ||
| /// Batched GPU CUDA POA object | ||
| class Batch | ||
| { | ||
| public: | ||
| /// \brief CudapoaBatch has a custom dtor, so declare ~Batch virtual and give it a default implementation | ||
| virtual ~Batch() = default; | ||
|
|
||
| /// \brief Add a new group to the batch to run POA algorithm on. Based on the constraints | ||
| /// of the batch, now all entries in a group may be added. This will be reflected in | ||
| /// the per_seq_status of the call. Those entries that were added will be shown with a success. | ||
| /// | ||
| /// \param per_seq_status Reference to an output vector of StatusType that holds | ||
| /// the processing status of each entry in the group. | ||
| /// NOTE: This API clears old entries in the vector. | ||
| /// \param poa_group Vector of Entry's to process in POA. Based on the constraints | ||
| /// of the batch, not all entries in a group may be added. | ||
| /// This will be reflected in the per_seq_status of the call. Those entries that were | ||
| /// added will show a success status. The POA algorithm will run with | ||
| /// the sequences that were added. | ||
| /// | ||
| /// \return Status representing whether PoaGroup was successfully added to batch. | ||
| virtual StatusType add_poa_group(std::vector<StatusType>& per_seq_status, | ||
| const Group& poa_group) = 0; | ||
|
|
||
| /// \brief Get total number of partial order alignments in batch. | ||
| /// | ||
| /// \return Total POAs in batch. | ||
| virtual int32_t get_total_poas() const = 0; | ||
|
|
||
| /// \brief Run partial order alignment algorithm over all POAs. | ||
| virtual void generate_poa() = 0; | ||
|
|
||
| /// \brief Get the consensus for each POA. | ||
| /// | ||
| /// \param consensus Reference to vector where consensus strings | ||
| /// will be returned | ||
| /// \param coverage Reference to vector where coverage of each | ||
| /// base in each consensus string is returned | ||
| /// \param output_status Reference to vector where the errors | ||
| /// during kernel execution is captured | ||
| virtual StatusType get_consensus(std::vector<std::string>& consensus, | ||
| std::vector<std::vector<uint16_t>>& coverage, | ||
| std::vector<claragenomics::cudapoa::StatusType>& output_status) = 0; | ||
|
|
||
| /// \brief Get the multiple sequence alignments for each POA. | ||
| /// | ||
| /// \param msa Reference to vector where msa strings of each | ||
| /// poa is returned | ||
| /// \param output_status Reference to vector where the errors | ||
| /// during kernel execution is captured | ||
| virtual StatusType get_msa(std::vector<std::vector<std::string>>& msa, | ||
| std::vector<StatusType>& output_status) = 0; | ||
|
|
||
| /// \brief Return batch ID. | ||
| /// | ||
| /// \return Batch ID | ||
| virtual int32_t batch_id() const = 0; | ||
|
|
||
| /// \brief Reset batch. Must do before re-using batch. | ||
| virtual void reset() = 0; | ||
| }; | ||
|
|
||
| /// \brief Creates a new CUDA Batch object. | ||
| /// | ||
| /// \param max_sequences_per_poa Maximum number of sequences per POA | ||
| /// \param device_id GPU device on which to run CUDA POA algorithm | ||
| /// \param stream CUDA stream to use on GPU | ||
| /// \param max_mem Maximum GPU memory to use for this batch. | ||
| /// \param output_mask Which outputs to produce from POA (msa, consensus) | ||
| /// \param gap_score Score to be assigned to a gap | ||
| /// \param mismatch_score Score to be assigned to a mismatch | ||
| /// \param match_score Score to be assigned for a match | ||
| /// \param cuda_banded_alignment Whether to use banded alignment | ||
| /// | ||
| /// \return Returns a unique pointer to a new Batch object | ||
| std::unique_ptr<Batch> create_batch(int32_t max_sequences_per_poa, | ||
| int32_t device_id, | ||
| cudaStream_t stream, | ||
| size_t max_mem, | ||
| int8_t output_mask, | ||
| int16_t gap_score, | ||
| int16_t mismatch_score, | ||
| int16_t match_score, | ||
| bool cuda_banded_alignment); | ||
|
|
||
| /// \} | ||
|
|
||
| } // namespace cudapoa | ||
|
|
||
| } // namespace claragenomics |
| @@ -0,0 +1,25 @@ | ||
| # | ||
| # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| # | ||
| # NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| # and proprietary rights in and to this software, related documentation | ||
| # and any modifications thereto. Any use, reproduction, disclosure or | ||
| # distribution of this software and related documentation without an express | ||
| # license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| # | ||
|
|
||
| project(sample_cudapoa) | ||
|
|
||
| get_property(cudapoa_data_include_dir GLOBAL PROPERTY cudapoa_data_include_dir) | ||
| include_directories(${cudapoa_data_include_dir}) | ||
|
|
||
| add_executable(${PROJECT_NAME} | ||
| sample_cudapoa.cpp | ||
| ) | ||
|
|
||
| target_link_libraries(${PROJECT_NAME} | ||
| cudapoa | ||
| ) | ||
|
|
||
| install(TARGETS ${PROJECT_NAME} | ||
| DESTINATION samples/cudapoa) |
| @@ -0,0 +1,227 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "../benchmarks/common/utils.hpp" | ||
|
|
||
| #include <file_location.hpp> | ||
| #include <claragenomics/cudapoa/cudapoa.hpp> | ||
| #include <claragenomics/cudapoa/batch.hpp> | ||
| #include <claragenomics/utils/signed_integer_utils.hpp> | ||
| #include <claragenomics/utils/cudautils.hpp> | ||
|
|
||
| #include <cuda_runtime_api.h> | ||
| #include <vector> | ||
| #include <string> | ||
| #include <stdexcept> | ||
| #include <unistd.h> | ||
|
|
||
| using namespace claragenomics; | ||
| using namespace claragenomics::cudapoa; | ||
|
|
||
| std::unique_ptr<Batch> initialize_batch(bool msa) | ||
| { | ||
| // Get device information. | ||
| int32_t device_count = 0; | ||
| CGA_CU_CHECK_ERR(cudaGetDeviceCount(&device_count)); | ||
| assert(device_count > 0); | ||
|
|
||
| size_t total = 0, free = 0; | ||
| cudaSetDevice(0); // Using first GPU for sample. | ||
| cudaMemGetInfo(&free, &total); | ||
|
|
||
| // Initialize internal logging framework. | ||
| Init(); | ||
|
|
||
| // Initialize CUDAPOA batch object for batched processing of POAs on the GPU. | ||
| const int32_t max_sequences_per_poa_group = 100; | ||
| const int32_t device_id = 0; | ||
| cudaStream_t stream = 0; | ||
| size_t mem_per_batch = 0.9 * free; // Using 90% of GPU available memory for CUDAPOA batch. | ||
| const int32_t mismatch_score = -6, gap_score = -8, match_score = 8; | ||
| bool banded_alignment = false; | ||
|
|
||
| std::unique_ptr<Batch> batch = create_batch(max_sequences_per_poa_group, | ||
| device_id, | ||
| stream, | ||
| mem_per_batch, | ||
| msa ? OutputType::msa : OutputType::consensus, | ||
| gap_score, | ||
| mismatch_score, | ||
| match_score, | ||
| banded_alignment); | ||
|
|
||
| return std::move(batch); | ||
| } | ||
|
|
||
| void process_batch(Batch* batch, bool msa, bool print) | ||
| { | ||
| batch->generate_poa(); | ||
|
|
||
| StatusType status = StatusType::success; | ||
| if (msa) | ||
| { | ||
| // Grab MSA results for all POA groups in batch. | ||
| std::vector<std::vector<std::string>> msa; // MSA per group | ||
| std::vector<StatusType> output_status; // Status of MSA generation per group | ||
|
|
||
| status = batch->get_msa(msa, output_status); | ||
| if (status != StatusType::success) | ||
| { | ||
| std::cerr << "Could not generate MSA for batch : " << status << std::endl; | ||
| } | ||
|
|
||
| for (int32_t g = 0; g < get_size(msa); g++) | ||
| { | ||
| if (output_status[g] != StatusType::success) | ||
| { | ||
| std::cerr << "Error generating MSA for POA group " << g << ". Error type " << output_status[g] << std::endl; | ||
| } | ||
| else | ||
| { | ||
| if (print) | ||
| { | ||
| for (const auto& alignment : msa[g]) | ||
| { | ||
| std::cout << alignment << std::endl; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| else | ||
| { | ||
| // Grab consensus results for all POA groups in batch. | ||
| std::vector<std::string> consensus; // Consensus string for each POA group | ||
| std::vector<std::vector<uint16_t>> coverage; // Per base coverage for each consensus | ||
| std::vector<StatusType> output_status; // Status of consensus generation per group | ||
|
|
||
| status = batch->get_consensus(consensus, coverage, output_status); | ||
| if (status != StatusType::success) | ||
| { | ||
| std::cerr << "Could not generate consensus for batch : " << status << std::endl; | ||
| } | ||
|
|
||
| for (int32_t g = 0; g < get_size(consensus); g++) | ||
| { | ||
| if (output_status[g] != StatusType::success) | ||
| { | ||
| std::cerr << "Error generating consensus for POA group " << g << ". Error type " << output_status[g] << std::endl; | ||
| } | ||
| else | ||
| { | ||
| if (print) | ||
| { | ||
| std::cout << consensus[g] << std::endl; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| int main(int argc, char** argv) | ||
| { | ||
| // Process options | ||
| int c = 0; | ||
| bool msa = false; | ||
| bool help = false; | ||
| bool print = false; | ||
|
|
||
| while ((c = getopt(argc, argv, "mhp")) != -1) | ||
| { | ||
| switch (c) | ||
| { | ||
| case 'm': | ||
| msa = true; | ||
| break; | ||
| case 'p': | ||
| print = true; | ||
| break; | ||
| case 'h': | ||
| help = true; | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| if (help) | ||
| { | ||
| std::cout << "CUDAPOA API sample program. Runs consensus or MSA generation on pre-canned data." << std::endl; | ||
| std::cout << "Usage:" << std::endl; | ||
| std::cout << "./sample_cudapoa [-m] [-h]" << std::endl; | ||
| std::cout << "-m : Generate MSA (if not provided, generates consensus by default)" << std::endl; | ||
| std::cout << "-p : Print the MSA or consensus output to stdout" << std::endl; | ||
| std::cout << "-h : Print help message" << std::endl; | ||
| std::exit(0); | ||
| } | ||
|
|
||
| // Load input data. Each POA group is represented as a vector of strings. The sample | ||
| // data has many such POA groups to process, hence the data is loaded into a vector | ||
| // of vector of strings. | ||
| const std::string input_data = std::string(CUDAPOA_BENCHMARK_DATA_DIR) + "/sample-windows.txt"; | ||
| std::vector<std::vector<std::string>> windows; | ||
| parse_window_data_file(windows, input_data, 1000); // Generate windows. | ||
| assert(get_size(windows) > 0); | ||
|
|
||
| // Initialize batch. | ||
| std::unique_ptr<Batch> batch = initialize_batch(msa); | ||
|
|
||
| // Loop over all the POA groups, add them to the batch and process them. | ||
| int32_t window_count = 0; | ||
| for (int32_t i = 0; i < get_size(windows);) | ||
| { | ||
| const std::vector<std::string>& window = windows[i]; | ||
|
|
||
| Group poa_group; | ||
| // Create a new entry for each sequence and add to the group. | ||
| for (const auto& seq : window) | ||
| { | ||
| Entry poa_entry{}; | ||
| poa_entry.seq = seq.c_str(); | ||
| poa_entry.length = seq.length(); | ||
| poa_entry.weights = nullptr; | ||
| poa_group.push_back(poa_entry); | ||
| } | ||
|
|
||
| std::vector<StatusType> seq_status; | ||
| StatusType status = batch->add_poa_group(seq_status, poa_group); | ||
|
|
||
| if (status == StatusType::success) | ||
| { | ||
| // Check if all sequences in POA group wre added successfully. | ||
| for (const auto& s : seq_status) | ||
| { | ||
| if (s == StatusType::exceeded_maximum_sequence_size) | ||
| { | ||
| std::cerr << "Dropping sequence because sequence exceeded maximum size" << std::endl; | ||
| } | ||
| } | ||
| i++; | ||
| } | ||
| // NOTE: If number of windows smaller than batch capacity, then run POA generation | ||
| // once last window is added to batch. | ||
| if (status == StatusType::exceeded_maximum_poas || (i == get_size(windows) - 1)) | ||
| { | ||
| // No more POA groups can be added to batch. Now process batch. | ||
| process_batch(batch.get(), msa, print); | ||
|
|
||
| // After MSA is generated for batch, reset batch to make roomf or next set of POA groups. | ||
| batch->reset(); | ||
|
|
||
| std::cout << "Processed windows " << window_count << " - " << i << std::endl; | ||
| window_count = i; | ||
| } | ||
|
|
||
| if (status != StatusType::exceeded_maximum_poas && status != StatusType::success) | ||
| { | ||
| std::cerr << "Could not add POA group to batch. Error code " << status << std::endl; | ||
| } | ||
| } | ||
|
|
||
| return 0; | ||
| } |