Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions include/bio/detail/in_file_iterator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// -----------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
// -----------------------------------------------------------------------------------------------------

/*!\file
* \brief Provides the seqan3::detail::in_file_iterator class template.
* \author Hannes Hauswedell <hannes.hauswedell AT fu-berlin.de>
*/

#pragma once

#include <cassert>
#include <ranges>

#include <bio/platform.hpp>

namespace bio::detail
{

/*!\brief Input iterator that provides a range-like interface for readers.
* \tparam file_type The data structure on which the iterator operates.
* \implements std::input_Iterator
* \ingroup bio
*
* This iterator is a single-pass input iterator for input files. All member types are resolved
* via `file_type`'s member types, dereference is implemented via file's `front()` member
* function, and increment calls the `buffer_next_record()` member of file.
*
* Note that since this is a single-pass input iterator, post-increment returns void because
* previous iterators are always invalid (all iterators point to the current position in single-pass
* ranges).
*
* This iterator may be compared against std::default_sentinel_t, this check delegates to
* calling the `eof()` member function on the file's stream.
*/
template <typename file_type>
class in_file_iterator
{
static_assert(!std::is_const_v<file_type>,
"You cannot iterate over const files, because the iterator changes the file.");

public:
/*!\name Member types
* \brief The associated types are derived from the `file_type`.
* \{
*/

//!\brief The value type.
using value_type = typename file_type::record_type;
//!\brief The reference type.
using reference = typename file_type::record_type &;
//!\brief The const reference type.
using const_reference = typename file_type::record_type &;
//!\brief The size type.
using size_type = size_t;
//!\brief The difference type. A signed integer type, usually std::ptrdiff_t.
using difference_type = ptrdiff_t;
//!\brief The pointer type.
using pointer = typename file_type::record_type *;
//!\brief Tag this class as an input iterator.
using iterator_category = std::input_iterator_tag;
//!\}

/*!\name Constructors, destructor and assignment.
* \{
*/
in_file_iterator() = default; //!< Defaulted.
in_file_iterator(in_file_iterator const &) = default; //!< Defaulted.
in_file_iterator(in_file_iterator &&) = default; //!< Defaulted.
~in_file_iterator() = default; //!< Defaulted.
in_file_iterator & operator=(in_file_iterator const &) = default; //!< Defaulted.
in_file_iterator & operator=(in_file_iterator &&) = default; //!< Defaulted.

//!\brief Construct with reference to host.
in_file_iterator(file_type & _host) noexcept : host{&_host} {}
//!\}

/*!\name Iterator operations
* \{
*/
//!\brief Move to the next record in the file and return a reference to it.
in_file_iterator & operator++()
{
assert(host != nullptr);
host->read_next_record();
return *this;
}

//!\brief Post-increment is the same as pre-increment, but returns void.
void operator++(int)
{
assert(host != nullptr);
++(*this);
}

//!\brief Dereference returns the currently buffered record.
reference operator*() noexcept
{
assert(host != nullptr);
return host->record_buffer;
}

//!\brief Dereference returns the currently buffered record.
reference operator*() const noexcept
{
assert(host != nullptr);
return host->record_buffer;
}

//!\brief Dereference returns the currently buffered record.
value_type * operator->() noexcept
{
assert(host != nullptr);
return &host->record_buffer;
}

//!\brief Dereference returns the currently buffered record.
value_type const * operator->() const noexcept
{
assert(host != nullptr);
return &host->record_buffer;
}

//!\}

/*!\name Comparison operators
* \brief Only (in-)equality comparison of iterator with end() is supported.
* \{
*/

//!\brief Checks whether `*this` is equal to the sentinel.
constexpr bool operator==(std::default_sentinel_t const &) const noexcept
{
assert(host != nullptr);
return host->at_end;
}

//!\brief Checks whether `*this` is not equal to the sentinel.
constexpr bool operator!=(std::default_sentinel_t const &) const noexcept
{
assert(host != nullptr);
return !host->at_end;
}

//!\brief Checks whether `it` is equal to the sentinel.
constexpr friend bool operator==(std::default_sentinel_t const &, in_file_iterator const & it) noexcept
{
return (it == std::default_sentinel);
}

//!\brief Checks whether `it` is not equal to the sentinel.
constexpr friend bool operator!=(std::default_sentinel_t const &, in_file_iterator const & it) noexcept
{
return (it != std::default_sentinel);
}
//!\}

private:
//!\brief Pointer to file host.
file_type * host{};
};

} // namespace bio::detail
76 changes: 76 additions & 0 deletions include/bio/detail/misc.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// -----------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
// Copyright (c) 2020-2021, deCODE Genetics
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/bio/blob/master/LICENSE.md
// -----------------------------------------------------------------------------------------------------

/*!\file
* \brief Provides miscellaneous utilities.
* \author Hannes Hauswedell <hannes.hauswedell AT decode.is>
*/

#pragma once

#include <algorithm>
#include <concepts>
#include <filesystem>
#include <ranges>
#include <string>

#include <seqan3/core/detail/template_inspection.hpp>
#include <seqan3/utility/type_list/detail/type_list_algorithm.hpp>
#include <seqan3/utility/type_list/type_list.hpp>

#include <bio/exception.hpp>

namespace bio::detail
{

/*!\addtogroup bio
* \{
*/

/*!\brief Sets the file format according to the file name extension.
* \param[out] format The format to set.
* \param[in] file_name The file name to extract the extension from.
*
* \throws seqan3::unhandled_extension_error If the extension in file_name does
* not occur in any valid extensions of the formats specified in the
* \p format_variant_type template argument list.
*/
void set_format(auto & format, std::filesystem::path const & file_name)
{
using format_variant_type = std::remove_cvref_t<decltype(format)>;
using valid_formats = seqan3::detail::transfer_template_args_onto_t<format_variant_type, seqan3::type_list>;

bool format_found = false;
std::string extension = file_name.extension().string();
if (extension.size() > 1)
{
extension = extension.substr(1); // drop leading "."
seqan3::detail::for_each<valid_formats>(
[&](auto fmt)
{
using fm_type = typename decltype(fmt)::type; // remove type_identity wrapper

for (auto const & ext : fm_type::file_extensions)
{
if (std::ranges::equal(ext, extension))
{
format.template emplace<fm_type>();
format_found = true;
return;
}
}
});
}

if (!format_found)
throw unhandled_extension_error("No valid format found for this extension.");
}

//!\}

} // namespace bio::detail
76 changes: 56 additions & 20 deletions include/bio/detail/range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@

#pragma once

#include <ranges>
#include <span>

#include <seqan3/alphabet/concept.hpp>
#include <seqan3/utility/concept/container.hpp>

#include <bio/platform.hpp>

Expand All @@ -25,6 +27,57 @@ namespace bio::detail
* \{
*/

// ----------------------------------------------------------------------------
// concepts
// ----------------------------------------------------------------------------

/*!\interface bio::detail::back_insertable_with <>
* \extends std::ranges::output_range
* \tparam rng_t The container type.
* \tparam val_t The type to append to the container.
* \brief Describes range types that can grow in amortised constant time by appending an element of type val_t.
*/
//!\cond
template <typename rng_t, typename val_t>
concept back_insertable_with = std::ranges::output_range<rng_t, val_t> && requires(rng_t & v)
{
v.push_back(std::declval<val_t>());
};
//!\endcond

/*!\interface bio::detail::back_insertable <>
* \extends std::ranges::output_range
* \extends std::ranges::input_range
* \tparam rng_t The container type.
* \brief Describes range types that can grow in amortised constant time by appending an element.
*/
//!\cond
template <typename rng_t>
concept back_insertable =
std::ranges::input_range<rng_t> && back_insertable_with<rng_t, std::ranges::range_reference_t<rng_t>>;
//!\endcond

//!\brief A seqan3::alphabet that is **not** a character or number (any std::integral).
template <typename t>
concept deliberate_alphabet = seqan3::alphabet<t> && !std::integral<std::remove_cvref_t<t>>;

//!\brief A range whose value type is `char`.
template <typename t>
concept char_range = std::ranges::range<t> && std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;

//!\brief A range whose value type is an integral type other than `char`.
template <typename t>
concept int_range = std::ranges::range<t> && std::integral<std::remove_cvref_t<std::ranges::range_value_t<t>>> &&
!std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;

//!\brief A type that is not std::span<std::byte const>.
template <typename t>
concept not_a_byte_span = !std::same_as<t, std::span<std::byte const>>;

// ----------------------------------------------------------------------------
// copy functions
// ----------------------------------------------------------------------------

/*!\brief Copy elements from the first range into the second range.
* \param[in] in The range to copy from.
* \param[out] out The range to copy to.
Expand All @@ -36,8 +89,8 @@ namespace bio::detail
* If the input range is sized and the target range offers a `.resize()` member, this function uses
* resize and assignment instead of back-insertion.
*/
void sized_range_copy(std::ranges::input_range auto && in,
seqan3::back_insertable_with<std::ranges::range_reference_t<decltype(in)>> auto && out)
void sized_range_copy(std::ranges::input_range auto && in,
back_insertable_with<std::ranges::range_reference_t<decltype(in)>> auto && out)
{
using in_t = decltype(in);
using out_t = decltype(out);
Expand All @@ -64,23 +117,6 @@ void string_copy(std::string_view const in, auto & out)
sized_range_copy(in, out);
}

//!\brief A seqan3::alphabet that is **not** a character or number (any std::integral).
template <typename t>
concept deliberate_alphabet = seqan3::alphabet<t> && !std::integral<std::remove_cvref_t<t>>;

//!\brief A range whose value type is `char`.
template <typename t>
concept char_range = std::ranges::range<t> && std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;

//!\brief A range whose value type is an integral type other than `char`.
template <typename t>
concept int_range = std::ranges::range<t> && std::integral<std::remove_cvref_t<std::ranges::range_value_t<t>>> &&
!std::same_as<char, std::remove_cvref_t<std::ranges::range_value_t<t>>>;

//!\brief A type that is not std::span<std::byte const>.
template <typename t>
concept not_a_byte_span = !std::same_as<t, std::span<std::byte const>>;

//!\}

} // namespace bio::detail
Loading