diff --git a/include/fkYAML/detail/encodings/encode_detector.hpp b/include/fkYAML/detail/encodings/encode_detector.hpp index 2878acd4..9cea4f17 100644 --- a/include/fkYAML/detail/encodings/encode_detector.hpp +++ b/include/fkYAML/detail/encodings/encode_detector.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include /// @brief namespace for fkYAML library. @@ -27,63 +27,68 @@ namespace detail /// @brief Detect an encoding type for UTF-8 expected inputs. /// @note This function doesn't support the case where the first character is null. -/// @param b0 The 1st byte of an input character sequence. -/// @param b1 The 2nd byte of an input character sequence. -/// @param b2 The 3rd byte of an input character sequence. -/// @param b3 The 4th byte of an input character sequence. +/// @param[in] bytes 4 bytes of an input character sequence. +/// @param[out] has_bom Whether or not the input contains a BOM. /// @return A detected encoding type. -inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3) noexcept +inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { + has_bom = false; + // Check if a BOM exists. - if (b0 == uint8_t(0xEFu) && b1 == uint8_t(0xBBu) && b2 == uint8_t(0xBFu)) + if (bytes[0] == uint8_t(0xEFu) && bytes[1] == uint8_t(0xBBu) && bytes[2] == uint8_t(0xBFu)) { - return encode_t::UTF_8_BOM; + has_bom = true; + return utf_encode_t::UTF_8; } - if (b0 == 0 && b1 == 0 && b2 == uint8_t(0xFEu) && b3 == uint8_t(0xFFu)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == uint8_t(0xFEu) && bytes[3] == uint8_t(0xFFu)) { - return encode_t::UTF_32BE_BOM; + has_bom = true; + return utf_encode_t::UTF_32BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu) && b2 == 0 && b3 == 0) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu) && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_BOM; + has_bom = true; + return utf_encode_t::UTF_32LE; } - if (b0 == uint8_t(0xFEu) && b1 == uint8_t(0xFFu)) + if (bytes[0] == uint8_t(0xFEu) && bytes[1] == uint8_t(0xFFu)) { - return encode_t::UTF_16BE_BOM; + has_bom = true; + return utf_encode_t::UTF_16BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu)) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu)) { - return encode_t::UTF_16LE_BOM; + has_bom = true; + return utf_encode_t::UTF_16LE; } // Test the first character assuming it's an ASCII character. - if (b0 == 0 && b1 == 0 && b2 == 0 && 0 < b3 && b3 < uint8_t(0x80u)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && 0 < bytes[3] && bytes[3] < uint8_t(0x80u)) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0 && b2 == 0 && b3 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_N; + return utf_encode_t::UTF_32LE; } - if (b0 == 0 && 0 < b1 && b1 < uint8_t(0x80u)) + if (bytes[0] == 0 && 0 < bytes[1] && bytes[1] < uint8_t(0x80u)) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0) { - return encode_t::UTF_16LE_N; + return utf_encode_t::UTF_16LE; } - return encode_t::UTF_8_N; + return utf_encode_t::UTF_8; } /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. @@ -93,9 +98,9 @@ inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t /// @param end The end of input iterators. /// @return A detected encoding type. template ())))> -inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) +inline utf_encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; switch (ElemSize) { case sizeof(char): { // this case covers char8_t as well when compiled with C++20 features. @@ -104,30 +109,34 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i] = uint8_t(begin[i]); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (has_bom) { - case encode_t::UTF_8_BOM: - std::advance(begin, 3); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 2); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 4); - break; - default: - // Do nothing if a BOM doesn't exist. - break; + // skip reading the BOM. + switch (encode_type) + { + case utf_encode_t::UTF_8: + std::advance(begin, 3); + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + std::advance(begin, 2); + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + std::advance(begin, 4); + break; + } } + return encode_type; } case sizeof(char16_t): { if (begin == end) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } for (int i = 0; i < 2 && begin + i != end; i++) { @@ -135,45 +144,47 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i * 2 + 1] = uint8_t(begin[i] & 0xFFu); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char16_t characters must be encoded in the UTF-16 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } case sizeof(char32_t): { if (begin == end) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } + bytes[0] = uint8_t((*begin & 0xFF000000u) >> 24); bytes[1] = uint8_t((*begin & 0x00FF0000u) >> 16); bytes[2] = uint8_t((*begin & 0x0000FF00u) >> 8); bytes[3] = uint8_t(*begin & 0x000000FFu); - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char32_t characters must be encoded in the UTF-32 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } default: @@ -181,9 +192,9 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) } } -inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; for (std::size_t i = 0; i < 4; i++) { char byte = 0; @@ -195,32 +206,36 @@ inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept bytes[i] = uint8_t(byte & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + long offset = 0; + if (has_bom) { - case encode_t::UTF_8_BOM: - fseek(file, 3, SEEK_SET); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - fseek(file, 2, SEEK_SET); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - fseek(file, 4, SEEK_SET); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - fseek(file, 0, SEEK_SET); - break; + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + fseek(file, offset, SEEK_SET); return encode_type; } -inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; for (std::size_t i = 0; i < 4; i++) { char ch = 0; @@ -235,25 +250,29 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept bytes[i] = uint8_t(ch & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + std::streamoff offset = 0; + if (has_bom) { - case encode_t::UTF_8_BOM: - is.seekg(3, std::ios_base::beg); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - is.seekg(2, std::ios_base::beg); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - is.seekg(4, std::ios_base::beg); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - is.seekg(0, std::ios_base::beg); - break; + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + is.seekg(offset, std::ios_base::beg); return encode_type; } diff --git a/include/fkYAML/detail/encodings/encode_t.hpp b/include/fkYAML/detail/encodings/utf_encode_t.hpp similarity index 58% rename from include/fkYAML/detail/encodings/encode_t.hpp rename to include/fkYAML/detail/encodings/utf_encode_t.hpp index 20814270..d019d481 100644 --- a/include/fkYAML/detail/encodings/encode_t.hpp +++ b/include/fkYAML/detail/encodings/utf_encode_t.hpp @@ -8,8 +8,8 @@ /// /// @file -#ifndef FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ -#define FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ #include @@ -22,22 +22,17 @@ namespace detail /// @brief Definition of Unicode encoding types /// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. -enum class encode_t +enum class utf_encode_t { - UTF_8_N, //!< UTF-8 without BOM - UTF_8_BOM, //!< UTF-8 with BOM - UTF_16BE_N, //!< UTF-16BE without BOM - UTF_16BE_BOM, //!< UTF-16BE with BOM - UTF_16LE_N, //!< UTF-16LE without BOM - UTF_16LE_BOM, //!< UTF-16LE with BOM - UTF_32BE_N, //!< UTF-32BE without BOM - UTF_32BE_BOM, //!< UTF-32BE with BOM - UTF_32LE_N, //!< UTF-32LE without BOM - UTF_32LE_BOM, //!< UTF-32LE with BOM + UTF_8, //!< UTF-8 + UTF_16BE, //!< UTF-16 Big Endian + UTF_16LE, //!< UTF-16 Little Endian + UTF_32BE, //!< UTF-32 Big Endian + UTF_32LE, //!< UTF-32 Little Endian }; } // namespace detail FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ */ +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ */ diff --git a/include/fkYAML/detail/input/deserializer.hpp b/include/fkYAML/detail/input/deserializer.hpp index c94fdc66..3c0bcb55 100644 --- a/include/fkYAML/detail/input/deserializer.hpp +++ b/include/fkYAML/detail/input/deserializer.hpp @@ -62,7 +62,7 @@ class basic_deserializer template ::value, int> = 0> BasicNodeType deserialize(InputAdapterType&& input_adapter) { - lexical_analyzer lexer(std::forward(input_adapter)); + lexical_analyzer lexer(std::forward(input_adapter)); BasicNodeType root = BasicNodeType::mapping(); m_current_node = &root; diff --git a/include/fkYAML/detail/input/input_adapter.hpp b/include/fkYAML/detail/input/input_adapter.hpp index 2ec49dfb..f5891344 100644 --- a/include/fkYAML/detail/input/input_adapter.hpp +++ b/include/fkYAML/detail/input/input_adapter.hpp @@ -18,8 +18,9 @@ #include #include +#include #include -#include +#include #include #include #include @@ -55,7 +56,7 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) @@ -76,20 +77,15 @@ class iterator_input_adapter< typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -101,6 +97,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + if (m_current != m_end) { auto ret = std::char_traits::to_int_type(*m_current); @@ -114,6 +112,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { if (m_current == m_end) @@ -126,25 +126,19 @@ class iterator_input_adapter< while (m_current != m_end && m_encoded_buf_size < 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(*m_current) << 8); ++m_current; m_encoded_buffer[m_encoded_buf_size] |= char16_t(*m_current); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(*m_current); ++m_current; m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(*m_current) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } + ++m_current; ++m_encoded_buf_size; } @@ -171,6 +165,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { if (m_current == m_end) @@ -179,10 +175,8 @@ class iterator_input_adapter< } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(*m_current << 24); ++m_current; utf32 |= char32_t(*m_current << 16); @@ -190,9 +184,9 @@ class iterator_input_adapter< utf32 |= char32_t(*m_current << 8); ++m_current; utf32 |= char32_t(*m_current); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(*m_current); ++m_current; utf32 |= char32_t(*m_current << 8); @@ -200,11 +194,6 @@ class iterator_input_adapter< utf32 |= char32_t(*m_current << 16); ++m_current; utf32 |= char32_t(*m_current << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -223,7 +212,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -256,11 +245,14 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + // char8_t characters must be encoded in the UTF-8 format. + // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); } // allow only move construct/assignment like other input adapters. @@ -273,30 +265,10 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. typename std::char_traits::int_type get_character() - { - typename std::char_traits::int_type ret = 0; - switch (m_encode_type) - { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); - break; - default: // LCOV_EXCL_LINE - // char8_t characters must be encoded in the UTF-8 format. - // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. - break; // LCOV_EXCL_LINE - } - return ret; - } - -private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept { if (m_current != m_end) { - auto ret = std::char_traits::to_int_type(*m_current); + auto ret = std::char_traits::to_int_type(char(*m_current)); ++m_current; return ret; } @@ -309,7 +281,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #endif // defined(FK_YAML_HAS_CHAR8_T) @@ -332,11 +304,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); } // allow only move construct/assignment like other input adapters. @@ -362,23 +335,17 @@ class iterator_input_adapter< while (m_current != m_end && m_encoded_buf_size < 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = *m_current; - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // utf_encode_t::UTF_16LE + { char16_t tmp = *m_current; m_encoded_buffer[m_encoded_buf_size] = char16_t((tmp & 0x00FFu) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t((tmp & 0xFF00u) >> 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } + ++m_current; ++m_encoded_buf_size; } @@ -407,7 +374,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_16BE_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -438,11 +405,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); } // allow only move construct/assignment like other input adapters. @@ -464,24 +432,17 @@ class iterator_input_adapter< } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = *m_current; - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { char32_t tmp = *m_current; utf32 |= char32_t((tmp & 0xFF000000u) >> 24); utf32 |= char32_t((tmp & 0x00FF0000u) >> 8); utf32 |= char32_t((tmp & 0x0000FF00u) << 8); utf32 |= char32_t((tmp & 0x000000FFu) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -500,7 +461,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_32BE_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; /// The buffer for UTF-8 encoded characters. std::array m_utf8_buffer {{0, 0, 0, 0}}; /// The next index in `m_utf8_buffer` to read. @@ -525,7 +486,7 @@ class file_input_adapter /// It's user's responsibility to call those functions. /// @param file A file handle for this adapter. (A non-null pointer is assumed.) /// @param encode_type The encoding type for this input adapter. - explicit file_input_adapter(std::FILE* file, encode_t encode_type) noexcept + explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept : m_file(file), m_encode_type(encode_type) { @@ -545,20 +506,15 @@ class file_input_adapter typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -570,6 +526,8 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + char ch = 0; size_t size = std::fread(&ch, sizeof(char), 1, m_file); if (size == 1) @@ -583,27 +541,22 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char chars[2] = {0, 0}; while (m_encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } ++m_encoded_buf_size; @@ -636,6 +589,8 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char chars[4] = {0, 0, 0, 0}; @@ -646,26 +601,19 @@ class file_input_adapter } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(uint8_t(chars[0]) << 24); utf32 |= char32_t(uint8_t(chars[1]) << 16); utf32 |= char32_t(uint8_t(chars[2]) << 8); utf32 |= char32_t(uint8_t(chars[3])); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(uint8_t(chars[0])); utf32 |= char32_t(uint8_t(chars[1]) << 8); utf32 |= char32_t(uint8_t(chars[2]) << 16); utf32 |= char32_t(uint8_t(chars[3]) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -681,7 +629,7 @@ class file_input_adapter /// A pointer to the input file handle. std::FILE* m_file {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -706,7 +654,7 @@ class stream_input_adapter /// @brief Construct a new stream_input_adapter object. /// @param is A reference to the target input stream. - explicit stream_input_adapter(std::istream& is, encode_t encode_type) noexcept + explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept : m_istream(&is), m_encode_type(encode_type) { @@ -726,20 +674,15 @@ class stream_input_adapter typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -751,6 +694,7 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); return m_istream->get(); } @@ -758,6 +702,8 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { while (m_encoded_buf_size < 2) @@ -774,22 +720,15 @@ class stream_input_adapter break; } - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } ++m_encoded_buf_size; @@ -817,6 +756,8 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char ch = 0; @@ -828,10 +769,8 @@ class stream_input_adapter } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(ch << 24); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 16); @@ -839,9 +778,9 @@ class stream_input_adapter utf32 |= char32_t(ch << 8); m_istream->read(&ch, 1); utf32 |= char32_t(ch); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(ch); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 8); @@ -849,11 +788,6 @@ class stream_input_adapter utf32 |= char32_t(ch << 16); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -869,7 +803,7 @@ class stream_input_adapter /// A pointer to the input stream object. std::istream* m_istream {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -894,7 +828,7 @@ class stream_input_adapter template ())))> inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { - encode_t encode_type = detect_encoding_and_skip_bom(begin, end); + utf_encode_t encode_type = detect_encoding_and_skip_bom(begin, end); return iterator_input_adapter(begin, end, encode_type); } @@ -964,7 +898,7 @@ inline file_input_adapter input_adapter(std::FILE* file) { throw fkyaml::exception("Invalid FILE object pointer."); } - encode_t encode_type = detect_encoding_and_skip_bom(file); + utf_encode_t encode_type = detect_encoding_and_skip_bom(file); return file_input_adapter(file, encode_type); } @@ -973,7 +907,7 @@ inline file_input_adapter input_adapter(std::FILE* file) /// @return stream_input_adapter inline stream_input_adapter input_adapter(std::istream& stream) noexcept { - encode_t encode_type = detect_encoding_and_skip_bom(stream); + utf_encode_t encode_type = detect_encoding_and_skip_bom(stream); return stream_input_adapter(stream, encode_type); } diff --git a/include/fkYAML/detail/input/input_handler.hpp b/include/fkYAML/detail/input/input_handler.hpp index b9caf0e3..91f0ccf7 100644 --- a/include/fkYAML/detail/input/input_handler.hpp +++ b/include/fkYAML/detail/input/input_handler.hpp @@ -27,21 +27,12 @@ namespace detail { /// @brief An input buffer handler. -/// @tparam InputAdapterType The type of the input adapter. -template ::value, int> = 0> class input_handler { -public: +private: /// The type of character traits of the input buffer. - using char_traits_type = std::char_traits; - /// The type of characters of the input buffer. - using char_type = typename char_traits_type::char_type; - /// The type of integers for the input buffer. - using int_type = typename char_traits_type::int_type; - /// The type of strings of the input buffer. - using string_type = std::basic_string; + using char_traits_type = std::char_traits; -private: /// @brief A set of information on the current position in an input buffer. struct position { @@ -55,82 +46,87 @@ class input_handler public: /// @brief Construct a new input_handler object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object + template ::value, int> = 0> explicit input_handler(InputAdapterType&& input_adapter) - : m_input_adapter(std::move(input_adapter)) + : m_buffer_size(0) { - get_next(); - m_position.cur_pos = m_position.cur_pos_in_line = m_position.lines_read = 0; + m_buffer.clear(); + + int ch = s_end_of_input; + while ((ch = input_adapter.get_character()) != s_end_of_input) + { + m_buffer.push_back(char_traits_type::to_char_type(ch)); + m_buffer_size++; + } } /// @brief Get the character at the current position. - /// @return int_type A character or EOF. - int_type get_current() const noexcept + /// @return int A character or EOF. + int get_current() const noexcept { - return m_cache[m_position.cur_pos]; + if (m_position.cur_pos == m_buffer_size) + { + return s_end_of_input; + } + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos]); } /// @brief Get the character at next position. - /// @return int_type A character or EOF. - int_type get_next() + /// @return int A character or EOF. + int get_next() { - int_type ret = end_of_input; - - // if already cached, return the cached value. - if (m_position.cur_pos + 1 < m_cache.size()) + // if all the input has already been consumed, return the EOF. + if (m_position.cur_pos == m_buffer_size - 1) { - ret = m_cache[++m_position.cur_pos]; - ++m_position.cur_pos_in_line; + m_position.cur_pos++; + m_position.cur_pos_in_line++; + return s_end_of_input; } - else + + if (m_position.cur_pos == m_buffer_size) { - ret = m_input_adapter.get_character(); - if (ret != end_of_input || m_cache[m_position.cur_pos] != end_of_input) - { - // cache the return value for possible later use. - m_cache.push_back(ret); - ++m_position.cur_pos; - ++m_position.cur_pos_in_line; - } + return s_end_of_input; } - if (m_cache[m_position.cur_pos - 1] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { m_position.cur_pos_in_line = 0; ++m_position.lines_read; } + else + { + m_position.cur_pos_in_line++; + } - return ret; + return char_traits_type::to_int_type(m_buffer[++m_position.cur_pos]); } /// @brief Get the characters in the given range. /// @param length The length of characters retrieved from the current position. /// @param str A string which will contain the resulting characters. - /// @return int_type 0 (for success) or EOF (for error). - int_type get_range(std::size_t length, string_type& str) + /// @return int 0 (for success) or EOF (for error). + int get_range(std::size_t length, std::string& str) { str.clear(); - if (get_current() == end_of_input) + if (length == 0) { - return end_of_input; + // regard this case as successful in getting zero characters. + return 0; } - str += char_traits_type::to_char_type(get_current()); + if (m_position.cur_pos + length - 1 >= m_buffer_size) + { + return s_end_of_input; + } + + str += m_buffer[m_position.cur_pos]; for (std::size_t i = 1; i < length; i++) { - if (get_next() == end_of_input) - { - // m_cur_pos -= i; - for (std::size_t j = i; j > 0; j--) - { - unget(); - } - str.clear(); - return end_of_input; - } - str += char_traits_type::to_char_type(get_current()); + str += char_traits_type::to_char_type(get_next()); } return 0; @@ -141,16 +137,15 @@ class input_handler { if (m_position.cur_pos > 0) { - // just move back the cursor. (no action for adapter) --m_position.cur_pos; --m_position.cur_pos_in_line; - if (m_cache[m_position.cur_pos] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { --m_position.lines_read; m_position.cur_pos_in_line = 0; if (m_position.cur_pos > 0) { - for (std::size_t i = m_position.cur_pos - 1; m_cache[i] != '\n'; i--) + for (std::size_t i = m_position.cur_pos - 1; m_buffer[i] != '\n'; i--) { if (i == 0) { @@ -168,7 +163,8 @@ class input_handler /// @param length The length of moving backward. void unget_range(std::size_t length) { - for (std::size_t i = 0; i < length; i++) + size_t unget_num = (m_position.cur_pos < length) ? m_position.cur_pos : length; + for (std::size_t i = 0; i < unget_num; i++) { unget(); } @@ -178,23 +174,15 @@ class input_handler /// @param expected An expected next character. /// @return true The next character is the expected one. /// @return false The next character is not the expected one. - bool test_next_char(char_type expected) + bool test_next_char(char expected) { - if (get_current() == end_of_input) + if (m_position.cur_pos >= m_buffer_size - 1) { + // there is no input character left. return false; } - int_type next = get_next(); - if (next == end_of_input) - { - unget(); - return false; - } - - bool ret = char_traits_type::eq(char_traits_type::to_char_type(next), expected); - unget(); - return ret; + return char_traits_type::eq(m_buffer[m_position.cur_pos + 1], expected); } /// @brief Get the current position in the current line. @@ -213,12 +201,12 @@ class input_handler private: /// The value of EOF for the target character type. - static constexpr int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); - /// An input adapter object. - InputAdapterType m_input_adapter {}; - /// Cached characters retrieved from an input adapter object. - std::vector m_cache {}; + /// The input buffer retrieved from an input adapter object. + std::string m_buffer {}; + /// The size of the buffer. + std::size_t m_buffer_size {0}; /// The current position in an input buffer. position m_position {}; }; diff --git a/include/fkYAML/detail/input/lexical_analyzer.hpp b/include/fkYAML/detail/input/lexical_analyzer.hpp index 80c1e4f8..c0bd5dab 100644 --- a/include/fkYAML/detail/input/lexical_analyzer.hpp +++ b/include/fkYAML/detail/input/lexical_analyzer.hpp @@ -45,17 +45,11 @@ namespace detail /// @brief A class which lexically analizes YAML formatted inputs. /// @tparam BasicNodeType A type of the container for YAML values. -template < - typename BasicNodeType, typename InputAdapterType, - enable_if_t, is_input_adapter>::value, int> = 0> +template ::value, int> = 0> class lexical_analyzer { private: - using input_handler_type = input_handler; - using char_traits_type = typename input_handler_type::char_traits_type; - using char_type = typename char_traits_type::char_type; - using char_int_type = typename char_traits_type::int_type; - using input_string_type = typename input_handler_type::string_type; + using char_traits_type = typename std::char_traits; enum class block_style_indicator_t { @@ -77,7 +71,9 @@ class lexical_analyzer using string_type = typename BasicNodeType::string_type; /// @brief Construct a new lexical_analyzer object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object. + template ::value, int> = 0> explicit lexical_analyzer(InputAdapterType&& input_adapter) : m_input_handler(std::move(input_adapter)) { @@ -89,7 +85,7 @@ class lexical_analyzer { skip_white_spaces_and_newline_codes(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); m_last_token_begin_pos = m_input_handler.get_cur_pos_in_line(); m_last_token_begin_line = m_input_handler.get_lines_read(); @@ -141,37 +137,6 @@ class lexical_analyzer return m_last_token_type = lexical_token_t::KEY_SEPARATOR; } - // switch (m_input_handler.get_next()) - // { - // case ' ': { - // size_t prev_pos = m_input_handler.get_lines_read(); - // skip_white_spaces_and_comments(); - // size_t cur_pos = m_input_handler.get_lines_read(); - // if (prev_pos == cur_pos) - // { - // current = m_input_handler.get_current(); - // if (current != '\r' && current != '\n') - // { - // return m_last_token_type = lexical_token_t::KEY_SEPARATOR; - // } - // } - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // } - // case '\r': { - // char_int_type next = m_input_handler.get_next(); - // if (next == '\n') - // { - // m_input_handler.get_next(); - // } - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // } - // case '\n': - // m_input_handler.get_next(); - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // default: - // emit_error("Half-width spaces or newline codes are required after a key separater(:)."); - // } - case ',': // value separater m_input_handler.get_next(); return m_last_token_type = lexical_token_t::VALUE_SEPARATOR; @@ -179,7 +144,7 @@ class lexical_analyzer m_value_buffer.clear(); while (true) { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == s_end_of_input || next == '\r' || next == '\n') { emit_error("An anchor label must be followed by some value."); @@ -197,7 +162,7 @@ class lexical_analyzer m_value_buffer.clear(); while (true) { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ' || next == '\r' || next == '\n' || next == s_end_of_input) { if (m_value_buffer.empty()) @@ -217,7 +182,7 @@ class lexical_analyzer case '%': // directive prefix return m_last_token_type = scan_directive(); case '-': { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ') { // Move a cursor to the beginning of the next token. @@ -231,7 +196,7 @@ class lexical_analyzer return m_last_token_type = scan_number(); } - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); + int ret = m_input_handler.get_range(3, m_value_buffer); if (ret != s_end_of_input) { if (m_value_buffer == "---") @@ -279,7 +244,7 @@ class lexical_analyzer case '+': return m_last_token_type = scan_number(); case '.': { - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); + int ret = m_input_handler.get_range(3, m_value_buffer); if (ret != s_end_of_input) { if (m_value_buffer == "...") @@ -377,7 +342,7 @@ class lexical_analyzer const string_type& get_string() const noexcept { // TODO: Provide support for different string types between nodes & inputs. - static_assert(std::is_same::value, "Unsupported, different string types."); + static_assert(std::is_same::value, "Unsupported, different string types."); return m_value_buffer; } @@ -395,7 +360,7 @@ class lexical_analyzer /// @brief A utility function to convert a hexadecimal character to an integer. /// @param source A hexadecimal character ('0'~'9', 'A'~'F', 'a'~'f') /// @return char A integer converted from @a source. - char convert_hex_char_to_byte(char_int_type source) const + char convert_hex_char_to_byte(int source) const { if ('0' <= source && source <= '9') { @@ -524,7 +489,7 @@ class lexical_analyzer { m_value_buffer.clear(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); FK_YAML_ASSERT(std::isdigit(current) || current == '-' || current == '+'); lexical_token_t ret = lexical_token_t::END_OF_BUFFER; @@ -576,7 +541,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for either integer or float numbers. lexical_token_t scan_negative_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); // The value of `next` must be guranteed to be a digit in the get_next_token() function. FK_YAML_ASSERT(std::isdigit(next)); @@ -588,7 +553,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float). lexical_token_t scan_number_after_zero_at_first() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); switch (next) { case '.': @@ -611,7 +576,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_decimal_point() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -627,7 +592,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_exponent() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == '+' || next == '-') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -649,7 +614,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number_after_sign() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -664,7 +629,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -697,7 +662,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_octal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if ('0' <= next && next <= '7') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -710,7 +675,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_hexadecimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isxdigit(next)) { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -796,7 +761,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for strings. lexical_token_t extract_string_token(bool needs_last_single_quote, bool needs_last_double_quote) { - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { @@ -842,7 +807,7 @@ class lexical_analyzer // " :" is permitted in a plain style string token, but not when followed by a space. if (current == ':') { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); m_input_handler.unget(); if (next == ' ') { @@ -907,7 +872,7 @@ class lexical_analyzer continue; } - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); m_input_handler.unget(); // A colon as a key separator must be followed by a space or a newline code. @@ -1005,7 +970,7 @@ class lexical_analyzer m_value_buffer.push_back('\r'); break; case 'e': - m_value_buffer.push_back(char_type(0x1B)); + m_value_buffer.push_back(char(0x1B)); break; case ' ': m_value_buffer.push_back(' '); @@ -1079,7 +1044,7 @@ class lexical_analyzer // Handle 2-byte characters encoded in UTF-8. (U+0080..U+07FF) if (current <= 0xDF) { - std::array byte_array = {{current, m_input_handler.get_next()}}; + std::array byte_array = {{current, m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); @@ -1093,8 +1058,7 @@ class lexical_analyzer // Handle 3-byte characters encoded in UTF-8. (U+1000..U+D7FF,U+E000..U+FFFF) if (current <= 0xEF) { - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next()}}; + std::array byte_array = {{current, m_input_handler.get_next(), m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); @@ -1108,7 +1072,7 @@ class lexical_analyzer } // Handle 4-byte characters encoded in UTF-8. (U+10000..U+FFFFF,U+100000..U+10FFFF) - std::array byte_array = { + std::array byte_array = { {current, m_input_handler.get_next(), m_input_handler.get_next(), m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { @@ -1128,7 +1092,7 @@ class lexical_analyzer m_value_buffer.clear(); // Handle leading all-space lines. - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { if (current == ' ') @@ -1354,7 +1318,7 @@ class lexical_analyzer /// @brief Handle unescaped control characters. /// @param c A target character. - void handle_unescaped_control_char(char_int_type c) + void handle_unescaped_control_char(int c) { FK_YAML_ASSERT(0x00 <= c && c <= 0x1F); @@ -1446,7 +1410,7 @@ class lexical_analyzer void get_block_style_metadata(chomping_indicator_t& chomp_type, std::size_t& indent) { - char_int_type ch = m_input_handler.get_next(); + int ch = m_input_handler.get_next(); chomp_type = chomping_indicator_t::CLIP; if (ch == '-') @@ -1558,12 +1522,12 @@ class lexical_analyzer private: /// The value of EOF for the target characters. - static constexpr char_int_type s_end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); /// An input buffer adapter to be analyzed. - input_handler_type m_input_handler; + input_handler m_input_handler; /// A temporal buffer to store a string to be parsed to an actual datum. - input_string_type m_value_buffer {}; + std::string m_value_buffer {}; /// A temporal buffer to store a UTF-8 encoded char sequence. std::array m_encode_buffer {}; /// The actual size of a UTF-8 encoded char sequence. diff --git a/include/fkYAML/detail/output/serializer.hpp b/include/fkYAML/detail/output/serializer.hpp index 51bdc7ac..120658f1 100644 --- a/include/fkYAML/detail/output/serializer.hpp +++ b/include/fkYAML/detail/output/serializer.hpp @@ -340,7 +340,7 @@ class basic_serializer } auto adapter = input_adapter(s); - lexical_analyzer lexer(std::move(adapter)); + lexical_analyzer lexer(std::move(adapter)); lexical_token_t token_type = lexer.get_next_token(); if (token_type != lexical_token_t::STRING_VALUE) diff --git a/single_include/fkYAML/node.hpp b/single_include/fkYAML/node.hpp index 10806ff6..55f3cca9 100644 --- a/single_include/fkYAML/node.hpp +++ b/single_include/fkYAML/node.hpp @@ -1731,21 +1731,12 @@ namespace detail { /// @brief An input buffer handler. -/// @tparam InputAdapterType The type of the input adapter. -template ::value, int> = 0> class input_handler { -public: +private: /// The type of character traits of the input buffer. - using char_traits_type = std::char_traits; - /// The type of characters of the input buffer. - using char_type = typename char_traits_type::char_type; - /// The type of integers for the input buffer. - using int_type = typename char_traits_type::int_type; - /// The type of strings of the input buffer. - using string_type = std::basic_string; + using char_traits_type = std::char_traits; -private: /// @brief A set of information on the current position in an input buffer. struct position { @@ -1759,82 +1750,87 @@ class input_handler public: /// @brief Construct a new input_handler object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object + template ::value, int> = 0> explicit input_handler(InputAdapterType&& input_adapter) - : m_input_adapter(std::move(input_adapter)) + : m_buffer_size(0) { - get_next(); - m_position.cur_pos = m_position.cur_pos_in_line = m_position.lines_read = 0; + m_buffer.clear(); + + int ch = s_end_of_input; + while ((ch = input_adapter.get_character()) != s_end_of_input) + { + m_buffer.push_back(char_traits_type::to_char_type(ch)); + m_buffer_size++; + } } /// @brief Get the character at the current position. - /// @return int_type A character or EOF. - int_type get_current() const noexcept + /// @return int A character or EOF. + int get_current() const noexcept { - return m_cache[m_position.cur_pos]; + if (m_position.cur_pos == m_buffer_size) + { + return s_end_of_input; + } + return char_traits_type::to_int_type(m_buffer[m_position.cur_pos]); } /// @brief Get the character at next position. - /// @return int_type A character or EOF. - int_type get_next() + /// @return int A character or EOF. + int get_next() { - int_type ret = end_of_input; - - // if already cached, return the cached value. - if (m_position.cur_pos + 1 < m_cache.size()) + // if all the input has already been consumed, return the EOF. + if (m_position.cur_pos == m_buffer_size - 1) { - ret = m_cache[++m_position.cur_pos]; - ++m_position.cur_pos_in_line; + m_position.cur_pos++; + m_position.cur_pos_in_line++; + return s_end_of_input; } - else + + if (m_position.cur_pos == m_buffer_size) { - ret = m_input_adapter.get_character(); - if (ret != end_of_input || m_cache[m_position.cur_pos] != end_of_input) - { - // cache the return value for possible later use. - m_cache.push_back(ret); - ++m_position.cur_pos; - ++m_position.cur_pos_in_line; - } + return s_end_of_input; } - if (m_cache[m_position.cur_pos - 1] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { m_position.cur_pos_in_line = 0; ++m_position.lines_read; } + else + { + m_position.cur_pos_in_line++; + } - return ret; + return char_traits_type::to_int_type(m_buffer[++m_position.cur_pos]); } /// @brief Get the characters in the given range. /// @param length The length of characters retrieved from the current position. /// @param str A string which will contain the resulting characters. - /// @return int_type 0 (for success) or EOF (for error). - int_type get_range(std::size_t length, string_type& str) + /// @return int 0 (for success) or EOF (for error). + int get_range(std::size_t length, std::string& str) { str.clear(); - if (get_current() == end_of_input) + if (length == 0) + { + // regard this case as successful in getting zero characters. + return 0; + } + + if (m_position.cur_pos + length - 1 >= m_buffer_size) { - return end_of_input; + return s_end_of_input; } - str += char_traits_type::to_char_type(get_current()); + str += m_buffer[m_position.cur_pos]; for (std::size_t i = 1; i < length; i++) { - if (get_next() == end_of_input) - { - // m_cur_pos -= i; - for (std::size_t j = i; j > 0; j--) - { - unget(); - } - str.clear(); - return end_of_input; - } - str += char_traits_type::to_char_type(get_current()); + str += char_traits_type::to_char_type(get_next()); } return 0; @@ -1845,16 +1841,15 @@ class input_handler { if (m_position.cur_pos > 0) { - // just move back the cursor. (no action for adapter) --m_position.cur_pos; --m_position.cur_pos_in_line; - if (m_cache[m_position.cur_pos] == '\n') + if (m_buffer[m_position.cur_pos] == '\n') { --m_position.lines_read; m_position.cur_pos_in_line = 0; if (m_position.cur_pos > 0) { - for (std::size_t i = m_position.cur_pos - 1; m_cache[i] != '\n'; i--) + for (std::size_t i = m_position.cur_pos - 1; m_buffer[i] != '\n'; i--) { if (i == 0) { @@ -1872,7 +1867,8 @@ class input_handler /// @param length The length of moving backward. void unget_range(std::size_t length) { - for (std::size_t i = 0; i < length; i++) + size_t unget_num = (m_position.cur_pos < length) ? m_position.cur_pos : length; + for (std::size_t i = 0; i < unget_num; i++) { unget(); } @@ -1882,23 +1878,15 @@ class input_handler /// @param expected An expected next character. /// @return true The next character is the expected one. /// @return false The next character is not the expected one. - bool test_next_char(char_type expected) + bool test_next_char(char expected) { - if (get_current() == end_of_input) + if (m_position.cur_pos >= m_buffer_size - 1) { + // there is no input character left. return false; } - int_type next = get_next(); - if (next == end_of_input) - { - unget(); - return false; - } - - bool ret = char_traits_type::eq(char_traits_type::to_char_type(next), expected); - unget(); - return ret; + return char_traits_type::eq(m_buffer[m_position.cur_pos + 1], expected); } /// @brief Get the current position in the current line. @@ -1917,12 +1905,12 @@ class input_handler private: /// The value of EOF for the target character type. - static constexpr int_type end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); - /// An input adapter object. - InputAdapterType m_input_adapter {}; - /// Cached characters retrieved from an input adapter object. - std::vector m_cache {}; + /// The input buffer retrieved from an input adapter object. + std::string m_buffer {}; + /// The size of the buffer. + std::size_t m_buffer_size {0}; /// The current position in an input buffer. position m_position {}; }; @@ -2204,17 +2192,11 @@ namespace detail /// @brief A class which lexically analizes YAML formatted inputs. /// @tparam BasicNodeType A type of the container for YAML values. -template < - typename BasicNodeType, typename InputAdapterType, - enable_if_t, is_input_adapter>::value, int> = 0> +template ::value, int> = 0> class lexical_analyzer { private: - using input_handler_type = input_handler; - using char_traits_type = typename input_handler_type::char_traits_type; - using char_type = typename char_traits_type::char_type; - using char_int_type = typename char_traits_type::int_type; - using input_string_type = typename input_handler_type::string_type; + using char_traits_type = typename std::char_traits; enum class block_style_indicator_t { @@ -2236,7 +2218,9 @@ class lexical_analyzer using string_type = typename BasicNodeType::string_type; /// @brief Construct a new lexical_analyzer object. + /// @tparam InputAdapterType The type of the input adapter. /// @param input_adapter An input adapter object. + template ::value, int> = 0> explicit lexical_analyzer(InputAdapterType&& input_adapter) : m_input_handler(std::move(input_adapter)) { @@ -2248,7 +2232,7 @@ class lexical_analyzer { skip_white_spaces_and_newline_codes(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); m_last_token_begin_pos = m_input_handler.get_cur_pos_in_line(); m_last_token_begin_line = m_input_handler.get_lines_read(); @@ -2300,37 +2284,6 @@ class lexical_analyzer return m_last_token_type = lexical_token_t::KEY_SEPARATOR; } - // switch (m_input_handler.get_next()) - // { - // case ' ': { - // size_t prev_pos = m_input_handler.get_lines_read(); - // skip_white_spaces_and_comments(); - // size_t cur_pos = m_input_handler.get_lines_read(); - // if (prev_pos == cur_pos) - // { - // current = m_input_handler.get_current(); - // if (current != '\r' && current != '\n') - // { - // return m_last_token_type = lexical_token_t::KEY_SEPARATOR; - // } - // } - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // } - // case '\r': { - // char_int_type next = m_input_handler.get_next(); - // if (next == '\n') - // { - // m_input_handler.get_next(); - // } - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // } - // case '\n': - // m_input_handler.get_next(); - // return m_last_token_type = lexical_token_t::MAPPING_BLOCK_PREFIX; - // default: - // emit_error("Half-width spaces or newline codes are required after a key separater(:)."); - // } - case ',': // value separater m_input_handler.get_next(); return m_last_token_type = lexical_token_t::VALUE_SEPARATOR; @@ -2338,7 +2291,7 @@ class lexical_analyzer m_value_buffer.clear(); while (true) { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == s_end_of_input || next == '\r' || next == '\n') { emit_error("An anchor label must be followed by some value."); @@ -2356,7 +2309,7 @@ class lexical_analyzer m_value_buffer.clear(); while (true) { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ' || next == '\r' || next == '\n' || next == s_end_of_input) { if (m_value_buffer.empty()) @@ -2376,7 +2329,7 @@ class lexical_analyzer case '%': // directive prefix return m_last_token_type = scan_directive(); case '-': { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == ' ') { // Move a cursor to the beginning of the next token. @@ -2390,7 +2343,7 @@ class lexical_analyzer return m_last_token_type = scan_number(); } - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); + int ret = m_input_handler.get_range(3, m_value_buffer); if (ret != s_end_of_input) { if (m_value_buffer == "---") @@ -2438,7 +2391,7 @@ class lexical_analyzer case '+': return m_last_token_type = scan_number(); case '.': { - char_int_type ret = m_input_handler.get_range(3, m_value_buffer); + int ret = m_input_handler.get_range(3, m_value_buffer); if (ret != s_end_of_input) { if (m_value_buffer == "...") @@ -2536,7 +2489,7 @@ class lexical_analyzer const string_type& get_string() const noexcept { // TODO: Provide support for different string types between nodes & inputs. - static_assert(std::is_same::value, "Unsupported, different string types."); + static_assert(std::is_same::value, "Unsupported, different string types."); return m_value_buffer; } @@ -2554,7 +2507,7 @@ class lexical_analyzer /// @brief A utility function to convert a hexadecimal character to an integer. /// @param source A hexadecimal character ('0'~'9', 'A'~'F', 'a'~'f') /// @return char A integer converted from @a source. - char convert_hex_char_to_byte(char_int_type source) const + char convert_hex_char_to_byte(int source) const { if ('0' <= source && source <= '9') { @@ -2683,7 +2636,7 @@ class lexical_analyzer { m_value_buffer.clear(); - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); FK_YAML_ASSERT(std::isdigit(current) || current == '-' || current == '+'); lexical_token_t ret = lexical_token_t::END_OF_BUFFER; @@ -2735,7 +2688,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for either integer or float numbers. lexical_token_t scan_negative_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); // The value of `next` must be guranteed to be a digit in the get_next_token() function. FK_YAML_ASSERT(std::isdigit(next)); @@ -2747,7 +2700,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float). lexical_token_t scan_number_after_zero_at_first() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); switch (next) { case '.': @@ -2770,7 +2723,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_decimal_point() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2786,7 +2739,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for float numbers. lexical_token_t scan_decimal_number_after_exponent() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (next == '+' || next == '-') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2808,7 +2761,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number_after_sign() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2823,7 +2776,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for one of number types(integer/float) lexical_token_t scan_decimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isdigit(next)) { @@ -2856,7 +2809,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_octal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if ('0' <= next && next <= '7') { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2869,7 +2822,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for integers. lexical_token_t scan_hexadecimal_number() { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); if (std::isxdigit(next)) { m_value_buffer.push_back(char_traits_type::to_char_type(next)); @@ -2955,7 +2908,7 @@ class lexical_analyzer /// @return lexical_token_t The lexical token type for strings. lexical_token_t extract_string_token(bool needs_last_single_quote, bool needs_last_double_quote) { - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { @@ -3001,7 +2954,7 @@ class lexical_analyzer // " :" is permitted in a plain style string token, but not when followed by a space. if (current == ':') { - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); m_input_handler.unget(); if (next == ' ') { @@ -3066,7 +3019,7 @@ class lexical_analyzer continue; } - char_int_type next = m_input_handler.get_next(); + int next = m_input_handler.get_next(); m_input_handler.unget(); // A colon as a key separator must be followed by a space or a newline code. @@ -3164,7 +3117,7 @@ class lexical_analyzer m_value_buffer.push_back('\r'); break; case 'e': - m_value_buffer.push_back(char_type(0x1B)); + m_value_buffer.push_back(char(0x1B)); break; case ' ': m_value_buffer.push_back(' '); @@ -3238,7 +3191,7 @@ class lexical_analyzer // Handle 2-byte characters encoded in UTF-8. (U+0080..U+07FF) if (current <= 0xDF) { - std::array byte_array = {{current, m_input_handler.get_next()}}; + std::array byte_array = {{current, m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); @@ -3252,8 +3205,7 @@ class lexical_analyzer // Handle 3-byte characters encoded in UTF-8. (U+1000..U+D7FF,U+E000..U+FFFF) if (current <= 0xEF) { - std::array byte_array = { - {current, m_input_handler.get_next(), m_input_handler.get_next()}}; + std::array byte_array = {{current, m_input_handler.get_next(), m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { throw fkyaml::invalid_encoding("ill-formed UTF-8 encoded character found", byte_array); @@ -3267,7 +3219,7 @@ class lexical_analyzer } // Handle 4-byte characters encoded in UTF-8. (U+10000..U+FFFFF,U+100000..U+10FFFF) - std::array byte_array = { + std::array byte_array = { {current, m_input_handler.get_next(), m_input_handler.get_next(), m_input_handler.get_next()}}; if (!utf8_encoding::validate(byte_array)) { @@ -3287,7 +3239,7 @@ class lexical_analyzer m_value_buffer.clear(); // Handle leading all-space lines. - char_int_type current = m_input_handler.get_current(); + int current = m_input_handler.get_current(); for (;; current = m_input_handler.get_next()) { if (current == ' ') @@ -3513,7 +3465,7 @@ class lexical_analyzer /// @brief Handle unescaped control characters. /// @param c A target character. - void handle_unescaped_control_char(char_int_type c) + void handle_unescaped_control_char(int c) { FK_YAML_ASSERT(0x00 <= c && c <= 0x1F); @@ -3605,7 +3557,7 @@ class lexical_analyzer void get_block_style_metadata(chomping_indicator_t& chomp_type, std::size_t& indent) { - char_int_type ch = m_input_handler.get_next(); + int ch = m_input_handler.get_next(); chomp_type = chomping_indicator_t::CLIP; if (ch == '-') @@ -3717,12 +3669,12 @@ class lexical_analyzer private: /// The value of EOF for the target characters. - static constexpr char_int_type s_end_of_input = char_traits_type::eof(); + static constexpr int s_end_of_input = char_traits_type::eof(); /// An input buffer adapter to be analyzed. - input_handler_type m_input_handler; + input_handler m_input_handler; /// A temporal buffer to store a string to be parsed to an actual datum. - input_string_type m_value_buffer {}; + std::string m_value_buffer {}; /// A temporal buffer to store a UTF-8 encoded char sequence. std::array m_encode_buffer {}; /// The actual size of a UTF-8 encoded char sequence. @@ -3837,7 +3789,7 @@ class basic_deserializer template ::value, int> = 0> BasicNodeType deserialize(InputAdapterType&& input_adapter) { - lexical_analyzer lexer(std::forward(input_adapter)); + lexical_analyzer lexer(std::forward(input_adapter)); BasicNodeType root = BasicNodeType::mapping(); m_current_node = &root; @@ -4351,6 +4303,8 @@ FK_YAML_NAMESPACE_END // #include +// #include + // #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library @@ -4370,7 +4324,7 @@ FK_YAML_NAMESPACE_END // #include -// #include +// #include /// _______ __ __ __ _____ __ __ __ /// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library /// | __| _ < \_ _/| ___ | _ | |___ version 0.3.2 @@ -4381,8 +4335,8 @@ FK_YAML_NAMESPACE_END /// /// @file -#ifndef FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ -#define FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ // #include @@ -4396,25 +4350,20 @@ namespace detail /// @brief Definition of Unicode encoding types /// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. -enum class encode_t -{ - UTF_8_N, //!< UTF-8 without BOM - UTF_8_BOM, //!< UTF-8 with BOM - UTF_16BE_N, //!< UTF-16BE without BOM - UTF_16BE_BOM, //!< UTF-16BE with BOM - UTF_16LE_N, //!< UTF-16LE without BOM - UTF_16LE_BOM, //!< UTF-16LE with BOM - UTF_32BE_N, //!< UTF-32BE without BOM - UTF_32BE_BOM, //!< UTF-32BE with BOM - UTF_32LE_N, //!< UTF-32LE without BOM - UTF_32LE_BOM, //!< UTF-32LE with BOM +enum class utf_encode_t +{ + UTF_8, //!< UTF-8 + UTF_16BE, //!< UTF-16 Big Endian + UTF_16LE, //!< UTF-16 Little Endian + UTF_32BE, //!< UTF-32 Big Endian + UTF_32LE, //!< UTF-32 Little Endian }; } // namespace detail FK_YAML_NAMESPACE_END -#endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_T_HPP_ */ +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP_ */ // #include @@ -4428,63 +4377,68 @@ namespace detail /// @brief Detect an encoding type for UTF-8 expected inputs. /// @note This function doesn't support the case where the first character is null. -/// @param b0 The 1st byte of an input character sequence. -/// @param b1 The 2nd byte of an input character sequence. -/// @param b2 The 3rd byte of an input character sequence. -/// @param b3 The 4th byte of an input character sequence. +/// @param[in] bytes 4 bytes of an input character sequence. +/// @param[out] has_bom Whether or not the input contains a BOM. /// @return A detected encoding type. -inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3) noexcept +inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { + has_bom = false; + // Check if a BOM exists. - if (b0 == uint8_t(0xEFu) && b1 == uint8_t(0xBBu) && b2 == uint8_t(0xBFu)) + if (bytes[0] == uint8_t(0xEFu) && bytes[1] == uint8_t(0xBBu) && bytes[2] == uint8_t(0xBFu)) { - return encode_t::UTF_8_BOM; + has_bom = true; + return utf_encode_t::UTF_8; } - if (b0 == 0 && b1 == 0 && b2 == uint8_t(0xFEu) && b3 == uint8_t(0xFFu)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == uint8_t(0xFEu) && bytes[3] == uint8_t(0xFFu)) { - return encode_t::UTF_32BE_BOM; + has_bom = true; + return utf_encode_t::UTF_32BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu) && b2 == 0 && b3 == 0) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu) && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_BOM; + has_bom = true; + return utf_encode_t::UTF_32LE; } - if (b0 == uint8_t(0xFEu) && b1 == uint8_t(0xFFu)) + if (bytes[0] == uint8_t(0xFEu) && bytes[1] == uint8_t(0xFFu)) { - return encode_t::UTF_16BE_BOM; + has_bom = true; + return utf_encode_t::UTF_16BE; } - if (b0 == uint8_t(0xFFu) && b1 == uint8_t(0xFEu)) + if (bytes[0] == uint8_t(0xFFu) && bytes[1] == uint8_t(0xFEu)) { - return encode_t::UTF_16LE_BOM; + has_bom = true; + return utf_encode_t::UTF_16LE; } // Test the first character assuming it's an ASCII character. - if (b0 == 0 && b1 == 0 && b2 == 0 && 0 < b3 && b3 < uint8_t(0x80u)) + if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && 0 < bytes[3] && bytes[3] < uint8_t(0x80u)) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0 && b2 == 0 && b3 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0) { - return encode_t::UTF_32LE_N; + return utf_encode_t::UTF_32LE; } - if (b0 == 0 && 0 < b1 && b1 < uint8_t(0x80u)) + if (bytes[0] == 0 && 0 < bytes[1] && bytes[1] < uint8_t(0x80u)) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } - if (0 < b0 && b0 < uint8_t(0x80u) && b1 == 0) + if (0 < bytes[0] && bytes[0] < uint8_t(0x80u) && bytes[1] == 0) { - return encode_t::UTF_16LE_N; + return utf_encode_t::UTF_16LE; } - return encode_t::UTF_8_N; + return utf_encode_t::UTF_8; } /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. @@ -4494,9 +4448,9 @@ inline encode_t detect_encoding_type(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t /// @param end The end of input iterators. /// @return A detected encoding type. template ())))> -inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) +inline utf_encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; switch (ElemSize) { case sizeof(char): { // this case covers char8_t as well when compiled with C++20 features. @@ -4505,30 +4459,34 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i] = uint8_t(begin[i]); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (has_bom) { - case encode_t::UTF_8_BOM: - std::advance(begin, 3); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 2); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 4); - break; - default: - // Do nothing if a BOM doesn't exist. - break; + // skip reading the BOM. + switch (encode_type) + { + case utf_encode_t::UTF_8: + std::advance(begin, 3); + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + std::advance(begin, 2); + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + std::advance(begin, 4); + break; + } } + return encode_type; } case sizeof(char16_t): { if (begin == end) { - return encode_t::UTF_16BE_N; + return utf_encode_t::UTF_16BE; } for (int i = 0; i < 2 && begin + i != end; i++) { @@ -4536,45 +4494,47 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) bytes[i * 2 + 1] = uint8_t(begin[i] & 0xFFu); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char16_t characters must be encoded in the UTF-16 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } case sizeof(char32_t): { if (begin == end) { - return encode_t::UTF_32BE_N; + return utf_encode_t::UTF_32BE; } + bytes[0] = uint8_t((*begin & 0xFF000000u) >> 24); bytes[1] = uint8_t((*begin & 0x00FF0000u) >> 16); bytes[2] = uint8_t((*begin & 0x0000FF00u) >> 8); bytes[3] = uint8_t(*begin & 0x000000FFu); - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) + + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - std::advance(begin, 1); - break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32LE_N: - // Do nothing if a BOM doesn't exist. - break; - default: throw exception("char32_t characters must be encoded in the UTF-32 format."); } + + if (has_bom) + { + // skip reading the BOM. + std::advance(begin, 1); + } + return encode_type; } default: @@ -4582,9 +4542,9 @@ inline encode_t detect_encoding_and_skip_bom(ItrType& begin, const ItrType& end) } } -inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; for (std::size_t i = 0; i < 4; i++) { char byte = 0; @@ -4596,32 +4556,36 @@ inline encode_t detect_encoding_and_skip_bom(std::FILE* file) noexcept bytes[i] = uint8_t(byte & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) - { - case encode_t::UTF_8_BOM: - fseek(file, 3, SEEK_SET); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - fseek(file, 2, SEEK_SET); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - fseek(file, 4, SEEK_SET); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - fseek(file, 0, SEEK_SET); - break; + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + long offset = 0; + if (has_bom) + { + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + fseek(file, offset, SEEK_SET); return encode_type; } -inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept +inline utf_encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept { - uint8_t bytes[4] = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; + std::array bytes = {0xFFu, 0xFFu, 0xFFu, 0xFFu}; for (std::size_t i = 0; i < 4; i++) { char ch = 0; @@ -4636,25 +4600,29 @@ inline encode_t detect_encoding_and_skip_bom(std::istream& is) noexcept bytes[i] = uint8_t(ch & 0xFF); } - encode_t encode_type = detect_encoding_type(bytes[0], bytes[1], bytes[2], bytes[3]); - switch (encode_type) - { - case encode_t::UTF_8_BOM: - is.seekg(3, std::ios_base::beg); - break; - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_BOM: - is.seekg(2, std::ios_base::beg); - break; - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_BOM: - is.seekg(4, std::ios_base::beg); - break; - default: - // Move back to the beginning of the file contents if a BOM doesn't exist. - is.seekg(0, std::ios_base::beg); - break; + bool has_bom = false; + utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + std::streamoff offset = 0; + if (has_bom) + { + switch (encode_type) + { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + is.seekg(offset, std::ios_base::beg); return encode_type; } @@ -4665,7 +4633,7 @@ FK_YAML_NAMESPACE_END #endif /* FK_YAML_DETAIL_ENCODINGS_ENCODE_DETECTOR_HPP_ */ -// #include +// #include // #include @@ -4705,7 +4673,7 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) @@ -4726,20 +4694,15 @@ class iterator_input_adapter< typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -4751,6 +4714,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + if (m_current != m_end) { auto ret = std::char_traits::to_int_type(*m_current); @@ -4764,6 +4729,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { if (m_current == m_end) @@ -4776,25 +4743,19 @@ class iterator_input_adapter< while (m_current != m_end && m_encoded_buf_size < 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(*m_current) << 8); ++m_current; m_encoded_buffer[m_encoded_buf_size] |= char16_t(*m_current); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(*m_current); ++m_current; m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(*m_current) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } + ++m_current; ++m_encoded_buf_size; } @@ -4821,6 +4782,8 @@ class iterator_input_adapter< /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { if (m_current == m_end) @@ -4829,10 +4792,8 @@ class iterator_input_adapter< } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(*m_current << 24); ++m_current; utf32 |= char32_t(*m_current << 16); @@ -4840,9 +4801,9 @@ class iterator_input_adapter< utf32 |= char32_t(*m_current << 8); ++m_current; utf32 |= char32_t(*m_current); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(*m_current); ++m_current; utf32 |= char32_t(*m_current << 8); @@ -4850,11 +4811,6 @@ class iterator_input_adapter< utf32 |= char32_t(*m_current << 16); ++m_current; utf32 |= char32_t(*m_current << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -4873,7 +4829,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -4906,11 +4862,14 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + // char8_t characters must be encoded in the UTF-8 format. + // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); } // allow only move construct/assignment like other input adapters. @@ -4923,30 +4882,10 @@ class iterator_input_adapter< /// @brief Get a character at the current position and move forward. /// @return std::char_traits::int_type A character or EOF. typename std::char_traits::int_type get_character() - { - typename std::char_traits::int_type ret = 0; - switch (m_encode_type) - { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: - ret = get_character_for_utf8(); - break; - default: // LCOV_EXCL_LINE - // char8_t characters must be encoded in the UTF-8 format. - // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. - break; // LCOV_EXCL_LINE - } - return ret; - } - -private: - /// @brief The concrete implementation of get_character() for UTF-8 encoded inputs. - /// @return A UTF-8 encoded byte at the current position, or EOF. - typename std::char_traits::int_type get_character_for_utf8() noexcept { if (m_current != m_end) { - auto ret = std::char_traits::to_int_type(*m_current); + auto ret = std::char_traits::to_int_type(char(*m_current)); ++m_current; return ret; } @@ -4959,7 +4898,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; }; #endif // defined(FK_YAML_HAS_CHAR8_T) @@ -4982,11 +4921,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); } // allow only move construct/assignment like other input adapters. @@ -5012,23 +4952,17 @@ class iterator_input_adapter< while (m_current != m_end && m_encoded_buf_size < 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = *m_current; - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // utf_encode_t::UTF_16LE + { char16_t tmp = *m_current; m_encoded_buffer[m_encoded_buf_size] = char16_t((tmp & 0x00FFu) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t((tmp & 0xFF00u) >> 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } + ++m_current; ++m_encoded_buf_size; } @@ -5057,7 +4991,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_16BE_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -5088,11 +5022,12 @@ class iterator_input_adapter< /// @param begin The beginning of iteraters. /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - iterator_input_adapter(IterType begin, IterType end, encode_t encode_type) noexcept + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type) noexcept : m_current(begin), m_end(end), m_encode_type(encode_type) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); } // allow only move construct/assignment like other input adapters. @@ -5114,24 +5049,17 @@ class iterator_input_adapter< } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = *m_current; - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { char32_t tmp = *m_current; utf32 |= char32_t((tmp & 0xFF000000u) >> 24); utf32 |= char32_t((tmp & 0x00FF0000u) >> 8); utf32 |= char32_t((tmp & 0x0000FF00u) << 8); utf32 |= char32_t((tmp & 0x000000FFu) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -5150,7 +5078,7 @@ class iterator_input_adapter< /// The iterator at the end of input. IterType m_end {}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_32BE_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; /// The buffer for UTF-8 encoded characters. std::array m_utf8_buffer {{0, 0, 0, 0}}; /// The next index in `m_utf8_buffer` to read. @@ -5175,7 +5103,7 @@ class file_input_adapter /// It's user's responsibility to call those functions. /// @param file A file handle for this adapter. (A non-null pointer is assumed.) /// @param encode_type The encoding type for this input adapter. - explicit file_input_adapter(std::FILE* file, encode_t encode_type) noexcept + explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept : m_file(file), m_encode_type(encode_type) { @@ -5195,20 +5123,15 @@ class file_input_adapter typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -5220,6 +5143,8 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); + char ch = 0; size_t size = std::fread(&ch, sizeof(char), 1, m_file); if (size == 1) @@ -5233,27 +5158,22 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char chars[2] = {0, 0}; while (m_encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } ++m_encoded_buf_size; @@ -5286,6 +5206,8 @@ class file_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char chars[4] = {0, 0, 0, 0}; @@ -5296,26 +5218,19 @@ class file_input_adapter } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(uint8_t(chars[0]) << 24); utf32 |= char32_t(uint8_t(chars[1]) << 16); utf32 |= char32_t(uint8_t(chars[2]) << 8); utf32 |= char32_t(uint8_t(chars[3])); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(uint8_t(chars[0])); utf32 |= char32_t(uint8_t(chars[1]) << 8); utf32 |= char32_t(uint8_t(chars[2]) << 16); utf32 |= char32_t(uint8_t(chars[3]) << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -5331,7 +5246,7 @@ class file_input_adapter /// A pointer to the input file handle. std::FILE* m_file {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -5356,7 +5271,7 @@ class stream_input_adapter /// @brief Construct a new stream_input_adapter object. /// @param is A reference to the target input stream. - explicit stream_input_adapter(std::istream& is, encode_t encode_type) noexcept + explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept : m_istream(&is), m_encode_type(encode_type) { @@ -5376,20 +5291,15 @@ class stream_input_adapter typename std::char_traits::int_type ret = 0; switch (m_encode_type) { - case encode_t::UTF_8_N: - case encode_t::UTF_8_BOM: + case utf_encode_t::UTF_8: ret = get_character_for_utf8(); break; - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: ret = get_character_for_utf16(); break; - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: ret = get_character_for_utf32(); break; } @@ -5401,6 +5311,7 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf8() noexcept { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); return m_istream->get(); } @@ -5408,6 +5319,8 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + if (m_utf8_buf_index == m_utf8_buf_size) { while (m_encoded_buf_size < 2) @@ -5424,22 +5337,15 @@ class stream_input_adapter break; } - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_16BE) { - case encode_t::UTF_16BE_N: - case encode_t::UTF_16BE_BOM: m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0]) << 8); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1])); - break; - case encode_t::UTF_16LE_N: - case encode_t::UTF_16LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_16LE + { m_encoded_buffer[m_encoded_buf_size] = char16_t(uint8_t(chars[0])); m_encoded_buffer[m_encoded_buf_size] |= char16_t(uint8_t(chars[1]) << 8); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } ++m_encoded_buf_size; @@ -5467,6 +5373,8 @@ class stream_input_adapter /// @return A UTF-8 encoded byte at the current position, or EOF. typename std::char_traits::int_type get_character_for_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + if (m_utf8_buf_index == m_utf8_buf_size) { char ch = 0; @@ -5478,10 +5386,8 @@ class stream_input_adapter } char32_t utf32 = 0; - switch (m_encode_type) + if (m_encode_type == utf_encode_t::UTF_32BE) { - case encode_t::UTF_32BE_N: - case encode_t::UTF_32BE_BOM: utf32 = char32_t(ch << 24); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 16); @@ -5489,9 +5395,9 @@ class stream_input_adapter utf32 |= char32_t(ch << 8); m_istream->read(&ch, 1); utf32 |= char32_t(ch); - break; - case encode_t::UTF_32LE_N: - case encode_t::UTF_32LE_BOM: { + } + else // m_encode_type == utf_encode_t::UTF_32LE + { utf32 = char32_t(ch); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 8); @@ -5499,11 +5405,6 @@ class stream_input_adapter utf32 |= char32_t(ch << 16); m_istream->read(&ch, 1); utf32 |= char32_t(ch << 24); - break; - } - default: // LCOV_EXCL_LINE - // should not come here. - break; // LCOV_EXCL_LINE } utf8_encoding::from_utf32(utf32, m_utf8_buffer, m_utf8_buf_size); @@ -5519,7 +5420,7 @@ class stream_input_adapter /// A pointer to the input stream object. std::istream* m_istream {nullptr}; /// The encoding type for this input adapter. - encode_t m_encode_type {encode_t::UTF_8_N}; + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The buffer for decoding characters read from the input. std::array m_encoded_buffer {{0, 0}}; /// The number of elements in `m_encoded_buffer`. @@ -5544,7 +5445,7 @@ class stream_input_adapter template ())))> inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { - encode_t encode_type = detect_encoding_and_skip_bom(begin, end); + utf_encode_t encode_type = detect_encoding_and_skip_bom(begin, end); return iterator_input_adapter(begin, end, encode_type); } @@ -5614,7 +5515,7 @@ inline file_input_adapter input_adapter(std::FILE* file) { throw fkyaml::exception("Invalid FILE object pointer."); } - encode_t encode_type = detect_encoding_and_skip_bom(file); + utf_encode_t encode_type = detect_encoding_and_skip_bom(file); return file_input_adapter(file, encode_type); } @@ -5623,7 +5524,7 @@ inline file_input_adapter input_adapter(std::FILE* file) /// @return stream_input_adapter inline stream_input_adapter input_adapter(std::istream& stream) noexcept { - encode_t encode_type = detect_encoding_and_skip_bom(stream); + utf_encode_t encode_type = detect_encoding_and_skip_bom(stream); return stream_input_adapter(stream, encode_type); } @@ -6703,7 +6604,7 @@ class basic_serializer } auto adapter = input_adapter(s); - lexical_analyzer lexer(std::move(adapter)); + lexical_analyzer lexer(std::move(adapter)); lexical_token_t token_type = lexer.get_next_token(); if (token_type != lexical_token_t::STRING_VALUE) diff --git a/test/unit_test/test_encode_detector.cpp b/test/unit_test/test_encode_detector.cpp index 545c0423..e1bd8c2f 100644 --- a/test/unit_test/test_encode_detector.cpp +++ b/test/unit_test/test_encode_detector.cpp @@ -28,54 +28,67 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingTypeTest", "[EncodeDetectorTest]") { - using pair_t = std::pair, fkyaml::detail::encode_t>; - auto params = GENERATE( - pair_t {{0xEFu, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_BOM}, - pair_t {{0xEFu, 0, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xEFu, 0xBBu, 0, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_32BE_BOM}, - pair_t {{0x80u, 0, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0x80u, 0xFEu, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0x80u, 0xFFu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0xFEu, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_BOM}, - pair_t {{0x80u, 0xFEu, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0x80u, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0x80u, 0}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0xFFu, 0xFEu, 0, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0xFEu, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_16BE_BOM}, - pair_t {{0x80u, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFEu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_BOM}, - pair_t {{0x80u, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0xFFu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 1}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0, 0, 0, 0x40u}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0, 0, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_32BE_N}, - pair_t {{0x80u, 0, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0x80u, 0, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0x80u, 0x7Fu}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0, 0, 0, 0x80u}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{1, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0x40u, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0x7Fu, 0, 0, 0}, fkyaml::detail::encode_t::UTF_32LE_N}, - pair_t {{0, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x80u, 0, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x7Fu, 0x80u, 0, 0}, fkyaml::detail::encode_t::UTF_8_N}, - pair_t {{0x7Fu, 0, 0x80u, 0}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x7Fu, 0, 0, 0x80u}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0, 1, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{0, 0x40u, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{0, 0x7Fu, 1, 1}, fkyaml::detail::encode_t::UTF_16BE_N}, - pair_t {{1, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x40u, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}, - pair_t {{0x7Fu, 0, 1, 1}, fkyaml::detail::encode_t::UTF_16LE_N}); - - REQUIRE( - fkyaml::detail::detect_encoding_type(params.first[0], params.first[1], params.first[2], params.first[3]) == - params.second); + struct test_data_t + { + test_data_t(std::array input_, fkyaml::detail::utf_encode_t encode_type_, bool has_bom_) + : input(input_), + encode_type(encode_type_), + has_bom(has_bom_) + { + } + + std::array input {}; + fkyaml::detail::utf_encode_t encode_type {fkyaml::detail::utf_encode_t::UTF_8}; + bool has_bom {false}; + }; + + auto d = GENERATE( + test_data_t {{0xEFu, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, true}, + test_data_t {{0xEFu, 0, 0xBFu, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xEFu, 0xBBu, 0, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0xBBu, 0xBFu, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0xFEu, 0xFFu}, fkyaml::detail::utf_encode_t::UTF_32BE, true}, + test_data_t {{0x80u, 0, 0xFEu, 0xFFu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0x80u, 0xFEu, 0xFFu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0x80u, 0xFFu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0xFEu, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFFu, 0xFEu, 0, 0}, fkyaml::detail::utf_encode_t::UTF_32LE, true}, + test_data_t {{0x80u, 0xFEu, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFFu, 0x80u, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFFu, 0xFEu, 0x80u, 0}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{0xFFu, 0xFEu, 0, 0x80u}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{0xFEu, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_16BE, true}, + test_data_t {{0x80u, 0xFFu, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFEu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFFu, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_16LE, true}, + test_data_t {{0x80u, 0xFEu, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0xFFu, 0x80u, 0x80u, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0, 1}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{0, 0, 0, 0x40u}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{0, 0, 0, 0x7Fu}, fkyaml::detail::utf_encode_t::UTF_32BE, false}, + test_data_t {{0x80u, 0, 0, 0x7Fu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0x80u, 0, 0x7Fu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0x80u, 0x7Fu}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0, 0, 0, 0x80u}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{1, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{0x40u, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{0x7Fu, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_32LE, false}, + test_data_t {{0, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0x80u, 0, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0x7Fu, 0x80u, 0, 0}, fkyaml::detail::utf_encode_t::UTF_8, false}, + test_data_t {{0x7Fu, 0, 0x80u, 0}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{0x7Fu, 0, 0, 0x80u}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{0, 1, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{0, 0x40u, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{0, 0x7Fu, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16BE, false}, + test_data_t {{1, 0, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{0x40u, 0, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16LE, false}, + test_data_t {{0x7Fu, 0, 1, 1}, fkyaml::detail::utf_encode_t::UTF_16LE, false}); + + bool has_bom = false; + REQUIRE(fkyaml::detail::detect_encoding_type(d.input, has_bom) == d.encode_type); + REQUIRE(has_bom == d.has_bom); } TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTest]") @@ -89,8 +102,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), char(0x61u), char(0x62u), char(0x63u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(begin == std::begin(input)); } @@ -99,8 +112,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xEFu), char(0xBBu), char(0xBFu), char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(begin == std::begin(input) + 3); } @@ -109,8 +122,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, char(0x60u), 0, char(0x61u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -119,8 +132,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFEu), char(0xFFu), 0, char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input) + 2); } @@ -129,8 +142,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), 0, char(0x61u), 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input)); } @@ -139,8 +152,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFFu), char(0xFEu), char(0x60u), 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input) + 2); } @@ -149,8 +162,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, 0, 0, char(0x60u), 0, 0, 0, char(0x61u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -159,8 +172,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {0, 0, char(0xFEu), char(0xFFu), 0, 0, 0, char(0x60u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input) + 4); } @@ -169,8 +182,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0x60u), 0, 0, 0, char(0x61u), 0, 0, 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input)); } @@ -179,8 +192,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::string input {char(0xFFu), char(0xFEu), 0, 0, char(0x60u), 0, 0, 0}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input) + 4); } @@ -193,8 +206,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0x0060u), char16_t(0x0061u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -203,8 +216,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0xFEFFu), char16_t(0x0060u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input) + 1); } @@ -213,8 +226,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0x6000u), char16_t(0x6100u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input)); } @@ -223,8 +236,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input {char16_t(0xFFFEu), char16_t(0x6000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(begin == std::begin(input) + 1); } @@ -233,8 +246,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u16string input = u""; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(begin == std::begin(input)); } @@ -255,8 +268,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x00000060u), char32_t(0x00000061u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -265,8 +278,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x0000FEFFu), char32_t(0x00000060u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input) + 1); } @@ -275,8 +288,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0x60000000u), char32_t(0x61000000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input)); } @@ -285,8 +298,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input {char32_t(0xFFFE0000u), char32_t(0x60000000u)}; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(begin == std::begin(input) + 1); } @@ -295,8 +308,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes std::u32string input = U""; auto begin = std::begin(input); auto end = std::end(input); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(begin, end); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(begin == std::begin(input)); } @@ -319,8 +332,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -333,8 +346,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 3); std::fclose(p_file); @@ -347,8 +360,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -361,8 +374,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(std::ftell(p_file) == 2); std::fclose(p_file); @@ -375,8 +388,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -389,8 +402,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(std::ftell(p_file) == 2); std::fclose(p_file); @@ -403,8 +416,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -417,8 +430,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(std::ftell(p_file) == 4); std::fclose(p_file); @@ -431,8 +444,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -445,8 +458,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(std::ftell(p_file) == 4); std::fclose(p_file); @@ -459,8 +472,8 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes ENABLE_C4996 REQUIRE(p_file != nullptr); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(p_file); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(std::ftell(p_file) == 0); std::fclose(p_file); @@ -473,88 +486,88 @@ TEST_CASE("EncodeDetectorTest_DetectEncodingAndSkipBomTest", "[EncodeDetectorTes SECTION("std::istream with UTF-8 encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8n.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-8(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf8bom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 3); } SECTION("std::istream with UTF-16BE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16ben.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-16BE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16bebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16BE); REQUIRE(ifs.tellg() == 2); } SECTION("std::istream with UTF-16LE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16len.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-16LE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf16lebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_16LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_16LE); REQUIRE(ifs.tellg() == 2); } SECTION("std::istream with UTF-32BE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32ben.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-32BE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32bebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32BE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32BE); REQUIRE(ifs.tellg() == 4); } SECTION("std::istream with UTF-32LE encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32len.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(ifs.tellg() == 0); } SECTION("std::istream with UTF-32LE(BOM) encoding") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/input_adapter_test_data_utf32lebom.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_32LE_BOM); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_32LE); REQUIRE(ifs.tellg() == 4); } SECTION("std::istream with an empty input file") { std::ifstream ifs(FK_YAML_TEST_DATA_DIR "/single_char_byte_input.txt"); - fkyaml::detail::encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); - REQUIRE(ret == fkyaml::detail::encode_t::UTF_8_N); + fkyaml::detail::utf_encode_t ret = fkyaml::detail::detect_encoding_and_skip_bom(ifs); + REQUIRE(ret == fkyaml::detail::utf_encode_t::UTF_8); REQUIRE(ifs.tellg() == 0); } } diff --git a/test/unit_test/test_input_handler.cpp b/test/unit_test/test_input_handler.cpp index ea6ecdd4..d1934541 100644 --- a/test/unit_test/test_input_handler.cpp +++ b/test/unit_test/test_input_handler.cpp @@ -17,12 +17,10 @@ #include #endif -using pchar_input_handler = fkyaml::detail::input_handler>; - TEST_CASE("InputHandlerTest_InitialStateTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -32,7 +30,7 @@ TEST_CASE("InputHandlerTest_InitialStateTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -53,13 +51,13 @@ TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } @@ -67,7 +65,7 @@ TEST_CASE("InputHandlerTest_GetCurrentTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_next() == 'e'); REQUIRE(handler.get_cur_pos_in_line() == 1); @@ -81,11 +79,11 @@ TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } @@ -93,8 +91,8 @@ TEST_CASE("InputHandlerTest_GetNextTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_GetRangeTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_range(4, str) == 0); REQUIRE(str == "test"); @@ -102,17 +100,17 @@ TEST_CASE("InputHandlerTest_GetRangeTest", "[InputHandlerTest]") REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_range(2, str) == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_range(2, str) == std::char_traits::eof()); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 3); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - REQUIRE(handler.get_range(0, str) == pchar_input_handler::char_traits_type::eof()); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_range(0, str) == 0); + REQUIRE(handler.get_current() == std::char_traits::eof()); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); } @@ -120,7 +118,7 @@ TEST_CASE("InputHandlerTest_GetRangeTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -140,7 +138,7 @@ TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") REQUIRE(handler.get_next() == 'e'); REQUIRE(handler.get_next() == 's'); REQUIRE(handler.get_next() == 't'); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); handler.unget(); REQUIRE(handler.get_current() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 3); @@ -150,7 +148,7 @@ TEST_CASE("InputHandlerTest_UngetTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_UngetRangeTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_current() == 't'); handler.unget_range(4); @@ -172,7 +170,7 @@ TEST_CASE("InputHandlerTest_UngetRangeTest", "[InputHandlerTest]") REQUIRE(handler.get_next() == 's'); REQUIRE(handler.get_next() == 't'); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); handler.unget_range(2); REQUIRE(handler.get_current() == 's'); REQUIRE(handler.get_cur_pos_in_line() == 2); @@ -182,7 +180,7 @@ TEST_CASE("InputHandlerTest_UngetRangeTest", "[InputHandlerTest]") TEST_CASE("InputHandlerTest_TestNextCharTest", "[InputHandlerTest]") { char input[] = "test"; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.test_next_char('e') == true); REQUIRE(handler.get_cur_pos_in_line() == 0); @@ -198,15 +196,14 @@ TEST_CASE("InputHandlerTest_TestNextCharTest", "[InputHandlerTest]") REQUIRE(handler.get_next() == 't'); REQUIRE(handler.test_next_char('t') == false); - REQUIRE(handler.get_next() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_next() == std::char_traits::eof()); REQUIRE(handler.test_next_char('t') == false); REQUIRE(handler.get_cur_pos_in_line() == 4); REQUIRE(handler.get_lines_read() == 0); - pchar_input_handler::char_type char_eof = - pchar_input_handler::char_traits_type::to_char_type(pchar_input_handler::char_traits_type::eof()); + char char_eof = std::char_traits::to_char_type(std::char_traits::eof()); REQUIRE(handler.test_next_char(char_eof) == false); - REQUIRE(handler.get_current() == pchar_input_handler::char_traits_type::eof()); + REQUIRE(handler.get_current() == std::char_traits::eof()); } TEST_CASE("InputHandlerTest_TestMultipleLinesTest", "[InputHandlerTest]") @@ -214,8 +211,8 @@ TEST_CASE("InputHandlerTest_TestMultipleLinesTest", "[InputHandlerTest]") SECTION("first character is not a newline code.") { char input[] = "test\nfoo"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_range(4, str) == 0); REQUIRE(handler.get_cur_pos_in_line() == 3); @@ -237,8 +234,8 @@ TEST_CASE("InputHandlerTest_TestMultipleLinesTest", "[InputHandlerTest]") SECTION("first character is a newline code.") { char input[] = "\ntest\nfoo"; - pchar_input_handler::string_type str; - pchar_input_handler handler(fkyaml::detail::input_adapter(input)); + std::string str; + fkyaml::detail::input_handler handler(fkyaml::detail::input_adapter(input)); REQUIRE(handler.get_next() == 't'); REQUIRE(handler.get_cur_pos_in_line() == 0); diff --git a/test/unit_test/test_lexical_analyzer_class.cpp b/test/unit_test/test_lexical_analyzer_class.cpp index fef65756..64e1cd60 100644 --- a/test/unit_test/test_lexical_analyzer_class.cpp +++ b/test/unit_test/test_lexical_analyzer_class.cpp @@ -14,10 +14,7 @@ #endif #include -using pchar_lexer_t = - fkyaml::detail::lexical_analyzer>; -using str_lexer_t = - fkyaml::detail::lexical_analyzer>; +using lexer_t = fkyaml::detail::lexical_analyzer; TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnalyzerClassTest]") { @@ -31,7 +28,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal value_pair_t(std::string("%YAML 1.2\n"), std::string("1.2")), value_pair_t(std::string("%YAML 1.2 "), std::string("1.2"))); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::YAML_VER_DIRECTIVE); @@ -49,7 +46,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal std::string("%YAMR 1.2 \r\n"), std::string("%YANL 1.2 \n")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); @@ -69,7 +66,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanYamlVersionDirectiveTest", "[LexicalAnal std::string("%YAML1.2 "), std::string("%YAML AbC")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -80,7 +77,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas SECTION("Test nothrow expected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%TAG ")); + lexer_t lexer(fkyaml::detail::input_adapter("%TAG ")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::TAG_DIRECTIVE); @@ -92,7 +89,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas { auto buffer = GENERATE(std::string("%TUB"), std::string("%TAC")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -101,7 +98,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanTagDirectiveTest", "[LexicalAnalyzerClas SECTION("Test nothrow expected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%TAGE")); + lexer_t lexer(fkyaml::detail::input_adapter("%TAGE")); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } } @@ -112,7 +109,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanReservedDirectiveTest", "[LexicalAnalyze GENERATE(std::string("%TEST"), std::string("%1984\n"), std::string("%TEST4LIB\r"), std::string("%%ERROR\r\n")); fkyaml::detail::lexical_token_t token; - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INVALID_DIRECTIVE); @@ -122,13 +119,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanReservedDirectiveTest", "[LexicalAnalyze TEST_CASE("LexicalAnalyzerClassTest_ScanEmptyDirectiveTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%")); + lexer_t lexer(fkyaml::detail::input_adapter("%")); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDirectivesTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\nfoo: bar")); + lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\nfoo: bar")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -150,7 +147,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDirectivesTest", "[LexicalAnalyzerC TEST_CASE("LexicalAnalyzerClassTest_ScanEndOfDocumentsTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\n...")); + lexer_t lexer(fkyaml::detail::input_adapter("%YAML 1.2\n---\n...")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -170,77 +167,77 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanColonTest", "[LexicalAnalyzerClassTest]" SECTION("Test colon with half-width space.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": ")); + lexer_t lexer(fkyaml::detail::input_adapter(": ")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with CR newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\r")); + lexer_t lexer(fkyaml::detail::input_adapter(":\r")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(":\r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":\n")); + lexer_t lexer(fkyaml::detail::input_adapter(":\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and a CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment\r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment\r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and a LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with a comment and no newline code") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": # comment")); + lexer_t lexer(fkyaml::detail::input_adapter(": # comment")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and a CRLF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": \r\n")); + lexer_t lexer(fkyaml::detail::input_adapter(": \r\n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and a LF newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": \n")); + lexer_t lexer(fkyaml::detail::input_adapter(": \n")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with many spaces and no newline code.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(": ")); + lexer_t lexer(fkyaml::detail::input_adapter(": ")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::KEY_SEPARATOR); } SECTION("Test colon with an always-safe character.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter(":test")); + lexer_t lexer(fkyaml::detail::input_adapter(":test")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); REQUIRE(lexer.get_string() == ":test"); @@ -250,7 +247,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanColonTest", "[LexicalAnalyzerClassTest]" { auto input = GENERATE(std::string(":,"), std::string(":{"), std::string(":}"), std::string(":["), std::string(":]")); - str_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); REQUIRE(lexer.get_string() == input); @@ -260,7 +257,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanColonTest", "[LexicalAnalyzerClassTest]" { auto input = GENERATE( std::string("{:,"), std::string("{:{"), std::string("{:}"), std::string("{:["), std::string("{:]")); - str_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::MAPPING_FLOW_BEGIN); REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -275,7 +272,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanNullTokenTest", "[LexicalAnalyzerClassTe SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("null"), std::string("Null"), std::string("NULL"), std::string("~")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::NULL_VALUE); @@ -285,7 +282,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanNullTokenTest", "[LexicalAnalyzerClassTe SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_null(), fkyaml::parse_error); } @@ -298,7 +295,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanTrueTokenTest", "[LexicalAnalyzer SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("true"), std::string("True"), std::string("TRUE")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::BOOLEAN_VALUE); @@ -308,7 +305,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanTrueTokenTest", "[LexicalAnalyzer SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_boolean(), fkyaml::parse_error); } @@ -321,7 +318,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanFalseTokenTest", "[LexicalAnalyze SECTION("Test nothrow expected tokens.") { auto buffer = GENERATE(std::string("false"), std::string("False"), std::string("FALSE")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::BOOLEAN_VALUE); @@ -331,7 +328,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBooleanFalseTokenTest", "[LexicalAnalyze SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_boolean(), fkyaml::parse_error); } @@ -352,7 +349,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanIntegerTokenTest", "[LexicalAnalyzerClas value_pair_t(std::string("643"), 643), value_pair_t(std::string("+123"), 123)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::INTEGER_VALUE); @@ -362,7 +359,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanIntegerTokenTest", "[LexicalAnalyzerClas SECTION("Test nothrow unexpected tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_integer(), fkyaml::parse_error); } @@ -377,7 +374,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanOctalNumberTokenTest", "[LexicalAnalyzer value_pair_t(std::string("0o77772"), 077772), value_pair_t(std::string("0o672}"), 0672)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -394,7 +391,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanHexadecimalNumberTokenTest", "[LexicalAn value_pair_t(std::string("0xa7F3"), 0xa7F3), value_pair_t(std::string("0xFf29Bc"), 0xFf29Bc)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -418,7 +415,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFloatNumberTokenTest", "[LexicalAnalyzer value_pair_t(std::string("3.95E3"), 3.95e3), value_pair_t(std::string("1.863e+3"), 1.863e+3)); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::FLOAT_NUMBER_VALUE); @@ -429,13 +426,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFloatNumberTokenTest", "[LexicalAnalyzer SECTION("Test nothrow unexpected float tokens.") { auto input = GENERATE(std::string("0."), std::string("1.23e"), std::string("1.2e-z")); - str_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } SECTION("Test non-float tokens.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test")); + lexer_t lexer(fkyaml::detail::input_adapter("test")); REQUIRE_NOTHROW(lexer.get_next_token()); REQUIRE_THROWS_AS(lexer.get_float_number(), fkyaml::parse_error); } @@ -450,7 +447,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInfinityTokenTest", "[LexicalAnalyzerCla std::string("-.inf"), std::string("-.Inf"), std::string("-.INF")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); SECTION("Test nothrow expected buffers.") { @@ -468,7 +465,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInfinityTokenTest", "[LexicalAnalyzerCla TEST_CASE("LexicalAnalyzerClassTest_ScanNaNTokenTest", "[LexicalAnalyzerClassTest]") { auto buffer = GENERATE(std::string(".nan"), std::string(".NaN"), std::string(".NAN")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); SECTION("Test nothrow expected buffers.") { @@ -564,7 +561,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanStringTokenTest", "[LexicalAnalyzerClass value_pair_t(std::string("\"foo\\Pbar\""), fkyaml::node::string_type("foo\u2029bar")), value_pair_t(std::string("\"\\x30\\x2B\\x6d\""), fkyaml::node::string_type("0+m"))); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -622,7 +619,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanMultiByteCharStringTokenTest", "[Lexical char_traits_t::to_char_type(0xBF), char_traits_t::to_char_type(0xBF)}); - str_lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); + lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -721,7 +718,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanEscapedUnicodeStringTokenTest", "[Lexica char_traits_t::to_char_type(0xBF), char_traits_t::to_char_type(0xBF)})); - str_lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); + lexer_t lexer(fkyaml::detail::input_adapter(value_pair.first)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -748,14 +745,14 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInvalidStringTokenTest", "[LexicalAnalyz std::string("\'\\t\'"), std::string("\"\\Q\"")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } SECTION("invalid_encoding expected") { std::string buffer = "\"\\U00110000\""; - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::invalid_encoding); } } @@ -888,7 +885,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanInvalidMultiByteCharStringTokenTest", "[ char_traits_t::to_char_type(0x80), char_traits_t::to_char_type(0x80)}); - str_lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); + lexer_t lexer(fkyaml::detail::input_adapter(mb_char)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::invalid_encoding); } @@ -926,7 +923,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanUnescapedControlCharacter", "[LexicalAna std::string buffer("test"); buffer.push_back(unescaped_char); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -938,7 +935,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|-\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -949,7 +946,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -960,7 +957,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC { const char input[] = "|+\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -972,7 +969,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|0\n" "foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -981,7 +978,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|2\n" " foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -990,7 +987,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1002,7 +999,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC const char input[] = "|\r\n" " foo\r\n" " bar\r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1018,7 +1015,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1033,7 +1030,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1049,7 +1046,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1064,7 +1061,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1078,7 +1075,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC " bar\n" "\n" " baz"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1094,7 +1091,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1109,7 +1106,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanLiteralStringScalar", "[LexicalAnalyzerC "\n" " baz\n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1125,7 +1122,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">-\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1136,7 +1133,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1147,7 +1144,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl { const char input[] = ">+\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1159,7 +1156,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = "|0\n" "foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -1168,7 +1165,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } @@ -1177,7 +1174,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1189,7 +1186,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl const char input[] = ">2\n" " foo\n" " bar\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1204,7 +1201,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl "\r\n" " bar\r\n" " \r\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1218,7 +1215,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1232,7 +1229,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1246,7 +1243,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFoldedStringScalar", "[LexicalAnalyzerCl " bar\n" " \n" "\n"; - pchar_lexer_t lexer(fkyaml::detail::input_adapter(input)); + lexer_t lexer(fkyaml::detail::input_adapter(input)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1260,7 +1257,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAnchorTokenTest", "[LexicalAnalyzerClass SECTION("Test nothorw expected tokens with an anchor.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: &anchor foo")); + lexer_t lexer(fkyaml::detail::input_adapter("test: &anchor foo")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1288,7 +1285,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAnchorTokenTest", "[LexicalAnalyzerClass { auto buffer = GENERATE(std::string("test: &anchor"), std::string("test: &anchor\r\n"), std::string("test: &anchor\n")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1308,7 +1305,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAliasTokenTest", "[LexicalAnalyzerClassT SECTION("Test nothrow expected tokens with an alias.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: *anchor")); + lexer_t lexer(fkyaml::detail::input_adapter("test: *anchor")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1331,7 +1328,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanAliasTokenTest", "[LexicalAnalyzerClassT { auto buffer = GENERATE( std::string("test: *"), std::string("test: *\r\n"), std::string("test: *\n"), std::string("test: * ")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1349,7 +1346,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanCommentTokenTest", "[LexicalAnalyzerClas { auto buffer = GENERATE( std::string("# comment\r"), std::string("# comment\r\n"), std::string("# comment\n"), std::string("# comment")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1362,13 +1359,13 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanCommentTokenTest", "[LexicalAnalyzerClas TEST_CASE("LexicalAnalyzerClassTest_ScanReservedIndicatorTokenTest", "[LexicalAnalyzerClassTest]") { auto buffer = GENERATE(std::string("@invalid"), std::string("`invalid")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_THROWS_AS(lexer.get_next_token(), fkyaml::parse_error); } TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: true")); + lexer_t lexer(fkyaml::detail::input_adapter("test: true")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1390,7 +1387,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyBooleanValuePairTokenTest", "[Lexical TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegerValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5784")); + lexer_t lexer(fkyaml::detail::input_adapter("test: -5784")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1412,7 +1409,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyIntegerValuePairTokenTest", "[Lexical TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumberValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: -5.58e-3")); + lexer_t lexer(fkyaml::detail::input_adapter("test: -5.58e-3")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1434,7 +1431,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanKeyFloatNumberValuePairTokenTest", "[Lex TEST_CASE("LexicalAnalyzerClassTest_ScanKeyStringValuePairTokenTest", "[LexicalAnalyzerClassTest]") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: \"some value\"")); + lexer_t lexer(fkyaml::detail::input_adapter("test: \"some value\"")); fkyaml::detail::lexical_token_t token; REQUIRE_NOTHROW(token = lexer.get_next_token()); @@ -1460,7 +1457,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowSequenceTokenTest", "[LexicalAnalyze SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: [ foo, bar ]")); + lexer_t lexer(fkyaml::detail::input_adapter("test: [ foo, bar ]")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1495,8 +1492,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowSequenceTokenTest", "[LexicalAnalyze SECTION("Input source No.2.") { - pchar_lexer_t lexer( - fkyaml::detail::input_adapter("test: [ { foo: one, bar: false }, { foo: two, bar: true } ]")); + lexer_t lexer(fkyaml::detail::input_adapter("test: [ { foo: one, bar: false }, { foo: two, bar: true } ]")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1596,7 +1592,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowMappingTokenTest", "[LexicalAnalyzer SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: { bool: true, foo: bar, pi: 3.14 }")); + lexer_t lexer(fkyaml::detail::input_adapter("test: { bool: true, foo: bar, pi: 3.14 }")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1663,7 +1659,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanFlowMappingTokenTest", "[LexicalAnalyzer SECTION("Input source No.2.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: {foo: bar}")); + lexer_t lexer(fkyaml::detail::input_adapter("test: {foo: bar}")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1705,7 +1701,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz { auto buffer = GENERATE(std::string("test:\n - foo\n - bar"), std::string("test:\r\n - foo\r\n - bar")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1741,7 +1737,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockSequenceTokenTest", "[LexicalAnalyz std::string("test:\r\n - foo: one\r\n bar: false\r\n - foo: two\r\n bar: true"), std::string("test:\n - foo: one\n bar: false\n - foo: two\n bar: true")); - str_lexer_t lexer(fkyaml::detail::input_adapter(buffer)); + lexer_t lexer(fkyaml::detail::input_adapter(buffer)); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1820,7 +1816,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("Input source No.1.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test:\n bool: true\n foo: \'bar\'\n pi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test:\n bool: true\n foo: \'bar\'\n pi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1875,7 +1871,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("input soure No.2.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: |\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test: |\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE); @@ -1922,7 +1918,7 @@ TEST_CASE("LexicalAnalyzerClassTest_ScanBlockMappingTokenTest", "[LexicalAnalyze SECTION("input soure No.3.") { - pchar_lexer_t lexer(fkyaml::detail::input_adapter("test: >\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); + lexer_t lexer(fkyaml::detail::input_adapter("test: >\n a literal scalar.\nfoo: \'bar\'\npi: 3.14")); REQUIRE_NOTHROW(token = lexer.get_next_token()); REQUIRE(token == fkyaml::detail::lexical_token_t::STRING_VALUE);