From 4058a8fa125acbf3b862a5e8de4029977e9ffb5a Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Thu, 28 Mar 2024 16:40:53 +0100 Subject: [PATCH] wip Signed-off-by: Christian Parpart --- src/vtbackend/Line.cpp | 134 +++++++++++++++++++------------ src/vtbackend/Screen_test.cpp | 12 ++- src/vtbackend/cell/CellConcept.h | 1 + src/vtbackend/cell/CompactCell.h | 1 + src/vtbackend/cell/SimpleCell.h | 2 + src/vtparser/CMakeLists.txt | 4 +- src/vtparser/Parser-impl.h | 43 ++++++++-- src/vtparser/Parser.h | 4 + 8 files changed, 142 insertions(+), 59 deletions(-) diff --git a/src/vtbackend/Line.cpp b/src/vtbackend/Line.cpp index f662766743..bbe10fb329 100644 --- a/src/vtbackend/Line.cpp +++ b/src/vtbackend/Line.cpp @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 #include #include +#include #include +#include #include #include #include @@ -157,76 +159,106 @@ std::string Line::toUtf8Trimmed(bool stripLeadingSpaces, bool stripTrailin } template -InflatedLineBuffer inflate(TrivialLineBuffer const& input) +struct TrivialLineInflater { - static constexpr char32_t ReplacementCharacter { 0xFFFD }; + TrivialLineBuffer const& input; + InflatedLineBuffer columns; - auto columns = InflatedLineBuffer {}; - columns.reserve(unbox(input.displayWidth)); + explicit TrivialLineInflater(TrivialLineBuffer const& input): input { input } + { + columns.reserve(unbox(input.displayWidth)); + } - auto lastChar = char32_t { 0 }; - auto utf8DecoderState = unicode::utf8_decoder_state {}; - auto gapPending = 0; + InflatedLineBuffer inflate() && + { + vtParserLog()("Inflating TrivialLineBuffer: '{}'", input.text.data() ? crispy::escape(input.text.data()) : ""); + auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() }; + auto result = lineSegmenter.process(std::numeric_limits::max()); + assert(result.stop_condition == unicode::StopCondition::EndOfInput); + vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth); + + // Fill remaining columns + for (unsigned i = columns.size(); i < unbox(input.displayWidth); ++i) + { + columns.emplace_back(input.fillAttributes); + } + assert(columns.size() == unbox(input.displayWidth)); + + return std::move(columns); + } - for (char const ch: input.text.view()) + void on_invalid(std::string_view /*invalid*/) noexcept { - unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); - if (holds_alternative(r)) - continue; + fmt::print("inflate invalid\n"); + static constexpr char32_t ReplacementCharacter { 0xFFFD }; - auto const nextChar = - holds_alternative(r) ? get(r).value : ReplacementCharacter; + columns.emplace_back(); + columns.back().setHyperlink(input.hyperlink); + columns.back().write(input.textAttributes, ReplacementCharacter, 1); + } - if (unicode::grapheme_segmenter::breakable(lastChar, nextChar)) + void on_ascii(std::string_view text) noexcept + { + fmt::print("inflate ASCII: '{}'\n", text); + for (auto const ch: text) { - while (gapPending > 0) - { - columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), - input.hyperlink); - --gapPending; - } - auto const charWidth = unicode::width(nextChar); - columns.emplace_back(Cell {}); + columns.emplace_back(); columns.back().setHyperlink(input.hyperlink); - columns.back().write(input.textAttributes, nextChar, static_cast(charWidth)); - gapPending = charWidth - 1; + columns.back().write(input.textAttributes, ch, 1); } - else + } + + void on_grapheme_cluster(std::string_view text, unsigned width) noexcept + { + fmt::print("inflate GC: '{}', width: {}\n", text, width); + columns.emplace_back(input.textAttributes, input.hyperlink); + Cell& cell = columns.back(); + cell.setHyperlink(input.hyperlink); + + auto utf8DecoderState = unicode::utf8_decoder_state {}; + for (auto const ch: text) { - Cell& prevCell = columns.back(); - auto const extendedWidth = prevCell.appendCharacter(nextChar); - if (extendedWidth > 0) + unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); + if (auto const* cp = std::get_if(&r)) { - auto const cellsAvailable = *input.displayWidth - static_cast(columns.size()) + 1; - auto const n = min(extendedWidth, cellsAvailable); - for (int i = 1; i < n; ++i) - { - columns.emplace_back(Cell { input.textAttributes }); - columns.back().setHyperlink(input.hyperlink); - } + std::cout << fmt::format(" - codepoint: U+{:X}\n", (unsigned) cp->value); + if (cell.codepointCount() == 0) + cell.setCharacter(cp->value); + else + (void) cell.appendCharacter(cp->value); } } - lastChar = nextChar; - } - while (gapPending > 0) - { - columns.emplace_back(Cell { input.textAttributes, input.hyperlink }); - --gapPending; - } + fmt::print(" -> result (UTF-8): \"{}\"\n", cell.toUtf8()); - assert(columns.size() == unbox(input.usedColumns)); - assert(unbox(input.displayWidth) > 0); - - while (columns.size() < unbox(input.displayWidth)) - columns.emplace_back(Cell { input.fillAttributes }); + // Fill remaining columns for wide characters + for (unsigned i = 1; i < width; ++i) + { + std::cout << fmt::format(" - continuation\n"); + columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), input.hyperlink); + cell.setWidth(width); + } + } +}; - return columns; +template +InflatedLineBuffer inflate(TrivialLineBuffer const& input) +{ + return TrivialLineInflater(input).inflate(); } + } // end namespace vtbackend +// {{{ Explicit instantiation of Line for supported cell types. #include -template class vtbackend::Line; - #include -template class vtbackend::Line; + +namespace vtbackend +{ + +template class Line; +template class Line; +template InflatedLineBuffer inflate(TrivialLineBuffer const& input); + +} // namespace vtbackend +// }}} diff --git a/src/vtbackend/Screen_test.cpp b/src/vtbackend/Screen_test.cpp index ea1a8704d9..82c3a7b323 100644 --- a/src/vtbackend/Screen_test.cpp +++ b/src/vtbackend/Screen_test.cpp @@ -578,9 +578,17 @@ TEST_CASE("AppendChar.emoji_1", "[screen]") auto mock = MockTerm { PageSize { LineCount(1), ColumnCount(3) } }; auto& screen = mock.terminal.primaryScreen(); - mock.writeToScreen(U"\U0001F600"); + mock.writeToScreen("\xf0\x9f\x98\x80"); // U+1F600 - auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); + Line const& line = screen.grid().lineAt(LineOffset(0)); + CHECK(line.isTrivialBuffer()); + TrivialLineBuffer const& trivialBuffer = line.trivialBuffer(); + CHECK(trivialBuffer.usedColumns == ColumnCount(2)); + CHECK(trivialBuffer.text.view() == "\xf0\x9f\x98\x80"); + + Line::InflatedBuffer const& inflated = line.inflatedBuffer(); + CompactCell const& c1 = inflated.at(0); + // auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); CHECK(c1.codepoints() == U"\U0001F600"); CHECK(c1.width() == 2); REQUIRE(screen.logicalCursorPosition() == CellLocation { LineOffset(0), ColumnOffset(2) }); diff --git a/src/vtbackend/cell/CellConcept.h b/src/vtbackend/cell/CellConcept.h index 05d5bdd064..cb2c2cc125 100644 --- a/src/vtbackend/cell/CellConcept.h +++ b/src/vtbackend/cell/CellConcept.h @@ -60,6 +60,7 @@ concept CellConcept = requires(T t, T const& u) t.setCharacter(char32_t{}); { t.appendCharacter(char32_t{}) } -> std::same_as; + // TODO(pr) rename appendCharacter function to extendGraphemeCluster(codepoint) { u.toUtf8() } -> std::convertible_to; diff --git a/src/vtbackend/cell/CompactCell.h b/src/vtbackend/cell/CompactCell.h index 820a3a125f..d1cdf6a3f7 100644 --- a/src/vtbackend/cell/CompactCell.h +++ b/src/vtbackend/cell/CompactCell.h @@ -321,6 +321,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept inline int CompactCell::appendCharacter(char32_t codepoint) noexcept { assert(codepoint != 0); + assert(_codepoint != 0); CellExtra& ext = extra(); if (ext.codepoints.size() < MaxCodepoints - 1) diff --git a/src/vtbackend/cell/SimpleCell.h b/src/vtbackend/cell/SimpleCell.h index a8b7cd8885..dda5ee0410 100644 --- a/src/vtbackend/cell/SimpleCell.h +++ b/src/vtbackend/cell/SimpleCell.h @@ -166,6 +166,8 @@ inline void SimpleCell::setCharacter(char32_t codepoint) inline int SimpleCell::appendCharacter(char32_t codepoint) { + assert(codepoint != 0); + assert(!_codepoints.empty() && "Use setCharacter() for first character."); _codepoints.push_back(codepoint); auto const diff = CellUtil::computeWidthChange(*this, codepoint); diff --git a/src/vtparser/CMakeLists.txt b/src/vtparser/CMakeLists.txt index 734aea8da5..8a250e07f4 100644 --- a/src/vtparser/CMakeLists.txt +++ b/src/vtparser/CMakeLists.txt @@ -13,6 +13,7 @@ target_link_libraries(vtparser PUBLIC fmt::fmt-header-only range-v3::range-v3 unicode::unicode + crispy::core ) target_include_directories(vtparser PUBLIC $ @@ -24,7 +25,8 @@ if(VTPARSER_TESTING) enable_testing() add_executable(vtparser_test Parser_test.cpp + test_main.cpp ) - target_link_libraries(vtparser_test vtparser Catch2::Catch2WithMain) + target_link_libraries(vtparser_test vtparser Catch2::Catch2) add_test(vtparser_test ./vtparser_test) endif() diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 2c37dc1d42..91a0551fef 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -365,6 +365,16 @@ template auto Parser::parseBulkText(char const* begin, char const* end) noexcept -> std::tuple { + // auto constexpr StopConditionStr = [](unicode::StopCondition value) -> std::string_view { + // switch (value) + // { + // case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; + // case unicode::StopCondition::EndOfInput: return "EndOfInput"; + // case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; + // } + // return "Unknown"; + // }; + auto const* input = begin; if (_state != State::Ground) return { ProcessKind::FallbackToFSM, 0 }; @@ -375,11 +385,28 @@ auto Parser::parseBulkText(char const* begin, auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); - _graphemeLineSegmenter.reset(chunk); - unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount); - auto const cellCount = result.width; + if (_graphemeLineSegmenter.next() == begin) + _graphemeLineSegmenter.expand_buffer_by(chunk.size()); + else + _graphemeLineSegmenter.reset(chunk); + // if (_graphemeLineSegmenter.end() == begin) + // _graphemeLineSegmenter.expand_buffer_by(chunk.size()); + // else + // _graphemeLineSegmenter.reset(chunk); + // TODO(pr) What if the last call to parseBulkText was only a partial read, and we have + // more text to read? Then we should not just call reset() but expand_buffer_by(). + // _graphemeLineSegmenter.reset(chunk); + + unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount); + unicode::grapheme_segmentation_result const flushResult = + _graphemeLineSegmenter.flush(maxCharCount - result.width); + // TODO(pr) this flush should only happen if non-text was reeived, e.g. a control sequence, or + // if the last codepoint was fully processed. Otherwise, we should not flush, but + // continue processing the next codepoint (in the NEXT call). + + auto const cellCount = result.width + flushResult.width; auto const* subStart = result.text.data(); - auto const* subEnd = subStart + result.text.size(); + auto const* subEnd = subStart + result.text.size() + flushResult.text.size(); if (result.text.empty()) return { ProcessKind::FallbackToFSM, 0 }; @@ -400,7 +427,10 @@ auto Parser::parseBulkText(char const* begin, if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { if (!text.empty()) + { + vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); _eventListener.print(text, cellCount); + } // This optimization is for the `cat`-people. // It further optimizes the throughput performance by bypassing @@ -411,7 +441,7 @@ auto Parser::parseBulkText(char const* begin, _eventListener.execute(*input++); } - auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); + auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); return { ProcessKind::ContinueBulk, count }; } @@ -437,6 +467,9 @@ void Parser::handle(ActionClass actionClass, (void) actionClass; auto const ch = static_cast(codepoint); + if (vtTraceParserLog) + vtTraceParserLog()("Parser.handle: {} {} {:X}", actionClass, action, (unsigned) ch); + switch (action) { case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break; diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index 0d81a7dc50..8ca2e8d6a0 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once +#include + #include #include @@ -19,6 +21,8 @@ namespace vtparser { +auto const inline vtTraceParserLog = logstore::category("vt.trace.parser", "Logs terminal parser trace."); + // NOLINTBEGIN(readability-identifier-naming) enum class State : uint8_t {