Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Parpart <christian@parpart.family>
  • Loading branch information
christianparpart committed Mar 28, 2024
1 parent 4f245c4 commit 4058a8f
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 59 deletions.
134 changes: 83 additions & 51 deletions src/vtbackend/Line.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: Apache-2.0
#include <vtbackend/GraphicsAttributes.h>
#include <vtbackend/Line.h>
#include <vtbackend/logging.h>
#include <vtbackend/primitives.h>

#include <libunicode/grapheme_line_segmenter.h>
#include <libunicode/grapheme_segmenter.h>
#include <libunicode/utf8.h>
#include <libunicode/width.h>
Expand Down Expand Up @@ -157,76 +159,106 @@ std::string Line<Cell>::toUtf8Trimmed(bool stripLeadingSpaces, bool stripTrailin
}

template <typename Cell>
InflatedLineBuffer<Cell> inflate(TrivialLineBuffer const& input)
struct TrivialLineInflater
{
static constexpr char32_t ReplacementCharacter { 0xFFFD };
TrivialLineBuffer const& input;
InflatedLineBuffer<Cell> columns;

auto columns = InflatedLineBuffer<Cell> {};
columns.reserve(unbox<size_t>(input.displayWidth));
explicit TrivialLineInflater(TrivialLineBuffer const& input): input { input }
{
columns.reserve(unbox<size_t>(input.displayWidth));
}

auto lastChar = char32_t { 0 };
auto utf8DecoderState = unicode::utf8_decoder_state {};
auto gapPending = 0;
InflatedLineBuffer<Cell> inflate() &&
{
vtParserLog()("Inflating TrivialLineBuffer: '{}'", input.text.data() ? crispy::escape(input.text.data()) : "");
auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() };
auto result = lineSegmenter.process(std::numeric_limits<unsigned>::max());
assert(result.stop_condition == unicode::StopCondition::EndOfInput);
vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth);

// Fill remaining columns
for (unsigned i = columns.size(); i < unbox<size_t>(input.displayWidth); ++i)
{
columns.emplace_back(input.fillAttributes);
}
assert(columns.size() == unbox<size_t>(input.displayWidth));

return std::move(columns);
}

for (char const ch: input.text.view())
void on_invalid(std::string_view /*invalid*/) noexcept
{
unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast<uint8_t>(ch));
if (holds_alternative<unicode::Incomplete>(r))
continue;
fmt::print("inflate invalid\n");
static constexpr char32_t ReplacementCharacter { 0xFFFD };

auto const nextChar =
holds_alternative<unicode::Success>(r) ? get<unicode::Success>(r).value : ReplacementCharacter;
columns.emplace_back();
columns.back().setHyperlink(input.hyperlink);
columns.back().write(input.textAttributes, ReplacementCharacter, 1);
}

if (unicode::grapheme_segmenter::breakable(lastChar, nextChar))
void on_ascii(std::string_view text) noexcept
{
fmt::print("inflate ASCII: '{}'\n", text);
for (auto const ch: text)
{
while (gapPending > 0)
{
columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation),
input.hyperlink);
--gapPending;
}
auto const charWidth = unicode::width(nextChar);
columns.emplace_back(Cell {});
columns.emplace_back();
columns.back().setHyperlink(input.hyperlink);
columns.back().write(input.textAttributes, nextChar, static_cast<uint8_t>(charWidth));
gapPending = charWidth - 1;
columns.back().write(input.textAttributes, ch, 1);
}
else
}

void on_grapheme_cluster(std::string_view text, unsigned width) noexcept
{
fmt::print("inflate GC: '{}', width: {}\n", text, width);
columns.emplace_back(input.textAttributes, input.hyperlink);
Cell& cell = columns.back();
cell.setHyperlink(input.hyperlink);

auto utf8DecoderState = unicode::utf8_decoder_state {};
for (auto const ch: text)
{
Cell& prevCell = columns.back();
auto const extendedWidth = prevCell.appendCharacter(nextChar);
if (extendedWidth > 0)
unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast<uint8_t>(ch));
if (auto const* cp = std::get_if<unicode::Success>(&r))
{
auto const cellsAvailable = *input.displayWidth - static_cast<int>(columns.size()) + 1;
auto const n = min(extendedWidth, cellsAvailable);
for (int i = 1; i < n; ++i)
{
columns.emplace_back(Cell { input.textAttributes });
columns.back().setHyperlink(input.hyperlink);
}
std::cout << fmt::format(" - codepoint: U+{:X}\n", (unsigned) cp->value);
if (cell.codepointCount() == 0)
cell.setCharacter(cp->value);
else
(void) cell.appendCharacter(cp->value);
}
}
lastChar = nextChar;
}

while (gapPending > 0)
{
columns.emplace_back(Cell { input.textAttributes, input.hyperlink });
--gapPending;
}
fmt::print(" -> result (UTF-8): \"{}\"\n", cell.toUtf8());

assert(columns.size() == unbox<size_t>(input.usedColumns));
assert(unbox(input.displayWidth) > 0);

while (columns.size() < unbox<size_t>(input.displayWidth))
columns.emplace_back(Cell { input.fillAttributes });
// Fill remaining columns for wide characters
for (unsigned i = 1; i < width; ++i)
{
std::cout << fmt::format(" - continuation\n");
columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), input.hyperlink);
cell.setWidth(width);
}
}
};

return columns;
template <typename Cell>
InflatedLineBuffer<Cell> inflate(TrivialLineBuffer const& input)
{
return TrivialLineInflater<Cell>(input).inflate();
}

} // end namespace vtbackend

// {{{ Explicit instantiation of Line<Cell> for supported cell types.
#include <vtbackend/cell/CompactCell.h>
template class vtbackend::Line<vtbackend::CompactCell>;

#include <vtbackend/cell/SimpleCell.h>
template class vtbackend::Line<vtbackend::SimpleCell>;

namespace vtbackend
{

template class Line<CompactCell>;
template class Line<SimpleCell>;
template InflatedLineBuffer<SimpleCell> inflate(TrivialLineBuffer const& input);

} // namespace vtbackend
// }}}
12 changes: 10 additions & 2 deletions src/vtbackend/Screen_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,17 @@ TEST_CASE("AppendChar.emoji_1", "[screen]")
auto mock = MockTerm { PageSize { LineCount(1), ColumnCount(3) } };
auto& screen = mock.terminal.primaryScreen();

mock.writeToScreen(U"\U0001F600");
mock.writeToScreen("\xf0\x9f\x98\x80"); // U+1F600

auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0));
Line<CompactCell> const& line = screen.grid().lineAt(LineOffset(0));
CHECK(line.isTrivialBuffer());
TrivialLineBuffer const& trivialBuffer = line.trivialBuffer();
CHECK(trivialBuffer.usedColumns == ColumnCount(2));
CHECK(trivialBuffer.text.view() == "\xf0\x9f\x98\x80");

Line<CompactCell>::InflatedBuffer const& inflated = line.inflatedBuffer();
CompactCell const& c1 = inflated.at(0);
// auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0));
CHECK(c1.codepoints() == U"\U0001F600");
CHECK(c1.width() == 2);
REQUIRE(screen.logicalCursorPosition() == CellLocation { LineOffset(0), ColumnOffset(2) });
Expand Down
1 change: 1 addition & 0 deletions src/vtbackend/cell/CellConcept.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ concept CellConcept = requires(T t, T const& u)

t.setCharacter(char32_t{});
{ t.appendCharacter(char32_t{}) } -> std::same_as<int>;
// TODO(pr) rename appendCharacter function to extendGraphemeCluster(codepoint)

{ u.toUtf8() } -> std::convertible_to<std::string>;

Expand Down
1 change: 1 addition & 0 deletions src/vtbackend/cell/CompactCell.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept
inline int CompactCell::appendCharacter(char32_t codepoint) noexcept
{
assert(codepoint != 0);
assert(_codepoint != 0);

CellExtra& ext = extra();
if (ext.codepoints.size() < MaxCodepoints - 1)
Expand Down
2 changes: 2 additions & 0 deletions src/vtbackend/cell/SimpleCell.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ inline void SimpleCell::setCharacter(char32_t codepoint)

inline int SimpleCell::appendCharacter(char32_t codepoint)
{
assert(codepoint != 0);
assert(!_codepoints.empty() && "Use setCharacter() for first character.");
_codepoints.push_back(codepoint);

auto const diff = CellUtil::computeWidthChange(*this, codepoint);
Expand Down
4 changes: 3 additions & 1 deletion src/vtparser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ target_link_libraries(vtparser PUBLIC
fmt::fmt-header-only
range-v3::range-v3
unicode::unicode
crispy::core
)
target_include_directories(vtparser PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
Expand All @@ -24,7 +25,8 @@ if(VTPARSER_TESTING)
enable_testing()
add_executable(vtparser_test
Parser_test.cpp
test_main.cpp
)
target_link_libraries(vtparser_test vtparser Catch2::Catch2WithMain)
target_link_libraries(vtparser_test vtparser Catch2::Catch2)
add_test(vtparser_test ./vtparser_test)
endif()
43 changes: 38 additions & 5 deletions src/vtparser/Parser-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,16 @@ template <typename EventListener, bool TraceStateChanges>
auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin, char const* end) noexcept
-> std::tuple<ProcessKind, size_t>
{
// auto constexpr StopConditionStr = [](unicode::StopCondition value) -> std::string_view {
// switch (value)
// {
// case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput";
// case unicode::StopCondition::EndOfInput: return "EndOfInput";
// case unicode::StopCondition::EndOfWidth: return "EndOfWidth";
// }
// return "Unknown";
// };

auto const* input = begin;
if (_state != State::Ground)
return { ProcessKind::FallbackToFSM, 0 };
Expand All @@ -375,11 +385,28 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,

auto const chunk = std::string_view(input, static_cast<size_t>(std::distance(input, end)));

_graphemeLineSegmenter.reset(chunk);
unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount);
auto const cellCount = result.width;
if (_graphemeLineSegmenter.next() == begin)
_graphemeLineSegmenter.expand_buffer_by(chunk.size());
else
_graphemeLineSegmenter.reset(chunk);
// if (_graphemeLineSegmenter.end() == begin)
// _graphemeLineSegmenter.expand_buffer_by(chunk.size());
// else
// _graphemeLineSegmenter.reset(chunk);
// TODO(pr) What if the last call to parseBulkText was only a partial read, and we have
// more text to read? Then we should not just call reset() but expand_buffer_by().
// _graphemeLineSegmenter.reset(chunk);

unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount);
unicode::grapheme_segmentation_result const flushResult =
_graphemeLineSegmenter.flush(maxCharCount - result.width);
// TODO(pr) this flush should only happen if non-text was reeived, e.g. a control sequence, or
// if the last codepoint was fully processed. Otherwise, we should not flush, but
// continue processing the next codepoint (in the NEXT call).

auto const cellCount = result.width + flushResult.width;
auto const* subStart = result.text.data();
auto const* subEnd = subStart + result.text.size();
auto const* subEnd = subStart + result.text.size() + flushResult.text.size();

if (result.text.empty())
return { ProcessKind::FallbackToFSM, 0 };
Expand All @@ -400,7 +427,10 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
if (!_graphemeLineSegmenter.is_utf8_byte_pending())
{
if (!text.empty())
{
vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount);
_eventListener.print(text, cellCount);
}

// This optimization is for the `cat`-people.
// It further optimizes the throughput performance by bypassing
Expand All @@ -411,7 +441,7 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
_eventListener.execute(*input++);
}

auto const count = static_cast<size_t>(std::distance(input, _graphemeLineSegmenter.next()));
auto const count = static_cast<size_t>(std::distance(input, _graphemeLineSegmenter.next()));
return { ProcessKind::ContinueBulk, count };
}

Expand All @@ -437,6 +467,9 @@ void Parser<EventListener, TraceStateChanges>::handle(ActionClass actionClass,
(void) actionClass;
auto const ch = static_cast<char>(codepoint);

if (vtTraceParserLog)
vtTraceParserLog()("Parser.handle: {} {} {:X}", actionClass, action, (unsigned) ch);

switch (action)
{
case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break;
Expand Down
4 changes: 4 additions & 0 deletions src/vtparser/Parser.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
#pragma once

#include <crispy/logstore.h>

#include <libunicode/convert.h>
#include <libunicode/grapheme_line_segmenter.h>

Expand All @@ -19,6 +21,8 @@
namespace vtparser
{

auto const inline vtTraceParserLog = logstore::category("vt.trace.parser", "Logs terminal parser trace.");

// NOLINTBEGIN(readability-identifier-naming)
enum class State : uint8_t
{
Expand Down

0 comments on commit 4058a8f

Please sign in to comment.