Skip to content

Commit

Permalink
Adapt to latest changes in libunicode and its new grapheme_line_segme…
Browse files Browse the repository at this point in the history
…nter API

Signed-off-by: Christian Parpart <christian@parpart.family>
  • Loading branch information
christianparpart committed Mar 28, 2024
1 parent b341b44 commit 4f245c4
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 22 deletions.
3 changes: 2 additions & 1 deletion cmake/presets/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"hidden": true,
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}",
"LIBUNICODE_TABLEGEN_FASTBUILD": "ON"
}
}
]
Expand Down
6 changes: 3 additions & 3 deletions scripts/install-deps.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ class ThirdParty {
$ThirdParties =
@(
[ThirdParty]@{
Folder = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc";
Archive = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc.zip";
URI = "https://github.com/contour-terminal/libunicode/archive/23d7b30166a914b10526bb8fe7a469a9610c07dc.zip";
Folder = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f";
Archive = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip";
URI = "https://github.com/contour-terminal/libunicode/archive/dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip";
Macro = "libunicode"
};
[ThirdParty]@{
Expand Down
2 changes: 1 addition & 1 deletion scripts/install-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ fetch_and_unpack_boxed()
fetch_and_unpack_libunicode()
{
if test x$LIBUNICODE_SRC_DIR = x; then
local libunicode_git_sha="23d7b30166a914b10526bb8fe7a469a9610c07dc"
local libunicode_git_sha="dabfea48f7fd2a8bf6ae19e37581de5c127c607f"
fetch_and_unpack \
libunicode-$libunicode_git_sha \
libunicode-$libunicode_git_sha.tar.gz \
Expand Down
4 changes: 2 additions & 2 deletions src/vtbackend/CellUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ CRISPY_REQUIRES(CellConcept<Cell>)
if (!AllowWidthChange)
return 0;

auto const newWidth = [codepoint]() {
auto const newWidth = [codepoint]() -> int {
switch (codepoint)
{
case 0xFE0E: return 1;
case 0xFE0F: return 2;
default: return unicode::width(codepoint);
default: return static_cast<int>(unicode::width(codepoint));
}
}();

Expand Down
3 changes: 2 additions & 1 deletion src/vtbackend/cell/CompactCell.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <libunicode/convert.h>
#include <libunicode/width.h>

#include <algorithm>
#include <memory>
#include <string>

Expand Down Expand Up @@ -312,7 +313,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept
_extra->imageFragment = {};
}
if (codepoint)
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1)));
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1u)));
else
setWidth(1);
}
Expand Down
2 changes: 1 addition & 1 deletion src/vtbackend/cell/SimpleCell.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ inline void SimpleCell::setCharacter(char32_t codepoint)
if (codepoint)
{
_codepoints.push_back(codepoint);
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1)));
setWidth(static_cast<uint8_t>(std::max(unicode::width(codepoint), 1u)));
}
else
setWidth(1);
Expand Down
22 changes: 13 additions & 9 deletions src/vtparser/Parser-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,11 +373,15 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
if (!maxCharCount)
return { ProcessKind::FallbackToFSM, 0 };

_scanState.next = nullptr;
auto const chunk = std::string_view(input, static_cast<size_t>(std::distance(input, end)));
auto const [cellCount, subStart, subEnd] = unicode::scan_text(_scanState, chunk, maxCharCount);

if (_scanState.next == input)
_graphemeLineSegmenter.reset(chunk);
unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount);
auto const cellCount = result.width;
auto const* subStart = result.text.data();
auto const* subEnd = subStart + result.text.size();

if (result.text.empty())
return { ProcessKind::FallbackToFSM, 0 };

// We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width
Expand All @@ -390,10 +394,10 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,

assert(cellCount <= maxCharCount);
assert(subEnd <= chunk.data() + chunk.size());
assert(_scanState.next <= chunk.data() + chunk.size());
assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size());

auto const text = std::string_view { subStart, byteCount };
if (_scanState.utf8.expectedLength == 0)
if (!_graphemeLineSegmenter.is_utf8_byte_pending())
{
if (!text.empty())
_eventListener.print(text, cellCount);
Expand All @@ -407,22 +411,22 @@ auto Parser<EventListener, TraceStateChanges>::parseBulkText(char const* begin,
_eventListener.execute(*input++);
}

auto const count = static_cast<size_t>(std::distance(input, _scanState.next));
auto const count = static_cast<size_t>(std::distance(input, _graphemeLineSegmenter.next()));
return { ProcessKind::ContinueBulk, count };
}

template <typename EventListener, bool TraceStateChanges>
void Parser<EventListener, TraceStateChanges>::printUtf8Byte(char ch)
{
unicode::ConvertResult const r = unicode::from_utf8(_scanState.utf8, (uint8_t) ch);
unicode::ConvertResult const r = _graphemeLineSegmenter.process_single_byte(static_cast<uint8_t>(ch));
if (std::holds_alternative<unicode::Incomplete>(r))
return;

auto constexpr ReplacementCharacter = char32_t { 0xFFFD };
auto const codepoint = std::holds_alternative<unicode::Success>(r) ? std::get<unicode::Success>(r).value
: ReplacementCharacter;
_eventListener.print(codepoint);
_scanState.lastCodepointHint = codepoint;
_graphemeLineSegmenter.reset_last_codepoint_hint(codepoint);
}

template <typename EventListener, bool TraceStateChanges>
Expand All @@ -435,7 +439,7 @@ void Parser<EventListener, TraceStateChanges>::handle(ActionClass actionClass,

switch (action)
{
case Action::GroundStart: _scanState.lastCodepointHint = 0; break;
case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break;
case Action::Clear: _eventListener.clear(); break;
case Action::CollectLeader: _eventListener.collectLeader(ch); break;
case Action::Collect: _eventListener.collect(ch); break;
Expand Down
9 changes: 6 additions & 3 deletions src/vtparser/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#pragma once

#include <libunicode/convert.h>
#include <libunicode/scan.h>
#include <libunicode/grapheme_line_segmenter.h>

#include <fmt/core.h>

Expand Down Expand Up @@ -531,7 +531,10 @@ class Parser

[[nodiscard]] State state() const noexcept { return _state; }

[[nodiscard]] char32_t precedingGraphicCharacter() const noexcept { return _scanState.lastCodepointHint; }
[[nodiscard]] char32_t precedingGraphicCharacter() const noexcept
{
return _graphemeLineSegmenter.last_codepoint_hint();
}

void printUtf8Byte(char ch);

Expand All @@ -553,7 +556,7 @@ class Parser
//
State _state = State::Ground;
EventListener& _eventListener;
unicode::scan_state _scanState {};
unicode::grapheme_line_segmenter<void> _graphemeLineSegmenter;
};

/// @returns parsed tuple with OSC code and offset to first data parameter byte.
Expand Down
10 changes: 9 additions & 1 deletion src/vtparser/Parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include <vtparser/Parser.h>
#include <vtparser/ParserEvents.h>

#include <crispy/escape.h>

#include <libunicode/convert.h>

#include <catch2/catch_test_macros.hpp>
Expand All @@ -17,9 +19,15 @@ class MockParserEvents final: public vtparser::NullParserEvents
size_t maxCharCount = 80;

void error(string_view const& msg) override { INFO(fmt::format("Parser error received. {}", msg)); }
void print(char32_t ch) override { text += unicode::convert_to<char>(ch); }

void print(char32_t ch) override
{
UNSCOPED_INFO(fmt::format("print: U+{:X}", (unsigned) ch));
text += unicode::convert_to<char>(ch);
}
size_t print(std::string_view s, size_t cellCount) override
{
UNSCOPED_INFO(fmt::format("print: {}", crispy::escape(s)));
text += s;
return maxCharCount -= cellCount;
}
Expand Down

0 comments on commit 4f245c4

Please sign in to comment.