From a85ffa4992c4f14825968517ecdcf276d24d1185 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Sun, 4 Feb 2024 11:43:55 +0100 Subject: [PATCH 1/3] Minor rework - pass tokens into addContinuationMarkers --- tools/shell/linenoise.cpp | 97 +++++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/tools/shell/linenoise.cpp b/tools/shell/linenoise.cpp index 6725bf7c61d..2db251b5228 100644 --- a/tools/shell/linenoise.cpp +++ b/tools/shell/linenoise.cpp @@ -773,38 +773,65 @@ void linenoiseSetPrompt(const char *continuation, const char *continuationSelect continuationSelectedPrompt = continuationSelected; } -#ifndef DISABLE_HIGHLIGHT -#include -#include "duckdb/parser/parser.hpp" +enum class tokenType : uint8_t { + TOKEN_IDENTIFIER, + TOKEN_NUMERIC_CONSTANT, + TOKEN_STRING_CONSTANT, + TOKEN_OPERATOR, + TOKEN_KEYWORD, + TOKEN_COMMENT +}; struct highlightToken { - duckdb::SimplifiedTokenType type; + tokenType type; size_t start = 0; bool search_match = false; }; -std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_pos, searchMatch *match = nullptr) { +#ifndef DISABLE_HIGHLIGHT +#include +#include "duckdb/parser/parser.hpp" + +tokenType convertToken(duckdb::SimplifiedTokenType token_type) { + switch(token_type) { + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_IDENTIFIER: + return tokenType::TOKEN_IDENTIFIER; + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_NUMERIC_CONSTANT: + return tokenType::TOKEN_NUMERIC_CONSTANT; + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_STRING_CONSTANT: + return tokenType::TOKEN_STRING_CONSTANT; + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_OPERATOR: + return tokenType::TOKEN_OPERATOR; + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_KEYWORD: + return tokenType::TOKEN_KEYWORD; + case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_COMMENT: + return tokenType::TOKEN_COMMENT; + default: + throw duckdb::InternalException("Unrecognized token type"); + } +} + +std::vector tokenize(char *buf, size_t len, searchMatch *match = nullptr) { std::string sql(buf, len); auto parseTokens = duckdb::Parser::Tokenize(sql); - std::stringstream ss; std::vector tokens; for (auto &token : parseTokens) { highlightToken new_token; - new_token.type = token.type; + new_token.type = convertToken(token.type); new_token.start = token.start; tokens.push_back(new_token); } if (!tokens.empty() && tokens[0].start > 0) { highlightToken new_token; - new_token.type = duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_IDENTIFIER; + new_token.type = tokenType::TOKEN_IDENTIFIER; new_token.start = 0; tokens.insert(tokens.begin(), new_token); } if (tokens.empty() && sql.size() > 0) { highlightToken new_token; - new_token.type = duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_IDENTIFIER; + new_token.type = tokenType::TOKEN_IDENTIFIER; new_token.start = 0; tokens.push_back(new_token); } @@ -816,7 +843,7 @@ std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_po if (tokens[i].start <= match->match_start && tokens[i + 1].start >= match->match_start) { // this token begins after the search position, insert the token here size_t token_position = i + 1; - duckdb::SimplifiedTokenType end_type = tokens[i].type; + auto end_type = tokens[i].type; if (tokens[i].start == match->match_start) { // exact start: only set the search match tokens[i].search_match = true; @@ -850,6 +877,11 @@ std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_po } } } + return tokens; +} + +std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_pos, const std::vector &tokens) { + std::stringstream ss; for (size_t i = 0; i < tokens.size(); i++) { size_t next = i + 1 < tokens.size() ? tokens[i + 1].start : len; if (next < start_pos) { @@ -868,11 +900,11 @@ std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_po ss << underline; } switch (token.type) { - case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_KEYWORD: + case tokenType::TOKEN_KEYWORD: ss << keyword << text << reset; break; - case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_NUMERIC_CONSTANT: - case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_STRING_CONSTANT: + case tokenType::TOKEN_NUMERIC_CONSTANT: + case tokenType::TOKEN_STRING_CONSTANT: ss << constant << text << reset; break; default: @@ -923,7 +955,8 @@ static void renderText(size_t &render_pos, char *&buf, size_t &len, size_t pos, } #ifndef DISABLE_HIGHLIGHT if (highlight) { - highlight_buffer = highlightText(buf, len, start_pos, cpos, match); + auto tokens = tokenize(buf, len, match); + highlight_buffer = highlightText(buf, len, start_pos, cpos, tokens); buf = (char *)highlight_buffer.c_str(); len = highlight_buffer.size(); } else @@ -1147,13 +1180,14 @@ size_t colAndRowToPosition(struct linenoiseState *l, int target_row, int target_ } static std::string addContinuationMarkers(struct linenoiseState *l, const char *buf, size_t len, int plen, - int cursor_row, searchMatch *match) { + int cursor_row, std::vector &tokens) { std::string result; int rows = 1; int cols = plen; size_t cpos = 0; size_t prev_pos = 0; - size_t match_start_pos = match ? match->match_start : 0; + size_t extra_bytes = 0; // extra bytes introduced + size_t token_position = 0; // token position while (cpos < len) { bool is_newline = isNewline(buf[cpos]); nextPosition(l, buf, len, cpos, rows, cols, plen); @@ -1169,17 +1203,23 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * result += " "; } result += prompt; - if (match && match_start_pos >= cpos) { - // move search match over by any additional prompts added - size_t continuationBytes = plen - continuationRender + continuationLen; - match->match_start += continuationBytes; - match->match_end += continuationBytes; + for(; token_position < tokens.size(); token_position++) { + if (tokens[token_position].start >= cpos) { + // not there yet + break; + } + tokens[token_position].start += extra_bytes; } + size_t continuationBytes = plen - continuationRender + continuationLen; + extra_bytes += continuationBytes; } } for (; prev_pos < cpos; prev_pos++) { result += buf[prev_pos]; } + for(; token_position < tokens.size(); token_position++) { + tokens[token_position].start += extra_bytes; + } return result; } @@ -1246,23 +1286,22 @@ static void refreshMultiLine(struct linenoiseState *l) { l->maxrows = rows; } - searchMatch match; - searchMatch *matchPtr = nullptr; - if (l->search_index < l->search_matches.size()) { - match = l->search_matches[l->search_index]; - matchPtr = &match; - } + std::vector tokens; +#ifndef DISABLE_HIGHLIGHT + auto match = l->search_index < l->search_matches.size() ? &l->search_matches[l->search_index] : nullptr; + tokens = tokenize(buf, len, match); +#endif if (rows > 1) { // add continuation markers highlight_buffer = - addContinuationMarkers(l, buf, len, plen, l->y_scroll > 0 ? new_cursor_row + 1 : new_cursor_row, matchPtr); + addContinuationMarkers(l, buf, len, plen, l->y_scroll > 0 ? new_cursor_row + 1 : new_cursor_row, tokens); buf = (char *)highlight_buffer.c_str(); len = highlight_buffer.size(); } #ifndef DISABLE_HIGHLIGHT if (duckdb::Utf8Proc::IsValid(l->buf, l->len)) { if (enableHighlighting) { - highlight_buffer = highlightText(buf, len, 0, len, matchPtr); + highlight_buffer = highlightText(buf, len, 0, len, tokens); buf = (char *)highlight_buffer.c_str(); len = highlight_buffer.size(); } From ac057321ab218527328370ca02c7dcbc0663fb3a Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Sun, 4 Feb 2024 11:50:15 +0100 Subject: [PATCH 2/3] Add highlighting for continuation tokens --- tools/shell/linenoise.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/shell/linenoise.cpp b/tools/shell/linenoise.cpp index 2db251b5228..8b6814c279c 100644 --- a/tools/shell/linenoise.cpp +++ b/tools/shell/linenoise.cpp @@ -779,7 +779,9 @@ enum class tokenType : uint8_t { TOKEN_STRING_CONSTANT, TOKEN_OPERATOR, TOKEN_KEYWORD, - TOKEN_COMMENT + TOKEN_COMMENT, + TOKEN_CONTINUATION, + TOKEN_CONTINUATION_SELECTED }; struct highlightToken { @@ -793,7 +795,7 @@ struct highlightToken { #include "duckdb/parser/parser.hpp" tokenType convertToken(duckdb::SimplifiedTokenType token_type) { - switch(token_type) { + switch (token_type) { case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_IDENTIFIER: return tokenType::TOKEN_IDENTIFIER; case duckdb::SimplifiedTokenType::SIMPLIFIED_TOKEN_NUMERIC_CONSTANT: @@ -880,7 +882,8 @@ std::vector tokenize(char *buf, size_t len, searchMatch *match = return tokens; } -std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_pos, const std::vector &tokens) { +std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_pos, + const std::vector &tokens) { std::stringstream ss; for (size_t i = 0; i < tokens.size(); i++) { size_t next = i + 1 < tokens.size() ? tokens[i + 1].start : len; @@ -901,10 +904,12 @@ std::string highlightText(char *buf, size_t len, size_t start_pos, size_t end_po } switch (token.type) { case tokenType::TOKEN_KEYWORD: + case tokenType::TOKEN_CONTINUATION_SELECTED: ss << keyword << text << reset; break; case tokenType::TOKEN_NUMERIC_CONSTANT: case tokenType::TOKEN_STRING_CONSTANT: + case tokenType::TOKEN_CONTINUATION: ss << constant << text << reset; break; default: @@ -1195,7 +1200,8 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * result += buf[prev_pos]; } if (is_newline) { - const char *prompt = rows == cursor_row ? continuationSelectedPrompt : continuationPrompt; + bool is_cursor_row = rows == cursor_row; + const char *prompt = is_cursor_row ? continuationSelectedPrompt : continuationPrompt; size_t continuationLen = strlen(prompt); size_t continuationRender = linenoiseComputeRenderWidth(prompt, continuationLen); // pad with spaces prior to prompt @@ -1203,13 +1209,20 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * result += " "; } result += prompt; - for(; token_position < tokens.size(); token_position++) { + for (; token_position < tokens.size(); token_position++) { if (tokens[token_position].start >= cpos) { // not there yet break; } tokens[token_position].start += extra_bytes; } + highlightToken token; + token.start = cpos + extra_bytes; + token.type = is_cursor_row ? tokenType::TOKEN_CONTINUATION_SELECTED : tokenType::TOKEN_CONTINUATION; + token.search_match = false; + tokens.insert(tokens.begin() + token_position, token); + token_position++; + size_t continuationBytes = plen - continuationRender + continuationLen; extra_bytes += continuationBytes; } @@ -1217,7 +1230,7 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * for (; prev_pos < cpos; prev_pos++) { result += buf[prev_pos]; } - for(; token_position < tokens.size(); token_position++) { + for (; token_position < tokens.size(); token_position++) { tokens[token_position].start += extra_bytes; } return result; From 59dc5959b511f1f6f4f6b481f0a48ed360797411 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Sun, 4 Feb 2024 11:59:29 +0100 Subject: [PATCH 3/3] Clean up highlighting for continuation tokens --- tools/shell/linenoise.cpp | 40 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/tools/shell/linenoise.cpp b/tools/shell/linenoise.cpp index 8b6814c279c..4fbe741c989 100644 --- a/tools/shell/linenoise.cpp +++ b/tools/shell/linenoise.cpp @@ -1193,6 +1193,8 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * size_t prev_pos = 0; size_t extra_bytes = 0; // extra bytes introduced size_t token_position = 0; // token position + std::vector new_tokens; + new_tokens.reserve(tokens.size()); while (cpos < len) { bool is_newline = isNewline(buf[cpos]); nextPosition(l, buf, len, cpos, rows, cols, plen); @@ -1209,21 +1211,31 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * result += " "; } result += prompt; - for (; token_position < tokens.size(); token_position++) { - if (tokens[token_position].start >= cpos) { - // not there yet - break; + size_t continuationBytes = plen - continuationRender + continuationLen; + if (token_position < tokens.size()) { + for (; token_position < tokens.size(); token_position++) { + if (tokens[token_position].start >= cpos) { + // not there yet + break; + } + tokens[token_position].start += extra_bytes; + new_tokens.push_back(tokens[token_position]); } - tokens[token_position].start += extra_bytes; + tokenType prev_type = tokenType::TOKEN_IDENTIFIER; + if (token_position > 0 && token_position < tokens.size() + 1) { + prev_type = tokens[token_position - 1].type; + } + highlightToken token; + token.start = cpos + extra_bytes; + token.type = is_cursor_row ? tokenType::TOKEN_CONTINUATION_SELECTED : tokenType::TOKEN_CONTINUATION; + token.search_match = false; + new_tokens.push_back(token); + + token.start = cpos + extra_bytes + continuationBytes; + token.type = prev_type; + token.search_match = false; + new_tokens.push_back(token); } - highlightToken token; - token.start = cpos + extra_bytes; - token.type = is_cursor_row ? tokenType::TOKEN_CONTINUATION_SELECTED : tokenType::TOKEN_CONTINUATION; - token.search_match = false; - tokens.insert(tokens.begin() + token_position, token); - token_position++; - - size_t continuationBytes = plen - continuationRender + continuationLen; extra_bytes += continuationBytes; } } @@ -1232,7 +1244,9 @@ static std::string addContinuationMarkers(struct linenoiseState *l, const char * } for (; token_position < tokens.size(); token_position++) { tokens[token_position].start += extra_bytes; + new_tokens.push_back(tokens[token_position]); } + tokens = std::move(new_tokens); return result; }