From b0ebb6722c77dda1ab1e3ce13521fe7db20cbc79 Mon Sep 17 00:00:00 2001 From: alaviss Date: Sun, 16 Apr 2023 15:08:27 -0500 Subject: [PATCH] perf(util): simplify and optimize parsers (#435) * refactor(util): turn parse_keys into state machine The parser now requires only 3 states with much simpler conditional structure of only 2 layers: one for the state and another for the character read. Several performance improvements are made as part of this: - Avoid calling t(), which involves string allocation, both in C and in Lua due to gsub(). - Reduced string allocations: avoid string building which is extremely slow in Lua. - Make heavy use of composite conditionals: LuaJIT likes them a lot and testing shows markable improvement in speed with their usage. - Reduced number of table lookups in loops, which includes options lookup (very slow) and object function lookup. All of this net into a startup time improvement of ~10.6%, from 11.3ms[+-0.156] down to 10.1ms[+-0.13] as recorded by lazy.nvim, sampled 10 times. * refactor(util): simplify parse_internal Rewrites parse_internal as a state machine. This reduces the number of variables required to manage the parser, making it simpler to understand. The code is optimized with LuaJIT in mind, in particular: - Make use of composite conditionals on hot path to leverage IR optimizations. - Avoid string building which has quadratic complexity in Lua. Startup time goes from 10.1ms[+-0.13] down to 9.81ms[+-0.165] (~2.8% improvement) with this commit as recorded by lazy.nvim with samples of 10. --- lua/which-key/util.lua | 145 ++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 74 deletions(-) diff --git a/lua/which-key/util.lua b/lua/which-key/util.lua index 5f02c16..30a1d52 100644 --- a/lua/which-key/util.lua +++ b/lua/which-key/util.lua @@ -1,5 +1,7 @@ ---@class Util local M = {} +local strbyte = string.byte +local strsub = string.sub function M.count(tab) local ret = 0 @@ -48,92 +50,87 @@ local utf8len_tab = { } -- stylua: ignore end +local Tokens = { + ["<"] = strbyte("<"), + [">"] = strbyte(">"), + ["-"] = strbyte("-"), +} ---@return KeyCodes function M.parse_keys(keystr) - local keys = {} - local cur = "" - local todo = 1 - local special = nil - for i = 1, #keystr, 1 do - local c = keystr:sub(i, i) - if special then - if todo == 0 then - if c == ">" then - table.insert(keys, special .. ">") - cur = "" - todo = 1 - special = nil - elseif c == "-" then - -- When getting a special key notation: - -- todo = 0 means it can be ended by a ">" now. - -- todo = 1 means ">" should be treated as the modified character. - todo = 1 - end - else - todo = 0 - end - if special then - special = special .. c - end - elseif c == "<" then - special = "<" - todo = 0 + local notation = {} + ---@alias ParseState + --- | "Character" + --- | "Special" + --- | "SpecialNoClose" + local start = 1 + local i = start + ---@type ParseState + local state = "Character" + while i <= #keystr do + local c = strbyte(keystr, i, i) + + if state == "Character" then + start = i + state = c == Tokens["<"] and "Special" or state + elseif state == "Special" then + state = (c == Tokens["-"] and "SpecialNoClose") or (c == Tokens[">"] and "Character") or state else - if todo == 1 then - todo = utf8len_tab[c:byte() + 1] - end - cur = cur .. c - todo = todo - 1 - if todo == 0 then - table.insert(keys, cur) - cur = "" - todo = 1 - end - end - end - local ret = { keys = M.t(keystr), internal = {}, notation = {} } - for i, key in pairs(keys) do - if key == " " then - key = "" + state = "Special" end - if i == 1 and vim.g.mapleader and M.t(key) == M.t(vim.g.mapleader) then - key = "" + + i = i + utf8len_tab[c + 1] + if state == "Character" then + local k = strsub(keystr, start, i - 1) + notation[#notation + 1] = k == " " and "" or k end - table.insert(ret.internal, M.t(key)) - table.insert(ret.notation, key) end - return ret + + local keys = M.t(keystr) + local internal = M.parse_internal(keys) + local mapleader = vim.g.mapleader + mapleader = mapleader and M.t(mapleader) + notation[1] = internal[1] == mapleader and "" or notation[1] + + return { + keys = keys, + internal = internal, + notation = notation, + } end -- @return string[] function M.parse_internal(keystr) local keys = {} - local cur = "" - local todo = 1 - local utf8 = false - for i = 1, #keystr, 1 do - local c = keystr:sub(i, i) - if not utf8 then - if todo == 1 and c == "\128" then - -- K_SPECIAL: get 3 bytes - todo = 3 - elseif cur == "\128" and c == "\252" then - -- K_SPECIAL KS_MODIFIER: repeat after getting 3 bytes - todo = todo + 1 - elseif todo == 1 then - -- When the second byte of a K_SPECIAL sequence is not KS_MODIFIER, - -- the third byte is guaranteed to be between 0x02 and 0x7f. - todo = utf8len_tab[c:byte() + 1] - utf8 = todo > 1 + ---@alias ParseInternalState + --- | "Character" + --- | "Special" + ---@type ParseInternalState + local state = "Character" + local start = 1 + local i = 1 + while i <= #keystr do + local c = strbyte(keystr, i, i) + + if state == "Character" then + state = c == 128 and "Special" or state + i = i + utf8len_tab[c + 1] + + if state == "Character" then + keys[#keys + 1] = strsub(keystr, start, i - 1) + start = i end - end - cur = cur .. c - todo = todo - 1 - if todo == 0 then - table.insert(keys, cur) - cur = "" - todo = 1 - utf8 = false + else + -- This state is entered on the second byte of K_SPECIAL sequence. + if c == 252 then + -- K_SPECIAL KS_MODIFIER: skip this byte and the next + i = i + 2 + else + -- K_SPECIAL _: skip this byte + i = i + 1 + end + -- The last byte of this sequence should be between 0x02 and 0x7f, + -- switch to Character state to collect. + state = "Character" end end return keys