Skip to content

Commit

Permalink
perf(util): simplify and optimize parsers (#435)
Browse files Browse the repository at this point in the history
* refactor(util): turn parse_keys into state machine

The parser now requires only 3 states with much simpler conditional
structure of only 2 layers: one for the state and another for the
character read.

Several performance improvements are made as part of this:

- Avoid calling t(), which involves string allocation, both in C and in
  Lua due to gsub().

- Reduced string allocations: avoid string building which is extremely
  slow in Lua.

- Make heavy use of composite conditionals: LuaJIT likes them a lot and
  testing shows markable improvement in speed with their usage.

- Reduced number of table lookups in loops, which includes options
  lookup (very slow) and object function lookup.

All of this net into a startup time improvement of ~10.6%, from
11.3ms[+-0.156] down to 10.1ms[+-0.13] as recorded by lazy.nvim, sampled
10 times.

* refactor(util): simplify parse_internal

Rewrites parse_internal as a state machine. This reduces the number of
variables required to manage the parser, making it simpler to
understand.

The code is optimized with LuaJIT in mind, in particular:

- Make use of composite conditionals on hot path to leverage IR
  optimizations.

- Avoid string building which has quadratic complexity in Lua.

Startup time goes from 10.1ms[+-0.13] down to 9.81ms[+-0.165] (~2.8%
improvement) with this commit as recorded by lazy.nvim with samples of
10.
  • Loading branch information
alaviss committed Apr 16, 2023
1 parent 8b1bb02 commit b0ebb67
Showing 1 changed file with 71 additions and 74 deletions.
145 changes: 71 additions & 74 deletions lua/which-key/util.lua
@@ -1,5 +1,7 @@
---@class Util
local M = {}
local strbyte = string.byte
local strsub = string.sub

function M.count(tab)
local ret = 0
Expand Down Expand Up @@ -48,92 +50,87 @@ local utf8len_tab = {
}
-- stylua: ignore end

local Tokens = {
["<"] = strbyte("<"),
[">"] = strbyte(">"),
["-"] = strbyte("-"),
}
---@return KeyCodes
function M.parse_keys(keystr)
local keys = {}
local cur = ""
local todo = 1
local special = nil
for i = 1, #keystr, 1 do
local c = keystr:sub(i, i)
if special then
if todo == 0 then
if c == ">" then
table.insert(keys, special .. ">")
cur = ""
todo = 1
special = nil
elseif c == "-" then
-- When getting a special key notation:
-- todo = 0 means it can be ended by a ">" now.
-- todo = 1 means ">" should be treated as the modified character.
todo = 1
end
else
todo = 0
end
if special then
special = special .. c
end
elseif c == "<" then
special = "<"
todo = 0
local notation = {}
---@alias ParseState
--- | "Character"
--- | "Special"
--- | "SpecialNoClose"
local start = 1
local i = start
---@type ParseState
local state = "Character"
while i <= #keystr do
local c = strbyte(keystr, i, i)

if state == "Character" then
start = i
state = c == Tokens["<"] and "Special" or state
elseif state == "Special" then
state = (c == Tokens["-"] and "SpecialNoClose") or (c == Tokens[">"] and "Character") or state
else
if todo == 1 then
todo = utf8len_tab[c:byte() + 1]
end
cur = cur .. c
todo = todo - 1
if todo == 0 then
table.insert(keys, cur)
cur = ""
todo = 1
end
end
end
local ret = { keys = M.t(keystr), internal = {}, notation = {} }
for i, key in pairs(keys) do
if key == " " then
key = "<space>"
state = "Special"
end
if i == 1 and vim.g.mapleader and M.t(key) == M.t(vim.g.mapleader) then
key = "<leader>"

i = i + utf8len_tab[c + 1]
if state == "Character" then
local k = strsub(keystr, start, i - 1)
notation[#notation + 1] = k == " " and "<space>" or k
end
table.insert(ret.internal, M.t(key))
table.insert(ret.notation, key)
end
return ret

local keys = M.t(keystr)
local internal = M.parse_internal(keys)
local mapleader = vim.g.mapleader
mapleader = mapleader and M.t(mapleader)
notation[1] = internal[1] == mapleader and "<leader>" or notation[1]

return {
keys = keys,
internal = internal,
notation = notation,
}
end

-- @return string[]
function M.parse_internal(keystr)
local keys = {}
local cur = ""
local todo = 1
local utf8 = false
for i = 1, #keystr, 1 do
local c = keystr:sub(i, i)
if not utf8 then
if todo == 1 and c == "\128" then
-- K_SPECIAL: get 3 bytes
todo = 3
elseif cur == "\128" and c == "\252" then
-- K_SPECIAL KS_MODIFIER: repeat after getting 3 bytes
todo = todo + 1
elseif todo == 1 then
-- When the second byte of a K_SPECIAL sequence is not KS_MODIFIER,
-- the third byte is guaranteed to be between 0x02 and 0x7f.
todo = utf8len_tab[c:byte() + 1]
utf8 = todo > 1
---@alias ParseInternalState
--- | "Character"
--- | "Special"
---@type ParseInternalState
local state = "Character"
local start = 1
local i = 1
while i <= #keystr do
local c = strbyte(keystr, i, i)

if state == "Character" then
state = c == 128 and "Special" or state
i = i + utf8len_tab[c + 1]

if state == "Character" then
keys[#keys + 1] = strsub(keystr, start, i - 1)
start = i
end
end
cur = cur .. c
todo = todo - 1
if todo == 0 then
table.insert(keys, cur)
cur = ""
todo = 1
utf8 = false
else
-- This state is entered on the second byte of K_SPECIAL sequence.
if c == 252 then
-- K_SPECIAL KS_MODIFIER: skip this byte and the next
i = i + 2
else
-- K_SPECIAL _: skip this byte
i = i + 1
end
-- The last byte of this sequence should be between 0x02 and 0x7f,
-- switch to Character state to collect.
state = "Character"
end
end
return keys
Expand Down

0 comments on commit b0ebb67

Please sign in to comment.