Skip to content

Commit

Permalink
Indexing operations
Browse files Browse the repository at this point in the history
* Moved everything out of redundant ``compiler`` parent directory

* Created test case for the README example

* Make VarAssign give a list of nodes (either Ident or Index) instead of a list of names

* Reimplement deobsfuscating hex escaped strings
  • Loading branch information
Vurv78 committed Jun 12, 2022
1 parent 711f558 commit 231a6b8
Show file tree
Hide file tree
Showing 16 changed files with 129 additions and 60 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
> Lua formatter, deobsfuscator, parser, etc
### Turns

```lua
print("abc")local a,b=55,21;do print("xyz")end;repeat break;repeat print("Hi")(function(a,b,c)a,b=b,c;local d=0x50;local e="\x70\x72\x69\x6e\x74\x28\x27\x65\x76\x69\x6c\x20\x6c\x75\x61\x20\x63\x6f\x64\x65\x27\x29"end)()until true;local f,g,h;for i=1,2,3 do _G["\x6c\x6f\x61\x64\x73\x74\x72\x69\x6e\x67"]=55 end until true
```
Expand Down
File renamed without changes.
37 changes: 37 additions & 0 deletions src/codegen/mode-lua/deobsfuscate.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---@type NodeKinds
local NODE_KINDS = require("parser/lua").Kinds

local fmt = string.format

-- Extends format mode
local Mode = assert( require("codegen/mode-lua/format") )

---@param self Transpiler
---@param data table
Mode[NODE_KINDS.Comment] = function(self, data)
local multiline, inner, depth = data[1], data[2], data[3]

if multiline then
local equals = string.rep("=", depth)
return fmt("--[%s[%s]%s]", equals, inner, equals)
else
return "--" .. inner
end
end

---@param self Transpiler
---@param data table
Mode[NODE_KINDS.Literal] = function(self, data)
local kind, raw, val = data[1], data[2], data[3]
if kind == "string" then
-- Replace escape sequences with their actual characters
local str = val:gsub("\\x(%x%x)",function (x) return string.char(tonumber(x,16)) end)
return fmt("%q", str)
elseif kind == "number" then
return tostring(val)
else
return raw
end
end

return Mode
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---@type NodeKinds
local NODE_KINDS = require("compiler/parser/lua").Kinds
local NODE_KINDS = require("parser/lua").Kinds

local fmt = string.format

Expand Down Expand Up @@ -138,14 +138,20 @@ local Transpilers = {
---@param self Transpiler
---@param data table
[NODE_KINDS.VarAssign] = function(self, data)
local names, vals = data[1], data[2]
local params, vals = data[1], data[2]

local idents = {}

for k, node in ipairs(params) do
idents[k] = self:transpile(node)
end

local valstrs = {}
for i = 1, #vals do
valstrs[i] = self:transpile(vals[i])
end

return fmt("%s = %s", table.concat(names, ", "), table.concat(valstrs, ", "))
return fmt("%s = %s", table.concat(idents, ", "), table.concat(valstrs, ", "))
end,

---@param self Transpiler
Expand Down
22 changes: 0 additions & 22 deletions src/compiler/codegen/mode-lua/deobsfuscate.lua

This file was deleted.

2 changes: 1 addition & 1 deletion src/compiler/lexer/lua.lua → src/lexer/lua.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--#region Prelude
local Structure = require("compiler/structure/lua")
local Structure = require("structure/lua")
local Operators, Keywords, Grammar, LUT = Structure.Operators, Structure.Keywords, Structure.Grammar, Structure.LUT

---@class Lexer
Expand Down
8 changes: 4 additions & 4 deletions src/main.lua
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package.path = package.path .. ";src/?.lua"

local Lexer = require("compiler/lexer/lua").new()
local Parser = require("compiler/parser/lua").new()
local Lexer = require("lexer/lua").new()
local Parser = require("parser/lua").new()

local Transpiler = require("compiler/codegen/lua")
local Formatter = Transpiler.new( require("compiler/codegen/mode-lua/format") )
local Transpiler = require("codegen/lua")
local Formatter = Transpiler.new( require("codegen/mode-lua/format") )

--- Indented print.
-- Removes indentation from the given string so you can indent the string source without it affecting the output.
Expand Down
49 changes: 33 additions & 16 deletions src/compiler/parser/lua.lua → src/parser/lua.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local TOKEN_KINDS = require("compiler/lexer/lua").Kinds
local TOKEN_KINDS = require("lexer/lua").Kinds

---@class NodeKinds
local KINDS = {
Expand Down Expand Up @@ -359,7 +359,7 @@ function Parser:acceptParameters()
local args = {}
if self:popToken(TOKEN_KINDS.Grammar, ")") then return args end

local arg, ty
local arg
while self:hasTokens() do
arg = self:acceptIdent()
if not arg then break end
Expand Down Expand Up @@ -401,6 +401,22 @@ function Parser:acceptArguments( noparenthesis )
end
end

--- Tries to accept an indexing operation e.g. `.b` or `[1]`
---@return string kind
---@return Node|string|nil index
function Parser:acceptIndex()
if self:popToken(TOKEN_KINDS.Grammar, ".") then
-- Ident index
local index = assert( self:acceptIdent(), "Expected identifier after '.'" )
return ".", index
elseif self:popToken(TOKEN_KINDS.Grammar, "[") then
local index = assert( self:acceptExpression(), "Expected expression after '['" )
assert( self:popToken(TOKEN_KINDS.Grammar, "]"), "Expected ']' after expression" )

return "[]", index
end
end

Statements = {
--- Shouldn't be a part of 'Statements' but w/e
---@param self Parser
Expand Down Expand Up @@ -566,19 +582,27 @@ Statements = {
---@param token Token
[KINDS.VarAssign] = function(self, token)
if isToken(token, TOKEN_KINDS.Identifier) then
local names = {}
local idents = {}
self:prevToken()

while true do
local name = assert( self:acceptIdent(), "Expected identifier in assignment" )
names[#names + 1] = name
local ident = assert( self:popToken(TOKEN_KINDS.Identifier), "Expected identifier in assignment" )
local kind, idx = self:acceptIndex()

local ident_node = Node.new(KINDS.Identifier, {ident.raw})

if kind then
idents[#idents + 1] = Node.new(KINDS.Index, {kind, ident_node, idx})
else
idents[#idents + 1] = ident_node
end

if not self:popToken(TOKEN_KINDS.Grammar, ",") then break end
end

if self:popToken(TOKEN_KINDS.Operator, "=") then
local exprs = self:acceptArguments(true)
return { names, exprs }
return { idents, exprs }
else
-- Only names were given. Probably an expr?
return
Expand Down Expand Up @@ -636,16 +660,9 @@ Expressions = {
---@param token Token
[3] = function(self, token)
local expr = Expressions[4](self, token)
if self:popToken(TOKEN_KINDS.Grammar, ".") then
-- Ident index
local index = assert( self:acceptIdent(), "Expected identifier after '.'" )
return Node.new(KINDS.Index, {".", expr, index})
elseif self:popToken(TOKEN_KINDS.Grammar, "[") then
local index = assert( self:acceptExpression(), "Expected expression after '['" )
assert( self:popToken(TOKEN_KINDS.Grammar, "]"), "Expected ']' after expression" )

return Node.new(KINDS.Index, {"[]", expr, index})
end

local idx = self:acceptIndex(expr)
if idx then return idx end

return expr
end,
Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions test/cases/format.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
local Lexer = require("compiler/lexer/lua")
local Parser = require("compiler/parser/lua")
local Transpiler = require("compiler/codegen/lua")
local Lexer = require("lexer/lua")
local Parser = require("parser/lua")
local Transpiler = require("codegen/lua")

local lexer = Lexer.new()
local tokens = lexer:parse([[
Expand All @@ -17,7 +17,7 @@ local parser = Parser.new()
local nodes = parser:parse(tokens)
assert(nodes, "Failed to generate AST")

local transpiler = Transpiler.new( require("compiler/codegen/mode-lua/format") )
local transpiler = Transpiler.new( require("codegen/mode-lua/format") )
local code = transpiler:process(nodes)

Assert.equal(code, "local var = 55\nxyz = xyz\n::test::\ngoto test")
2 changes: 1 addition & 1 deletion test/cases/lexer.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local Lexer = require("compiler/lexer/lua")
local Lexer = require("lexer/lua")

local lexer = Lexer.new()
local tokens = lexer:parse([[
Expand Down
2 changes: 1 addition & 1 deletion test/cases/lexer_fails.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local Lexer = require("compiler/lexer/lua")
local Lexer = require("lexer/lua")

local lexer = Lexer.new()

Expand Down
17 changes: 13 additions & 4 deletions test/cases/parser.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
local Lexer = require("compiler/lexer/lua")
local Parser = require("compiler/parser/lua")
local Lexer = require("lexer/lua")
local Parser = require("parser/lua")

local lexer = Lexer.new()
local tokens = lexer:parse([[
Expand All @@ -25,8 +25,17 @@ assert(nodes[1].data[1][1] == "X" and nodes[1].data[2][1].kind == Parser.Kinds.L

assert(nodes[2].kind == Parser.Kinds.VarAssign)
assert(#nodes[2].data[1] == 1, "Expected only 1 variable in assignment")
assert(nodes[2].data[1][1] == "Y" and nodes[1].data[2][1].kind == Parser.Kinds.Literal, "Expected Y to be a literal")
assert(nodes[2].data[2][1].data[1] == "string" and nodes[2].data[2][1].data[3] == "World!", "Expected value of literal to be 'World!'" )

-- Assignment contains a list of identifiers OR index ops
Assert.equal(nodes[2].data[1][1].kind, Parser.Kinds.Identifier)
Assert.equal(nodes[2].data[2][1].kind, Parser.Kinds.Literal)

-- nodes[2] -> 2nd node (Y = "World!")
-- .data[2] -> list of values
-- [1] -> 1st value ("World!") (As a literal node)
-- .data[1] -> Literal type
Assert.equal(nodes[2].data[2][1].data[1], "string")
Assert.equal(nodes[2].data[2][1].data[3], "World!")

assert(nodes[3] and nodes[3].kind == Parser.Kinds.Comment, "Expected comment")

Expand Down
4 changes: 2 additions & 2 deletions test/cases/parser_fails.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
local Lexer = require("compiler/lexer/lua")
local Parser = require("compiler/parser/lua")
local Lexer = require("lexer/lua")
local Parser = require("parser/lua")

local lexer = Lexer.new()

Expand Down
23 changes: 23 additions & 0 deletions test/cases/readme.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--[[
This is a test to make sure the README.md example still actually parses and compiles properly.
]]


local handle = assert( io.open("README.md", "r"), "Could not open README.md" )
local markdown = handle:read("*a")
handle:close()

local sources = {}
for src in markdown:gmatch("```lua([^`]+)```") do
sources[#sources + 1] = src:sub(2, -2)
end

local lexer = require("lexer/lua").new()
local parser = require("parser/lua").new()
local transpiler = require("codegen/lua").new(require("codegen/mode-lua/deobsfuscate"))

local tokens = assert( lexer:parse(sources[1]) )
local nodes = assert( parser:parse(tokens) )
local code = assert( transpiler:process(nodes) )

Assert.equal(sources[2], code)
2 changes: 1 addition & 1 deletion test/lib.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ loadstring = loadstring or load
Assert = {}
function Assert.equal(a, b)
if a ~= b then
error(string.format("Assertion failed:\n\t%q\n\t~=\n\t%q", a, b))
error(string.format("Assertion failed:\n%q\n\t--- ~= ---\n%q", a, b))
end
end

Expand Down

0 comments on commit 231a6b8

Please sign in to comment.