Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
lunadry/lunadry.lua
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
476 lines (408 sloc)
20.3 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env lua | |
| -- Lunadry -- A code formatter. | |
| -- Copyright (C) 2014 Patrick Joseph Donnelly | |
| -- | |
| -- This program is free software: you can redistribute it and/or modify | |
| -- it under the terms of the GNU General Public License as published by | |
| -- the Free Software Foundation, either version 3 of the License, or | |
| -- (at your option) any later version. | |
| -- | |
| -- This program is distributed in the hope that it will be useful, | |
| -- but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| -- GNU General Public License for more details. | |
| -- | |
| -- You should have received a copy of the GNU General Public License | |
| -- along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| local VERSION = "Lunadry v0.1.0 Copyright (C) 2012-2015 Patrick Donnelly" | |
| local DEBUG = false | |
| local IN_PLACE = false | |
| local assert = assert; | |
| local pairs = pairs; | |
| local require = require; | |
| local tostring = tostring; | |
| local io = require "io"; | |
| local read = io.read; | |
| local write = io.write; | |
| local stderr = io.stderr; | |
| local os = require "os" | |
| local table = require "table"; | |
| local concat = table.concat; | |
| local lpeg = require "lpeg"; | |
| local P = require "posix"; | |
| local function luafilter (...) | |
| local P = lpeg.P; | |
| local S = lpeg.S; | |
| local V = lpeg.V; | |
| local C = lpeg.C; | |
| local Cb = lpeg.Cb; | |
| local Cc = lpeg.Cc; | |
| local Cf = lpeg.Cf; | |
| local Cg = lpeg.Cg; | |
| local Cp = lpeg.Cp; | |
| local Cs = lpeg.Cs; | |
| local Cmt = lpeg.Cmt; | |
| local Ct = lpeg.Ct; | |
| lpeg.setmaxstack(8000); | |
| local NEWLINE = Cb "newline" * ((V "space" - P "\n")^0 * P "\n")^-1; | |
| local SPACE = Cb "space"; | |
| local INDENT_SPACE = Cb "indent_space"; | |
| local function INDENT_INCREASE (p, nonewline) | |
| -- The outermost group capture is to scope the increased indentation so that: | |
| -- INDENT_INCREASE(foo) * bar | |
| -- does not effect the indentation of bar. | |
| if nonewline then | |
| return Cg(Cg(Cb "indent" * INDENT_SPACE, "indent") * p); | |
| else | |
| return Cg(Cg(Cb "indent" * INDENT_SPACE, "indent") * NEWLINE * p); | |
| end | |
| end | |
| local INDENT = Cb "indent"; | |
| local shebang = P "#" * (P(1) - P "\n")^0 * P "\n"; | |
| local function K (k) -- keyword | |
| return C(k) * -(V "alnum" + P "_"); | |
| end | |
| -- The formatter uses captures to indent code. We necessarily use thousands and | |
| -- thousands of them. At various strategic points, we concatenate these captures | |
| -- so we don't overflow the Lua stack. | |
| local function FLATTEN (pattern) | |
| return Ct(pattern) / concat; | |
| end | |
| local lua = lpeg.locale { | |
| V "_init" * FLATTEN(V "_script"); | |
| _init = Cg(Cc "\n", "newline") * Cg(Cc "", "indent") * Cg(Cc " ", "indent_space") * Cg(Cc " ", "space"); | |
| _script = C(shebang)^-1 * V "chunk" * -P(1); | |
| -- keywords | |
| keywords = K "and" + K "break" + K "do" + K "else" + K "elseif" + -- | |
| K "end" + K "false" + K "for" + K "function" + K "if" + -- | |
| K "in" + K "local" + K "nil" + K "not" + K "or" + K "repeat" + -- | |
| K "return" + K "then" + K "true" + K "until" + K "while"; | |
| -- longstrings | |
| longstring = P { -- from Roberto Ierusalimschy's lpeg examples | |
| (V "open" * (P(1) - V "closeeq")^0 * V "close") / "%0"; | |
| open = "[" * Cg((P "=")^0, "init") * P "[" * (P "\n")^-1; | |
| close = "]" * C((P "=")^0) * "]"; | |
| closeeq = Cmt(V "close" * Cb "init", function (s, i, a, b) return a == b end); | |
| }; | |
| -- comments & whitespace | |
| -- read a comment but do not capture any whitespace at the end | |
| chomp_comment = C((P(1) - (V "space" - P "\n")^0 * (P "\n" + -P(1)))^0) * (V "space" - P "\n")^0 * (P "\n" + -P(1)) * Cc "\n"; | |
| one_line_comment = -V "multi_line_comment" * C "--" * V "chomp_comment"; | |
| multi_line_comment = C "--" * V "longstring"; | |
| comment = V "multi_line_comment" + V "one_line_comment" * INDENT; | |
| whitespace = (V "space" + (SPACE * V "comment" * SPACE))^0; | |
| space_after_stat = ((V "space" - P "\n")^0 * (P ";")^-1 * (V "space" - P "\n")^0 * SPACE * V "one_line_comment") + -- | |
| (V "whitespace" * P ";")^-1 * NEWLINE; | |
| -- match "filler" comments | |
| line_of_space = (V "space" - P "\n")^0 * P "\n"; | |
| collapse_whitespace = V "line_of_space"^2 * Cc "\n\n" + V "line_of_space"^1 * Cc "\n"; | |
| filler_comment = (V "space" - P "\n")^0 * INDENT * V "one_line_comment"; -- * C "\n"^-1; | |
| --filler_comment = (V "space" - P "\n")^0 * INDENT * (V "one_line_comment" - V "multi_line_comment"); -- * C "\n"^-1; -- FIXME highlighted after INDENT | |
| filler = (V "collapse_whitespace" + V "filler_comment")^0 * V "whitespace" + V "whitespace"; | |
| -- Types and Comments | |
| Name = C((V "alpha" + P "_") * (V "alnum" + P "_")^0) - V "keywords"; | |
| BinaryExponent = S "pP" * (P "-")^-1 * V "digit"^1; | |
| DecimalExponent = S "eE" * (P "-")^-1 * V "digit"^1; | |
| Number = C((P "-")^-1 * V "whitespace" * P "0" * S "xX" * V "xdigit"^1 * (P "." * V "xdigit"^0)^-1 * V "BinaryExponent"^-1 * -(V "alnum" + P "_")) + -- | |
| C((P "-")^-1 * V "whitespace" * V "digit"^1 * (P "." * V "digit"^0)^-1 * V "DecimalExponent"^-1 * -(V "alnum" + P "_")) + -- | |
| C((P "-")^-1 * V "whitespace" * P "." * V "digit"^1 * V "DecimalExponent"^-1 * -(V "alnum" + P "_")); | |
| String = C(P "\"" * (P "\\" * P(1) + (1 - P "\""))^0 * P "\"") + -- | |
| C(P "'" * (P "\\" * P(1) + (1 - P "'"))^0 * P "'") + -- | |
| V "longstring"; | |
| -- Lua Complete Syntax | |
| chunk = FLATTEN((V "filler" * INDENT * FLATTEN(V "stat") * V "space_after_stat")^0 * (V "filler" * INDENT * V "retstat" * V "space_after_stat")^-1); | |
| block = V "chunk"; | |
| stat = P ";" + -- | |
| V "label" + -- | |
| K "break" + -- | |
| K "goto" * SPACE * V "whitespace" * V "Name" + -- | |
| K "do" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * INDENT * K "end" + -- | |
| K "while" * SPACE * V "whitespace" * V "_oneline_exp" * V "whitespace" * SPACE * K "do" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * INDENT * K "end" + -- | |
| K "repeat" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * INDENT * K "until" * SPACE * V "whitespace" * V "_oneline_exp" + -- | |
| K "if" * SPACE * V "whitespace" * V "_oneline_exp" * V "whitespace" * SPACE * K "then" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * (INDENT * K "elseif" * SPACE * V "whitespace" * V "_oneline_exp" * V "whitespace" * SPACE * K "then" * INDENT_INCREASE(V "filler" * V "block" * V "filler"))^0 * (INDENT * K "else" * INDENT_INCREASE(V "filler" * V "block" * V "filler"))^-1 * INDENT * K "end" + -- | |
| K "for" * SPACE * V "whitespace" * V "Name" * V "whitespace" * SPACE * C "=" * SPACE * V "whitespace" * V "_oneline_exp" * V "whitespace" * C "," * SPACE * V "whitespace" * V "_oneline_exp" * (V "whitespace" * C "," * SPACE * V "whitespace" * V "_oneline_exp")^-1 * V "whitespace" * SPACE * K "do" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * INDENT * K "end" + -- | |
| K "for" * SPACE * V "whitespace" * V "namelist" * V "whitespace" * SPACE * K "in" * SPACE * V "whitespace" * V "explist" * V "whitespace" * SPACE * K "do" * INDENT_INCREASE(V "filler" * V "block" * V "filler") * INDENT * K "end" + -- | |
| K "function" * SPACE * V "whitespace" * V "funcname" * SPACE * V "whitespace" * V "funcbody" + -- | |
| K "local" * SPACE * V "whitespace" * K "function" * SPACE * V "whitespace" * V "Name" * V "whitespace" * SPACE * V "funcbody" + -- | |
| K "local" * SPACE * V "whitespace" * V "namelist" * (SPACE * V "whitespace" * C "=" * SPACE * V "whitespace" * V "explist")^-1 * V "_check_ambiguous" + -- | |
| V "_function_declaration" + -- | |
| V "varlist" * V "whitespace" * SPACE * C "=" * SPACE * V "whitespace" * V "explist" * V "_check_ambiguous" + -- | |
| V "functioncall" * V "_check_ambiguous"; | |
| -- If the script uses a semicolon to avoid an ambiguous syntax situation, we keep it. | |
| -- Example: | |
| -- a = f() | |
| -- ("foo"):method() | |
| -- | |
| -- Can be parsed as: | |
| -- a = f()("foo"):method(); | |
| -- or | |
| -- a = f(); | |
| -- ("foo"):method(); | |
| _check_ambiguous = #(V "whitespace" * P ";" * V "whitespace" * P "(") * Cc ";" + P(true); | |
| _function_declaration = Cmt(V "Name" * V "space"^0 * P "=" * V "space"^0 * FLATTEN(V "function") * -(V "whitespace" * (V "binop" + P ",")), function (s, p, name, f) local new = f:gsub("^function", "function "..name) return true, new end); | |
| label = C "::" * V "whitespace" * V "Name" * V "whitespace" * C "::"; | |
| retstat = K "return" * (SPACE * V "whitespace" * V "explist")^-1; | |
| funcname = V "Name" * (V "whitespace" * C "." * V "whitespace" * V "Name")^0 * (V "whitespace" * C ":" * V "whitespace" * V "Name")^-1; | |
| namelist = V "Name" * (V "whitespace" * C "," * SPACE * V "whitespace" * V "Name")^0; | |
| varlist = V "var" * (V "whitespace" * C "," * SPACE * V "whitespace" * V "var")^0; | |
| -- Let's come up with a syntax that does not use left recursion (only listing changes to Lua 5.1 extended BNF syntax) | |
| -- value ::= nil | false | true | Number | String | '...' | function | tableconstructor | functioncall | var | '(' exp ')' | |
| -- exp ::= unop exp | value [binop exp] | |
| -- prefix ::= '(' exp ')' | Name | |
| -- index ::= '[' exp ']' | '.' Name | |
| -- call ::= args | ':' Name args | |
| -- suffix ::= call | index | |
| -- var ::= prefix {suffix} index | Name | |
| -- functioncall ::= prefix {suffix} call | |
| _deparenthesis_value = P "(" * V "whitespace" * (V "_deparenthesis_value" + V "_value_simple") * V "whitespace" * P ")"; | |
| _value_simple = K "nil" + -- | |
| K "false" + -- | |
| K "true" + -- | |
| V "Number" + -- | |
| V "String" + -- | |
| V "function" + -- | |
| V "tableconstructor" + -- | |
| V "var"; | |
| -- Something that represents a value (or many values) | |
| value = K "nil" + -- | |
| K "false" + -- | |
| K "true" + -- | |
| V "Number" + -- | |
| V "String" + -- | |
| C "..." + -- | |
| V "function" + -- | |
| V "tableconstructor" + -- | |
| V "functioncall" + -- | |
| V "var" + -- | |
| V "_deparenthesis_value" + -- remove redundant parenthesis | |
| C "(" * V "whitespace" * V "exp" * V "whitespace" * C ")"; | |
| -- An expression operates on values to produce a new value or is a value | |
| exp = V "unop" * V "whitespace" * V "exp" + -- | |
| V "value" * (V "whitespace" * V "binop" * V "whitespace" * V "exp")^-1; | |
| -- This is an expression which is always truncated to 1 result, and so we can remove | |
| -- redundant parenthesis. | |
| _single_exp = P "(" * V "whitespace" * V "_single_exp" * V "whitespace" * P ")" * -(V "whitespace" * (V "suffix" + V "binop")) + | |
| V "exp"; | |
| _oneline_exp = Cg(Cg(Cc " ", "newline") * Cg(Cc "", "indent") * Cg(Cc "", "indent_space") * V "_single_exp"); | |
| -- Index and Call | |
| index = C "[" * V "whitespace" * V "_single_exp" * V "whitespace" * C "]" + | |
| C "." * V "whitespace" * V "Name"; | |
| call = V "args" + | |
| C ":" * V "whitespace" * V "Name" * V "whitespace" * V "args"; | |
| -- A Prefix is a the leftmost side of a var(iable) or functioncall | |
| prefix = C "(" * V "whitespace" * V "exp" * V "whitespace" * C ")" + | |
| V "Name"; | |
| -- A Suffix is a Call or Index | |
| suffix = V "call" + | |
| V "index"; | |
| var = V "prefix" * (V "whitespace" * V "suffix" * #(V "whitespace" * V "suffix"))^0 * V "whitespace" * V "index" + | |
| V "Name"; | |
| functioncall = V "prefix" * (V "whitespace" * V "suffix" * #(V "whitespace" * V "suffix"))^0 * V "whitespace" * V "call"; | |
| explist = V "exp" * (V "whitespace" * C "," * SPACE * V "whitespace" * V "exp")^0; | |
| -- Change func({}) to func {} | |
| -- Change func("...") to func "..." | |
| args = P "(" * SPACE * V "whitespace" * V "tableconstructor" * V "whitespace" * P ")" + | |
| P "(" * SPACE * V "whitespace" * V "String" * V "whitespace" * P ")" + | |
| C "(" * INDENT_INCREASE(V "whitespace" * (V "explist" * V "whitespace")^-1, true) * C ")" + | |
| SPACE * V "tableconstructor" + | |
| SPACE * V "String"; | |
| ["function"] = FLATTEN(K "function" * SPACE * V "whitespace" * V "funcbody"); | |
| funcbody = C "(" * V "whitespace" * (V "parlist" * V "whitespace")^-1 * C ")" * INDENT_INCREASE(V "block" * V "whitespace") * INDENT * K "end"; | |
| parlist = V "namelist" * (V "whitespace" * C "," * SPACE * V "whitespace" * C "...")^-1 + | |
| C "..."; | |
| tableconstructor = C "{" * V "space"^0 * C "}" + -- check for empty tableconstructor | |
| FLATTEN(C "{" * INDENT_INCREASE(V "filler" * (V "fieldlist" * V "filler")^-1) * NEWLINE * INDENT * C "}"); | |
| field_space_after = (V "space" - P "\n")^0 * SPACE * V "one_line_comment"; | |
| fieldlist = INDENT * FLATTEN(V "field") * (V "whitespace" * V "fieldsep" * (V "field_space_after" + NEWLINE) * V "filler" * INDENT * FLATTEN(V "field"))^0 * (V "whitespace" * V "fieldsep" + Cc ",")^-1 * (V "field_space_after" + NEWLINE); | |
| field = C "[" * V "whitespace" * V "_oneline_exp" * V "whitespace" * C "]" * SPACE * V "whitespace" * C "=" * SPACE * V "whitespace" * V "_single_exp" + | |
| V "Name" * SPACE * V "whitespace" * C "=" * SPACE * V "whitespace" * V "_single_exp" + | |
| V "exp"; | |
| fieldsep = C "," + | |
| P ";" * Cc ","; -- use only commas | |
| -- match longest token sequences first | |
| binop = SPACE * K "and" * SPACE + -- | |
| SPACE * K "or" * SPACE + -- | |
| SPACE * C ".." * SPACE + -- | |
| SPACE * C "<=" * SPACE + -- | |
| SPACE * C ">=" * SPACE + -- | |
| SPACE * C "==" * SPACE + -- | |
| SPACE * C "~=" * SPACE + -- | |
| SPACE * C "//" * SPACE + -- | |
| SPACE * C "<<" * SPACE + -- | |
| SPACE * C ">>" * SPACE + -- | |
| SPACE * C "&" * SPACE + -- | |
| SPACE * C "~" * SPACE + -- | |
| SPACE * C "|" * SPACE + -- | |
| SPACE * C "+" * SPACE + -- | |
| SPACE * (C "-" - P "--") * SPACE + -- | |
| SPACE * C "*" * SPACE + -- | |
| SPACE * C "/" * SPACE + -- | |
| C "^" + -- no space for power | |
| SPACE * C "%" * SPACE + -- | |
| SPACE * C "<" * SPACE + -- | |
| SPACE * C ">" * SPACE; | |
| unop = (C "-" - P "--") + | |
| C "#" + | |
| C "~" + | |
| K "not" * SPACE; | |
| } | |
| if DEBUG then | |
| local level = 0 | |
| local function escape (s, p, n) | |
| return s:sub(p, p+n):gsub("[^%g ]", setmetatable({["\n"] = "\\n", ["\t"] = "\\t"}, {__index = function (t, c) return ("\\x%02X"):format(c:byte()) end})) | |
| end | |
| for k, p in pairs(lua) do | |
| local enter = Cmt(P(true), function (s, p) | |
| stderr:write(("%sENTER %s '%s'\n"):format((" "):rep(level*2), k, escape(s, p, 16))) | |
| level = level+1 | |
| return true | |
| end) | |
| local match = Cmt(P(true), function (s, p) | |
| level = level-1 | |
| if k == "space" or k == "comment" then | |
| return true | |
| else | |
| stderr:write(("%sMATCH %s '%s'\n"):format((" "):rep(level*2), k, escape(s, p, 16))) | |
| return true | |
| end | |
| end) | |
| local leave = Cmt(P(true), function (s, p) | |
| level = level-1 | |
| stderr:write(("%sLEAVE %s\n"):format((" "):rep(level*2), k)) | |
| return false | |
| end) | |
| lua[k] = enter * p * match + leave * (P "k" - P "k") -- use a trick '(P "k" - P "k")' to avoid lpeg left recursion false error | |
| end | |
| end | |
| return P(lua) | |
| end | |
| local function checkbytecode (f1, f2) | |
| -- batrick@batbytes:~$ luac -l -p - | |
| -- foo = function () end | |
| -- | |
| -- main <stdin:0,0> (3 instructions, 12 bytes at 0x172c230) | |
| -- 0+ params, 2 slots, 0 upvalues, 0 locals, 1 constant, 1 function | |
| -- 1 [1] CLOSURE 0 0 ; 0x172c410 | |
| -- 2 [1] SETGLOBAL 0 -1 ; foo | |
| -- 3 [1] RETURN 0 1 | |
| -- | |
| -- function <stdin:1,1> (1 instruction, 4 bytes at 0x172c410) | |
| -- 0 params, 2 slots, 0 upvalues, 0 locals, 0 constants, 0 functions | |
| -- 1 [1] RETURN 0 1 | |
| -- | |
| -- We filter out everything but the opcodes and the lines specifying the | |
| -- function statistics (# of parameters, upvalues, etc.). We also remove CLOSURE | |
| -- opcodes because they include a runtime pointer address which changes across | |
| -- luac invocations. | |
| local pid = P.fork() | |
| if pid == 0 then | |
| local template = [=[ | |
| { | |
| filter() { | |
| grep --invert-match -E "^function|main" | grep --invert-match "^[[:space:]]*$" | grep --invert-match CLOSURE | cut -f 2,4- | |
| } | |
| diff -au <(luac -l -p "$1" | filter) <(luac -l -p "$2" | filter) | |
| } >&2 | |
| ]=] | |
| P.exec("/bin/bash", "-c", template, "bash", f1, f2); | |
| os.exit(1) | |
| end | |
| local wpid, status, n | |
| repeat | |
| wpid, status, n = P.wait(pid, 0) | |
| until pid == wpid | |
| if status == "exited" and n ~= 0 then | |
| io.stderr:write(("WARNING: input file '%s' and output file '%s' have different bytecode\n"):format(f1, f2)) | |
| end | |
| end | |
| local function main (...) | |
| local LONGOPT_VERSION = "\x01" | |
| local short = "dhi" | |
| local long = { | |
| {"debug", "none", "d"}, | |
| {"help", "none", "h"}, | |
| {"in-place", "none", "i"}, | |
| {"version", "none", LONGOPT_VERSION}, | |
| } | |
| local last = 1 | |
| for r, optarg, optind, li in P.getopt(arg, short, long) do | |
| if r == "?" then | |
| io.stderr:write(("unrecognized option: %s\n"):format(arg[last])) | |
| os.exit(1) | |
| elseif r == "d" then | |
| DEBUG = true | |
| elseif r == "h" then | |
| io.stdout:write(("usage: %s [-dhi] [--in-place] file1 [file2 [...]]\n"):format(arg[0])) | |
| os.exit(0) | |
| elseif r == "i" then | |
| IN_PLACE = true | |
| elseif r == LONGOPT_VERSION then | |
| io.stdout:write(VERSION, "\n") | |
| os.exit(0) | |
| end | |
| last = optind | |
| end | |
| if last > #arg then | |
| table.insert(arg, "-") | |
| last = #arg | |
| end | |
| local lua = luafilter() | |
| local function filter (input) | |
| local filtered, err = lua:match(input) | |
| if not filtered then | |
| io.stderr:write(("could not filter: %s\n"):format(err)) | |
| end | |
| return filtered | |
| end | |
| for i = last, #arg do | |
| local path = arg[i] | |
| local f, err | |
| if path == "-" then | |
| f = io.stdin | |
| else | |
| f, err = io.open(path) | |
| end | |
| if f then | |
| local input, err = f:read "*a" | |
| if input then | |
| local filtered, err = filter(input) | |
| if filtered then | |
| local out, outpath; | |
| if f == io.stdin then | |
| outpath = "[stdout]" | |
| out = io.stdout | |
| else | |
| outpath = path..".lunadry" | |
| out = io.open(outpath, "w") | |
| end | |
| if out then | |
| local _, err = out:write(filtered) | |
| if err then | |
| io.stderr:write(("could not write '%s': %s\n"):format(outpath, err)) | |
| end | |
| out:close() | |
| if path ~= "-" then | |
| checkbytecode(path, outpath) | |
| end | |
| if IN_PLACE and path ~= "-" then | |
| local _, err = os.rename(outpath, path) | |
| if err then | |
| io.stderr:write(("could not rename '%s' to '%s': %s\n"):format(outpath, path, err)) | |
| end | |
| end | |
| else | |
| io.stderr:write(("could not open '%s': %s\n"):format(outpath, err)) | |
| end | |
| else | |
| io.stderr:write(("could not filter '%s': %s\n"):format(path, err)) | |
| end | |
| else | |
| io.stderr:write(("could not read '%s': %s\n"):format(path, err)) | |
| end | |
| f:close() | |
| else | |
| io.stderr:write(("could not open '%s': %s\n"):format(path, err)) | |
| end | |
| end | |
| end | |
| return main(...) |