Skip to content
Permalink
master
Go to file
 
 
Cannot retrieve contributors at this time
executable file 633 lines (574 sloc) 20.1 KB
#!/usr/bin/env escript
%% -*- erlang -*-
-mode(compile).
%% We print out a warning for files that take longer than
%% ?LONG_COMPILE_THRESHOLD milliseconds to compile.
-define(LONG_COMPILE_THRESHOLD, 10000).
main(Args0) ->
DefOpts = #{format=>asm,no_compile=>false,legacy=>false,copts=>[]},
{Args,Opts} = opts(Args0, DefOpts),
case Args of
[OutDir] ->
do_compile(OutDir, Opts);
_ ->
usage()
end.
usage() ->
S = ["usage: otp-diffable-asm [OPTION] DIRECTORY\n\n"
"Options:\n"
" --asm Output to .S files (default)\n"
" --legacy-asm Output to legacy .S files\n"
" --dis Output to .dis files\n"
" --no-compile Disassemble from BEAM files (use with --dis)\n"
" --deterministic Compile with +deterministic (useful when\n"
" comparing output from different build trees using\n"
" --asm)\n"
" --co <option> Add <option> to the list of options given to the\n"
" compiler. See compile:file/2 for valid options.\n"
"\n"
"DESCRIPTION\n"
"\n"
"Compile some applications from OTP (about 1000 modules) to either\n"
".S files or .dis files. The files are massaged to make them diff-friendly.\n"
"\n"
"The --legacy-asm options forces the output file to be in Latin1 encoding\n"
"and adds a latin1 encoding comment to the first line of the file.\n"
"\n"
"INCLUDING THE ELIXIR STANDARD LIBRARY\n"
"\n"
"If the Elixir repository is installed alongside the Erlang/OTP repository,\n"
"the Elixir standard library will be included in the compilation. For this\n"
"to work, the Elixir repository must be installed in: \n",
"\n"
" ",elixir_root(),"\n"
"\n"
"Here is how to install Elixir:\n"
"\n"
" cd ",filename:dirname(elixir_root()),"\n"
" git clone git@github.com:elixir-lang/elixir.git\n"
" cd elixir\n"
" PATH=",code:root_dir(),"/bin:$PATH make clean test\n"
"\n"
"EXAMPLES\n"
"\n"
"This example shows how the effectiveness of a compiler \n"
"optimization can be verified (alternatively, that pure code\n"
"refactoring has no effect on the generated code):\n"
"\n"
"$ scripts/diffable old\n"
"# Hack the compiler.\n"
"$ scripts/diffable new\n"
"$ diff -u old new\n"
"\n"
"This example shows how the effectiveness of loader hacks\n"
"can be verified:\n"
"\n"
"$ scripts/diffable --dis --no-compile old\n"
"# Hack ops.tab and/or one of the *instr.tab files.\n"
"$ scripts/diffable --dis --no-compile new\n"
"$ diff -u old new\n"],
io:put_chars(S),
halt(1).
opts(["--asm"|Args], Opts) ->
opts(Args, Opts#{format:=asm});
opts(["--dis"|Args], Opts) ->
opts(Args, Opts#{format:=dis});
opts(["--legacy-asm"|Args], Opts) ->
opts(Args, Opts#{format:=asm,legacy:=true});
opts(["--no-compile"|Args], Opts) ->
opts(Args, Opts#{format:=dis,no_compile:=true});
opts(["--deterministic"|Args], #{copts:=Copts}=Opts) ->
opts(Args, Opts#{copts:=Copts++[deterministic]});
opts(["--co",Opt|Args], #{copts:=Copts}=Opts) ->
opts(Args, Opts#{copts:=Copts++[list_to_atom(Opt)]});
opts(["--"++Opt|_], _Opts) ->
io:format("Uknown option: --~ts\n\n", [Opt]),
usage();
opts(Args, Opts) ->
{Args,Opts}.
do_compile(OutDir, Opts0) ->
Opts1 = Opts0#{outdir=>OutDir},
_ = filelib:ensure_dir(filename:join(OutDir, "dummy")),
Apps = ["preloaded",
"asn1",
"stdlib",
"kernel",
"reltool",
"runtime_tools",
"xmerl",
"common_test",
"compiler",
"diameter",
"mnesia",
"inets",
"syntax_tools",
"parsetools",
"dialyzer",
"ssl",
"wx"],
{Specs,Opts} = get_specs(Apps, Opts1),
CF = choose_format(Opts),
p_run(fun(Spec) ->
compile_spec(CF, Spec)
end, Specs).
choose_format(#{format:=Format}=Opts) ->
case Format of
asm ->
compile_to_asm_fun(Opts);
dis ->
compile_to_dis_fun(Opts)
end.
compile_spec(CF, Spec) ->
try
case timer:tc(CF, [Spec]) of
{Time0, ok} ->
Time = erlang:convert_time_unit(Time0, microsecond, millisecond),
{Spec, Time};
_ ->
error
end
catch
Class:Error:Stk ->
if
is_list(Spec) ->
io:format("~s: ~p ~p\n~p\n\n",
[Spec,Class,Error,Stk]);
true ->
io:format("~p: ~p ~p\n~p\n\n",
[Spec,Class,Error,Stk])
end,
error
end.
elixir_root() ->
filename:join(filename:dirname(code:root_dir()), "elixir").
%%%
%%% Get names of files (either .erl files or BEAM files) together with their
%%% compile options.
%%%
get_specs(Apps, #{format:=dis,no_compile:=true}=Opts) ->
Files = get_elixir_beams() ++ get_beams(Apps),
{Files,Opts};
get_specs(Apps, #{}=Opts) ->
Inc = make_includes(),
CompilerOpts = [{d,epmd_dist_high,42},
{d,epmd_dist_low,37},
{d,'VSN',1},
{d,'COMPILER_VSN',1},
{d,erlang_daemon_port,1337}|Inc],
Files = get_src(Apps),
Specs1 = add_opts(Files, CompilerOpts),
Specs = [{Beam,elixir} || Beam <- get_elixir_beams()] ++ Specs1,
{Specs,Opts}.
get_elixir_beams() ->
ElixirEbin = filename:join(elixir_root(), "lib/elixir/ebin"),
case filelib:is_dir(ElixirEbin) of
true ->
true = code:add_patha(ElixirEbin),
filelib:wildcard(filename:join(ElixirEbin, "*.beam"));
false ->
[]
end.
add_opts([F|Fs], Opts0) ->
Opts = case vsn_is_harmful(F) of
true ->
Opts0 -- [{d,'VSN',1}];
false ->
Opts0
end,
[{F,Opts}|add_opts(Fs, Opts0)];
add_opts([], _Opts) ->
[].
vsn_is_harmful(F) ->
case filename:basename(F) of
"group_history.erl" ->
true;
_ ->
App = filename:basename(filename:dirname(filename:dirname(F))),
App =:= "ssl"
end.
get_src(["preloaded"|Apps]) ->
WC = filename:join(code:root_dir(), "erts/preloaded/src/*.erl"),
filelib:wildcard(WC) ++ get_src(Apps);
get_src(["inets"|Apps]) ->
LibDir = code:lib_dir(inets),
WC = filename:join(LibDir, "src/*/*.erl"),
filelib:wildcard(WC) ++ get_src(Apps);
get_src(["syntax_tools"|Apps]) ->
LibDir = code:lib_dir(syntax_tools),
WC = filename:join(LibDir, "src/*.erl"),
Files0 = filelib:wildcard(WC),
Files = [F || F <- Files0,
filename:basename(F) =/= "merl_tests.erl"],
Files ++ get_src(Apps);
get_src(["wx"|Apps]) ->
LibDir = code:lib_dir(wx),
WC1 = filename:join(LibDir, "src/gen/*.erl"),
WC2 = filename:join(LibDir, "src/*.erl"),
filelib:wildcard(WC1) ++ filelib:wildcard(WC2) ++ get_src(Apps);
get_src([App|Apps]) ->
WC = filename:join(code:lib_dir(App), "src/*.erl"),
filelib:wildcard(WC) ++ get_src(Apps);
get_src([]) -> [].
make_includes() ->
Is = [{common_test,"include"},
{inets,"include"},
{inets,"src/http_client"},
{inets,"src/http_lib"},
{inets,"src/http_server"},
{inets,"src/inets_app"},
{kernel,"include"},
{kernel,"src"},
{public_key,"include"},
{runtime_tools,"include"},
{ssh,"include"},
{snmp,"include"},
{stdlib,"include"},
{syntax_tools,"include"},
{wx,"src"},
{wx,"include"},
{xmerl,"include"}],
[{i,filename:join(code:lib_dir(App), Path)} || {App,Path} <- Is].
get_beams(["preloaded"|Apps]) ->
WC = filename:join(code:root_dir(), "erts/preloaded/ebin/*.beam"),
filelib:wildcard(WC) ++ get_beams(Apps);
get_beams([App|Apps]) ->
WC = filename:join(code:lib_dir(App), "ebin/*.beam"),
filelib:wildcard(WC) ++ get_beams(Apps);
get_beams([]) -> [].
%%%
%%% Generate renumbered .S files.
%%%
compile_to_asm_fun(#{outdir:=OutDir}=Opts) ->
fun(Spec) ->
Legacy = map_get(legacy, Opts),
COpts = map_get(copts, Opts),
compile_to_asm(Spec, OutDir, Legacy, COpts)
end.
compile_to_asm({Beam,elixir}, OutDir, _Legacy, COpts) ->
Abst = get_abstract_from_beam(Beam),
Source = filename:rootname(Beam, ".beam"),
Opts = [diffable,{outdir,OutDir},report_errors,{source,Source}]++COpts,
case compile:forms(Abst, Opts) of
{ok,_Mod,_Asm} ->
ok;
error ->
error
end;
compile_to_asm({File,Opts0}, OutDir, Legacy, COpts) ->
Opts = [diffable,{outdir,OutDir},report_errors|Opts0]++COpts,
case compile:file(File, Opts) of
{ok,_Mod} ->
case Legacy of
true ->
legacy_asm(OutDir, File);
false ->
ok
end;
error ->
error
end.
legacy_asm(OutDir, Source) ->
ModName = filename:rootname(filename:basename(Source)),
File = filename:join(OutDir, ModName),
AsmFile = File ++ ".S",
{ok,Asm0} = file:read_file(AsmFile),
Asm1 = unicode:characters_to_binary(Asm0, utf8, latin1),
Asm = [<<"%% -*- encoding:latin-1 -*-\n">>|Asm1],
ok = file:write_file(AsmFile, Asm).
get_abstract_from_beam(Beam) ->
{ok,{_Mod,[AbstChunk]}} = beam_lib:chunks(Beam, [abstract_code]),
{abstract_code,{raw_abstract_v1,Abst}} = AbstChunk,
Abst.
%%%
%%% Compile and disassemble the loaded code.
%%%
compile_to_dis_fun(#{outdir:=OutDir,no_compile:=false}) ->
fun(Spec) ->
compile_to_dis(Spec, OutDir)
end;
compile_to_dis_fun(#{outdir:=OutDir,no_compile:=true}) ->
fun(Spec) ->
dis_only(Spec, OutDir)
end.
compile_to_dis({File,elixir}, OutDir) ->
{ok,Beam} = file:read_file(File),
Mod0 = filename:rootname(filename:basename(File)),
Mod = list_to_atom(Mod0),
Dis0 = disasm(Mod, Beam),
Dis1 = renumber_disasm(Dis0, Mod, Mod),
Dis = format_disasm(Dis1),
OutFile = filename:join(OutDir, atom_to_list(Mod)++".dis"),
ok = file:write_file(OutFile, Dis);
compile_to_dis({File,Opts}, OutDir) ->
case compile:file(File, [to_asm,binary,report_errors|Opts]) of
error ->
error;
{ok,Mod,Asm0} ->
NewMod = list_to_atom("--"++atom_to_list(Mod)++"--"),
Asm = rename_mod_in_asm(Asm0, Mod, NewMod),
AsmOpts = [from_asm,report,no_postopt,binary],
{ok,NewMod,Beam} = compile:forms(Asm, AsmOpts),
Dis0 = disasm(NewMod, Beam),
Dis1 = renumber_disasm(Dis0, Mod, NewMod),
Dis = format_disasm(Dis1),
OutFile = filename:join(OutDir, atom_to_list(Mod)++".dis"),
ok = file:write_file(OutFile, Dis)
end.
dis_only(File, OutDir) ->
Mod0 = filename:rootname(filename:basename(File)),
Mod = list_to_atom(Mod0),
Dis0 = disasm(Mod),
Dis1 = renumber_disasm(Dis0, Mod, Mod),
Dis = format_disasm(Dis1),
OutFile = filename:join(OutDir, atom_to_list(Mod)++".dis"),
ok = file:write_file(OutFile, Dis).
%%% Loading system modules can cause any number of problems.
%%% Therefore, we rename all modules to a dummy name before
%%% loading and disassembling them.
rename_mod_in_asm({OldMod,Exp,_Attr,Fs0,NumLabels}, OldMod, NewMod) ->
Fs = [fix_func_info(F, {atom,OldMod}, {atom,NewMod}) || F <- Fs0],
{NewMod,Exp,[],Fs,NumLabels}.
fix_func_info({function,Name,Arity,Entry,Is0}, OldMod, NewMod) ->
Is1 = [begin
case I of
{func_info,_,F,A} ->
{func_info,NewMod,F,A};
_ ->
I
end
end || I <- Is0],
Is = case {Name,Arity} of
{module_info,0} -> fix_module_info(Is1, OldMod, NewMod);
{module_info,1} -> fix_module_info(Is1, OldMod, NewMod);
{_,_} -> Is1
end,
{function,Name,Arity,Entry,Is}.
fix_module_info([{move,OldMod,Dst}|Is], OldMod, NewMod) ->
[{move,NewMod,Dst}|fix_module_info(Is, OldMod, NewMod)];
fix_module_info([I|Is], OldMod, NewMod) ->
[I|fix_module_info(Is, OldMod, NewMod)];
fix_module_info([], _, _) ->
[].
%%% Disassemble the module.
disasm(Mod, Beam) ->
{module,Mod} = code:load_binary(Mod, "", Beam),
disasm(Mod).
disasm(Mod) ->
disasm_1(Mod:module_info(functions), Mod).
disasm_1([{Name,Arity}|Fs], Mod) ->
MFA = {Mod,Name,Arity},
Dis = disasm_func({MFA,<<>>,MFA}, MFA),
[{Name,Arity,Dis}|disasm_1(Fs, Mod)];
disasm_1([], _) ->
[].
disasm_func({Next,_,MFA}, MFA) ->
case erts_debug:disassemble(Next) of
{_,Line,MFA}=Cont ->
[Line|disasm_func(Cont, MFA)];
{_,_,_} ->
[];
false ->
[]
end.
%%% Renumber the disassembled module to use labels instead of
%%% absolute addresses. Also do other translations so that the
%%% output will be the same each time (for the same BEAM file
%%% runtime system).
renumber_disasm(Fs0, OldMod, NewMod) ->
Fs1 = split_dis_lines(Fs0),
renumber_disasm_fs(Fs1, OldMod, NewMod).
renumber_disasm_fs([{Name,Arity,Is0}|Fs], OldMod, NewMod) ->
Labels = find_labels(Is0, Name, Arity),
Is1 = rename_mod(Is0, OldMod, NewMod),
Is = renumber_disasm_func(Is1, Labels),
[{Name,Arity,Is}|renumber_disasm_fs(Fs, OldMod, NewMod)];
renumber_disasm_fs([], _OldMod, _NewMod) ->
[].
renumber_disasm_func([[A,OpCode|Ops0]|Is], Labels) ->
Spaces = " ",
Left = case maps:find(A, Labels) of
{ok,Lbl} ->
case byte_size(Lbl) of
LblSize when LblSize < length(Spaces) ->
[$\n,Lbl,":",lists:nth(LblSize, Spaces)];
_ ->
[Lbl,":\n"|Spaces]
end;
error ->
Spaces
end,
Ops1 = [replace_label(Op, Labels) || Op <- Ops0],
Ops = handle_special_instrs(OpCode, Ops1),
[[Left,OpCode|Ops]|renumber_disasm_func(Is, Labels)];
renumber_disasm_func([], _) ->
[].
handle_special_instrs(<<"i_get_hash_cId">>, [Key,_Hash,Dst]) ->
[Key,hash_value(),Dst];
handle_special_instrs(<<"i_get_map_element_",_/binary>>,
[Fail,Src,Key,_Hash,Dst]) ->
[Fail,Src,Key,hash_value(),Dst];
handle_special_instrs(<<"i_get_map_elements_",_/binary>>,
[Fail,Src,N,Space|List0]) ->
List1 = rejoin_atoms(List0),
List = fix_hash_value(List1),
[Fail,Src,N,Space|List];
handle_special_instrs(<<"i_select_val_bins_",_/binary>>,
[Src,Fail,Num|List0]) ->
%% Atoms are sorted in atom-number order, which is
%% different every time the runtime system is restarted.
%% Resort the values in ASCII order.
List1 = rejoin_atoms(List0),
{Values0,Labels0} = lists:split(length(List1) div 2, List1),
Zipped0 = lists:zip(Values0, Labels0),
Zipped = lists:sort(Zipped0),
{Values,Labels} = lists:unzip(Zipped),
[Src,Fail,Num|Values++Labels];
handle_special_instrs(<<"i_select_val_lins_",_/binary>>,
[Src,Fail,Num|List0]) ->
List1 = rejoin_atoms(List0),
{Values0,Labels0} = lists:split(length(List1) div 2, List1),
Values1 = lists:droplast(Values0),
Labels1 = lists:droplast(Labels0),
Vlast = lists:last(Values0),
Llast = lists:last(Labels0),
Zipped0 = lists:zip(Values1, Labels1),
Zipped = lists:sort(Zipped0),
{Values,Labels} = lists:unzip(Zipped),
[Src,Fail,Num|Values++[Vlast]++Labels++[Llast]];
handle_special_instrs(_, Ops) ->
Ops.
fix_hash_value([Val,Dst,_Hash|T]) ->
[Val,Dst,hash_value()|fix_hash_value(T)];
fix_hash_value([]) ->
[].
hash_value() ->
<<"--hash-value--">>.
replace_label(<<"f(",T/binary>>, Labels) ->
replace_label_1("f(", T, Labels);
replace_label(<<"j(",T/binary>>, Labels) ->
replace_label_1("j(", T, Labels);
replace_label(Op, _Labels) ->
Op.
replace_label_1(Prefix, Lbl0, Labels) ->
Sz = byte_size(Lbl0)-1,
Lbl = case Lbl0 of
<<"0)">> ->
Lbl0;
<<Lbl1:Sz/bytes,")">> ->
[maps:get(Lbl1, Labels),")"];
_ ->
Lbl0
end,
iolist_to_binary([Prefix,Lbl]).
split_dis_lines(Fs) ->
{ok,RE} = re:compile(<<"\\s*\\n$">>),
Colon = binary:compile_pattern(<<": ">>),
Space = binary:compile_pattern(<<" ">>),
[split_dis_func(F, RE, Colon, Space) || F <- Fs].
split_dis_func({Name,Arity,Lines0}, RE, Colon, Space) ->
Lines1 = [re:replace(L, RE, <<>>, [{return,binary}]) || L <- Lines0],
Lines2 = [begin
[A,I] = binary:split(L, Colon),
Ops = binary:split(I, Space, [global]),
[A|Ops]
end|| L <- Lines1],
{Name,Arity,Lines2}.
rejoin_atoms([<<"`'",Tail/binary>> = Bin0,Next|Ops]) ->
Sz = byte_size(Tail) - 2,
case Tail of
<<_:Sz/bytes,"'`">> ->
[Bin0|rejoin_atoms([Next|Ops])];
<<>> ->
Bin = <<Bin0/binary,$\s,Next/binary>>,
rejoin_atoms([Bin|Ops]);
_ ->
Bin = <<Bin0/binary,$\s,Next/binary>>,
rejoin_atoms([Bin|Ops])
end;
rejoin_atoms([Op|Ops]) ->
[Op|rejoin_atoms(Ops)];
rejoin_atoms([]) ->
[].
find_labels(Is, Name, Arity) ->
[_,[Entry|_]|_] = Is,
EntryLabel = iolist_to_binary(io_lib:format("~p/~p", [Name,Arity])),
{ok,RE} = re:compile(<<"^[fj]\\(([0-9A-F]{8,16})\\)$">>),
Ls0 = [find_labels_1(Ops, RE) || [_Addr,_OpCode|Ops] <- Is],
Ls1 = lists:flatten(Ls0),
Ls2 = lists:usort(Ls1),
Ls3 = number(Ls2, 1),
Ls = [{Entry,EntryLabel}|Ls3],
maps:from_list(Ls).
find_labels_1([Op|Ops], RE) ->
case re:run(Op, RE, [{capture,all_but_first,binary}]) of
nomatch ->
find_labels_1(Ops, RE);
{match,[M]} ->
[M|find_labels_1(Ops, RE)]
end;
find_labels_1([], _) ->
[].
number([H|T], N) ->
S = iolist_to_binary(["L",integer_to_list(N)]),
[{H,S}|number(T, N+1)];
number([], _) ->
[].
format_disasm([{_,_,Is}|Fs]) ->
L = [lists:join(" ", I) || I <- Is],
[lists:join("\n", L),"\n\n"|format_disasm(Fs)];
format_disasm([]) ->
[].
rename_mod(Is, OldMod0, NewMod) ->
OldMod = atom_to_binary(OldMod0, utf8),
Pattern = <<"'",(atom_to_binary(NewMod, utf8))/binary,"'">>,
[rename_mod_1(I, Pattern, OldMod) || I <- Is].
rename_mod_1([A,OpCode|Ops], Pat, Replacement) ->
[A,OpCode|[rename_mod_2(Op, Pat, Replacement) || Op <- Ops]].
rename_mod_2(Subject, Pat, Replacement) ->
Sz = byte_size(Pat),
case Subject of
<<Pat:Sz/bytes,Tail/binary>> ->
<<Replacement/binary,Tail/binary>>;
_ ->
Subject
end.
%%%
%%% Run tasks in parallel.
%%%
p_run(Test, List) ->
N = erlang:system_info(schedulers) * 2,
p_run_loop(Test, List, N, [], 0).
p_run_loop(_, [], _, [], Errors) ->
io:put_chars("\r \n"),
case Errors of
0 ->
ok;
N ->
io:format("~p errors\n", [N]),
halt(1)
end;
p_run_loop(Test, [H|T], N, Refs, Errors) when length(Refs) < N ->
{_,Ref} = erlang:spawn_monitor(fun() -> exit(Test(H)) end),
p_run_loop(Test, T, N, [Ref|Refs], Errors);
p_run_loop(Test, List, N, Refs0, Errors) ->
receive
{'DOWN',Ref,process,_,error} ->
Refs = Refs0 -- [Ref],
p_run_loop(Test, List, N, Refs, Errors + 1);
{'DOWN',Ref,process,_,{Spec,Time}} ->
if
Time >= ?LONG_COMPILE_THRESHOLD ->
Name = format_spec(Spec),
io:format("~s took ~pms to compile~n", [Name, Time]);
Time < ?LONG_COMPILE_THRESHOLD ->
io:format("\r~p ", [length(List) + length(Refs0)])
end,
Refs = Refs0 -- [Ref],
p_run_loop(Test, List, N, Refs, Errors)
end.
format_spec(File) when is_list(File) ->
File;
format_spec({File, _Options}) when is_list(File) ->
File;
format_spec(Spec) ->
io_lib:format("~p", [Spec]).
You can’t perform that action at this time.