Skip to content
Browse files

Removed lots of code from the old native code backend that the new on…

…e does not need

This includes options for dumping intermediate data as well es the data
structures used to represent that data.  In particular Mach and
Linearize are replaced by a variante used for generating LLVM IR.  Any
code used for register allocation as well as a lot of amd64-specific
code are removed completely.
  • Loading branch information...
1 parent 2cbc262 commit ecf488c6a8351091aed20cbfdf39728d21b92435 @colinbenner committed Mar 1, 2012
Showing with 1,351 additions and 8,138 deletions.
  1. +2 −78 src/asmcomp/amd64/arch.ml
  2. +14 −735 src/asmcomp/amd64/emit.mlp
  3. +0 −179 src/asmcomp/amd64/proc.ml
  4. +0 −211 src/asmcomp/amd64/proc_nt.ml
  5. +0 −127 src/asmcomp/amd64/reload.ml
  6. +0 −20 src/asmcomp/amd64/scheduling.ml
  7. +0 −235 src/asmcomp/amd64/selection.ml
  8. +36 −65 src/asmcomp/asmgen.ml
  9. +4 −0 src/asmcomp/asmgen.mli
  10. +4 −9 src/asmcomp/asmlink.ml
  11. +35 −0 src/asmcomp/aux.ml
  12. +1 −1 src/asmcomp/cmmgen.ml
  13. +0 −278 src/asmcomp/coloring.ml
  14. +0 −17 src/asmcomp/coloring.mli
  15. +0 −90 src/asmcomp/comballoc.ml
  16. +0 −17 src/asmcomp/comballoc.mli
  17. +0 −802 src/asmcomp/emit.ml
  18. +6 −5 src/asmcomp/emit.mli
  19. +34 −162 src/asmcomp/{llvmemit.ml → emit_common.ml}
  20. +5 −6 src/asmcomp/{llvmemit.mli → emit_common.mli}
  21. +11 −11 src/asmcomp/emitaux.ml
  22. +0 −173 src/asmcomp/interf.ml
  23. +0 −18 src/asmcomp/interf.mli
  24. +0 −187 src/asmcomp/interval.ml
  25. +0 −35 src/asmcomp/interval.mli
  26. +386 −232 src/asmcomp/linearize.ml
  27. +30 −37 src/asmcomp/linearize.mli
  28. +0 −192 src/asmcomp/linscan.ml
  29. +0 −16 src/asmcomp/linscan.mli
  30. +0 −120 src/asmcomp/liveness.ml
  31. +0 −20 src/asmcomp/liveness.mli
  32. +0 −90 src/asmcomp/llvm_aux.ml
  33. +0 −13 src/asmcomp/llvm_aux.mli
  34. +0 −397 src/asmcomp/llvm_linearize.ml
  35. +0 −45 src/asmcomp/llvm_linearize.mli
  36. +0 −163 src/asmcomp/llvm_mach.ml
  37. +0 −102 src/asmcomp/llvm_mach.mli
  38. +0 −456 src/asmcomp/llvm_selectgen.ml
  39. +0 −38 src/asmcomp/llvmcompile.ml
  40. +0 −20 src/asmcomp/llvmcompile.mli
  41. +85 −105 src/asmcomp/mach.ml
  42. +38 −78 src/asmcomp/mach.mli
  43. +0 −77 src/asmcomp/printlinear.ml
  44. +0 −21 src/asmcomp/printlinear.mli
  45. +36 −0 src/asmcomp/printlinearize.ml
  46. +69 −228 src/asmcomp/printmach.ml
  47. +0 −32 src/asmcomp/printmach.mli
  48. +0 −34 src/asmcomp/proc.mli
  49. +67 −150 src/asmcomp/reg.ml
  50. +48 −57 src/asmcomp/reg.mli
  51. +0 −17 src/asmcomp/reload.mli
  52. +0 −140 src/asmcomp/reloadgen.ml
  53. +0 −26 src/asmcomp/reloadgen.mli
  54. +0 −356 src/asmcomp/schedgen.ml
  55. +0 −46 src/asmcomp/schedgen.mli
  56. +0 −17 src/asmcomp/scheduling.mli
  57. +440 −834 src/asmcomp/selectgen.ml
  58. +0 −80 src/asmcomp/selectgen.mli
  59. +0 −18 src/asmcomp/selection.mli
  60. +0 −402 src/asmcomp/spill.ml
  61. +0 −18 src/asmcomp/spill.mli
Sorry, we could not display the entire diff because it was too big.
View
80 src/asmcomp/amd64/arch.ml
@@ -22,27 +22,6 @@ let command_line_options =
"-fno-PIC", Arg.Clear pic_code,
" Generate position-dependent machine code" ]
-(* Specific operations for the AMD64 processor *)
-
-open Format
-
-type addressing_mode =
- Ibased of string * int (* symbol + displ *)
- | Iindexed of int (* reg + displ *)
- | Iindexed2 of int (* reg + reg + displ *)
- | Iscaled of int * int (* reg * scale + displ *)
- | Iindexed2scaled of int * int (* reg + reg * scale + displ *)
-
-type specific_operation =
- Ilea of addressing_mode (* "lea" gives scaled adds *)
- | Istore_int of nativeint * addressing_mode (* Store an integer constant *)
- | Istore_symbol of string * addressing_mode (* Store a symbol *)
- | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
- | Ifloatarithmem of float_operation * addressing_mode
- (* Float arith operation with memory *)
-and float_operation =
- Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
-
(* Sizes, endianness *)
let big_endian = false
@@ -51,61 +30,6 @@ let size_addr = 8
let size_int = 8
let size_float = 8
-(* Operations on addressing modes *)
-
-let identity_addressing = Iindexed 0
-
-let offset_addressing addr delta =
- match addr with
- Ibased(s, n) -> Ibased(s, n + delta)
- | Iindexed n -> Iindexed(n + delta)
- | Iindexed2 n -> Iindexed2(n + delta)
- | Iscaled(scale, n) -> Iscaled(scale, n + delta)
- | Iindexed2scaled(scale, n) -> Iindexed2scaled(scale, n + delta)
-
-let num_args_addressing = function
- Ibased(s, n) -> 0
- | Iindexed n -> 1
- | Iindexed2 n -> 2
- | Iscaled(scale, n) -> 1
- | Iindexed2scaled(scale, n) -> 2
-
-(* Printing operations and addressing modes *)
-
-let print_addressing printreg addr ppf arg =
- match addr with
- | Ibased(s, 0) ->
- fprintf ppf "\"%s\"" s
- | Ibased(s, n) ->
- fprintf ppf "\"%s\" + %i" s n
- | Iindexed n ->
- let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
- fprintf ppf "%a%s" printreg arg.(0) idx
- | Iindexed2 n ->
- let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
- fprintf ppf "%a + %a%s" printreg arg.(0) printreg arg.(1) idx
- | Iscaled(scale, n) ->
- let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
- fprintf ppf "%a * %i%s" printreg arg.(0) scale idx
- | Iindexed2scaled(scale, n) ->
- let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
- fprintf ppf "%a + %a * %i%s" printreg arg.(0) printreg arg.(1) scale idx
+(* Instruction selection *)
-let print_specific_operation printreg op ppf arg =
- match op with
- | Ilea addr -> print_addressing printreg addr ppf arg
- | Istore_int(n, addr) ->
- fprintf ppf "[%a] := %nd" (print_addressing printreg addr) arg n
- | Istore_symbol(lbl, addr) ->
- fprintf ppf "[%a] := \"%s\"" (print_addressing printreg addr) arg lbl
- | Ioffset_loc(n, addr) ->
- fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
- | Ifloatarithmem(op, addr) ->
- let op_name = function
- | Ifloatadd -> "+f"
- | Ifloatsub -> "-f"
- | Ifloatmul -> "*f"
- | Ifloatdiv -> "/f" in
- fprintf ppf "%a %s float64[%a]" printreg arg.(0) (op_name op)
- (print_addressing printreg addr)
- (Array.sub arg 1 (Array.length arg - 1))
+let word_addressed = false
View
749 src/asmcomp/amd64/emit.mlp
@@ -14,72 +14,21 @@
(* Emission of x86-64 (AMD 64) assembly code *)
-open Misc
open Cmm
open Arch
-open Proc
-open Reg
-open Mach
-open Linearize
open Emitaux
let macosx =
match Config.system with
| "macosx" -> true
| _ -> false
-
-(* Tradeoff between code size and code speed *)
-
-let fastcode_flag = ref true
-
-let stack_offset = ref 0
-
-(* Layout of the stack frame *)
-
-let frame_required () =
- !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0
-
-let frame_size () = (* includes return address *)
- if frame_required() then begin
- let sz =
- (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8)
- in Misc.align sz 16
- end else
- !stack_offset + 8
-
-let slot_offset loc cl =
- match loc with
- Incoming n -> frame_size() + n
- | Local n ->
- if cl = 0
- then !stack_offset + n * 8
- else !stack_offset + (num_stack_slots.(0) + n) * 8
- | Outgoing n -> n
-
(* Symbols *)
let emit_symbol s =
if macosx then emit_string "_";
Emitaux.emit_symbol '$' s
-let emit_call s =
- if !Clflags.dlcode && not macosx
- then `call {emit_symbol s}@PLT`
- else `call {emit_symbol s}`
-
-let emit_jump s =
- if !Clflags.dlcode && not macosx
- then `jmp {emit_symbol s}@PLT`
- else `jmp {emit_symbol s}`
-
-let load_symbol_addr s =
- if !Clflags.dlcode
- then `movq {emit_symbol s}@GOTPCREL(%rip)`
- else if !pic_code
- then `leaq {emit_symbol s}(%rip)`
- else `movq ${emit_symbol s}`
-
(* Output a label *)
let emit_label lbl =
@@ -89,714 +38,44 @@ let emit_label lbl =
let emit_align n =
let n = if macosx then Misc.log2 n else n in
- ` .align {emit_int n}\n`
-
-let emit_Llabel fallthrough lbl =
- if not fallthrough && !fastcode_flag then emit_align 4;
- emit_label lbl
-
-(* Output a pseudo-register *)
-
-let emit_reg = function
- { loc = Reg r } ->
- emit_string (register_name r)
- | { loc = Stack s } as r ->
- let ofs = slot_offset s (register_class r) in
- `{emit_int ofs}(%rsp)`
- | { loc = Unknown } ->
- assert false
-
-(* Output a reference to the lower 8, 16 or 32 bits of a register *)
-
-let reg_low_8_name =
- [| "%al"; "%bl"; "%dil"; "%sil"; "%dl"; "%cl"; "%r8b"; "%r9b";
- "%r10b"; "%r11b"; "%bpl"; "%r12b"; "%r13b" |]
-let reg_low_16_name =
- [| "%ax"; "%bx"; "%di"; "%si"; "%dx"; "%cx"; "%r8w"; "%r9w";
- "%r10w"; "%r11w"; "%bp"; "%r12w"; "%r13w" |]
-let reg_low_32_name =
- [| "%eax"; "%ebx"; "%edi"; "%esi"; "%edx"; "%ecx"; "%r8d"; "%r9d";
- "%r10d"; "%r11d"; "%ebp"; "%r12d"; "%r13d" |]
-
-let emit_subreg tbl r =
- match r.loc with
- Reg r when r < 13 ->
- emit_string tbl.(r)
- | Stack s ->
- let ofs = slot_offset s (register_class r) in
- `{emit_int ofs}(%rsp)`
- | _ ->
- assert false
-
-let emit_reg8 r = emit_subreg reg_low_8_name r
-let emit_reg16 r = emit_subreg reg_low_16_name r
-let emit_reg32 r = emit_subreg reg_low_32_name r
-
-(* Output an addressing mode *)
-
-let emit_addressing addr r n =
- match addr with
- | Ibased _ when !Clflags.dlcode -> assert false
- | Ibased(s, d) ->
- `{emit_symbol s}`;
- if d <> 0 then ` + {emit_int d}`;
- `(%rip)`
- | Iindexed d ->
- if d <> 0 then emit_int d;
- `({emit_reg r.(n)})`
- | Iindexed2 d ->
- if d <> 0 then emit_int d;
- `({emit_reg r.(n)}, {emit_reg r.(n+1)})`
- | Iscaled(2, d) ->
- if d <> 0 then emit_int d;
- `({emit_reg r.(n)}, {emit_reg r.(n)})`
- | Iscaled(scale, d) ->
- if d <> 0 then emit_int d;
- `(, {emit_reg r.(n)}, {emit_int scale})`
- | Iindexed2scaled(scale, d) ->
- if d <> 0 then emit_int d;
- `({emit_reg r.(n)}, {emit_reg r.(n+1)}, {emit_int scale})`
-
-(* Record live pointers at call points -- see Emitaux *)
-
-let record_frame_label live dbg =
- let lbl = new_label() in
- let live_offset = ref [] in
- Reg.Set.iter
- (function
- {typ = Addr; loc = Reg r} ->
- live_offset := ((r lsl 1) + 1) :: !live_offset
- | {typ = Addr; loc = Stack s} as reg ->
- live_offset := slot_offset s (register_class reg) :: !live_offset
- | _ -> ())
- live;
- frame_descriptors :=
- { fd_lbl = lbl;
- fd_frame_size = frame_size();
- fd_live_offset = !live_offset;
- fd_debuginfo = dbg } :: !frame_descriptors;
- lbl
-
-let record_frame live dbg =
- let lbl = record_frame_label live dbg in `{emit_label lbl}:\n`
-
-(* Record calls to the GC -- we've moved them out of the way *)
-
-type gc_call =
- { gc_lbl: label; (* Entry label *)
- gc_return_lbl: label; (* Where to branch after GC *)
- gc_frame: label } (* Label of frame descriptor *)
-
-let call_gc_sites = ref ([] : gc_call list)
-
-let emit_call_gc gc =
- `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
- `{emit_label gc.gc_frame}: jmp {emit_label gc.gc_return_lbl}\n`
-
-(* Record calls to caml_ml_array_bound_error.
- In -g mode, we maintain one call to caml_ml_array_bound_error
- per bound check site. Without -g, we can share a single call. *)
-
-type bound_error_call =
- { bd_lbl: label; (* Entry label *)
- bd_frame: label } (* Label of frame descriptor *)
-
-let bound_error_sites = ref ([] : bound_error_call list)
-let bound_error_call = ref 0
-
-let bound_error_label dbg =
- if !Clflags.debug then begin
- let lbl_bound_error = new_label() in
- let lbl_frame = record_frame_label Reg.Set.empty dbg in
- bound_error_sites :=
- { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
- lbl_bound_error
- end else begin
- if !bound_error_call = 0 then bound_error_call := new_label();
- !bound_error_call
- end
-
-let emit_call_bound_error bd =
- `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
- `{emit_label bd.bd_frame}:\n`
-
-let emit_call_bound_errors () =
- List.iter emit_call_bound_error !bound_error_sites;
- if !bound_error_call > 0 then
- `{emit_label !bound_error_call}: {emit_call "caml_ml_array_bound_error"}\n`
-
-(* Names for instructions *)
-
-let instr_for_intop = function
- Iadd -> "addq"
- | Isub -> "subq"
- | Imul -> "imulq"
- | Iand -> "andq"
- | Ior -> "orq"
- | Ixor -> "xorq"
- | Ilsl -> "salq"
- | Ilsr -> "shrq"
- | Iasr -> "sarq"
- | _ -> assert false
-
-let instr_for_floatop = function
- Iaddf -> "addsd"
- | Isubf -> "subsd"
- | Imulf -> "mulsd"
- | Idivf -> "divsd"
- | _ -> assert false
-
-let instr_for_floatarithmem = function
- Ifloatadd -> "addsd"
- | Ifloatsub -> "subsd"
- | Ifloatmul -> "mulsd"
- | Ifloatdiv -> "divsd"
-
-let name_for_cond_branch = function
- Isigned Ceq -> "e" | Isigned Cne -> "ne"
- | Isigned Cle -> "le" | Isigned Cgt -> "g"
- | Isigned Clt -> "l" | Isigned Cge -> "ge"
- | Iunsigned Ceq -> "e" | Iunsigned Cne -> "ne"
- | Iunsigned Cle -> "be" | Iunsigned Cgt -> "a"
- | Iunsigned Clt -> "b" | Iunsigned Cge -> "ae"
-
-(* Output an = 0 or <> 0 test. *)
-
-let output_test_zero arg =
- match arg.loc with
- Reg r -> ` testq {emit_reg arg}, {emit_reg arg}\n`
- | _ -> ` cmpq $0, {emit_reg arg}\n`
-
-(* Output a floating-point compare and branch *)
-
-let emit_float_test cmp neg arg lbl =
- (* Effect of comisd on flags and conditional branches:
- ZF PF CF cond. branches taken
- unordered 1 1 1 je, jb, jbe, jp
- > 0 0 0 jne, jae, ja
- < 0 0 1 jne, jbe, jb
- = 1 0 0 je, jae, jbe.
- If FP traps are on (they are off by default),
- comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
- *)
- match (cmp, neg) with
- | (Ceq, false) | (Cne, true) ->
- let next = new_label() in
- ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
- ` jp {emit_label next}\n`; (* skip if unordered *)
- ` je {emit_label lbl}\n`; (* branch taken if x=y *)
- `{emit_label next}:\n`
- | (Cne, false) | (Ceq, true) ->
- ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
- ` jp {emit_label lbl}\n`; (* branch taken if unordered *)
- ` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *)
- | (Clt, _) ->
- ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
- if not neg then
- ` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *)
- else
- ` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *)
- | (Cle, _) ->
- ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
- if not neg then
- ` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *)
- else
- ` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *)
- | (Cgt, _) ->
- ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
- if not neg then
- ` ja {emit_label lbl}\n` (* branch taken if x>y *)
- else
- ` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *)
- | (Cge, _) ->
- ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
- if not neg then
- ` jae {emit_label lbl}\n` (* branch taken if x>=y *)
- else
- ` jb {emit_label lbl}\n` (* taken if unordered or x<y i.e. !(x>=y) *)
-
-(* Deallocate the stack frame before a return or tail call *)
-
-let output_epilogue () =
- if frame_required() then begin
- let n = frame_size() - 8 in
- ` addq ${emit_int n}, %rsp\n`
- end
-
-(* Output the assembly code for an instruction *)
-
-(* Name of current function *)
-let function_name = ref ""
-(* Entry point for tail recursive calls *)
-let tailrec_entry_point = ref 0
-
-let float_constants = ref ([] : (int * string) list)
-
-let emit_instr fallthrough i =
- match i.desc with
- Lend -> ()
- | Lop(Imove | Ispill | Ireload) ->
- let src = i.arg.(0) and dst = i.res.(0) in
- if src.loc <> dst.loc then begin
- match src.typ, src.loc, dst.loc with
- Float, Reg _, Reg _ ->
- ` movapd {emit_reg src}, {emit_reg dst}\n`
- | Float, _, _ ->
- ` movsd {emit_reg src}, {emit_reg dst}\n`
- | _ ->
- ` movq {emit_reg src}, {emit_reg dst}\n`
- end
- | Lop(Iconst_int n) ->
- if n = 0n then begin
- match i.res.(0).loc with
- Reg n -> ` xorq {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
- | _ -> ` movq $0, {emit_reg i.res.(0)}\n`
- end else if n <= 0x7FFFFFFFn && n >= -0x80000000n then
- ` movq ${emit_nativeint n}, {emit_reg i.res.(0)}\n`
- else
- ` movabsq ${emit_nativeint n}, {emit_reg i.res.(0)}\n`
- | Lop(Iconst_float s) ->
- begin match Int64.bits_of_float (float_of_string s) with
- | 0x0000_0000_0000_0000L -> (* +0.0 *)
- ` xorpd {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n`
- | _ ->
- let lbl = new_label() in
- float_constants := (lbl, s) :: !float_constants;
- ` movsd {emit_label lbl}(%rip), {emit_reg i.res.(0)}\n`
- end
- | Lop(Iconst_symbol s) ->
- ` {load_symbol_addr s}, {emit_reg i.res.(0)}\n`
- | Lop(Icall_ind) ->
- ` call *{emit_reg i.arg.(0)}\n`;
- record_frame i.live i.dbg
- | Lop(Icall_imm(s)) ->
- ` {emit_call s}\n`;
- record_frame i.live i.dbg
- | Lop(Itailcall_ind) ->
- output_epilogue();
- ` jmp *{emit_reg i.arg.(0)}\n`
- | Lop(Itailcall_imm s) ->
- if s = !function_name then
- ` jmp {emit_label !tailrec_entry_point}\n`
- else begin
- output_epilogue();
- ` {emit_jump s}\n`
- end
- | Lop(Iextcall(s, alloc)) ->
- if alloc then begin
- ` {load_symbol_addr s}, %rax\n`;
- ` {emit_call "caml_c_call"}\n`;
- record_frame i.live i.dbg
- end else begin
- ` {emit_call s}\n`
- end
- | Lop(Istackoffset n) ->
- if n < 0
- then ` addq ${emit_int(-n)}, %rsp\n`
- else ` subq ${emit_int(n)}, %rsp\n`;
- stack_offset := !stack_offset + n
- | Lop(Iload(chunk, addr)) ->
- let dest = i.res.(0) in
- begin match chunk with
- | Word ->
- ` movq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Byte_unsigned ->
- ` movzbq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Byte_signed ->
- ` movsbq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Sixteen_unsigned ->
- ` movzwq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Sixteen_signed ->
- ` movswq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Thirtytwo_unsigned ->
- ` movl {emit_addressing addr i.arg 0}, {emit_reg32 dest}\n`
- | Thirtytwo_signed ->
- ` movslq {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Single ->
- ` cvtss2sd {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- | Double | Double_u ->
- ` movsd {emit_addressing addr i.arg 0}, {emit_reg dest}\n`
- end
- | Lop(Istore(chunk, addr)) ->
- begin match chunk with
- | Word ->
- ` movq {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
- | Byte_unsigned | Byte_signed ->
- ` movb {emit_reg8 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
- | Sixteen_unsigned | Sixteen_signed ->
- ` movw {emit_reg16 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
- | Thirtytwo_signed | Thirtytwo_unsigned ->
- ` movl {emit_reg32 i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
- | Single ->
- ` cvtsd2ss {emit_reg i.arg.(0)}, %xmm15\n`;
- ` movss %xmm15, {emit_addressing addr i.arg 1}\n`
- | Double | Double_u ->
- ` movsd {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 1}\n`
- end
- | Lop(Ialloc n) ->
- if !fastcode_flag then begin
- let lbl_redo = new_label() in
- `{emit_label lbl_redo}: subq ${emit_int n}, %r15\n`;
- if !Clflags.dlcode then begin
- ` {load_symbol_addr "caml_young_limit"}, %rax\n`;
- ` cmpq (%rax), %r15\n`;
- end else
- ` cmpq {emit_symbol "caml_young_limit"}(%rip), %r15\n`;
- let lbl_call_gc = new_label() in
- let lbl_frame = record_frame_label i.live Debuginfo.none in
- ` jb {emit_label lbl_call_gc}\n`;
- ` leaq 8(%r15), {emit_reg i.res.(0)}\n`;
- call_gc_sites :=
- { gc_lbl = lbl_call_gc;
- gc_return_lbl = lbl_redo;
- gc_frame = lbl_frame } :: !call_gc_sites
- end else begin
- begin match n with
- 16 -> ` {emit_call "caml_alloc1"}\n`
- | 24 -> ` {emit_call "caml_alloc2"}\n`
- | 32 -> ` {emit_call "caml_alloc3"}\n`
- | _ -> ` movq ${emit_int n}, %rax\n`;
- ` {emit_call "caml_allocN"}\n`
- end;
- `{record_frame i.live Debuginfo.none} leaq 8(%r15), {emit_reg i.res.(0)}\n`
- end
- | Lop(Iintop(Icomp cmp)) ->
- ` cmpq {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
- let b = name_for_cond_branch cmp in
- ` set{emit_string b} %al\n`;
- ` movzbq %al, {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Icomp cmp, n)) ->
- ` cmpq ${emit_int n}, {emit_reg i.arg.(0)}\n`;
- let b = name_for_cond_branch cmp in
- ` set{emit_string b} %al\n`;
- ` movzbq %al, {emit_reg i.res.(0)}\n`
- | Lop(Iintop Icheckbound) ->
- let lbl = bound_error_label i.dbg in
- ` cmpq {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
- ` jbe {emit_label lbl}\n`
- | Lop(Iintop_imm(Icheckbound, n)) ->
- let lbl = bound_error_label i.dbg in
- ` cmpq ${emit_int n}, {emit_reg i.arg.(0)}\n`;
- ` jbe {emit_label lbl}\n`
- | Lop(Iintop(Idiv | Imod)) ->
- ` cqto\n`;
- ` idivq {emit_reg i.arg.(1)}\n`
- | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
- (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *)
- ` {emit_string(instr_for_intop op)} %cl, {emit_reg i.res.(0)}\n`
- | Lop(Iintop op) ->
- (* We have i.arg.(0) = i.res.(0) *)
- ` {emit_string(instr_for_intop op)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
- ` leaq {emit_int n}({emit_reg i.arg.(0)}), {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
- ` incq {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
- ` decq {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Idiv, n)) ->
- (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *)
- let l = Misc.log2 n in
- ` movq {emit_reg i.arg.(0)}, %rax\n`;
- ` addq ${emit_int(n-1)}, {emit_reg i.arg.(0)}\n`;
- ` testq %rax, %rax\n`;
- ` cmovns %rax, {emit_reg i.arg.(0)}\n`;
- ` sarq ${emit_int l}, {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(Imod, n)) ->
- (* Note: i.arg.(0) = i.res.(0) = rdx (cf. selection.ml) *)
- ` movq {emit_reg i.arg.(0)}, %rax\n`;
- ` testq %rax, %rax\n`;
- ` leaq {emit_int(n-1)}(%rax), %rax\n`;
- ` cmovns {emit_reg i.arg.(0)}, %rax\n`;
- ` andq ${emit_int (-n)}, %rax\n`;
- ` subq %rax, {emit_reg i.res.(0)}\n`
- | Lop(Iintop_imm(op, n)) ->
- (* We have i.arg.(0) = i.res.(0) *)
- ` {emit_string(instr_for_intop op)} ${emit_int n}, {emit_reg i.res.(0)}\n`
- | Lop(Inegf) ->
- ` xorpd {emit_symbol "caml_negf_mask"}(%rip), {emit_reg i.res.(0)}\n`
- | Lop(Iabsf) ->
- ` andpd {emit_symbol "caml_absf_mask"}(%rip), {emit_reg i.res.(0)}\n`
- | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) ->
- ` {emit_string(instr_for_floatop floatop)} {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
- | Lop(Ifloatofint) ->
- ` cvtsi2sdq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
- | Lop(Iintoffloat) ->
- ` cvttsd2siq {emit_reg i.arg.(0)}, {emit_reg i.res.(0)}\n`
- | Lop(Ispecific(Ilea addr)) ->
- ` leaq {emit_addressing addr i.arg 0}, {emit_reg i.res.(0)}\n`
- | Lop(Ispecific(Istore_int(n, addr))) ->
- ` movq ${emit_nativeint n}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Istore_symbol(s, addr))) ->
- assert (not !pic_code && not !Clflags.dlcode);
- ` movq ${emit_symbol s}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Ioffset_loc(n, addr))) ->
- ` addq ${emit_int n}, {emit_addressing addr i.arg 0}\n`
- | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
- ` {emit_string(instr_for_floatarithmem op)} {emit_addressing addr i.arg 1}, {emit_reg i.res.(0)}\n`
- | Lreloadretaddr ->
- ()
- | Lreturn ->
- output_epilogue();
- ` ret\n`
- | Llabel lbl ->
- `{emit_Llabel fallthrough lbl}:\n`
- | Lbranch lbl ->
- ` jmp {emit_label lbl}\n`
- | Lcondbranch(tst, lbl) ->
- begin match tst with
- Itruetest ->
- output_test_zero i.arg.(0);
- ` jne {emit_label lbl}\n`
- | Ifalsetest ->
- output_test_zero i.arg.(0);
- ` je {emit_label lbl}\n`
- | Iinttest cmp ->
- ` cmpq {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`;
- let b = name_for_cond_branch cmp in
- ` j{emit_string b} {emit_label lbl}\n`
- | Iinttest_imm((Isigned Ceq | Isigned Cne |
- Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
- output_test_zero i.arg.(0);
- let b = name_for_cond_branch cmp in
- ` j{emit_string b} {emit_label lbl}\n`
- | Iinttest_imm(cmp, n) ->
- ` cmpq ${emit_int n}, {emit_reg i.arg.(0)}\n`;
- let b = name_for_cond_branch cmp in
- ` j{emit_string b} {emit_label lbl}\n`
- | Ifloattest(cmp, neg) ->
- emit_float_test cmp neg i.arg lbl
- | Ioddtest ->
- ` testb $1, {emit_reg8 i.arg.(0)}\n`;
- ` jne {emit_label lbl}\n`
- | Ieventest ->
- ` testb $1, {emit_reg8 i.arg.(0)}\n`;
- ` je {emit_label lbl}\n`
- end
- | Lcondbranch3(lbl0, lbl1, lbl2) ->
- ` cmpq $1, {emit_reg i.arg.(0)}\n`;
- begin match lbl0 with
- None -> ()
- | Some lbl -> ` jb {emit_label lbl}\n`
- end;
- begin match lbl1 with
- None -> ()
- | Some lbl -> ` je {emit_label lbl}\n`
- end;
- begin match lbl2 with
- None -> ()
- | Some lbl -> ` jg {emit_label lbl}\n`
- end
- | Lswitch jumptbl ->
- let lbl = new_label() in
- (* rax and rdx are clobbered by the Lswitch,
- meaning that no variable that is live across the Lswitch
- is assigned to rax or rdx. However, the argument to Lswitch
- can still be assigned to one of these two registers, so
- we must be careful not to clobber it before use. *)
- let (tmp1, tmp2) =
- if i.arg.(0).loc = Reg 0 (* rax *)
- then (phys_reg 4 (*rdx*), phys_reg 0 (*rax*))
- else (phys_reg 0 (*rax*), phys_reg 4 (*rdx*)) in
- ` leaq {emit_label lbl}(%rip), {emit_reg tmp1}\n`;
- ` movslq ({emit_reg tmp1}, {emit_reg i.arg.(0)}, 4), {emit_reg tmp2}\n`;
- ` addq {emit_reg tmp2}, {emit_reg tmp1}\n`;
- ` jmp *{emit_reg tmp1}\n`;
- if macosx
- then ` .const\n`
- else ` .section .rodata\n`;
- emit_align 4;
- `{emit_label lbl}:`;
- for i = 0 to Array.length jumptbl - 1 do
- ` .long {emit_label jumptbl.(i)} - {emit_label lbl}\n`
- done;
- ` .text\n`
- | Lsetuptrap lbl ->
- ` call {emit_label lbl}\n`
- | Lpushtrap ->
- ` pushq %r14\n`;
- ` movq %rsp, %r14\n`;
- stack_offset := !stack_offset + 16
- | Lpoptrap ->
- ` popq %r14\n`;
- ` addq $8, %rsp\n`;
- stack_offset := !stack_offset - 16
- | Lraise ->
- if !Clflags.debug then begin
- ` {emit_call "caml_raise_exn"}\n`;
- record_frame Reg.Set.empty i.dbg
- end else begin
- ` movq %r14, %rsp\n`;
- ` popq %r14\n`;
- ` ret\n`
- end
-
-let rec emit_all fallthrough i =
- match i.desc with
- | Lend -> ()
- | _ ->
- emit_instr fallthrough i;
- emit_all (Linearize.has_fallthrough i.desc) i.next
-
-(* Emission of the floating-point constants *)
-
-let emit_float_constant (lbl, cst) =
- `{emit_label lbl}:`;
- emit_float64_directive ".quad" cst
-
-(* Emission of the profiling prelude *)
-
-let emit_profile () =
- match Config.system with
- | "linux" | "gnu" ->
- (* mcount preserves rax, rcx, rdx, rsi, rdi, r8, r9 explicitly
- and rbx, rbp, r12-r15 like all C functions.
- We need to preserve r10 and r11 ourselves, since Caml can
- use them for argument passing. *)
- ` pushq %r10\n`;
- ` movq %rsp, %rbp\n`;
- ` pushq %r11\n`;
- ` {emit_call "mcount"}\n`;
- ` popq %r11\n`;
- ` popq %r10\n`
- | _ ->
- () (*unsupported yet*)
-
-(* Emission of a function declaration *)
-
-let fundecl fundecl =
- function_name := fundecl.fun_name;
- fastcode_flag := fundecl.fun_fast;
- tailrec_entry_point := new_label();
- stack_offset := 0;
- float_constants := [];
- call_gc_sites := [];
- bound_error_sites := [];
- bound_error_call := 0;
- ` .text\n`;
- emit_align 16;
- if macosx
- && not !Clflags.output_c_object
- && is_generic_function fundecl.fun_name
- then (* PR#4690 *)
- ` .private_extern {emit_symbol fundecl.fun_name}\n`
- else
- ` .globl {emit_symbol fundecl.fun_name}\n`;
- `{emit_symbol fundecl.fun_name}:\n`;
- if !Clflags.gprofile then emit_profile();
- if frame_required() then begin
- let n = frame_size() - 8 in
- ` subq ${emit_int n}, %rsp\n`
- end;
- `{emit_label !tailrec_entry_point}:\n`;
- emit_all true fundecl.fun_body;
- List.iter emit_call_gc !call_gc_sites;
- emit_call_bound_errors ();
- begin match Config.system with
- "linux" | "gnu" ->
- ` .type {emit_symbol fundecl.fun_name},@function\n`;
- ` .size {emit_symbol fundecl.fun_name},.-{emit_symbol fundecl.fun_name}\n`
- | _ -> ()
- end;
- if !float_constants <> [] then begin
- if macosx
- then ` .literal8\n`
- else ` .section .rodata.cst8,\"a\",@progbits\n`;
- List.iter emit_float_constant !float_constants
- end
+ `module asm \" .align {emit_int n}\"\n`
(* Emission of data *)
let emit_item = function
Cglobal_symbol s ->
- ` .globl {emit_symbol s}\n`;
+ `module asm \" .globl {emit_symbol s}\"\n`;
| Cdefine_symbol s ->
- `{emit_symbol s}:\n`
+ `module asm \"{emit_symbol s}:\"\n`
| Cdefine_label lbl ->
- `{emit_label (100000 + lbl)}:\n`
+ `module asm \"{emit_label (100000 + lbl)}:\"\n`
| Cint8 n ->
- ` .byte {emit_int n}\n`
+ `module asm \" .byte {emit_int n}\"\n`
| Cint16 n ->
- ` .word {emit_int n}\n`
+ `module asm \" .word {emit_int n}\"\n`
| Cint32 n ->
- ` .long {emit_nativeint n}\n`
+ `module asm \" .long {emit_nativeint n}\"\n`
| Cint n ->
- ` .quad {emit_nativeint n}\n`
+ `module asm \" .quad {emit_nativeint n}\"\n`
| Csingle f ->
emit_float32_directive ".long" f
| Cdouble f ->
emit_float64_directive ".quad" f
| Csymbol_address s ->
- ` .quad {emit_symbol s}\n`
+ `module asm \" .quad {emit_symbol s}\"\n`
| Clabel_address lbl ->
- ` .quad {emit_label (100000 + lbl)}\n`
+ `module asm \" .quad {emit_label (100000 + lbl)}\"\n`
| Cstring s ->
emit_string_directive " .ascii " s
| Cskip n ->
- if n > 0 then ` .space {emit_int n}\n`
+ if n > 0 then `module asm \" .space {emit_int n}\"\n`
| Calign n ->
emit_align n
let data l =
- ` .data\n`;
+ `module asm \" .data\"\n`;
List.iter emit_item l
-(* Beginning / end of an assembly file *)
-
-let begin_assembly() =
- if !Clflags.dlcode then begin
- (* from amd64.S; could emit these constants on demand *)
- if macosx then
- ` .literal16\n`
- else
- ` .section .rodata.cst8,\"a\",@progbits\n`;
- emit_align 16;
- `{emit_symbol "caml_negf_mask"}: .quad 0x8000000000000000, 0\n`;
- emit_align 16;
- `{emit_symbol "caml_absf_mask"}: .quad 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n`
- end;
- let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
- ` .data\n`;
- ` .globl {emit_symbol lbl_begin}\n`;
- `{emit_symbol lbl_begin}:\n`;
- let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
- ` .text\n`;
- ` .globl {emit_symbol lbl_begin}\n`;
- `{emit_symbol lbl_begin}:\n`;
- if macosx then ` nop\n` (* PR#4690 *)
+let begin_assembly() = Emit_common.begin_assembly()
-let end_assembly() =
- let lbl_end = Compilenv.make_symbol (Some "code_end") in
- ` .text\n`;
- if macosx then ` nop\n`; (* suppress "ld warning: atom sorting error" *)
- ` .globl {emit_symbol lbl_end}\n`;
- `{emit_symbol lbl_end}:\n`;
- ` .data\n`;
- let lbl_end = Compilenv.make_symbol (Some "data_end") in
- ` .globl {emit_symbol lbl_end}\n`;
- `{emit_symbol lbl_end}:\n`;
- ` .long 0\n`;
- let lbl = Compilenv.make_symbol (Some "frametable") in
- ` .globl {emit_symbol lbl}\n`;
- `{emit_symbol lbl}:\n`;
- emit_frames
- { efa_label = (fun l -> ` .quad {emit_label l}\n`);
- efa_16 = (fun n -> ` .word {emit_int n}\n`);
- efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
- efa_word = (fun n -> ` .quad {emit_int n}\n`);
- efa_align = emit_align;
- efa_label_rel =
- if macosx then begin
- let setcnt = ref 0 in
- fun lbl ofs ->
- incr setcnt;
- ` .set L$set${emit_int !setcnt}, ({emit_label lbl} - .) + {emit_int32 ofs}\n`;
- ` .long L$set${emit_int !setcnt}\n`
- end else begin
- fun lbl ofs ->
- ` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`
- end;
- efa_def_label = (fun l -> `{emit_label l}:\n`);
- efa_string = (fun s -> emit_string_directive " .asciz " s) };
- if Config.system = "linux" then
- (* Mark stack as non-executable, PR#4564 *)
- ` .section .note.GNU-stack,\"\",%progbits\n`
+let end_assembly() = Emit_common.end_assembly()
View
179 src/asmcomp/amd64/proc.ml
@@ -14,185 +14,6 @@
(* Description of the AMD64 processor *)
-open Misc
-open Arch
-open Cmm
-open Reg
-open Mach
-
-(* Registers available for register allocation *)
-
-(* Register map:
- rax 0 rax - r11: Caml function arguments
- rbx 1 rdi - r9: C function arguments
- rdi 2 rax: Caml and C function results
- rsi 3 rbx, rbp, r12-r15 are preserved by C
- rdx 4
- rcx 5
- r8 6
- r9 7
- r10 8
- r11 9
- rbp 10
- r12 11
- r13 12
- r14 trap pointer
- r15 allocation pointer
-
- xmm0 - xmm15 100 - 115 xmm0 - xmm9: Caml function arguments
- xmm0 - xmm7: C function arguments
- xmm0: Caml and C function results *)
-
-let int_reg_name =
- [| "%rax"; "%rbx"; "%rdi"; "%rsi"; "%rdx"; "%rcx"; "%r8"; "%r9";
- "%r10"; "%r11"; "%rbp"; "%r12"; "%r13" |]
-
-let float_reg_name =
- [| "%xmm0"; "%xmm1"; "%xmm2"; "%xmm3"; "%xmm4"; "%xmm5"; "%xmm6"; "%xmm7";
- "%xmm8"; "%xmm9"; "%xmm10"; "%xmm11";
- "%xmm12"; "%xmm13"; "%xmm14"; "%xmm15" |]
-
-let num_register_classes = 2
-
-let register_class r =
- match r.typ with
- Int -> 0
- | Addr -> 0
- | Float -> 1
-
-let num_available_registers = [| 13; 16 |]
-
-let first_available_register = [| 0; 100 |]
-
-let register_name r =
- if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
-
-(* Pack registers starting at %rax so as to reduce the number of REX
- prefixes and thus improve code density *)
-let rotate_registers = false
-
-(* Representation of hard registers by pseudo-registers *)
-
-let hard_int_reg =
- let v = Array.create 13 Reg.dummy in
- for i = 0 to 12 do v.(i) <- Reg.at_location Int (Reg i) done;
- v
-
-let hard_float_reg =
- let v = Array.create 16 Reg.dummy in
- for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg (100 + i)) done;
- v
-
-let all_phys_regs =
- Array.append hard_int_reg hard_float_reg
-
-let phys_reg n =
- if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
-
-let rax = phys_reg 0
-let rcx = phys_reg 5
-let rdx = phys_reg 4
-let rxmm15 = phys_reg 115
-
-let stack_slot slot ty =
- Reg.at_location ty (Stack slot)
-
-(* Instruction selection *)
-
-let word_addressed = false
-
-(* Calling conventions *)
-
-let calling_conventions first_int last_int first_float last_float make_stack
- arg =
- let loc = Array.create (Array.length arg) Reg.dummy in
- let int = ref first_int in
- let float = ref first_float in
- let ofs = ref 0 in
- for i = 0 to Array.length arg - 1 do
- match arg.(i).typ with
- Int | Addr as ty ->
- if !int <= last_int then begin
- loc.(i) <- phys_reg !int;
- incr int
- end else begin
- loc.(i) <- stack_slot (make_stack !ofs) ty;
- ofs := !ofs + size_int
- end
- | Float ->
- if !float <= last_float then begin
- loc.(i) <- phys_reg !float;
- incr float
- end else begin
- loc.(i) <- stack_slot (make_stack !ofs) Float;
- ofs := !ofs + size_float
- end
- done;
- (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
-
-let incoming ofs = Incoming ofs
-let outgoing ofs = Outgoing ofs
-let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
-
-let loc_arguments arg =
- calling_conventions 0 9 100 109 outgoing arg
-let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 9 100 109 incoming arg in loc
-let loc_results res =
- let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
-
-(* C calling convention:
- first integer args in rdi, rsi, rdx, rcx, r8, r9
- first float args in xmm0 ... xmm7
- remaining args on stack.
- Return value in rax or xmm0. *)
-
-let loc_external_arguments arg =
- calling_conventions 2 7 100 107 outgoing arg
-let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
-
-let loc_exn_bucket = rax
-
-(* Registers destroyed by operations *)
-
-let destroyed_at_c_call = (* rbp, rbx, r12-r15 preserved *)
- Array.of_list(List.map phys_reg
- [0;2;3;4;5;6;7;8;9;
- 100;101;102;103;104;105;106;107;
- 108;109;110;111;112;113;114;115])
-
-let destroyed_at_oper = function
- Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
- | Iop(Iextcall(_, false)) -> destroyed_at_c_call
- | Iop(Iintop(Idiv | Imod)) -> [| rax; rdx |]
- | Iop(Istore(Single, _)) -> [| rxmm15 |]
- | Iop(Ialloc _ | Iintop(Icomp _) | Iintop_imm((Idiv|Imod|Icomp _), _))
- -> [| rax |]
- | Iswitch(_, _) -> [| rax; rdx |]
- | _ -> [||]
-
-let destroyed_at_raise = all_phys_regs
-
-(* Maximal register pressure *)
-
-let safe_register_pressure = function
- Iextcall(_,_) -> 0
- | _ -> 11
-
-let max_register_pressure = function
- Iextcall(_, _) -> [| 4; 0 |]
- | Iintop(Idiv | Imod) -> [| 11; 16 |]
- | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Idiv|Imod|Icomp _), _)
- -> [| 12; 16 |]
- | Istore(Single, _) -> [| 13; 15 |]
- | _ -> [| 13; 16 |]
-
-(* Layout of the stack frame *)
-
-let num_stack_slots = [| 0; 0 |]
-let contains_calls = ref false
-
(* Calling the assembler *)
let assemble_file infile outfile =
View
211 src/asmcomp/amd64/proc_nt.ml
@@ -14,217 +14,6 @@
(* Description of the AMD64 processor with Win64 conventions *)
-open Misc
-open Arch
-open Cmm
-open Reg
-open Mach
-
-(* Registers available for register allocation *)
-
-(* Register map:
- rax 0 rax - r11: Caml function arguments
- rbx 1 rcx - r9: C function arguments
- rdi 2 rax: Caml and C function results
- rsi 3 rbx, rbp, rsi, rdi r12-r15 are preserved by C
- rdx 4
- rcx 5
- r8 6
- r9 7
- r10 8
- r11 9
- rbp 10
- r12 11
- r13 12
- r14 trap pointer
- r15 allocation pointer
-
- xmm0 - xmm15 100 - 115 xmm0 - xmm9: Caml function arguments
- xmm0 - xmm3: C function arguments
- xmm0: Caml and C function results
- xmm6-xmm15 are preserved by C *)
-
-let int_reg_name =
- [| "rax"; "rbx"; "rdi"; "rsi"; "rdx"; "rcx"; "r8"; "r9";
- "r10"; "r11"; "rbp"; "r12"; "r13" |]
-
-let float_reg_name =
- [| "xmm0"; "xmm1"; "xmm2"; "xmm3"; "xmm4"; "xmm5"; "xmm6"; "xmm7";
- "xmm8"; "xmm9"; "xmm10"; "xmm11"; "xmm12"; "xmm13"; "xmm14"; "xmm15" |]
-
-let num_register_classes = 2
-
-let register_class r =
- match r.typ with
- Int -> 0
- | Addr -> 0
- | Float -> 1
-
-let num_available_registers = [| 13; 16 |]
-
-let first_available_register = [| 0; 100 |]
-
-let register_name r =
- if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
-
-(* Pack registers starting at %rax so as to reduce the number of REX
- prefixes and thus improve code density *)
-let rotate_registers = false
-
-(* Representation of hard registers by pseudo-registers *)
-
-let hard_int_reg =
- let v = Array.create 13 Reg.dummy in
- for i = 0 to 12 do v.(i) <- Reg.at_location Int (Reg i) done;
- v
-
-let hard_float_reg =
- let v = Array.create 16 Reg.dummy in
- for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg (100 + i)) done;
- v
-
-let all_phys_regs =
- Array.append hard_int_reg hard_float_reg
-
-let phys_reg n =
- if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
-
-let rax = phys_reg 0
-let rcx = phys_reg 5
-let rdx = phys_reg 4
-let r11 = phys_reg 9
-let rxmm15 = phys_reg 115
-
-let stack_slot slot ty =
- Reg.at_location ty (Stack slot)
-
-(* Instruction selection *)
-
-let word_addressed = false
-
-(* Calling conventions *)
-
-let calling_conventions first_int last_int first_float last_float make_stack
- arg =
- let loc = Array.create (Array.length arg) Reg.dummy in
- let int = ref first_int in
- let float = ref first_float in
- let ofs = ref 0 in
- for i = 0 to Array.length arg - 1 do
- match arg.(i).typ with
- Int | Addr as ty ->
- if !int <= last_int then begin
- loc.(i) <- phys_reg !int;
- incr int
- end else begin
- loc.(i) <- stack_slot (make_stack !ofs) ty;
- ofs := !ofs + size_int
- end
- | Float ->
- if !float <= last_float then begin
- loc.(i) <- phys_reg !float;
- incr float
- end else begin
- loc.(i) <- stack_slot (make_stack !ofs) Float;
- ofs := !ofs + size_float
- end
- done;
- (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
-
-let incoming ofs = Incoming ofs
-let outgoing ofs = Outgoing ofs
-let not_supported ofs = fatal_error "Proc.loc_results: cannot call"
-
-let loc_arguments arg =
- calling_conventions 0 9 100 109 outgoing arg
-let loc_parameters arg =
- let (loc, ofs) = calling_conventions 0 9 100 109 incoming arg in loc
-let loc_results res =
- let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
-
-(* C calling conventions (Win64):
- first integer args in rcx, rdx, r8, r9 (4 - 7)
- first float args in xmm0 ... xmm3 (100 - 103)
- each integer arg consumes a float reg, and conversely
- remaining args on stack
- always 32 bytes reserved at bottom of stack.
- Return value in rax or xmm0
-*)
-
-let loc_external_results res =
- let (loc, ofs) = calling_conventions 0 0 100 100 not_supported res in loc
-
-let int_external_arguments =
- [| 5 (*rcx*); 4 (*rdx*); 6 (*r8*); 7 (*r9*) |]
-let float_external_arguments =
- [| 100 (*xmm0*); 101 (*xmm1*); 102 (*xmm2*); 103 (*xmm3*) |]
-
-let loc_external_arguments arg =
- let loc = Array.create (Array.length arg) Reg.dummy in
- let reg = ref 0
- and ofs = ref 32 in
- for i = 0 to Array.length arg - 1 do
- match arg.(i).typ with
- Int | Addr as ty ->
- if !reg < 4 then begin
- loc.(i) <- phys_reg int_external_arguments.(!reg);
- incr reg
- end else begin
- loc.(i) <- stack_slot (Outgoing !ofs) ty;
- ofs := !ofs + size_int
- end
- | Float ->
- if !reg < 4 then begin
- loc.(i) <- phys_reg float_external_arguments.(!reg);
- incr reg
- end else begin
- loc.(i) <- stack_slot (Outgoing !ofs) Float;
- ofs := !ofs + size_float
- end
- done;
- (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
-
-let loc_exn_bucket = rax
-
-(* Registers destroyed by operations *)
-
-let destroyed_at_c_call =
- (* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
- Array.of_list(List.map phys_reg
- [0;4;5;6;7;8;9;
- 100;101;102;103;104;105])
-
-let destroyed_at_oper = function
- Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
- | Iop(Iextcall(_, false)) -> destroyed_at_c_call
- | Iop(Iintop(Idiv | Imod)) -> [| rax; rdx |]
- | Iop(Istore(Single, _)) -> [| rxmm15 |]
- | Iop(Ialloc _ | Iintop(Icomp _) | Iintop_imm((Idiv|Imod|Icomp _), _))
- -> [| rax |]
- | Iswitch(_, _) when !pic_code -> [| r11 |]
- | _ -> [||]
-
-let destroyed_at_raise = all_phys_regs
-
-(* Maximal register pressure *)
-
-let safe_register_pressure = function
- Iextcall(_,_) -> 8
- | _ -> 11
-
-let max_register_pressure = function
- Iextcall(_, _) -> [| 8; 10 |]
- | Iintop(Idiv | Imod) -> [| 11; 16 |]
- | Ialloc _ | Iintop(Icomp _) | Iintop_imm((Idiv|Imod|Icomp _), _)
- -> [| 12; 16 |]
- | Istore(Single, _) -> [| 13; 15 |]
- | _ -> [| 13; 16 |]
-
-(* Layout of the stack frame *)
-
-let num_stack_slots = [| 0; 0 |]
-let contains_calls = ref false
-
(* Calling the assembler *)
let assemble_file infile outfile =
View
127 src/asmcomp/amd64/reload.ml
@@ -1,127 +0,0 @@
-(***********************************************************************)
-(* *)
-(* Objective Caml *)
-(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
-(* *)
-(* Copyright 2000 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
-(* *)
-(***********************************************************************)
-
-(* $Id: reload.ml 10460 2010-05-24 15:26:23Z xleroy $ *)
-
-open Cmm
-open Arch
-open Reg
-open Mach
-
-(* Reloading for the AMD64 *)
-
-(* Summary of instruction set constraints:
- "S" means either stack or register, "R" means register only.
- Operation Res Arg1 Arg2
- Imove R S
- or S R
- Iconst_int S if 32-bit signed, R otherwise
- Iconst_float R
- Iconst_symbol (not PIC) S
- Iconst_symbol (PIC) R
- Icall_ind R
- Itailcall_ind R
- Iload R R R
- Istore R R
- Iintop(Icomp) R R S
- or S S R
- Iintop(Imul|Idiv|mod) R R S
- Iintop(shift) S S R
- Iintop(others) R R S
- or S S R
- Iintop_imm(Iadd, n)/lea R R
- Iintop_imm(others) S S
- Inegf...Idivf R R S
- Ifloatofint R S
- Iintoffloat R S
- Ispecific(Ilea) R R R
- Ispecific(Ifloatarithmem) R R R
-
- Conditional branches:
- Iinttest S R
- or R S
- Ifloattest R S (or S R if swapped test)
- other tests S
-*)
-
-let stackp r =
- match r.loc with
- Stack _ -> true
- | _ -> false
-
-class reload = object (self)
-
-inherit Reloadgen.reload_generic as super
-
-method! reload_operation op arg res =
- match op with
- | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
- (* One of the two arguments can reside in the stack, but not both *)
- if stackp arg.(0) && stackp arg.(1)
- then ([|arg.(0); self#makereg arg.(1)|], res)
- else (arg, res)
- | Iintop_imm(Iadd, _) when arg.(0).loc <> res.(0).loc ->
- (* This add will be turned into a lea; args and results must be
- in registers *)
- super#reload_operation op arg res
- | Iintop(Idiv | Imod | Ilsl | Ilsr | Iasr)
- | Iintop_imm(_, _) ->
- (* The argument(s) and results can be either in register or on stack *)
- (* Note: Idiv, Imod: arg(0) and res(0) already forced in regs
- Ilsl, Ilsr, Iasr: arg(1) already forced in regs *)
- (arg, res)
- | Iintop(Imul) | Iaddf | Isubf | Imulf | Idivf ->
- (* First argument (= result) must be in register, second arg
- can reside in the stack *)
- if stackp arg.(0)
- then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
- else (arg, res)
- | Ifloatofint | Iintoffloat ->
- (* Result must be in register, but argument can be on stack *)
- (arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
- | Iconst_int n ->
- if n <= 0x7FFFFFFFn && n >= -0x80000000n
- then (arg, res)
- else super#reload_operation op arg res
- | Iconst_symbol _ ->
- if !pic_code || !Clflags.dlcode
- then super#reload_operation op arg res
- else (arg, res)
- | _ -> (* Other operations: all args and results in registers *)
- super#reload_operation op arg res
-
-method! reload_test tst arg =
- match tst with
- Iinttest cmp ->
- (* One of the two arguments can reside on stack *)
- if stackp arg.(0) && stackp arg.(1)
- then [| self#makereg arg.(0); arg.(1) |]
- else arg
- | Ifloattest((Clt|Cle), _) ->
- (* Cf. emit.mlp: we swap arguments in this case *)
- (* First argument can be on stack, second must be in register *)
- if stackp arg.(1)
- then [| arg.(0); self#makereg arg.(1) |]
- else arg
- | Ifloattest((Ceq|Cne|Cgt|Cge), _) ->
- (* Second argument can be on stack, first must be in register *)
- if stackp arg.(0)
- then [| self#makereg arg.(0); arg.(1) |]
- else arg
- | _ ->
- (* The argument(s) can be either in register or on stack *)
- arg
-
-end
-
-let fundecl f =
- (new reload)#fundecl f
View
20 src/asmcomp/amd64/scheduling.ml
@@ -1,20 +0,0 @@
-(***********************************************************************)
-(* *)
-(* Objective Caml *)
-(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
-(* *)
-(* Copyright 2000 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
-(* *)
-(***********************************************************************)
-
-(* $Id: scheduling.ml 5634 2003-06-30 08:28:48Z xleroy $ *)
-
-open Schedgen (* to create a dependency *)
-
-(* Scheduling is turned off because the processor schedules dynamically
- much better than what we could do. *)
-
-let fundecl f = f
View
235 src/asmcomp/amd64/selection.ml
@@ -1,235 +0,0 @@
-(***********************************************************************)
-(* *)
-(* Objective Caml *)
-(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
-(* *)
-(* Copyright 2000 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
-(* *)
-(***********************************************************************)
-
-(* $Id: selection.ml 10250 2010-04-08 03:58:41Z garrigue $ *)
-
-(* Instruction selection for the AMD64 *)
-
-open Misc
-open Arch
-open Proc
-open Cmm
-open Reg
-open Mach
-
-(* Auxiliary for recognizing addressing modes *)
-
-type addressing_expr =
- Asymbol of string
- | Alinear of expression
- | Aadd of expression * expression
- | Ascale of expression * int
- | Ascaledadd of expression * expression * int
-
-let rec select_addr exp =
- match exp with
- Cconst_symbol s when not !Clflags.dlcode ->
- (Asymbol s, 0)
- | Cop((Caddi | Cadda), [arg; Cconst_int m]) ->
- let (a, n) = select_addr arg in (a, n + m)
- | Cop((Csubi | Csuba), [arg; Cconst_int m]) ->
- let (a, n) = select_addr arg in (a, n - m)
- | Cop((Caddi | Cadda), [Cconst_int m; arg]) ->
- let (a, n) = select_addr arg in (a, n + m)
- | Cop(Clsl, [arg; Cconst_int(1|2|3 as shift)]) ->
- begin match select_addr arg with
- (Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift)
- | _ -> (Alinear exp, 0)
- end
- | Cop(Cmuli, [arg; Cconst_int(2|4|8 as mult)]) ->
- begin match select_addr arg with
- (Alinear e, n) -> (Ascale(e, mult), n * mult)
- | _ -> (Alinear exp, 0)
- end
- | Cop(Cmuli, [Cconst_int(2|4|8 as mult); arg]) ->
- begin match select_addr arg with
- (Alinear e, n) -> (Ascale(e, mult), n * mult)
- | _ -> (Alinear exp, 0)
- end
- | Cop((Caddi | Cadda), [arg1; arg2]) ->
- begin match (select_addr arg1, select_addr arg2) with
- ((Alinear e1, n1), (Alinear e2, n2)) ->
- (Aadd(e1, e2), n1 + n2)
- | ((Alinear e1, n1), (Ascale(e2, scale), n2)) ->
- (Ascaledadd(e1, e2, scale), n1 + n2)
- | ((Ascale(e1, scale), n1), (Alinear e2, n2)) ->
- (Ascaledadd(e2, e1, scale), n1 + n2)
- | (_, (Ascale(e2, scale), n2)) ->
- (Ascaledadd(arg1, e2, scale), n2)
- | ((Ascale(e1, scale), n1), _) ->
- (Ascaledadd(arg2, e1, scale), n1)
- | _ ->
- (Aadd(arg1, arg2), 0)
- end
- | arg ->
- (Alinear arg, 0)
-
-(* Special constraints on operand and result registers *)
-
-exception Use_default
-
-let rax = phys_reg 0
-let rcx = phys_reg 5
-let rdx = phys_reg 4
-
-let pseudoregs_for_operation op arg res =
- match op with
- (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
- Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) | Iaddf|Isubf|Imulf|Idivf ->
- ([|res.(0); arg.(1)|], res)
- (* One-address unary operations: arg.(0) and res.(0) must be the same *)
- | Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _)
- | Iabsf | Inegf ->
- (res, res)
- | Ispecific(Ifloatarithmem(_,_)) ->
- let arg' = Array.copy arg in
- arg'.(0) <- res.(0);
- (arg', res)
- (* For shifts with variable shift count, second arg must be in rcx *)
- | Iintop(Ilsl|Ilsr|Iasr) ->
- ([|res.(0); rcx|], res)
- (* For div and mod, first arg must be in rax, rdx is clobbered,
- and result is in rax or rdx respectively.
- Keep it simple, just force second argument in rcx. *)
- | Iintop(Idiv) ->
- ([| rax; rcx |], [| rax |])
- | Iintop(Imod) ->
- ([| rax; rcx |], [| rdx |])
- (* For div and mod with immediate operand, arg must not be in rax.
- Keep it simple, force it in rdx. *)
- | Iintop_imm((Idiv|Imod), _) ->
- ([| rdx |], [| rdx |])
- (* Other instructions are regular *)
- | _ -> raise Use_default
-
-(* The selector class *)
-
-class selector = object (self)
-
-inherit Selectgen.selector_generic as super
-
-method is_immediate n = n <= 0x7FFFFFFF && n >= -0x80000000
-
-method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
-
-method select_addressing exp =
- let (a, d) = select_addr exp in
- (* PR#4625: displacement must be a signed 32-bit immediate *)
- if d < -0x8000_0000 || d > 0x7FFF_FFFF
- then (Iindexed 0, exp)
- else match a with
- | Asymbol s ->
- (Ibased(s, d), Ctuple [])
- | Alinear e ->
- (Iindexed d, e)
- | Aadd(e1, e2) ->
- (Iindexed2 d, Ctuple[e1; e2])
- | Ascale(e, scale) ->
- (Iscaled(scale, d), e)
- | Ascaledadd(e1, e2, scale) ->
- (Iindexed2scaled(scale, d), Ctuple[e1; e2])
-
-method! select_store addr exp =
- match exp with
- Cconst_int n when self#is_immediate n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
- | Cconst_natint n when self#is_immediate_natint n ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
- | Cconst_pointer n when self#is_immediate n ->
- (Ispecific(Istore_int(Nativeint.of_int n, addr)), Ctuple [])
- | Cconst_natpointer n when self#is_immediate_natint n ->
- (Ispecific(Istore_int(n, addr)), Ctuple [])
- | Cconst_symbol s when not (!pic_code || !Clflags.dlcode) ->
- (Ispecific(Istore_symbol(s, addr)), Ctuple [])
- | _ ->
- super#select_store addr exp
-
-method! select_operation op args =
- match op with
- (* Recognize the LEA instruction *)
- Caddi | Cadda | Csubi | Csuba ->
- begin match self#select_addressing (Cop(op, args)) with
- (Iindexed d, _) -> super#select_operation op args
- | (Iindexed2 0, _) -> super#select_operation op args
- | (addr, arg) -> (Ispecific(Ilea addr), [arg])
- end
- (* Recognize (x / cst) and (x % cst) only if cst is a power of 2. *)
- | Cdivi ->
- begin match args with
- [arg1; Cconst_int n] when self#is_immediate n
- && n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Idiv, n), [arg1])
- | _ -> (Iintop Idiv, args)
- end
- | Cmodi ->
- begin match args with
- [arg1; Cconst_int n] when self#is_immediate n
- && n = 1 lsl (Misc.log2 n) ->
- (Iintop_imm(Imod, n), [arg1])
- | _ -> (Iintop Imod, args)
- end
- (* Recognize float arithmetic with memory. *)
- | Caddf ->
- self#select_floatarith true Iaddf Ifloatadd args
- | Csubf ->
- self#select_floatarith false Isubf Ifloatsub args
- | Cmulf ->
- self#select_floatarith true Imulf Ifloatmul args
- | Cdivf ->
- self#select_floatarith false Idivf Ifloatdiv args
- (* Recognize store instructions *)
- | Cstore Word ->
- begin match args with
- [loc; Cop(Caddi, [Cop(Cload _, [loc']); Cconst_int n])]
- when loc = loc' && self#is_immediate n ->
- let (addr, arg) = self#select_addressing loc in
- (Ispecific(Ioffset_loc(n, addr)), [arg])
- | _ ->
- super#select_operation op args
- end
- | _ -> super#select_operation op args
-
-(* Recognize float arithmetic with mem *)
-
-method select_floatarith commutative regular_op mem_op args =
- match args with
- [arg1; Cop(Cload (Double|Double_u), [loc2])] ->
- let (addr, arg2) = self#select_addressing loc2 in
- (Ispecific(Ifloatarithmem(mem_op, addr)),
- [arg1; arg2])
- | [Cop(Cload (Double|Double_u), [loc1]); arg2] when commutative ->
- let (addr, arg1) = self#select_addressing loc1 in
- (Ispecific(Ifloatarithmem(mem_op, addr)),
- [arg2; arg1])
- | [arg1; arg2] ->
- (regular_op, [arg1; arg2])
- | _ ->
- assert false
-
-(* Deal with register constraints *)
-
-method! insert_op_debug op dbg rs rd =
- try
- let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
- self#insert_moves rs rsrc;
- self#insert_debug (Iop op) dbg rsrc rdst;
- self#insert_moves rdst rd;
- rd
- with Use_default ->
- super#insert_op_debug op dbg rs rd
-
-method! insert_op op rs rd =
- self#insert_op_debug op Debuginfo.none rs rd
-
-end
-
-let fundecl f = (new selector)#emit_fundecl f
View
101 src/asmcomp/asmgen.ml
@@ -19,71 +19,50 @@ open Config
open Clflags
open Misc
open Cmm
+open Reg
type error = Assembler_error of string
exception Error of error
-let liveness ppf phrase =
- Liveness.fundecl ppf phrase; phrase
-
-let dump_if ppf flag message phrase =
- if !flag then Printmach.phase message ppf phrase
-
-let pass_dump_if ppf flag message phrase =
- dump_if ppf flag message phrase; phrase
-
-let pass_dump_linear_if ppf flag message phrase =
- if !flag then fprintf ppf "*** %s@.%a@." message Printlinear.fundecl phrase;
- phrase
-
-let rec regalloc ppf round fd =
- if round > 50 then
- fatal_error(fd.Mach.fun_name ^
- ": function too complex, cannot complete register allocation");
- dump_if ppf dump_live "Liveness analysis" fd;
- Interf.build_graph fd;
- if !dump_interf then Printmach.interferences ppf ();
- if !dump_prefer then Printmach.preferences ppf ();
- Coloring.allocate_registers();
- dump_if ppf dump_regalloc "After register allocation" fd;
- let (newfd, redo_regalloc) = Reload.fundecl fd in
- dump_if ppf dump_reload "After insertion of reloading code" newfd;
- if redo_regalloc then begin
- Reg.reinit(); Liveness.fundecl ppf newfd; regalloc ppf (round + 1) newfd
- end else newfd
-
let (++) x f = f x
-let compile_fundecl (ppf : formatter) fd_cmm =
- Reg.reset();
+let read_function phrase =
+ match phrase with
+ | Cfunction fd_cmm ->
+ let name = fd_cmm.fun_name in
+ let args = List.map (fun _ -> addr_type) fd_cmm.fun_args in
+ Emit_common.local_functions := (Aux.translate_symbol name, args) :: !Emit_common.local_functions
+ | Cdata _ -> ()
+
+let dump print x = if !debug then print x; x
+
+let compile_fundecl fd_cmm =
fd_cmm
- ++ Selection.fundecl
- ++ pass_dump_if ppf dump_selection "After instruction selection"
- ++ Comballoc.fundecl
- ++ pass_dump_if ppf dump_combine "After allocation combining"
- ++ liveness ppf
- ++ pass_dump_if ppf dump_live "Liveness analysis"
- ++ Spill.fundecl
- ++ liveness ppf
- ++ pass_dump_if ppf dump_spill "After spilling"
- ++ Split.fundecl
- ++ pass_dump_if ppf dump_split "After live range splitting"
- ++ liveness ppf
- ++ regalloc ppf 1
+ ++ Selectgen.fundecl
+(* ++ dump (fun fn -> print_endline (Aux.to_string fn.body)) *)
++ Linearize.fundecl
- ++ pass_dump_linear_if ppf dump_linear "Linearized code"
- ++ Scheduling.fundecl
- ++ pass_dump_linear_if ppf dump_scheduling "After instruction scheduling"
- ++ Emit.fundecl
+ ++ Emit_common.fundecl
+
+let begin_assembly = Emit.begin_assembly
+
+let end_assembly = Emit.end_assembly
+
+let run_assembler asm infile outfile =
+ Ccomp.command (asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
+
+let assemble_file temp1 temp2 infile outfile =
+ let res = run_assembler Config.opt infile temp1 in
+ if res <> 0 then res else
+ let res = run_assembler Config.llc temp1 temp2 in
+ if res <> 0 then res else
+ Proc.assemble_file temp2 outfile
let compile_phrase ppf p =
if !dump_cmm then fprintf ppf "%a@." Printcmm.phrase p;
- let compile_fundecl = if !use_llvm then Llvmcompile.compile_fundecl else compile_fundecl ppf
- and data = if !use_llvm then Llvmcompile.data else Emit.data in
match p with
| Cfunction fd -> compile_fundecl fd
- | Cdata dl -> data dl
+ | Cdata dl -> Emit.data dl
(* For the native toplevel: generates generic functions unless
@@ -96,25 +75,18 @@ let compile_genfuns ppf f =
| _ -> ())
(Cmmgen.generic_functions true [Compilenv.current_unit_infos ()])
-let begin_assembly () =
- if !use_llvm then Llvmcompile.begin_assembly() else Emit.begin_assembly()
-
-let end_assembly () =
- if !use_llvm then Llvmcompile.end_assembly() else Emit.end_assembly()
-
let compile_implementation ?toplevel prefixname ppf (size, lam) =
- let suffix = if !use_llvm then ext_llvm else ext_asm in
let asmfile =
if !keep_asm_file
- then prefixname ^ suffix
- else Filename.temp_file "camlasm" suffix in
+ then prefixname ^ ext_llvm
+ else Filename.temp_file "camlasm" ext_llvm in
let oc = open_out asmfile in
begin try
Emitaux.output_channel := oc;
- begin_assembly();
+ Emit.begin_assembly();
Closure.intro size lam
++ Cmmgen.compunit size
- ++ List.map (fun x -> if !use_llvm then Llvmcompile.read_function x; x)
+ ++ List.map (fun x -> read_function x; x)
++ List.iter (compile_phrase ppf) ++ (fun () -> ());
(match toplevel with None -> () | Some f -> compile_genfuns ppf f);
@@ -130,7 +102,7 @@ let compile_implementation ?toplevel prefixname ppf (size, lam) =
(List.map Primitive.native_name !Translmod.primitive_declarations))
);
- end_assembly();
+ Emit.end_assembly();
close_out oc
with x ->
close_out oc;
@@ -145,8 +117,7 @@ let compile_implementation ?toplevel prefixname ppf (size, lam) =
if !Clflags.keep_asm_file then prefixname ^ ext_asm
else Filename.temp_dir_name ^ "/" ^ Filename.basename prefixname ^ ext_asm
in
- let assemble = if !use_llvm then Llvmcompile.assemble_file temp1 temp2 else Proc.assemble_file in
- if assemble asmfile (prefixname ^ ext_obj) <> 0
+ if assemble_file temp1 temp2 asmfile (prefixname ^ ext_obj) <> 0
then raise(Error(Assembler_error asmfile));
if !keep_asm_file then ()
else begin
View
4 src/asmcomp/asmgen.mli
@@ -20,9 +20,13 @@ val compile_implementation :
val compile_phrase :
Format.formatter -> Cmm.phrase -> unit
+val read_function: Cmm.phrase -> unit
+
val begin_assembly : unit -> unit
val end_assembly : unit -> unit
+val assemble_file: string -> string -> string -> string -> int
+
type error = Assembler_error of string
exception Error of error
val report_error: Format.formatter -> error -> unit
View
13 src/asmcomp/asmlink.ml
@@ -209,7 +209,7 @@ let make_startup_file ppf filename units_list =
compile_phrase (Cmmgen.entry_point name_list);
let units = List.map (fun (info,_,_) -> info) units_list in
let tmp = Cmmgen.generic_functions false units in
- List.iter Llvmcompile.read_function tmp;
+ List.iter Asmgen.read_function tmp;
List.iter compile_phrase tmp;
Array.iter
(fun name -> compile_phrase (Cmmgen.predef_exception name))
@@ -321,10 +321,9 @@ let link ppf objfiles output_name =
units_tolink;
Clflags.ccobjs := !Clflags.ccobjs @ !lib_ccobjs;
Clflags.ccopts := !lib_ccopts @ !Clflags.ccopts; (* put user's opts first *)
- let suffix = if !Clflags.use_llvm then ext_llvm else ext_asm in
let startup =
- if !Clflags.keep_startup_file then output_name ^ ".startup" ^ suffix
- else Filename.temp_file "camlstartup" suffix in
+ if !Clflags.keep_startup_file then output_name ^ ".startup" ^ ext_llvm
+ else Filename.temp_file "camlstartup" ext_llvm in
make_startup_file ppf startup units_tolink;
let startup_obj = Filename.temp_file "camlstartup" ext_obj in
let temp1 =
@@ -335,12 +334,8 @@ let link ppf objfiles output_name =
if !Clflags.keep_startup_file then output_name ^ ".startup" ^ ext_asm
else Filename.temp_file "camlstartup" ext_asm
in
- if !Clflags.use_llvm then begin
- if Llvmcompile.assemble_file temp1 temp2 startup startup_obj <> 0 then
+ if Asmgen.assemble_file temp1 temp2 startup startup_obj <> 0 then
raise(Error(Assembler_error startup));
- end else
- if Proc.assemble_file startup startup_obj <> 0
- then raise(Error(Assembler_error startup));
try
call_linker (List.map object_file_name objfiles) startup_obj output_name;
if not !Clflags.keep_startup_file then remove_file startup;
View
35 src/asmcomp/aux.ml
@@ -0,0 +1,35 @@
+(*
+val debug : bool ref
+(* Print a debugging message to stdout *)
+val print_debug : string -> unit
+
+val (++) : 'a -> ('a -> 'b) -> 'b
+
+val reg_name : reg -> string
+
+val string_of_reg : reg -> string
+
+(* Print the internal representation of an LLVM instruction in a notation
+ * inspired by S-expressions *)
+val to_string : Llvm_mach.instruction -> string
+ *)
+
+exception Llvm_error of string
+
+(* Raise an Llvm_error with the string given as an argument. *)
+let error s = raise (Llvm_error s)
+
+let debug = ref false
+
+let print_debug str = if !debug then print_endline str
+
+let translate_symbol s =
+ let result = ref "" in
+ for i = 0 to String.length s - 1 do
+ let c = s.[i] in
+ match c with
+ |'A'..'Z' | 'a'..'z' | '0'..'9' | '_' ->
+ result := !result ^ Printf.sprintf "%c" c
+ | _ -> result := !result ^ Printf.sprintf "$%02x" (Char.code c)
+ done;
+ !result
View
2 src/asmcomp/cmmgen.ml
@@ -252,7 +252,7 @@ let tag_offset =
if big_endian then -1 else -size_int
let get_tag ptr =
- if Proc.word_addressed then (* If byte loads are slow *)
+ if Arch.word_addressed then (* If byte loads are slow *)
Cop(Cand, [header ptr; Cconst_int 255])
else (* If byte loads are efficient *)
Cop(Cload Byte_unsigned,
View
278 src/asmcomp/coloring.ml
@@ -1,278 +0,0 @@
-(***********************************************************************)
-(* *)
-(* Objective Caml *)
-(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
-(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
-(* *)
-(***********************************************************************)
-
-(* $Id: coloring.ml 9547 2010-01-22 12:48:24Z doligez $ *)
-
-(* Register allocation by coloring of the interference graph *)
-
-open Reg
-
-(* Preallocation of spilled registers in the stack. *)
-
-let allocate_spilled reg =
- if reg.spill then begin
- let cl = Proc.register_class reg in
- let nslots = Proc.num_stack_slots.(cl) in
- let conflict = Array.create nslots false in
- List.iter
- (fun r ->
- match r.loc with
- Stack(Local n) ->
- if Proc.register_class r = cl then conflict.(n) <- true
- | _ -> ())
- reg.interf;
- let slot = ref 0 in
- while !slot < nslots && conflict.(!slot) do incr slot done;
- reg.loc <- Stack(Local !slot);
- if !slot >= nslots then Proc.num_stack_slots.(cl) <- !slot + 1
- end
-
-(* Compute the degree (= number of neighbours of the same type)
- of each register, and split them in two sets:
- unconstrained (degree < number of available registers)
- and constrained (degree >= number of available registers).
- Spilled registers are ignored in the process. *)
-
-let unconstrained = ref Reg.Set.empty
-let constrained = ref Reg.Set.empty
-
-let find_degree reg =
- if reg.spill then () else begin
- let cl = Proc.register_class reg in
- let avail_regs = Proc.num_available_registers.(cl) in
- if avail_regs = 0 then
- (* Don't bother computing the degree if there are no regs
- in this class *)
- unconstrained := Reg.Set.add reg !unconstrained
- else begin
- let deg = ref 0 in
- List.iter
- (fun r -> if not r.spill && Proc.register_class r = cl then incr deg)
- reg.interf;
- reg.degree <- !deg;
- if !deg >= avail_regs
- then constrained := Reg.Set.add reg !constrained
- else unconstrained := Reg.Set.add reg !unconstrained
- end
- end
-
-(* Remove a register from the interference graph *)
-
-let remove_reg reg =
- reg.degree <- 0; (* 0 means r is no longer part of the graph *)
- let cl = Proc.register_class reg in
- List.iter
- (fun r ->
- if Proc.register_class r = cl && r.degree > 0 then begin
- let olddeg = r.degree in
- r.degree <- olddeg - 1;
- if olddeg = Proc.num_available_registers.(cl) then begin
- (* r was constrained and becomes unconstrained *)
- constrained := Reg.Set.remove r !constrained;
- unconstrained := Reg.Set.add r !unconstrained
- end
- end)
- reg.interf
-
-(* Remove all registers one by one, unconstrained if possible, otherwise
- constrained with lowest spill cost. Return the list of registers removed
- in reverse order.
- The spill cost measure is [r.spill_cost / r.degree].
- [r.spill_cost] estimates the number of accesses to this register. *)
-
-let rec remove_all_regs stack =
- if not (Reg.Set.is_empty !unconstrained) then begin
- (* Pick any unconstrained register *)
- let r = Reg.Set.choose !unconstrained in
- unconstrained := Reg.Set.remove r !unconstrained;
- remove_all_regs (r :: stack)
- end else
- if not (Reg.Set.is_empty !constrained) then begin
- (* Find a constrained reg with minimal cost *)
- let r = ref Reg.dummy in
- let min_degree = ref 0 and min_spill_cost = ref 1 in
- (* initially !min_spill_cost / !min_degree is +infty *)
- Reg.Set.iter
- (fun r2 ->
- (* if r2.spill_cost / r2.degree < !min_spill_cost / !min_degree *)
- if r2.spill_cost * !min_degree < !min_spill_cost * r2.degree
- then begin
- r := r2; min_degree := r2.degree; min_spill_cost := r2.spill_cost
- end)
- !constrained;
- constrained := Reg.Set.remove !r !constrained;
- remove_all_regs (!r :: stack)
- end else
- stack (* All regs have been removed *)
-
-(* Iterate over all registers preferred by the given register (transitively) *)
-
-let iter_preferred f reg =
- let rec walk r w =
- if not r.visited then begin
- f r w;
- begin match r.prefer with
- [] -> ()
- | p -> r.visited <- true;
- List.iter (fun (r1, w1) -> walk r1 (min w w1)) p;
- r.visited <- false
- end
- end in
- reg.visited <- true;
- List.iter (fun (r, w) -> walk r w) reg.prefer;
- reg.visited <- false
-
-(* Where to start the search for a suitable register.
- Used to introduce some "randomness" in the choice between registers
- with equal scores. This offers more opportunities for scheduling. *)
-
-let start_register = Array.create Proc.num_register_classes 0
-
-(* Assign a location to a register, the best we can *)
-
-let assign_location reg =
- let cl = Proc.register_class reg in
- let first_reg = Proc.first_available_register.(cl) in
- let num_regs = Proc.num_available_registers.(cl) in
- let last_reg = first_reg + num_regs in
- let score = Array.create num_regs 0 in
- let best_score = ref (-1000000) and best_reg = ref (-1) in
- let start = start_register.(cl) in
- if num_regs > 0 then begin
- (* Favor the registers that have been assigned to pseudoregs for which
- we have a preference. If these pseudoregs have not been assigned
- already, avoid the registers with which they conflict. *)
- iter_preferred
- (fun r w ->
- match r.loc with
- Reg n -> if n >= first_reg && n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) + w
- | Unknown ->
- List.iter
- (fun neighbour ->
- match neighbour.loc with
- Reg n -> if n >= first_reg && n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) - w
- | _ -> ())
- r.interf
- | _ -> ())
- reg;
- List.iter
- (fun neighbour ->
- (* Prohibit the registers that have been assigned
- to our neighbours *)
- begin match neighbour.loc with
- Reg n -> if n >= first_reg && n < last_reg then
- score.(n - first_reg) <- (-1000000)
- | _ -> ()
- end;
- (* Avoid the registers that have been assigned to pseudoregs
- for which our neighbours have a preference *)
- iter_preferred
- (fun r w ->
- match r.loc with
- Reg n -> if n >= first_reg && n < last_reg then
- score.(n - first_reg) <- score.(n - first_reg) - (w - 1)
- (* w-1 to break the symmetry when two conflicting regs
- have the same preference for a third reg. *)
- | _ -> ())
- neighbour)
- reg.interf;
- (* Pick the register with the best score *)
- for n = start to num_regs - 1 do
- if score.(n) > !best_score then begin
- best_score := score.(n);
- best_reg := n
- end
- done;
- for n = 0 to start - 1 do
- if score.(n) > !best_score then begin
- best_score := score.(n);
- best_reg := n
- end
- done
- end;
- (* Found a register? *)
- if !best_reg >= 0 then begin
- reg.loc <- Reg(first_reg + !best_reg);
- if Proc.rotate_registers then
- start_register.(cl) <- (if start+1 >= num_regs then 0 else start+1)
- end else begin
- (* Sorry, we must put the pseudoreg in a stack location *)
- let nslots = Proc.num_stack_slots.(cl) in
- let score = Array.create nslots 0 in
- (* Compute the scores as for registers *)
- List.iter
- (fun (r, w) ->
- match r.loc with
- Stack(Local n) -> if Proc.register_class r = cl then
- score.(n) <- score.(n) + w
- | Unknown ->
- List.iter
- (fun neighbour ->
- match neighbour.loc with
- Stack(Local n) ->
- if Proc.register_class neighbour = cl
- then score.(n) <- score.(n) - w
- | _ -> ())
- r.interf
- | _ -> ())
- reg.prefer;
- List.iter
- (fun neighbour ->
- begin match neighbour.loc with
- Stack(Local n) ->
- if Proc.register_class neighbour = cl then
- score.(n) <- (-1000000)
- | _ -> ()
- end;
- List.iter
- (fun (r, w) ->
- match r.loc with
- Stack(Local n) -> if Proc.register_class r = cl then
- score.(n) <- score.(n) - w
- | _ -> ())
- neighbour.prefer)
- reg.interf;
- (* Pick the location with the best score *)
- let best_score = ref (-1000000) and best_slot = ref (-1) in
- for n = 0 to nslots - 1 do
- if score.(n) > !best_score then begin
- best_score := score.(n);
- best_slot := n
- end
- done;
- (* Found one? *)
- if !best_slot >= 0 then
- reg.loc <- Stack(Local !best_slot)
- else begin
- (* Allocate a new stack slot *)
- reg.loc <- Stack(Local nslots);
- Proc.num_stack_slots.(cl) <- nslots + 1
- end
- end;
- (* Cancel the preferences of this register so that they don't influence
- transitively the allocation of registers that prefer this reg. *)
- reg.prefer <- []
-
-let allocate_registers() =
- (* First pass: preallocate spill registers
- Second pass: compute the degrees
- Third pass: determine coloring order by successive removals of regs
- Fourth pass: assign registers in that order *)
- for i = 0 to Proc.num_register_classes - 1 do
- Proc.num_stack_slots.(i) <- 0;
- start_register.(i) <- 0
- done;
- List.iter allocate_spilled (Reg.all_registers());
- List.iter find_degree (Reg.all_registers());
- List.iter assign_location (remove_all_regs [])
View
17 src/asmcomp/coloring.mli
@@ -1,17 +0,0 @@
-(***********************************************************************)
-(* *)
-(* Objective Caml *)
-(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
-(* *)
-(* Copyright 1996 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
-(* *)
-(***********************************************************************)
-
-(* $Id: coloring.mli 2553 1999-11-17 18:59:06Z xleroy $ *)
-
-(* Register allocation by coloring of the interference graph *)
-
-val allocate_registers: unit -> unit
View
90 src/asmcomp/comballoc.ml
@@ -1,90 +0,0 @@
-(***********************************************************************)
-(* *)
-(*