diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index f85927af54..9161cea9e0 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -12,28 +12,15 @@ | |//----------------------------------------------------------------------- | -|.if WIN -|.define X64WIN, 1 // Windows/x64 calling conventions. -|.endif -| |// Fixed register assignments for the interpreter. |// This is very fragile and has many dependencies. Caveat emptor. |.define BASE, rdx // Not C callee-save, refetched anyway. -|.if X64WIN -|.define KBASE, rdi // Must be C callee-save. -|.define PC, rsi // Must be C callee-save. -|.define DISPATCH, rbx // Must be C callee-save. -|.define KBASEd, edi -|.define PCd, esi -|.define DISPATCHd, ebx -|.else |.define KBASE, r15 // Must be C callee-save. |.define PC, rbx // Must be C callee-save. |.define DISPATCH, r14 // Must be C callee-save. |.define KBASEd, r15d |.define PCd, ebx |.define DISPATCHd, r14d -|.endif | |.define RA, rcx |.define RAd, ecx @@ -56,16 +43,6 @@ |.define ITYPE, r11 |.define ITYPEd, r11d | -|.if X64WIN -|.define CARG1, rcx // x64/WIN64 C call arguments. -|.define CARG2, rdx -|.define CARG3, r8 -|.define CARG4, r9 -|.define CARG1d, ecx -|.define CARG2d, edx -|.define CARG3d, r8d -|.define CARG4d, r9d -|.else |.define CARG1, rdi // x64/POSIX C call arguments. |.define CARG2, rsi |.define CARG3, rdx @@ -78,7 +55,6 @@ |.define CARG4d, ecx |.define CARG5d, r8d |.define CARG6d, r9d -|.endif | |// Type definitions. Some of these are only used for documentation. |.type L, lua_State @@ -98,47 +74,6 @@ | |// Stack layout while in interpreter. Must match with lj_frame.h. |//----------------------------------------------------------------------- -|.if X64WIN // x64/Windows stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rdi; push rsi; push rbx -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -| pop rbx; pop rsi; pop rdi; pop rbp -|.endmacro -| -|.define SAVE_CFRAME, aword [rsp+aword*13] -|.define SAVE_PC, aword [rsp+aword*12] -|.define SAVE_L, aword [rsp+aword*11] -|.define SAVE_ERRF, dword [rsp+dword*21] -|.define SAVE_NRES, dword [rsp+dword*20] -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.define ARG5, aword [rsp+aword*4] -|.define CSAVE_4, aword [rsp+aword*3] -|.define CSAVE_3, aword [rsp+aword*2] -|.define CSAVE_2, aword [rsp+aword*1] -|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -| -|.define ARG5d, dword [rsp+dword*8] -|.define TMP1, ARG5 // TMP1 overlaps ARG5 -|.define TMP1d, ARG5d -|.define TMP1hi, dword [rsp+dword*9] -|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. -| -|//----------------------------------------------------------------------- -|.else // x64/POSIX stack layout | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). |.macro saveregs_ @@ -175,8 +110,6 @@ |.define TMP1hi, dword [rsp+dword*1] |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | -|.endif -| |//----------------------------------------------------------------------- | |// Instruction headers. @@ -199,22 +132,9 @@ |.endmacro | |// Instruction footer. -|.if 1 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | .define ins_next, ins_NEXT | .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| // Around 10%-30% slower on Core2, a lot more slower on P4. -| .macro ins_next -| jmp ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif | |// Call decode and dispatch. |.macro ins_callt @@ -484,12 +404,10 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_leave_unw | |->vm_unwind_rethrow: - |.if not X64WIN | mov CARG1, SAVE_L | mov CARG2d, eax | restoreregs | jmp extern lj_err_throw // (lua_State *L, int errcode) - |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) @@ -675,10 +593,8 @@ static void build_subroutines(BuildCtx *ctx) | mov RC, RA // ... in [RC] | mov PC, [RB-24] // Restore PC from [cont|PC]. | mov RA, qword [RB-32] // May be negative on WIN64 with debug. - |.if FFI | cmp RA, 1 | jbe >1 - |.endif | mov LFUNC:KBASE, [BASE-16] | cleartp LFUNC:KBASE | mov KBASE, LFUNC:KBASE->pc @@ -686,7 +602,6 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, RC = result, RB = meta base | jmp RA // Jump to continuation. | - |.if FFI |1: | je ->cont_ffi_callback // cont = 1: return from FFI callback. | // cont = 0: Tail call from C function. @@ -694,7 +609,6 @@ static void build_subroutines(BuildCtx *ctx) | shr RBd, 3 | lea RDd, [RBd-3] | jmp ->vm_call_tail - |.endif | |->cont_cat: // BASE = base, RC = result, RB = mbase | movzx RAd, PC_RB @@ -704,21 +618,12 @@ static void build_subroutines(BuildCtx *ctx) | je ->cont_ra | neg RA | shr RAd, 3 - |.if X64WIN - | mov CARG3d, RAd - | mov L:CARG1, SAVE_L - | mov L:CARG1->base, BASE - | mov RC, [RC] - | mov [RB], RC - | mov CARG2, RB - |.else | mov L:CARG1, SAVE_L | mov L:CARG1->base, BASE | mov CARG3d, RAd | mov RA, [RC] | mov [RB], RA | mov CARG2, RB - |.endif | jmp ->BC_CAT_Z | |//-- Table indexing metamethods ----------------------------------------- @@ -850,18 +755,11 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_call_dispatch_f | |->vmeta_tsetr: - |.if X64WIN - | mov L:CARG1, SAVE_L - | mov CARG3d, RCd - | mov L:CARG1->base, BASE - | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. - |.else | mov L:CARG1, SAVE_L | mov CARG2, TAB:RB | mov L:CARG1->base, BASE | mov RB, BASE // Save BASE. | mov CARG3d, RCd // Caveat: CARG3 == BASE. - |.endif | mov SAVE_PC, PC | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // TValue * returned in eax (RC). @@ -876,13 +774,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx RAd, PC_RA | mov L:RB, SAVE_L | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. - |.if X64WIN - | lea CARG3, [BASE+RD*8] - | lea CARG2, [BASE+RA*8] - |.else | lea CARG2, [BASE+RA*8] | lea CARG3, [BASE+RD*8] - |.endif | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. | movzx CARG4d, PC_OP | mov SAVE_PC, PC @@ -918,28 +811,18 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_equal: | cleartp TAB:RD | sub PC, 4 - |.if X64WIN - | mov CARG3, RD - | mov CARG4d, RBd - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2 == BASE. - | mov CARG2, RA - | mov CARG1, L:RB // Caveat: CARG1 == RA. - |.else | mov CARG2, RA | mov CARG4d, RBd // Caveat: CARG4 == RA. | mov L:RB, SAVE_L | mov L:RB->base, BASE // Caveat: CARG3 == BASE. | mov CARG3, RD | mov CARG1, L:RB - |.endif | mov SAVE_PC, PC | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | // 0/1 or TValue * (metamethod) returned in eax (RC). | jmp <3 | |->vmeta_equal_cd: - |.if FFI | sub PC, 4 | mov L:RB, SAVE_L | mov L:RB->base, BASE @@ -949,7 +832,6 @@ static void build_subroutines(BuildCtx *ctx) | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) | // 0/1 or TValue * (metamethod) returned in eax (RC). | jmp <3 - |.endif | |->vmeta_istype: | mov L:RB, SAVE_L @@ -988,16 +870,6 @@ static void build_subroutines(BuildCtx *ctx) | lea RB, [BASE+RB*8] |2: | lea RA, [BASE+RA*8] - |.if X64WIN - | mov CARG3, RB - | mov CARG4, RC - | movzx RCd, PC_OP - | mov ARG5d, RCd - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2 == BASE. - | mov CARG2, RA - | mov CARG1, L:RB // Caveat: CARG1 == RA. - |.else | movzx CARG5d, PC_OP | mov CARG2, RA | mov CARG4, RC // Caveat: CARG4 == RA. @@ -1005,7 +877,6 @@ static void build_subroutines(BuildCtx *ctx) | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. | mov CARG3, RB | mov L:RB, L:CARG1 - |.endif | mov SAVE_PC, PC | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | // NULL (finished) or TValue * (metamethod) returned in eax (RC). @@ -1052,18 +923,10 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = old base, RA = new base, RC = nargs+1, PC = return | mov TMP1d, NARGS:RDd // Save RA, RC for us. | mov RB, RA - |.if X64WIN - | mov L:TMPR, SAVE_L - | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. - | lea CARG2, [RA-16] - | lea CARG3, [RA+NARGS:RD*8-8] - | mov CARG1, L:TMPR // Caveat: CARG1 is RA. - |.else | mov L:CARG1, SAVE_L | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. | lea CARG2, [RA-16] | lea CARG3, [RA+NARGS:RD*8-8] - |.endif | mov SAVE_PC, PC | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | mov RA, RB @@ -1242,20 +1105,11 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res1 | |.ffunc_2 rawget - |.if X64WIN - | mov TAB:RA, [BASE] - | checktab TAB:RA, ->fff_fallback - | mov RB, BASE // Save BASE. - | lea CARG3, [BASE+8] - | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. - | mov CARG1, SAVE_L - |.else | mov TAB:CARG2, [BASE] | checktab TAB:CARG2, ->fff_fallback | mov RB, BASE // Save BASE. | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | mov CARG1, SAVE_L - |.endif | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | // cTValue * returned in eax (RD). | mov BASE, RB // Restore BASE. @@ -1293,9 +1147,7 @@ static void build_subroutines(BuildCtx *ctx) | mov L:RB, SAVE_L | mov L:RB->base, BASE // Add frame since C call can throw. | mov SAVE_PC, PC // Redundant (but a defined value). - |.if not X64WIN | mov CARG2, BASE // Otherwise: CARG2 == BASE - |.endif | mov L:CARG1, L:RB | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) | // GCstr returned in eax (RD). @@ -1308,25 +1160,14 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_1 next | je >2 // Missing 2nd arg? |1: - |.if X64WIN - | mov RA, [BASE] - | checktab RA, ->fff_fallback - |.else | mov CARG2, [BASE] | checktab CARG2, ->fff_fallback - |.endif | mov L:RB, SAVE_L | mov L:RB->base, BASE // Add frame since C call can throw. | mov L:RB->top, BASE // Dummy frame length is ok. | mov PC, [BASE-8] - |.if X64WIN - | lea CARG3, [BASE+8] - | mov CARG2, RA // Caveat: CARG2 == BASE. - | mov CARG1, L:RB - |.else | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | mov CARG1, L:RB - |.endif | mov SAVE_PC, PC // Needed for ITERN fallback. | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | // Flag returned in eax (RD). @@ -1386,16 +1227,9 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res2 |2: // Check for empty hash part first. Otherwise call C function. | cmp dword TAB:RB->hmask, 0; je ->fff_res0 - |.if X64WIN - | mov TMPR, BASE - | mov CARG2d, RAd - | mov CARG1, TAB:RB - | mov RB, TMPR - |.else | mov CARG1, TAB:RB | mov RB, BASE // Save BASE. | mov CARG2d, RAd // Caveat: CARG2 == BASE - |.endif | call extern lj_tab_getinth // (GCtab *t, int32_t key) | // cTValue * or NULL returned in eax (RD). | mov BASE, RB @@ -1718,11 +1552,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_n math_frexp | mov RB, BASE - |.if X64WIN - | lea CARG2, TMP1 // Caveat: CARG2 == BASE - |.else | lea CARG1, TMP1 - |.endif | call extern frexp | mov BASE, RB | mov RBd, TMP1d @@ -1735,11 +1565,7 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_n math_modf | mov RB, BASE - |.if X64WIN - | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE - |.else | lea CARG1, [BASE-16] - |.endif | call extern modf | mov BASE, RB | mov PC, [BASE-8] @@ -1854,19 +1680,11 @@ static void build_subroutines(BuildCtx *ctx) |.macro ffstring_op, name | .ffunc_1 string_ .. name | ffgccheck - |.if X64WIN - | mov STR:TMPR, [BASE] - | checkstr STR:TMPR, ->fff_fallback - |.else | mov STR:CARG2, [BASE] | checkstr STR:CARG2, ->fff_fallback - |.endif | mov L:RB, SAVE_L | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | mov L:RB->base, BASE - |.if X64WIN - | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE - |.endif | mov RC, SBUF:CARG1->b | mov SBUF:CARG1->L, L:RB | mov SBUF:CARG1->p, RC @@ -2040,7 +1858,6 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. - |.if JIT | // Decrement the hookcount for consistency, but always do the call. | test RDL, HOOK_ACTIVE | jnz >1 @@ -2048,7 +1865,6 @@ static void build_subroutines(BuildCtx *ctx) | jz >1 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | jmp >1 - |.endif | |->vm_rethook: // Dispatch target for return hooks. | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] @@ -2090,7 +1906,6 @@ static void build_subroutines(BuildCtx *ctx) | jmp <4 | |->vm_hotloop: // Hot loop counter underflow. - |.if JIT | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). | cleartp LFUNC:RB | mov RB, LFUNC:RB->pc @@ -2105,20 +1920,15 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_PC, PC | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | jmp <3 - |.endif | |->vm_callhook: // Dispatch target for call hooks. | mov SAVE_PC, PC - |.if JIT | jmp >1 - |.endif | |->vm_hotcall: // Hot call counter underflow. - |.if JIT | mov SAVE_PC, PC | or PC, 1 // Marker for hot call. |1: - |.endif | lea RD, [BASE+NARGS:RD*8-8] | mov L:RB, SAVE_L | mov L:RB->base, BASE @@ -2128,9 +1938,7 @@ static void build_subroutines(BuildCtx *ctx) | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | // ASMFunction returned in eax/rax (RD). | mov SAVE_PC, 0 // Invalidate for subsequent line hook. - |.if JIT | and PC, -2 - |.endif | mov BASE, L:RB->base | mov RA, RD | mov RD, L:RB->top @@ -2142,7 +1950,6 @@ static void build_subroutines(BuildCtx *ctx) | jmp RB | |->cont_stitch: // Trace stitching. - |.if JIT | // BASE = base, RC = result, RB = mbase | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE @@ -2191,7 +1998,6 @@ static void build_subroutines(BuildCtx *ctx) | mov aword [RA], LJ_TNIL | add RA, 8 | jmp <3 - |.endif | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- @@ -2200,7 +2006,6 @@ static void build_subroutines(BuildCtx *ctx) |// Called from an exit stub with the exit number on the stack. |// The 16 bit exit number is stored with two (sign-extended) push imm8. |->vm_exit_handler: - |.if JIT | push r13; push r12 | push r11; push r10; push r9; push r8 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp @@ -2213,11 +2018,7 @@ static void build_subroutines(BuildCtx *ctx) | set_vmstate EXIT | mov [DISPATCH+DISPATCH_J(exitno)], RCd | mov [DISPATCH+DISPATCH_J(parent)], RAd - |.if X64WIN - | sub rsp, 16*8+4*8 // Room for SSE regs + save area. - |.else | sub rsp, 16*8 // Room for SSE regs. - |.endif | add rbp, -128 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 @@ -2232,11 +2033,7 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | mov L:RB->base, BASE - |.if X64WIN - | lea CARG2, [rsp+4*8] - |.else | mov CARG2, rsp - |.endif | lea CARG1, [DISPATCH+GG_DISP2J] | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | call extern lj_trace_exit // (jit_State *J, ExitState *ex) @@ -2247,36 +2044,14 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, L:RB->base | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. | jmp >1 - |.endif |->vm_exit_interp: | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT | // Restore additional callee-save registers only used in compiled code. - |.if X64WIN - | lea RA, [rsp+10*16+4*8] - |1: - | movdqa xmm15, [RA-10*16] - | movdqa xmm14, [RA-9*16] - | movdqa xmm13, [RA-8*16] - | movdqa xmm12, [RA-7*16] - | movdqa xmm11, [RA-6*16] - | movdqa xmm10, [RA-5*16] - | movdqa xmm9, [RA-4*16] - | movdqa xmm8, [RA-3*16] - | movdqa xmm7, [RA-2*16] - | mov rsp, RA // Reposition stack to C frame. - | movdqa xmm6, [RA-1*16] - | mov r15, CSAVE_1 - | mov r14, CSAVE_2 - | mov r13, CSAVE_3 - | mov r12, CSAVE_4 - |.else | lea RA, [rsp+16] |1: | mov r13, [RA-8] | mov r12, [RA] | mov rsp, RA // Reposition stack to C frame. - |.endif | test RDd, RDd; js >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RDd @@ -2320,7 +2095,6 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG1, L:RB | mov CARG2, RD | call extern lj_err_throw // (lua_State *L, int errcode) - |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- @@ -2442,7 +2216,6 @@ static void build_subroutines(BuildCtx *ctx) |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) |->vm_cpuid: | mov eax, CARG1d - | .if X64WIN; push rsi; mov rsi, CARG2; .endif | push rbx | xor ecx, ecx | cpuid @@ -2451,7 +2224,6 @@ static void build_subroutines(BuildCtx *ctx) | mov [rsi+8], ecx | mov [rsi+12], edx | pop rbx - | .if X64WIN; pop rsi; .endif | ret | |//----------------------------------------------------------------------- @@ -2470,7 +2242,6 @@ static void build_subroutines(BuildCtx *ctx) | |// Handler for callback functions. Callback slot number in ah/al. |->vm_ffi_callback: - |.if FFI |.type CTSTATE, CTState, PC | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. | lea DISPATCH, [ebp+GG_G2DISP] @@ -2485,9 +2256,6 @@ static void build_subroutines(BuildCtx *ctx) | movsd qword CTSTATE->cb.fpr[1], xmm1 | movsd qword CTSTATE->cb.fpr[2], xmm2 | movsd qword CTSTATE->cb.fpr[3], xmm3 - |.if X64WIN - | lea rax, [rsp+CFRAME_SIZE+4*8] - |.else | lea rax, [rsp+CFRAME_SIZE] | mov CTSTATE->cb.gpr[4], CARG5 | mov CTSTATE->cb.gpr[5], CARG6 @@ -2495,7 +2263,6 @@ static void build_subroutines(BuildCtx *ctx) | movsd qword CTSTATE->cb.fpr[5], xmm5 | movsd qword CTSTATE->cb.fpr[6], xmm6 | movsd qword CTSTATE->cb.fpr[7], xmm7 - |.endif | mov CTSTATE->cb.stack, rax | mov CARG2, rsp | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. @@ -2511,10 +2278,8 @@ static void build_subroutines(BuildCtx *ctx) | shr RD, 3 | add RD, 1 | ins_callt - |.endif | |->cont_ffi_callback: // Return from FFI callback. - |.if FFI | mov L:RA, SAVE_L | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | mov aword CTSTATE->L, L:RA @@ -2526,11 +2291,9 @@ static void build_subroutines(BuildCtx *ctx) | mov rax, CTSTATE->cb.gpr[0] | movsd xmm0, qword CTSTATE->cb.fpr[0] | jmp ->vm_leave_unw - |.endif | |->vm_ffi_call: // Call C function via FFI. | // Caveat: needs special frame unwinding, see below. - |.if FFI | .type CCSTATE, CCallState, rbx | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | @@ -2554,35 +2317,28 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG2, CCSTATE->gpr[1] | mov CARG3, CCSTATE->gpr[2] | mov CARG4, CCSTATE->gpr[3] - |.if not X64WIN | mov CARG5, CCSTATE->gpr[4] | mov CARG6, CCSTATE->gpr[5] - |.endif | test eax, eax; jz >5 | movaps xmm0, CCSTATE->fpr[0] | movaps xmm1, CCSTATE->fpr[1] | movaps xmm2, CCSTATE->fpr[2] | movaps xmm3, CCSTATE->fpr[3] - |.if not X64WIN | cmp eax, 4; jbe >5 | movaps xmm4, CCSTATE->fpr[4] | movaps xmm5, CCSTATE->fpr[5] | movaps xmm6, CCSTATE->fpr[6] | movaps xmm7, CCSTATE->fpr[7] - |.endif |5: | | call aword CCSTATE->func | | mov CCSTATE->gpr[0], rax | movaps CCSTATE->fpr[0], xmm0 - |.if not X64WIN | mov CCSTATE->gpr[1], rdx | movaps CCSTATE->fpr[1], xmm1 - |.endif | | mov rbx, [rbp-8]; leave; ret - |.endif |// Note: vm_ffi_call must be the last function in this object file! | |//----------------------------------------------------------------------- @@ -2678,13 +2434,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movzx RDd, PC_RD | branchPC RD |2: // NE: Fallthrough to next instruction. - |.if not FFI - |3: - |.endif } else { - |.if not FFI - |3: - |.endif |2: // NE: Branch to the target. | movzx RDd, PC_RD | branchPC RD @@ -2694,10 +2444,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | if (op == BC_ISEQV || op == BC_ISNEV) { |5: // Either or both types are not numbers. - |.if FFI | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd - |.endif | cmp RA, RD | je <1 // Same GCobjs or pvalues? | cmp RBd, ITYPEd @@ -2720,12 +2468,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | jmp ->vmeta_equal // Handle __eq metamethod. } else { - |.if FFI |3: | cmp ITYPEd, LJ_TCDATA | jne <2 | jmp ->vmeta_equal_cd - |.endif } break; case BC_ISEQS: case BC_ISNES: @@ -3024,13 +2770,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; case BC_KCDATA: - |.if FFI | ins_AND // RA = dst, RD = cdata const (~) | mov RD, [KBASE+RD*8] | settp RD, LJ_TCDATA | mov [BASE+RA*8], RD | ins_next - |.endif break; case BC_KSHORT: | ins_AD // RA = dst, RD = signed int16 literal @@ -3106,12 +2850,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | jz <1 | // Crossed a write barrier. Move the barrier forward. - |.if not X64WIN | mov CARG2, RB | mov RB, BASE // Save BASE. - |.else - | xchg CARG2, RB // Save BASE (CARG2 == BASE). - |.endif | lea GL:CARG1, [DISPATCH+GG_DISP2G] | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | mov BASE, RB // Restore BASE. @@ -3698,9 +3438,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT | // NYI: add hotloop, record BC_ITERN. - |.endif | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB | mov RCd, [BASE+RA*8-8] // Get index from control var. @@ -3922,9 +3660,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.define FOR_EXT, [RA+24] case BC_FORL: - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. break; @@ -3982,9 +3718,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERL: - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. break; @@ -4009,9 +3743,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_A // RA = base, RD = target (loop extent) | // Note: RA/RD is only used by trace recorder to determine scope/extent | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; @@ -4021,7 +3753,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_JLOOP: - |.if JIT | ins_AD // RA = base (ignored), RD = traceno | mov RA, [DISPATCH+DISPATCH_J(trace)] | mov TRACE:RD, [RA+RD*8] @@ -4030,30 +3761,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | // Save additional callee-save registers only used in compiled code. - |.if X64WIN - | mov CSAVE_4, r12 - | mov CSAVE_3, r13 - | mov CSAVE_2, r14 - | mov CSAVE_1, r15 - | mov RA, rsp - | sub rsp, 10*16+4*8 - | movdqa [RA-1*16], xmm6 - | movdqa [RA-2*16], xmm7 - | movdqa [RA-3*16], xmm8 - | movdqa [RA-4*16], xmm9 - | movdqa [RA-5*16], xmm10 - | movdqa [RA-6*16], xmm11 - | movdqa [RA-7*16], xmm12 - | movdqa [RA-8*16], xmm13 - | movdqa [RA-9*16], xmm14 - | movdqa [RA-10*16], xmm15 - |.else | sub rsp, 16 | mov [rsp+16], r12 | mov [rsp+8], r13 - |.endif | jmp RD - |.endif break; case BC_JMP: @@ -4072,9 +3783,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) */ case BC_FUNCF: - |.if JIT | hotcall RBd - |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. break;