From 5aa2ac2246e23eac95f66447cfc6c532a4173717 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sat, 21 Jan 2017 23:49:29 -0500 Subject: [PATCH 01/11] Teach the assembler about PowerPC extended mnemonics. Also make a few changes to basic mnemonics. Fix typo in name of the basic "creqv". Add the basic "addc" and relatives, because it would be odd to have the extended "subc" without "addc". Fix the basic "rldicl", "rldicr", "rldic", "rldimi" to correctly encode the 6-bit MB field. Fix "slw" and relatives to correctly swap their RA and RS operands. Add many, but not all, of the extended mnemonics from IBM's Power ISA Version 2.06 Book I Appendix E. (I used 2.06, published 2009, just because I already had the PDF of it.) This commit includes mnemonics for branching, subtraction, traps, bit rotation, and a few others, like "mflr" and "nop". The assembler now understands branches like `beq cr7, label` and bit shifts like `slwi r7, r7, 2`. These encode the same machine instructions as the basic "bc" and "rlwinm". Some operands to basic names become optional. The assembler no longer requires the level in "sc" or the branch hint in "bcctr" and "bclr"; they default to zero. Some extended names take an optional branch hint or condition register. Some extended names are still missing. I don't provide names with static branch prediction, like "beq+" or "bge-", because the assembler parses '+' and '-' as operators, not as part of an instruction name. I also don't provide some names that 2.06 has for moving to or from the condition register or some special purpose registers, names like "mtcr" or "mfuamr". This commit also deletes some unused tokens and one unused yacc rule. --- mach/powerpc/as/mach0.c | 6 + mach/powerpc/as/mach2.c | 57 ++++--- mach/powerpc/as/mach3.c | 333 +++++++++++++++++++++++++++++++++++++--- mach/powerpc/as/mach4.c | 200 +++++++++++++++++++++--- 4 files changed, 531 insertions(+), 65 deletions(-) diff --git a/mach/powerpc/as/mach0.c b/mach/powerpc/as/mach0.c index 325c08910..3246828fc 100644 --- a/mach/powerpc/as/mach0.c +++ b/mach/powerpc/as/mach0.c @@ -31,3 +31,9 @@ typedef uint32_t quad; #define VALWIDTH 8 #define FIXUPFLAGS (RELBR | RELWR) + +/* 6-bit mb (mask begin) or me (mask end) field */ +#define MB6(v) (((v) & 0x1F)<<6 | ((v) & 0x20)>>0) + +/* 6-bit sh (shift) field */ +#define SH6(v) (((v) & 0x1F)<<11 | ((v) & 0x20)>>4) diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index 3ecaf10cf..555b92c38 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -8,20 +8,35 @@ %token FPR %token CR %token C +%token OP_HI OP_HA OP_LO %token OP +%token OP_BDA +%token OP_BDL %token OP_BF %token OP_BF_BFA %token OP_BF_FRA_FRB %token OP_BF_L_RA_RB %token OP_BF_L_RA_SI %token OP_BF_L_RA_UI +%token OP_BF_RA_RB +%token OP_BF_RA_SI +%token OP_BF_RA_UI %token OP_BF_U_C +%token OP_BH +%token OP_BI_BDA +%token OP_BI_BDL +%token OP_BI_BH +%token OP_BICR_BDA +%token OP_BICR_BDL +%token OP_BICR_BH %token OP_BO_BI_BDA %token OP_BO_BI_BDL %token OP_BO_BI_BH %token OP_BT_C +%token OP_BT_BA_BA %token OP_BT_BA_BB +%token OP_BT_BT_BT %token OP_FLM_FRB_C %token OP_FRS_RA_D %token OP_FRS_RA_RB @@ -32,16 +47,18 @@ %token OP_FRT_FRB_C %token OP_FRT_RA_D %token OP_FRT_RA_RB -%token OP_L %token OP_LEV %token OP_LIA %token OP_LIL -%token OP_L_RB -%token OP_RA_RB -%token OP_RB -%token OP_RS +%token OP_LI32 +%token OP_RA_RS_RB_C +%token OP_RA_RS_RB_MB5_ME5_C +%token OP_RA_RS_RB_MB6_C +%token OP_RA_RS_SH5_C +%token OP_RA_RS_SH5_MB5_ME5_C +%token OP_RA_RS_SH6_C +%token OP_RA_RS_SH6_MB6_C %token OP_RS_FXM -%token OP_RS_L %token OP_RS_RA %token OP_RS_RA_C %token OP_RS_RA_D @@ -50,14 +67,6 @@ %token OP_RS_RA_RB %token OP_RS_RA_RB_C %token OP_RS_RA_RA_C -%token OP_RS_RA_RB_MB5_ME5_C -%token OP_RS_RA_RB_MB6_C -%token OP_RS_RA_RB_ME6_C -%token OP_RS_RA_SH_MB5_ME5_C -%token OP_RS_RA_SH_MB6_SH_C -%token OP_RS_RA_SH_ME6_SH_C -%token OP_RS_RA_SH5_C -%token OP_RS_RA_SH6_C %token OP_RS_RA_UI %token OP_RS_RA_UI_CC %token OP_RS_RB @@ -73,22 +82,26 @@ %token OP_RT_RA_RB_C %token OP_RT_RA_SI %token OP_RT_RA_SI_addic +%token OP_RT_RA_SI_subi +%token OP_RT_RA_SI_subic %token OP_RT_RB +%token OP_RT_RB_RA_C +%token OP_RT_SI %token OP_RT_SPR %token OP_RT_SR %token OP_RT_TBR %token OP_TH_RA_RB %token OP_TO_RA_RB %token OP_TO_RA_SI - -%token OP_LA -%token OP_LI32 - -%token OP_POWERPC_FIXUP -%token OP_HI OP_HA OP_LO +%token OP_TOX_RA_RB +%token OP_TOX_RA_SI +%token OP_clrlsldi OP_clrldi OP_clrrdi OP_extldi OP_extrdi +%token OP_insrdi OP_rotrdi OP_sldi OP_srdi +%token OP_clrlslwi OP_clrlwi OP_clrrwi OP_extlwi OP_extrwi +%token OP_inslwi OP_insrwi OP_rotlwi OP_rotrwi OP_slwi OP_srwi /* Other token types */ %type c -%type e16 u8 u7 u6 u5 u4 u2 u1 -%type nb ds bda bdl lia lil spr_num +%type e16 negate16 u8 u7 u6 u5 u4 u2 u1 +%type opt_bh cr_opt nb ds bda bdl lia lil spr_num diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 724ba7312..16c1e6ae0 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -99,9 +99,6 @@ /* Special instructions */ 0, OP_LI32, 0, "li32", -0, OP_LA, 0, "la", -0, OP_LA, 0, "li", -0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", 0, OP_HI, 0, "hi16", 0, OP_HA, 0, "ha16", 0, OP_LO, 0, "lo16", @@ -126,11 +123,173 @@ 0, OP_BT_BA_BB, 19<<26 | 193<<1, "crxor", 0, OP_BT_BA_BB, 19<<26 | 225<<1, "crnand", 0, OP_BT_BA_BB, 19<<26 | 33<<1, "crnor", -0, OP_BT_BA_BB, 19<<26 | 289<<1, "crneqv", +0, OP_BT_BA_BB, 19<<26 | 289<<1, "creqv", 0, OP_BT_BA_BB, 19<<26 | 129<<1, "crandc", 0, OP_BT_BA_BB, 19<<26 | 417<<1, "crorc", 0, OP_BF_BFA, 19<<26 | 0<<1, "mcrf", +/* extended mnemonics for bc, bcctr, bclr */ +0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 0<<0, "bctr", +0, OP_BH, 19<<26 | 20<<21 | 528<<1 | 1<<0, "bctrl", +0, OP_BDL, 16<<26 | 16<<21 | 0<<1 | 0<<0, "bdnz", +0, OP_BDA, 16<<26 | 16<<21 | 1<<1 | 0<<0, "bdnza", +0, OP_BH, 19<<26 | 16<<21 | 16<<1 | 0<<0, "bdnzlr", +0, OP_BDL, 16<<26 | 16<<21 | 0<<1 | 1<<0, "bdnzl", +0, OP_BDA, 16<<26 | 16<<21 | 1<<1 | 1<<0, "bdnzla", +0, OP_BH, 19<<26 | 16<<21 | 16<<1 | 1<<0, "bdnzlrl", +0, OP_BI_BDL, 16<<26 | 0<<21 | 0<<1 | 0<<0, "bdnzf", +0, OP_BI_BDA, 16<<26 | 0<<21 | 1<<1 | 0<<0, "bdnzfa", +0, OP_BI_BH, 19<<26 | 0<<21 | 16<<1 | 0<<0, "bdnzflr", +0, OP_BI_BDL, 16<<26 | 0<<21 | 0<<1 | 1<<0, "bdnzfl", +0, OP_BI_BDA, 16<<26 | 0<<21 | 1<<1 | 1<<0, "bdnzfla", +0, OP_BI_BH, 19<<26 | 0<<21 | 16<<1 | 1<<0, "bdnzflrl", +0, OP_BI_BDL, 16<<26 | 8<<21 | 0<<1 | 0<<0, "bdnzt", +0, OP_BI_BDA, 16<<26 | 8<<21 | 1<<1 | 0<<0, "bdnzta", +0, OP_BI_BH, 19<<26 | 8<<21 | 16<<1 | 0<<0, "bdnztlr", +0, OP_BI_BDL, 16<<26 | 8<<21 | 0<<1 | 1<<0, "bdnztl", +0, OP_BI_BDA, 16<<26 | 8<<21 | 1<<1 | 1<<0, "bdnztla", +0, OP_BI_BH, 19<<26 | 8<<21 | 16<<1 | 1<<0, "bdnztlrl", +0, OP_BDL, 16<<26 | 18<<21 | 0<<1 | 0<<0, "bdz", +0, OP_BDA, 16<<26 | 18<<21 | 1<<1 | 0<<0, "bdza", +0, OP_BH, 19<<26 | 18<<21 | 16<<1 | 0<<0, "bdzlr", +0, OP_BDL, 16<<26 | 18<<21 | 0<<1 | 1<<0, "bdzl", +0, OP_BDA, 16<<26 | 18<<21 | 1<<1 | 1<<0, "bdzla", +0, OP_BH, 19<<26 | 18<<21 | 16<<1 | 1<<0, "bdzlrl", +0, OP_BI_BDL, 16<<26 | 2<<21 | 0<<1 | 0<<0, "bdzf", +0, OP_BI_BDA, 16<<26 | 2<<21 | 1<<1 | 0<<0, "bdzfa", +0, OP_BI_BH, 19<<26 | 2<<21 | 16<<1 | 0<<0, "bdzflr", +0, OP_BI_BDL, 16<<26 | 2<<21 | 0<<1 | 1<<0, "bdzfl", +0, OP_BI_BDA, 16<<26 | 2<<21 | 1<<1 | 1<<0, "bdzfla", +0, OP_BI_BH, 19<<26 | 2<<21 | 16<<1 | 1<<0, "bdzflrl", +0, OP_BI_BDL, 16<<26 | 10<<21 | 0<<1 | 0<<0, "bdzt", +0, OP_BI_BDA, 16<<26 | 10<<21 | 1<<1 | 0<<0, "bdzta", +0, OP_BI_BH, 19<<26 | 10<<21 | 16<<1 | 0<<0, "bdztlr", +0, OP_BI_BDL, 16<<26 | 10<<21 | 0<<1 | 1<<0, "bdztl", +0, OP_BI_BDA, 16<<26 | 10<<21 | 1<<1 | 1<<0, "bdztla", +0, OP_BI_BH, 19<<26 | 10<<21 | 16<<1 | 1<<0, "bdztlrl", +0, OP_BI_BDL, 16<<26 | 4<<21 | 0<<1 | 0<<0, "bf", +0, OP_BI_BDA, 16<<26 | 4<<21 | 1<<1 | 0<<0, "bfa", +0, OP_BI_BH, 19<<26 | 4<<21 | 528<<1 | 0<<0, "bfctr", +0, OP_BI_BH, 19<<26 | 4<<21 | 528<<1 | 1<<0, "bfctrl", +0, OP_BI_BDL, 16<<26 | 4<<21 | 0<<1 | 1<<0, "bfl", +0, OP_BI_BDA, 16<<26 | 4<<21 | 0<<1 | 1<<0, "bfla", +0, OP_BI_BH, 19<<26 | 4<<21 | 16<<1 | 0<<0, "bflr", +0, OP_BI_BH, 19<<26 | 4<<21 | 16<<1 | 1<<0, "bflrl", +0, OP_BH, 19<<26 | 20<<21 | 16<<1 | 0<<0, "blr", +0, OP_BH, 19<<26 | 20<<21 | 16<<1 | 1<<0, "blrl", +0, OP_BI_BDL, 16<<26 | 12<<21 | 0<<1 | 0<<0, "bt", +0, OP_BI_BDA, 16<<26 | 12<<21 | 1<<1 | 0<<0, "bta", +0, OP_BI_BH, 19<<26 | 12<<21 | 528<<1 | 0<<0, "btctr", +0, OP_BI_BH, 19<<26 | 12<<21 | 528<<1 | 1<<0, "btctrl", +0, OP_BI_BDL, 16<<26 | 12<<21 | 0<<1 | 1<<0, "btl", +0, OP_BI_BDA, 16<<26 | 12<<21 | 0<<1 | 1<<0, "btla", +0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 0<<0, "btlr", +0, OP_BI_BH, 19<<26 | 12<<21 | 16<<1 | 1<<0, "btlrl", + +/* extended m with condition in BI */ +0, OP_BICR_BDL, 16<<26 | 12<<21 | 2<<16 | 0<<1 | 0<<0, "beq", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 2<<16 | 1<<1 | 0<<0, "beqa", +0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 528<<1 | 0<<0, "beqctr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 528<<1 | 1<<0, "beqctrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 2<<16 | 0<<1 | 1<<0, "beql", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 2<<16 | 1<<1 | 1<<0, "beqla", +0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 16<<1 | 0<<0, "beqlr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 2<<16 | 16<<1 | 1<<0, "beqlrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 0<<16 | 0<<1 | 0<<0, "bge", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 0<<16 | 1<<1 | 0<<0, "bgea", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 528<<1 | 0<<0, "bgectr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 528<<1 | 1<<0, "bgectrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 0<<16 | 0<<1 | 1<<0, "bgel", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 0<<16 | 1<<1 | 1<<0, "bgela", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 16<<1 | 0<<0, "bgelr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 16<<1 | 1<<0, "bgelrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 1<<16 | 0<<1 | 0<<0, "bgt", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 1<<16 | 1<<1 | 0<<0, "bgta", +0, OP_BICR_BH, 19<<26 | 12<<21 | 1<<16 | 528<<1 | 0<<0, "bgtctr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 1<<16 | 528<<1 | 1<<0, "bgtctrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 1<<16 | 0<<1 | 1<<0, "bgtl", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 1<<16 | 1<<1 | 1<<0, "bgtla", +0, OP_BICR_BH, 19<<26 | 12<<21 | 1<<16 | 16<<1 | 0<<0, "bgtlr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 1<<16 | 16<<1 | 1<<0, "bgtlrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 1<<16 | 0<<1 | 0<<0, "ble", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 1<<16 | 1<<1 | 0<<0, "blea", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 528<<1 | 0<<0, "blectr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 528<<1 | 1<<0, "blectrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 1<<16 | 0<<1 | 1<<0, "blel", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 1<<16 | 1<<1 | 1<<0, "blela", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 16<<1 | 0<<0, "blelr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 16<<1 | 1<<0, "blelrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 0<<16 | 0<<1 | 0<<0, "blt", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 0<<16 | 1<<1 | 0<<0, "blta", +0, OP_BICR_BH, 19<<26 | 12<<21 | 0<<16 | 528<<1 | 0<<0, "bltctr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 0<<16 | 528<<1 | 1<<0, "bltctrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 0<<16 | 0<<1 | 1<<0, "bltl", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 0<<16 | 1<<1 | 1<<0, "bltla", +0, OP_BICR_BH, 19<<26 | 12<<21 | 0<<16 | 16<<1 | 0<<0, "bltlr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 0<<16 | 16<<1 | 1<<0, "bltlrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 2<<16 | 0<<1 | 0<<0, "bne", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 2<<16 | 1<<1 | 0<<0, "bnea", +0, OP_BICR_BH, 19<<26 | 4<<21 | 2<<16 | 528<<1 | 0<<0, "bnectr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 2<<16 | 528<<1 | 1<<0, "bnectrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 2<<16 | 0<<1 | 1<<0, "bnel", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 2<<16 | 1<<1 | 1<<0, "bnela", +0, OP_BICR_BH, 19<<26 | 4<<21 | 2<<16 | 16<<1 | 0<<0, "bnelr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 2<<16 | 16<<1 | 1<<0, "bnelrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 1<<16 | 0<<1 | 0<<0, "bng", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 1<<16 | 1<<1 | 0<<0, "bnga", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 528<<1 | 0<<0, "bngctr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 528<<1 | 1<<0, "bngctrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 1<<16 | 0<<1 | 1<<0, "bngl", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 1<<16 | 1<<1 | 1<<0, "bngla", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 16<<1 | 0<<0, "bnglr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 1<<16 | 16<<1 | 1<<0, "bnglrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 0<<16 | 0<<1 | 0<<0, "bnl", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 0<<16 | 1<<1 | 0<<0, "bnla", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 528<<1 | 0<<0, "bnlctr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 528<<1 | 1<<0, "bnlctrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 0<<16 | 0<<1 | 1<<0, "bnll", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 0<<16 | 1<<1 | 1<<0, "bnlla", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 16<<1 | 0<<0, "bnllr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 0<<16 | 16<<1 | 1<<0, "bnllrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 3<<16 | 0<<1 | 0<<0, "bns", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 3<<16 | 1<<1 | 0<<0, "bnsa", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 528<<1 | 0<<0, "bnsctr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 528<<1 | 1<<0, "bnsctrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 3<<16 | 0<<1 | 1<<0, "bnsl", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 3<<16 | 1<<1 | 1<<0, "bnsla", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 16<<1 | 0<<0, "bnslr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 16<<1 | 1<<0, "bnslrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 3<<16 | 0<<1 | 0<<0, "bnu", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 3<<16 | 1<<1 | 0<<0, "bnua", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 528<<1 | 0<<0, "bnuctr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 528<<1 | 1<<0, "bnuctrl", +0, OP_BICR_BDL, 16<<26 | 4<<21 | 3<<16 | 0<<1 | 1<<0, "bnul", +0, OP_BICR_BDA, 16<<26 | 4<<21 | 3<<16 | 1<<1 | 1<<0, "bnula", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 16<<1 | 0<<0, "bnulr", +0, OP_BICR_BH, 19<<26 | 4<<21 | 3<<16 | 16<<1 | 1<<0, "bnulrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 3<<16 | 0<<1 | 0<<0, "bso", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 3<<16 | 1<<1 | 0<<0, "bsoa", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 528<<1 | 0<<0, "bsoctr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 528<<1 | 1<<0, "bsoctrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 3<<16 | 0<<1 | 1<<0, "bsol", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 3<<16 | 1<<1 | 1<<0, "bsola", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0, "bsolr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0, "bsolrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 3<<16 | 0<<1 | 0<<0, "bun", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 3<<16 | 1<<1 | 0<<0, "buna", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 528<<1 | 0<<0, "bunctr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 528<<1 | 1<<0, "bunctrl", +0, OP_BICR_BDL, 16<<26 | 12<<21 | 3<<16 | 0<<1 | 1<<0, "bunl", +0, OP_BICR_BDA, 16<<26 | 12<<21 | 3<<16 | 1<<1 | 1<<0, "bunla", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 0<<0, "bunlr", +0, OP_BICR_BH, 19<<26 | 12<<21 | 3<<16 | 16<<1 | 1<<0, "bunlrl", + +/* extended m for cr logic */ +0, OP_BT_BT_BT, 19<<26 | 289<<1, "crset", +0, OP_BT_BT_BT, 19<<26 | 193<<1, "crclr", +0, OP_BT_BA_BA, 19<<26 | 449<<1, "crmove", +0, OP_BT_BA_BA, 19<<26 | 33<<1, "crnot", + /* Fixed point instructions (page 29) */ 0, OP_RT_RA_D, 34<<26, "lbz", @@ -199,6 +358,10 @@ 0, OP_RT_RA_RB_C, 31<<26 | 1<<10 | 40<<1, "subfo", 0, OP_RT_RA_SI_addic, 12<<26, "addic", /* special case C */ 0, OP_RT_RA_SI, 8<<26, "subfic", +0, OP_RT_RA_RB_C, 31<<26 | 0<<10 | 10<<1, "addc", +0, OP_RT_RA_RB_C, 31<<26 | 1<<10 | 10<<1, "addco", +0, OP_RT_RA_RB_C, 31<<26 | 0<<10 | 8<<1, "subfc", +0, OP_RT_RA_RB_C, 31<<26 | 1<<10 | 8<<1, "subfco", 0, OP_RT_RA_RB_C, 31<<26 | 0<<10 | 138<<1, "adde", 0, OP_RT_RA_RB_C, 31<<26 | 1<<10 | 138<<1, "addeo", 0, OP_RT_RA_RB_C, 31<<26 | 0<<10 | 136<<1, "subfe", @@ -214,6 +377,20 @@ 0, OP_RT_RA_C, 31<<26 | 0<<10 | 104<<1, "neg", 0, OP_RT_RA_C, 31<<26 | 1<<10 | 104<<1, "nego", +/* extended m for addition */ +0, OP_RT_RA_D, 14<<26, "la", +0, OP_RT_SI, 14<<26 | 0<<16, "li", +0, OP_RT_SI, 15<<26 | 0<<16, "lis", + +/* extended m for subtraction */ +0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 40<<1, "sub", +0, OP_RT_RB_RA_C, 31<<26 | 1<<10 | 40<<1, "subo", +0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 8<<1, "subc", +0, OP_RT_RB_RA_C, 31<<26 | 0<<10 | 8<<1, "subco", +0, OP_RT_RA_SI_subi, 14<<26, "subi", +0, OP_RT_RA_SI_subi, 15<<26, "subis", +0, OP_RT_RA_SI_subic, 12<<26, "subic", + /* page 54 */ 0, OP_RT_RA_SI, 7<<26, "mulli", 0, OP_RT_RA_RB_C, 31<<26 | 0<<10 | 233<<1, "mulld", @@ -241,12 +418,85 @@ 0, OP_BF_L_RA_UI, 10<<26, "cmpli", 0, OP_BF_L_RA_RB, 31<<26 | 32<<1, "cmpl", +/* extended m for comparison */ +0, OP_BF_RA_SI, 11<<26 | 1<<21, "cmpdi", +0, OP_BF_RA_RB, 31<<26 | 1<<21 | 0<<1, "cmpd", +0, OP_BF_RA_UI, 10<<26 | 1<<21, "cmpldi", +0, OP_BF_RA_RB, 31<<26 | 1<<21 | 32<<1, "cmpld", +0, OP_BF_RA_SI, 11<<26 | 0<<21, "cmpwi", +0, OP_BF_RA_RB, 31<<26 | 0<<21 | 0<<1, "cmpw", +0, OP_BF_RA_UI, 10<<26 | 0<<21, "cmplwi", +0, OP_BF_RA_RB, 31<<26 | 0<<21 | 32<<1, "cmplw", + /* page 60 */ 0, OP_TO_RA_SI, 2<<26, "tdi", 0, OP_TO_RA_SI, 3<<26, "twi", 0, OP_TO_RA_RB, 31<<26 | 68<<1, "td", 0, OP_TO_RA_RB, 31<<26 | 4<<1, "tw", +/* extended m for traps */ +0, OP_TOX_RA_RB, 31<<26 | 4<<21 | 68<<1, "tdeq", +0, OP_TOX_RA_SI, 2<<26 | 4<<21, "tdeqi", +0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 68<<1, "tdge", +0, OP_TOX_RA_SI, 2<<26 | 12<<21, "tdgei", +0, OP_TOX_RA_RB, 31<<26 | 8<<21 | 68<<1, "tdgt", +0, OP_TOX_RA_SI, 2<<26 | 8<<21, "tdgti", +0, OP_TOX_RA_RB, 31<<26 | 20<<21 | 68<<1, "tdle", +0, OP_TOX_RA_SI, 2<<26 | 20<<21, "tdlei", +0, OP_TOX_RA_RB, 31<<26 | 5<<21 | 68<<1, "tdlge", +0, OP_TOX_RA_SI, 2<<26 | 5<<21, "tdlgei", +0, OP_TOX_RA_RB, 31<<26 | 1<<21 | 68<<1, "tdlgt", +0, OP_TOX_RA_SI, 2<<26 | 1<<21, "tdlgti", +0, OP_TOX_RA_RB, 31<<26 | 6<<21 | 68<<1, "tdlle", +0, OP_TOX_RA_SI, 2<<26 | 6<<21, "tdllei", +0, OP_TOX_RA_RB, 31<<26 | 2<<21 | 68<<1, "tdllt", +0, OP_TOX_RA_SI, 2<<26 | 2<<21, "tdllti", +0, OP_TOX_RA_RB, 31<<26 | 6<<21 | 68<<1, "tdlng", +0, OP_TOX_RA_SI, 2<<26 | 6<<21, "tdlngi", +0, OP_TOX_RA_RB, 31<<26 | 5<<21 | 68<<1, "tdlnl", +0, OP_TOX_RA_SI, 2<<26 | 5<<21, "tdlnli", +0, OP_TOX_RA_RB, 31<<26 | 16<<21 | 68<<1, "tdlt", +0, OP_TOX_RA_SI, 2<<26 | 16<<21, "tdlti", +0, OP_TOX_RA_RB, 31<<26 | 24<<21 | 68<<1, "tdne", +0, OP_TOX_RA_SI, 2<<26 | 24<<21, "tdnei", +0, OP_TOX_RA_RB, 31<<26 | 20<<21 | 68<<1, "tdng", +0, OP_TOX_RA_SI, 2<<26 | 20<<21, "tdngi", +0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 68<<1, "tdnl", +0, OP_TOX_RA_SI, 2<<26 | 12<<21, "tdnli", +0, OP_TOX_RA_RB, 31<<26 | 31<<21 | 68<<1, "tdu", +0, OP_TOX_RA_SI, 2<<26 | 31<<21, "tdui", +0, OP, 31<<26 | 31<<21 | 4<<1, "trap", +0, OP_TOX_RA_RB, 31<<26 | 4<<21 | 4<<1, "tweq", +0, OP_TOX_RA_SI, 3<<26 | 4<<21, "tweqi", +0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 4<<1, "twge", +0, OP_TOX_RA_SI, 3<<26 | 12<<21, "twgei", +0, OP_TOX_RA_RB, 31<<26 | 8<<21 | 4<<1, "twgt", +0, OP_TOX_RA_SI, 3<<26 | 8<<21, "twgti", +0, OP_TOX_RA_RB, 31<<26 | 20<<21 | 4<<1, "twle", +0, OP_TOX_RA_SI, 3<<26 | 20<<21, "twlei", +0, OP_TOX_RA_RB, 31<<26 | 5<<21 | 4<<1, "twlge", +0, OP_TOX_RA_SI, 3<<26 | 5<<21, "twlgei", +0, OP_TOX_RA_RB, 31<<26 | 1<<21 | 4<<1, "twlgt", +0, OP_TOX_RA_SI, 3<<26 | 1<<21, "twlgti", +0, OP_TOX_RA_RB, 31<<26 | 6<<21 | 4<<1, "twlle", +0, OP_TOX_RA_SI, 3<<26 | 6<<21, "twllei", +0, OP_TOX_RA_RB, 31<<26 | 2<<21 | 4<<1, "twllt", +0, OP_TOX_RA_SI, 3<<26 | 2<<21, "twllti", +0, OP_TOX_RA_RB, 31<<26 | 6<<21 | 4<<1, "twlng", +0, OP_TOX_RA_SI, 3<<26 | 6<<21, "twlngi", +0, OP_TOX_RA_RB, 31<<26 | 5<<21 | 4<<1, "twlnl", +0, OP_TOX_RA_SI, 3<<26 | 5<<21, "twlnli", +0, OP_TOX_RA_RB, 31<<26 | 16<<21 | 4<<1, "twlt", +0, OP_TOX_RA_SI, 3<<26 | 16<<21, "twlti", +0, OP_TOX_RA_RB, 31<<26 | 24<<21 | 4<<1, "twne", +0, OP_TOX_RA_SI, 3<<26 | 24<<21, "twnei", +0, OP_TOX_RA_RB, 31<<26 | 20<<21 | 4<<1, "twng", +0, OP_TOX_RA_SI, 3<<26 | 20<<21, "twngi", +0, OP_TOX_RA_RB, 31<<26 | 12<<21 | 4<<1, "twnl", +0, OP_TOX_RA_SI, 3<<26 | 12<<21, "twnli", +0, OP_TOX_RA_RB, 31<<26 | 31<<21 | 4<<1, "twu", +0, OP_TOX_RA_SI, 3<<26 | 31<<21, "twui", + /* page 62 */ 0, OP_RS_RA_UI_CC, 28<<26, "andi", /* C compulsory */ 0, OP_RS_RA_UI_CC, 29<<26, "andis", /* C compulsory */ @@ -268,26 +518,59 @@ 0, OP_RS_RA_C, 31<<26 | 58<<1, "cntlzd", 0, OP_RS_RA_C, 31<<26 | 26<<1, "cntlzw", +/* extended m using logic */ +0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", +0, OP, 24<<26, "nop", +0, OP_RS_RA_RA_C, 31<<26 | 124<<1, "not", +0, OP, 26<<26, "xnop", + /* page 69 */ -0, OP_RS_RA_SH_MB6_SH_C, 30<<26 | 0<<2, "rldicl", -0, OP_RS_RA_SH_ME6_SH_C, 30<<26 | 1<<2, "rldicr", -0, OP_RS_RA_SH_MB6_SH_C, 30<<26 | 2<<2, "rldic", -0, OP_RS_RA_SH_MB5_ME5_C, 21<<26, "rlwinm", -0, OP_RS_RA_RB_MB6_C, 30<<26 | 8<<1, "rldcl", -0, OP_RS_RA_RB_ME6_C, 30<<26 | 9<<1, "rldcr", -0, OP_RS_RA_RB_MB5_ME5_C, 23<<26, "rlwnm", -0, OP_RS_RA_SH_MB6_SH_C, 30<<26 | 3<<2, "rldimi", -0, OP_RS_RA_SH_MB5_ME5_C, 20<<26, "rlwimi", +0, OP_RA_RS_SH6_MB6_C, 30<<26 | 0<<2, "rldicl", +0, OP_RA_RS_SH6_MB6_C, 30<<26 | 1<<2, "rldicr", +0, OP_RA_RS_SH6_MB6_C, 30<<26 | 2<<2, "rldic", +0, OP_RA_RS_SH5_MB5_ME5_C, 21<<26, "rlwinm", +0, OP_RA_RS_RB_MB6_C, 30<<26 | 8<<1, "rldcl", +0, OP_RA_RS_RB_MB6_C, 30<<26 | 9<<1, "rldcr", +0, OP_RA_RS_RB_MB5_ME5_C, 23<<26, "rlwnm", +0, OP_RA_RS_SH6_MB6_C, 30<<26 | 3<<2, "rldimi", +0, OP_RA_RS_SH5_MB5_ME5_C, 20<<26, "rlwimi", + +/* extended m for doubleword rotation */ +0, OP_clrlsldi, 30<<26 | 2<<2, "clrlsldi", +0, OP_clrldi, 30<<26 | 0<<2, "clrldi", +0, OP_clrrdi, 30<<26 | 1<<2, "clrrdi", +0, OP_extldi, 30<<26 | 0<<2, "extldi", +0, OP_extrdi, 30<<26 | 1<<2, "extrdi", +0, OP_insrdi, 30<<26 | 3<<2, "insrdi", +0, OP_RA_RS_RB_C, 30<<26 | MB6(0) | 8<<1, "rotld", +0, OP_RA_RS_SH6_C, 30<<26 | MB6(0) | 0<<2, "rotldi", +0, OP_rotrdi, 30<<26 | 0<<2, "rotrdi", +0, OP_sldi, 30<<26 | 1<<2, "sldi", +0, OP_srdi, 30<<26 | 0<<2, "srdi", + +/* extended m for word rotation */ +0, OP_clrlslwi, 21<<26, "clrlslwi", +0, OP_clrlwi, 21<<26, "clrlwi", +0, OP_clrrwi, 21<<26, "clrrwi", +0, OP_extlwi, 21<<26, "extlwi", +0, OP_extrwi, 21<<26, "extrwi", +0, OP_inslwi, 20<<26, "inslwi", +0, OP_insrwi, 20<<26, "insrwi", +0, OP_RA_RS_RB_C, 23<<26 | 0<<6 | 31<<1, "rotlw", +0, OP_RA_RS_SH5_C, 21<<26 | 0<<6 | 31<<1, "rotlwi", +0, OP_rotrwi, 21<<26, "rotrwi", +0, OP_slwi, 21<<26, "slwi", +0, OP_srwi, 21<<26, "srwi", /* page 74 */ -0, OP_RS_RA_RB_C, 31<<26 | 27<<1, "sld", -0, OP_RS_RA_RB_C, 31<<26 | 24<<1, "slw", -0, OP_RS_RA_RB_C, 31<<26 | 539<<1, "srd", -0, OP_RS_RA_RB_C, 31<<26 | 536<<1, "srw", -0, OP_RS_RA_SH6_C, 31<<26 | 413<<2, "sradi", -0, OP_RS_RA_SH5_C, 31<<26 | 824<<1, "srawi", -0, OP_RS_RA_RB_C, 31<<26 | 794<<1, "srad", -0, OP_RS_RA_RB_C, 31<<26 | 792<<1, "sraw", +0, OP_RA_RS_RB_C, 31<<26 | 27<<1, "sld", +0, OP_RA_RS_RB_C, 31<<26 | 24<<1, "slw", +0, OP_RA_RS_RB_C, 31<<26 | 539<<1, "srd", +0, OP_RA_RS_RB_C, 31<<26 | 536<<1, "srw", +0, OP_RA_RS_SH6_C, 31<<26 | 413<<2, "sradi", +0, OP_RA_RS_SH5_C, 31<<26 | 824<<1, "srawi", +0, OP_RA_RS_RB_C, 31<<26 | 794<<1, "srad", +0, OP_RA_RS_RB_C, 31<<26 | 792<<1, "sraw", /* page 78 */ 0, OP_RS_SPR, 31<<26 | 467<<1, "mtspr", @@ -295,6 +578,14 @@ 0, OP_RS_FXM, 31<<26 | 0<<21 | 144<<1, "mtcrf", 0, OP_RT, 31<<26 | 0<<21 | 19<<1, "mfcr", +/* extended m for special purpose registers */ +0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 339<<1, "mfctr", +0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 339<<1, "mflr", +0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 339<<1, "mfxer", +0, OP_RT, 31<<26 | 9<<16 | 0<<11 | 467<<1, "mtctr", +0, OP_RT, 31<<26 | 8<<16 | 0<<11 | 467<<1, "mtlr", +0, OP_RT, 31<<26 | 1<<16 | 0<<11 | 467<<1, "mtxer", + /* Floating point instructions (page 83) */ 0, OP_FRT_RA_D, 48<<26, "lfs", diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 1ad180de4..99f7f4537 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -4,16 +4,33 @@ */ operation - : OP_BF_BFA CR ',' CR { emit4($1 | ($2<<23) | ($4<<18)); } + : OP { emit4($1); } + | OP_BDA bda { emit4($1 | $2); } + | OP_BDL bdl { emit4($1 | $2); } + | OP_BF_BFA CR ',' CR { emit4($1 | ($2<<23) | ($4<<18)); } | OP_BF_FRA_FRB CR ',' FPR ',' FPR { emit4($1 | ($2<<23) | ($4<<16) | ($6<<11)); } | OP_BF_L_RA_RB CR ',' u1 ',' GPR ',' GPR { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | ($8<<11)); } | OP_BF_L_RA_SI CR ',' u1 ',' GPR ',' e16 { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } | OP_BF_L_RA_UI CR ',' u1 ',' GPR ',' e16 { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } + | OP_BF_RA_RB cr_opt GPR ',' GPR { emit4($1 | ($2<<23) | ($3<<16) | ($5<<11)); } + | OP_BF_RA_SI cr_opt GPR ',' e16 { emit4($1 | ($2<<23) | ($3<<16) | $5); } + | OP_BF_RA_UI cr_opt GPR ',' e16 { emit4($1 | ($2<<23) | ($3<<16) | $5); } | OP_BF_U_C c CR ',' u4 { emit4($1 | $2 | ($3<<23) | ($5<<12)); } + | OP_BH { emit4($1); } + | OP_BH u2 { emit4($1 | ($2<<11)); } + | OP_BI_BDA u5 ',' bda { emit4($1 | ($2<<16) | $4); } + | OP_BI_BDL u5 ',' bdl { emit4($1 | ($2<<16) | $4); } + | OP_BI_BH u5 opt_bh { emit4($1 | ($2<<16) | $3); } + | OP_BICR_BDA cr_opt bda { emit4($1 | ($2<<18) | $3); } + | OP_BICR_BDL cr_opt bdl { emit4($1 | ($2<<18) | $3); } + | OP_BICR_BH { emit4($1); } + | OP_BICR_BH CR opt_bh { emit4($1 | ($2<<18) | $3); } | OP_BO_BI_BDA u5 ',' u5 ',' bda { emit4($1 | ($2<<21) | ($4<<16) | $6); } | OP_BO_BI_BDL u5 ',' u5 ',' bdl { emit4($1 | ($2<<21) | ($4<<16) | $6); } - | OP_BO_BI_BH u5 ',' u5 ',' u2 { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } + | OP_BO_BI_BH u5 ',' u5 opt_bh { emit4($1 | ($2<<21) | ($4<<16) | $5); } + | OP_BT_BA_BA u5 ',' u5 { emit4($1 | ($2<<21) | ($4<<16) | ($4<<11)); } | OP_BT_BA_BB u5 ',' u5 ',' u5 { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } + | OP_BT_BT_BT u5 { emit4($1 | ($2<<21) | ($2<<16) | ($2<<11)); } | OP_BT_C c u5 { emit4($1 | $2 | ($3<<21)); } | OP_FLM_FRB_C c u8 ',' FPR { emit4($1 | $2 | ($3<<17) | ($5<<11)); } | OP_FRS_RA_D FPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } @@ -25,15 +42,35 @@ operation | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRT_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_C c FPR { emit4($1 | $2 | ($3<<21)); } + | OP_RA_RS_RB_C c GPR ',' GPR ',' GPR + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } + | OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | + ($9<<6) | ($11<<1)); } + | OP_RA_RS_RB_MB6_C c GPR ',' GPR ',' GPR ',' u6 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | MB6($9)); } + | OP_RA_RS_SH5_C c GPR ',' GPR ',' u5 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } + | OP_RA_RS_SH5_MB5_ME5_C c GPR ',' GPR ',' u5 ',' u5 ',' u5 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | + ($7<<11) | ($9<<6) | ($11<<1)); } + | OP_RA_RS_SH6_C c GPR ',' GPR ',' u6 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7)); } + | OP_RA_RS_SH6_MB6_C c GPR ',' GPR ',' u6 ',' u6 + { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7) | MB6($9)); } | OP_RT GPR { emit4($1 | ($2<<21)); } | OP_RT_RA_C c GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16)); } | OP_RT_RA_D GPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_RT_RA_DS GPR ',' ds '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_RT_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RT_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } - | OP_RT_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($7<<11)); } + | OP_RT_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($7<<11)); } | OP_RT_RA_SI GPR ',' GPR ',' e16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } | OP_RT_RA_SI_addic c GPR ',' GPR ',' e16 { emit4($1 | ($2<<26) | ($3<<21) | ($5<<16) | $7); } + | OP_RT_RA_SI_subi GPR ',' GPR ',' negate16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } + | OP_RT_RA_SI_subic c GPR ',' GPR ',' negate16 { emit4($1 | ($2<<26) | ($3<<21) | ($5<<16) | $7); } + | OP_RT_RB_RA_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); } + | OP_RT_SI GPR ',' e16 { emit4($1 | ($2<<21) | $4); } | OP_RT_SPR GPR ',' spr_num { emit4($1 | ($2<<21) | ($4<<11)); } | OP_RS_FXM u7 ',' GPR { emit4($1 | ($4<<21) | ($2<<12)); } | OP_RS_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } @@ -45,21 +82,127 @@ operation | OP_RS_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RS_RA_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } - | OP_RS_RA_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5 { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | ($9<<6) | ($11<<1)); } - | OP_RS_RA_RB_MB6_C c GPR ',' GPR ',' GPR ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | (($9&0x1F)<<6) | (($9&0x20)>>0)); } - | OP_RS_RA_RB_ME6_C c GPR ',' GPR ',' GPR ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | (($9&0x1F)<<6) | (($9&0x20)>>0)); } - | OP_RS_RA_SH_MB5_ME5_C c GPR ',' GPR ',' u5 ',' u5 ',' u5 { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | ($9<<6) | ($11<<1)); } - | OP_RS_RA_SH_MB6_SH_C c GPR ',' GPR ',' u6 ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | (($7&0x1F)<<11) | ($9<<6) | (($7&0x20)>>4)); } - | OP_RS_RA_SH_ME6_SH_C c GPR ',' GPR ',' u6 ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | (($7&0x1F)<<11) | ($9<<6) | (($7&0x20)>>4)); } - | OP_RS_RA_SH5_C c GPR ',' GPR ',' u5 { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } - | OP_RS_RA_SH6_C c GPR ',' GPR ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | (($7&0x1F)<<11) | (($7&0x20)>>4)); } | OP_RS_SPR spr_num ',' GPR { emit4($1 | ($4<<21) | ($2<<11)); } | OP_TO_RA_RB u5 ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_TO_RA_SI u5 ',' GPR ',' e16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } + | OP_TOX_RA_RB GPR ',' GPR { emit4($1 | ($2<<16) | ($4<<11)); } + | OP_TOX_RA_SI GPR ',' e16 { emit4($1 | ($2<<16) | $4); } + | OP_LEV { emit4($1); } | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } | OP_LIL lil { emit4($1 | $2); } | OP_LI32 li32 /* emitted in subrule */ + | OP_clrlsldi c GPR ',' GPR ',' u6 ',' u6 + { + quad mb = ($7 - $9) & 0x3f; + fit($9 <= $7); + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($9) | MB6(mb)); + } + | OP_clrldi c GPR ',' GPR ',' u6 + { + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(0) | MB6($7)); + } + | OP_clrrdi c GPR ',' GPR ',' u6 + { + quad me = 63 - $7; + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(0) | MB6(me)); + } + | OP_extldi c GPR ',' GPR ',' u6 ',' u6 + { + quad me = ($7 - 1) & 0x3f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($9) | MB6(me)); + } + | OP_extrdi c GPR ',' GPR ',' u6 ',' u6 + { + quad sh = ($9 + $7) & 0x3f; + quad mb = (64 - $7) & 0x3f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6(mb)); + } + | OP_rotrdi c GPR ',' GPR ',' u6 + { + quad sh = (64 - $7) & 0x3f; + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6(0)); + } + | OP_sldi c GPR ',' GPR ',' u6 + { + quad me = 63 - $7; + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7) | MB6(me)); + } + | OP_srdi c GPR ',' GPR ',' u6 + { + quad sh = (64 - $7) & 0x3f; + emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6($7)); + } + | OP_clrlslwi c GPR ',' GPR ',' u5 ',' u5 + { + quad mb = ($7 - $9) & 0x1f; + quad me = 31 - $9; + fit($9 <= $7); + emit4($1 | $2 | ($5<<21) | ($3<<16) | + ($9<<11) | (mb<<6) | (me<<1)); + } + | OP_clrlwi c GPR ',' GPR ',' u5 + { + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (0<<11) | ($7<<6) | (31<<1)); + } + | OP_clrrwi c GPR ',' GPR ',' u5 + { + quad me = 31 - $7; + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (0<<11) | (0<<6) | (me<<1)); + } + | OP_extlwi c GPR ',' GPR ',' u5 ',' u5 + { + quad me = ($7 - 1) & 0x1f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | + ($9<<11) | (0<<6) | (me<<1)); + } + | OP_extrwi c GPR ',' GPR ',' u5 ',' u5 + { + quad sh = ($9 + $7) & 0x1f; + quad mb = (32 - $7) & 0x1f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (sh<<11) | (mb<<6) | (31<<1)); + } + | OP_inslwi c GPR ',' GPR ',' u5 ',' u5 + { + quad sh = (32 - $9) & 0x1f; + quad me = ($9 + $7 - 1) & 0x1f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (sh<<11) | ($9<<6) | (me<<1)); + } + | OP_insrwi c GPR ',' GPR ',' u5 ',' u5 + { + quad sh = (32 - $9 - $7) & 0x1f; + quad me = ($9 + $7 - 1) & 0x1f; + fit($7 > 0); + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (sh<<11) | ($9<<6) | (me<<1)); + } + | OP_rotrwi c GPR ',' GPR ',' u5 + { + quad sh = (32 - $7) & 0x1f; + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (sh<<11) | (0<<6) | (31<<1)); + } + | OP_slwi c GPR ',' GPR ',' u5 + { + quad me = 31 - $7; + emit4($1 | $2 | ($5<<21) | ($3<<16) | + ($7<<11) | (0<<6) | (me<<1)); + } + | OP_srwi c GPR ',' GPR ',' u5 + { + quad sh = (32 - $7) & 0x1f; + emit4($1 | $2 | ($5<<21) | ($3<<16) | + (sh<<11) | ($7<<6) | (31<<1)); + } ; c @@ -80,6 +223,17 @@ e16 | OP_LO ASC_LPAR expr ASC_RPAR { $$ = emit_lo(&$3); } ; +negate16 + : absexp + { + /* To encode subi, we negate the immediate value, then + * it must fit as signed 16-bit. */ + $$ = -$1; + fit(fitx($$, 16)); + $$ = (uint16_t) $$; + } + ; + u8 : absexp { @@ -143,6 +297,19 @@ u2 } ; +/* Optional comma, branch hint. */ +opt_bh + : /* nothing */ { $$ = 0; } + | ',' u2 { $$ = ($2<<11); } + +/* + * Optional condition register, comma. This checks if the token is a + * CR register name. This wouldn't work if we allowed CR as a number. + */ +cr_opt + : /* nothing */ { $$ = 0; } + | CR ',' { $$ = $1; } + ds : e16 { @@ -251,14 +418,3 @@ spr_num $$ = ($1 >> 5) | (($1 & 0x1f) << 5); } ; - -powerpcfixup - : expr - { - quad type = $1.typ & S_TYP; - quad val = $1.val; - if (type == S_ABS) - serror(".powerpcfixup is useless on absolute values"); - newrelo($1.typ, RELOPPC | FIXUPFLAGS); - } - ; From a585ddf578479853c73b4948fbf143306ff4b142 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Sun, 22 Jan 2017 00:52:32 -0500 Subject: [PATCH 02/11] Fix parameters of signal handlers for linuxppc. Linux passes the arguments in registers, but our compiler expects arguments on the stack. Signal handlers got garbage instead of the signal number. Some handlers, like the one in lang/m2/libm2/sigtrp.c, need the correct signal number. I write a "bridge" in PowerPC assembly that moves the arguments to the stack. I put the bridge in sigaction(), so I provide a signal() that calls sigaction(). I remove the *.c glob or wildcard from build.lua, so linuxppc only compiles its own signal.c, not the other signal.c for linux386 and linux68k. My bridge uses sigprocmask(), so I also add sigprocmask(). Because linux386 and linux68k use globs, they also get sigprocmask(). I sync the header files so all three Linux platforms declare execve(), sigprocmask(), and unlink(), but not remove(), because we have remove() in . I am using sigaction.s to test some features that we recently added to our PowerPC assembler. These are the "hi16[...]" and "lo16[...]" syntax, and also the extended names like "beq", "cmpwi", "li", "subi". --- plat/linux/libsys/sigprocmask.c | 7 ++ plat/linux386/include/unistd.h | 9 +- plat/linux68k/include/unistd.h | 10 ++ plat/linuxppc/include/unistd.h | 27 ++++++ plat/linuxppc/libsys/build.lua | 38 ++++++-- plat/linuxppc/libsys/sigaction.s | 156 +++++++++++++++++++++++++++++++ plat/linuxppc/libsys/signal.c | 19 ++++ 7 files changed, 256 insertions(+), 10 deletions(-) create mode 100644 plat/linux/libsys/sigprocmask.c create mode 100644 plat/linuxppc/libsys/sigaction.s create mode 100644 plat/linuxppc/libsys/signal.c diff --git a/plat/linux/libsys/sigprocmask.c b/plat/linux/libsys/sigprocmask.c new file mode 100644 index 000000000..ad1b339c2 --- /dev/null +++ b/plat/linux/libsys/sigprocmask.c @@ -0,0 +1,7 @@ +#include +#include "libsys.h" + +int sigprocmask(int flags, const sigset_t *new, sigset_t *old) +{ + return _syscall(__NR_sigprocmask, flags, (quad) new, (quad) old); +} diff --git a/plat/linux386/include/unistd.h b/plat/linux386/include/unistd.h index 5c6f31ef4..8c8637c09 100644 --- a/plat/linux386/include/unistd.h +++ b/plat/linux386/include/unistd.h @@ -56,7 +56,6 @@ extern int write(int fd, void* buffer, size_t count); extern off_t lseek(int fildes, off_t offset, int whence); extern int fcntl(int fd, int op, ...); extern int unlink(const char* path); -extern int remove(const char* path); /* Special variables */ @@ -117,8 +116,16 @@ typedef int sig_atomic_t; #define _NSIG 32 /* Biggest signal number + 1 (not including real-time signals). */ + +/* sigprocmask */ +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 1 +#define SIG_SETMASK 2 +typedef unsigned long sigset_t; + typedef void (*sighandler_t)(int); extern sighandler_t signal(int signum, sighandler_t handler); +extern int sigprocmask(int, const sigset_t *, sigset_t *); extern int raise(int signum); diff --git a/plat/linux68k/include/unistd.h b/plat/linux68k/include/unistd.h index 307192f77..927a20459 100644 --- a/plat/linux68k/include/unistd.h +++ b/plat/linux68k/include/unistd.h @@ -55,6 +55,7 @@ extern int read(int fd, void* buffer, size_t count); extern int write(int fd, void* buffer, size_t count); extern off_t lseek(int fildes, off_t offset, int whence); extern int fcntl(int fd, int op, ...); +extern int unlink(const char* path); /* Special variables */ @@ -67,6 +68,7 @@ extern pid_t getpid(void); extern int brk(void* ptr); extern void* sbrk(int increment); extern int isatty(int d); +extern int execve(const char *path, char *const argv[], char *const envp[]); /* Signal handling */ @@ -114,8 +116,16 @@ typedef int sig_atomic_t; #define _NSIG 32 /* Biggest signal number + 1 (not including real-time signals). */ + +/* sigprocmask */ +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 1 +#define SIG_SETMASK 2 +typedef unsigned long sigset_t; + typedef void (*sighandler_t)(int); extern sighandler_t signal(int signum, sighandler_t handler); +extern int sigprocmask(int, const sigset_t *, sigset_t *); extern int raise(int signum); diff --git a/plat/linuxppc/include/unistd.h b/plat/linuxppc/include/unistd.h index a31bd9f0d..f57705365 100644 --- a/plat/linuxppc/include/unistd.h +++ b/plat/linuxppc/include/unistd.h @@ -55,6 +55,7 @@ extern int read(int fd, void* buffer, size_t count); extern int write(int fd, void* buffer, size_t count); extern off_t lseek(int fildes, off_t offset, int whence); extern int fcntl(int fd, int op, ...); +extern int unlink(const char* path); /* Special variables */ @@ -115,8 +116,34 @@ typedef int sig_atomic_t; #define _NSIG 32 /* Biggest signal number + 1 (not including real-time signals). */ + +/* sigprocmask */ +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 1 +#define SIG_SETMASK 2 +typedef unsigned long sigset_t; + +/* sa_flags */ +#define SA_NODEFER 0x40000000UL +#define SA_RESETHAND 0x80000000UL + +struct __siginfo; +struct sigaction { + union { + void (*__sa_handler)(int); + void (*__sa_sigaction)(int, struct __siginfo *, void *); + } __sigaction_u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; +#define sa_handler __sigaction_u.__sa_handler +#define sa_sigaction __sigaction_u.__sa_sigaction + typedef void (*sighandler_t)(int); +extern int sigaction(int, const struct sigaction *, struct sigaction *); extern sighandler_t signal(int signum, sighandler_t handler); +extern int sigprocmask(int, const sigset_t *, sigset_t *); extern int raise(int signum); diff --git a/plat/linuxppc/libsys/build.lua b/plat/linuxppc/libsys/build.lua index e74f3f416..f7b16b378 100644 --- a/plat/linuxppc/libsys/build.lua +++ b/plat/linuxppc/libsys/build.lua @@ -1,16 +1,36 @@ acklibrary { - name = "lib", - srcs = { - "./*.s", - "plat/linux/libsys/*.c", - "plat/linux/libsys/*.s", - }, + name = "lib", + srcs = { + "./_syscall.s", + "./sigaction.s", + "./signal.c", + "./trap.s", + "plat/linux/libsys/_exit.c", + "plat/linux/libsys/_hol0.s", + "plat/linux/libsys/close.c", + "plat/linux/libsys/creat.c", + "plat/linux/libsys/errno.s", + "plat/linux/libsys/execve.c", + "plat/linux/libsys/getpid.c", + "plat/linux/libsys/gettimeofday.c", + "plat/linux/libsys/ioctl.c", + "plat/linux/libsys/isatty.c", + "plat/linux/libsys/kill.c", + "plat/linux/libsys/lseek.c", + "plat/linux/libsys/open.c", + "plat/linux/libsys/read.c", + "plat/linux/libsys/sbrk.c", + -- omit signal.c + "plat/linux/libsys/sigprocmask.c", + "plat/linux/libsys/unlink.c", + "plat/linux/libsys/write.c", + }, deps = { "lang/cem/libcc.ansi/headers+headers", "plat/linuxppc/include+headers", }, - vars = { - plat = "linuxppc" - } + vars = { + plat = "linuxppc" + } } diff --git a/plat/linuxppc/libsys/sigaction.s b/plat/linuxppc/libsys/sigaction.s new file mode 100644 index 000000000..0509c8e72 --- /dev/null +++ b/plat/linuxppc/libsys/sigaction.s @@ -0,0 +1,156 @@ +#define __NR_sigaction 67 +#define SIG_BLOCK 0 +#define SIG_SETMASK 2 +#define MAXSIG 32 + +/* offsets into our stack frame */ +#define mynew 16 /* new sigaction */ +#define mynset 32 /* new signal set */ +#define myoset 36 /* old signal set */ +#define mysave 40 +#define mysize 56 + +.sect .text; .sect .rodata; .sect .data; .sect .bss + +/* + * Linux calls signal handlers with arguments in registers, but the + * ACK expects arguments on the stack. This sigaction() uses a + * "bridge" to move the arguments. + */ +.sect .text +.define _sigaction +_sigaction: + mflr r0 + subi r1, r1, mysize + stw r31, mysave+8(r1) + stw r30, mysave+4(r1) + stw r29, mysave(r1) + stw r0, mysave+12(r1) + li r3, 0 + stw r3, mynset(r1) ! mynset = 0 + lwz r29, mysize(r1) ! r29 = signal number + lwz r30, mysize+4(r1) ! r30 = new action + lwz r31, mysize+8(r1) ! r31 = old action + /* + * If the new action is non-NULL, the signal number is in + * range 1 to MAXSIG, and the new handler is not SIG_DFL 0 + * or SIG_IGN 1, then we interpose our bridge. + */ + cmpwi cr0, r30, 0 + subi r7, r29, 1 ! r7 = index in handlers + cmplwi cr7, r7, MAXSIG ! unsigned comparison + beq cr0, kernel + bge cr7, kernel + lwz r3, 0(r30) ! r3 = new handler + clrrwi. r3, r3, 1 + beq cr0, kernel + /* + * Block the signal while we build the bridge. Prevents a + * race if a signal arrives after we change the bridge but + * before we change the action in the kernel. + */ + li r4, 1 + slw r4, r4, r7 + stw r4, mynset(r1) ! mynmask = 1 << (signal - 1) + li r3, SIG_BLOCK + la r4, mynset(r1) + la r5, myoset(r1) + stw r3, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + bl _sigprocmask + /* + * Point our bridge to the new signal handler. Then copy the + * new sigaction but point it to our bridge. + */ + lis r6, hi16[handlers] + ori r6, r6, lo16[handlers] + subi r7, r29, 1 + slwi r7, r7, 2 + lwz r3, 0(r30) ! r3 = new handler + stwx r3, r6, r7 ! put it in array of handlers + lis r3, hi16[bridge] + ori r3, r3, lo16[bridge] + lwz r4, 4(r30) + lwz r5, 8(r30) + lwz r6, 12(r30) + stw r3, mynew(r1) ! sa_handler or sa_sigaction + stw r4, mynew+4(r1) ! sa_mask + stw r5, mynew+8(r1) ! sa_flags + stw r6, mynew+12(r1) ! sa_restorer + la r30, mynew(r1) +kernel: + li r3, __NR_sigaction + stw r3, 0(r1) + stw r29, 4(r1) + stw r30, 8(r1) + stw r31, 12(r1) + bl __syscall + /* + * If we blocked the signal, then restore the old signal mask. + */ + lwz r3, mynset(r1) + cmpwi cr0, r3, 0 + beq cr0, fixold + li r3, SIG_SETMASK + la r4, myoset(r1) + li r5, 0 + stw r3, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + bl _sigprocmask + /* + * If the old sigaction is non-NULL and points to our bridge, + * then point it to the signal handler. + */ +fixold: + cmpwi cr0, r31, 0 + beq cr0, leave + lis r3, hi16[bridge] + ori r3, r3, lo16[bridge] + lwz r4, 0(r31) + cmpw cr0, r3, r4 + bne cr0, leave + lis r6, hi16[handlers] + ori r6, r6, lo16[handlers] + subi r7, r29, 1 + slwi r7, r7, 2 + lwzx r3, r6, r7 ! get it from array of handlers + stw r3, 0(r31) ! put it in old sigaction +leave: + lwz r0, mysave+12(r1) + lwz r29, mysave(r1) + lwz r30, mysave+4(r1) + lwz r31, mysave+8(r1) + addi r1, r1, mysize + mtlr r0 + blr ! return from sigaction + +/* + * Linux calls bridge(signum) or bridge(signum, info, context) with + * arguments in registers r3, r4, r5. + */ +bridge: + mflr r0 + subi r1, r1, 16 + stw r0, 12(r1) + stw r3, 0(r1) ! signal number + stw r4, 4(r1) ! info + stw r5, 8(r1) ! context + + lis r6, hi16[handlers] + ori r6, r6, lo16[handlers] + subi r7, r3, 1 + slwi r7, r7, 2 + lwzx r6, r6, r7 + mtctr r6 + bctrl ! call our signal handler + + lwz r0, 12(r1) + addi r1, r1, 16 + mtlr r0 + blr ! return from bridge + +.sect .bss +handlers: + .space 4 * MAXSIG ! array of signal handlers diff --git a/plat/linuxppc/libsys/signal.c b/plat/linuxppc/libsys/signal.c new file mode 100644 index 000000000..0ed1918e1 --- /dev/null +++ b/plat/linuxppc/libsys/signal.c @@ -0,0 +1,19 @@ +#include + +/* + * Uses our bridge in sigaction.s when calling the signal handler. + * Mimics Linux __NR_signal by using SA_NODEFER | SA_RESETHAND. + */ +sighandler_t signal(int signum, sighandler_t handler) { + struct sigaction new, old; + int i; + + new.sa_handler = handler; + new.sa_mask = 0; /* empty set */ + new.sa_flags = SA_NODEFER | SA_RESETHAND; + + i = sigaction(signum, &new, &old); + if (i < 0) + return SIG_ERR; + return old.sa_handler; +} From a41b6f0458604a32d34e94f68644055285a11ab5 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 23 Jan 2017 16:19:38 -0500 Subject: [PATCH 03/11] Allow more PowerPC instructions in relocations. I need this for relocations in lis/lfd pairs. I add lfd along with addi, lfs, lha, stfs, stfd to the list. --- util/led/relocate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/util/led/relocate.c b/util/led/relocate.c index 7e2f9db10..036b7dbb8 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -107,14 +107,20 @@ static uint32_t get_vc4_valu(char* addr) static bool is_powerpc_memory_op(uint32_t opcode) { - /* Tests for any PowerPC memory indirection instruction where the payload - * is a *signed* 16-bit value. */ + /* Tests for any PowerPC memory indirection instruction (or + * addi) where the payload is a *signed* 16-bit value. */ switch ((opcode & 0xfc000000) >> 26) { + case 14: /* addi */ case 34: /* lbz */ + case 48: /* lfs */ + case 50: /* lfd */ + case 42: /* lha */ case 40: /* lhz */ case 32: /* lwz */ case 38: /* stb */ + case 52: /* stfs */ + case 54: /* stfd */ case 44: /* sth */ case 36: /* stw */ return true; From 032bcffef610e94406c192e39d67b5231c2d7ab1 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 23 Jan 2017 17:16:39 -0500 Subject: [PATCH 04/11] In PowerPC libem, use the new features of our assembler. The new features are the hi16/lo16 and ha16/lo16 syntax for relocations, and the extended mnemonics like "blr". Use ha16/lo16 to load some double floats with 2 instructions (lis/lfd) instead of 3 (lis/ori/lfd). Use the extended names for branches, comparisons, and bit rotations, so I can more easily read the code. The new names often encode the same machine instructions as the old names, except in a few places where I changed the instructions. Stop using andi. when we don't need to set cr0. In inn.s, I change andi. to extrwi to extract the same bits. In los.s and sts.s, I change "andi. r3, r3, ~3" to "clrrwi r3, r3, 2". This avoids setting cr0 and also stops clearing the high 16 bits of r3. In csa.s, los.s, sts.s, I change some comparisons and right shifts from signed to unsigned (cmplw, cmplwi, srwi), because the sizes are unsigned. In inn.s, the right shift can be signed (sraw) or unsigned (srw), but I use srw because we don't need the carry bit. In fef8.s, I save an instruction by using rlwinm instead of addis/andc to rlwinm to clear a field. The code no longer kills r7. In both fef8.s and fif8.s, I remove the list of killed registers. Also remove some whitespace from ends of lines. --- mach/powerpc/libem/aar4.s | 26 +++++++----------- mach/powerpc/libem/and.s | 8 +++--- mach/powerpc/libem/cfi8.s | 9 +------ mach/powerpc/libem/cfu8.s | 31 +++++++++------------- mach/powerpc/libem/cif8.s | 21 ++++++--------- mach/powerpc/libem/cms.s | 14 +++++----- mach/powerpc/libem/com.s | 8 +++--- mach/powerpc/libem/csa.s | 29 ++++++-------------- mach/powerpc/libem/csb.s | 25 +++++++----------- mach/powerpc/libem/cuf8.s | 23 ++++++---------- mach/powerpc/libem/fd_00000000.s | 7 +---- mach/powerpc/libem/fd_80000000.s | 7 +---- mach/powerpc/libem/fd_FFFFFFFF.s | 9 ++----- mach/powerpc/libem/fef8.s | 28 +++++++++----------- mach/powerpc/libem/fif8.s | 28 +++++++++----------- mach/powerpc/libem/inn.s | 12 ++++----- mach/powerpc/libem/ior.s | 8 +++--- mach/powerpc/libem/lar4.s | 19 ++++++-------- mach/powerpc/libem/los.s | 45 ++++++++++++++------------------ mach/powerpc/libem/rck.s | 12 ++++----- mach/powerpc/libem/ret.s | 17 ++++-------- mach/powerpc/libem/sar4.s | 19 ++++++-------- mach/powerpc/libem/set.s | 16 +++++------- mach/powerpc/libem/sts.s | 45 ++++++++++++++------------------ mach/powerpc/libem/xor.s | 8 +++--- mach/powerpc/libem/zer.s | 10 +++---- 26 files changed, 184 insertions(+), 300 deletions(-) diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index 2c65af643..5e4155091 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -1,12 +1,5 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text - + ! Index into a bounds-checked array. ! ! On entry: @@ -20,19 +13,20 @@ .define .aar4 .aar4: - li32 r0, .trap_earray + lis r0, hi16[.trap_earray] + ori r0, r0, lo16[.trap_earray] mtspr ctr, r0 ! load CTR with trap address lwz r0, 0(r3) subf. r4, r0, r4 ! adjust range - bcctr IFTRUE, LT, 0 ! check lower bound - + bltctr ! check lower bound + lwz r0, 4(r3) - cmpl cr0, 0, r4, r3 - bcctr IFFALSE, LT, 0 ! check upper bound - + cmplw r4, r3 + bgectr ! check upper bound + lwz r0, 8(r3) mullw r4, r4, r0 ! scale index add r3, r4, r5 ! calculate element address - - bclr ALWAYS, 0, 0 + + blr diff --git a/mach/powerpc/libem/and.s b/mach/powerpc/libem/and.s index 727d79ec0..cb4e1e54a 100644 --- a/mach/powerpc/libem/and.s +++ b/mach/powerpc/libem/and.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Set intersection. @@ -12,7 +10,7 @@ mr r4, sp ! r4 = ptr to set a add r5, sp, r3 ! r5 = ptr to set b - rlwinm r6, r3, 30, 2, 31 + srwi r6, r3, 2 mtspr ctr, r6 ! ctr = r3 / 4 1: lwz r7, 0(r4) @@ -21,6 +19,6 @@ stw r8, 0(r5) addi r4, r4, 4 addi r5, r5, 4 - bc DNZ, 0, 1b ! loop ctr times + bdnz 1b ! loop ctr times add sp, sp, r3 - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/cfi8.s b/mach/powerpc/libem/cfi8.s index 7142c694b..9a87e99ae 100644 --- a/mach/powerpc/libem/cfi8.s +++ b/mach/powerpc/libem/cfi8.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! Converts a 64-bit double into a 32-bit integer. @@ -17,4 +10,4 @@ fctiwz f0, f0 stfd f0, 0(sp) addi sp, sp, 4 - bclr ALWAYS, 0, 0 ! ...and return + blr diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 758df8572..915f84dd2 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! Converts a 64-bit double into a 32-bit unsigned integer. @@ -13,17 +6,17 @@ .define .cfu8 .cfu8: - li32 r3, .fd_00000000 - lfd f0, 0(r3) ! f0 = 0.0 - + lis r3, ha16[.fd_00000000] + lfd f0, lo16[.fd_00000000](r3) ! f0 = 0.0 + lfd f1, 0(sp) ! value to be converted - li32 r3, .fd_FFFFFFFF - lfd f3, 0(r3) ! f3 = 0xFFFFFFFF + lis r3, ha16[.fd_FFFFFFFF] + lfd f3, lo16[.fd_FFFFFFFF](r3) ! f3 = 0xFFFFFFFF + + lis r3, ha16[.fd_80000000] + lfd f4, lo16[.fd_80000000](r3) ! f4 = 0x80000000 - li32 r3, .fd_80000000 - lfd f4, 0(r3) ! f4 = 0x80000000 - fsel f2, f1, f1, f0 fsub f5, f3, f1 fsel f2, f5, f2, f3 @@ -34,11 +27,11 @@ stfd f2, 0(sp) addi sp, sp, 4 - - bclr IFTRUE, LT, 0 - + + bltlr + lwz r3, 0(sp) xoris r3, r3, 0x8000 stw r3, 0(sp) - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/cif8.s b/mach/powerpc/libem/cif8.s index d2c82e54e..13abbcade 100644 --- a/mach/powerpc/libem/cif8.s +++ b/mach/powerpc/libem/cif8.s @@ -1,9 +1,4 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" +.sect .text; .sect .rom; .sect .data; .sect .bss .sect .text @@ -14,22 +9,22 @@ .define .cif8 .cif8: addi sp, sp, -4 ! make space for the double - + lwz r3, 4(sp) xoris r3, r3, 0x8000 stw r3, 4(sp) ! flip sign of integer value - + addis r3, r0, 0x4330 stw r3, 0(sp) ! set high word to construct a double - + lfd f0, 0(sp) ! load value - li32 r3, pivot - lfd f1, 0(r3) ! load pivot value + lis r3, ha16[pivot] + lfd f1, lo16[pivot](r3) ! load pivot value fsub f0, f0, f1 ! adjust - + stfd f0, 0(sp) ! save value again... - bclr ALWAYS, 0, 0 ! ...and return + blr ! ...and return .sect .rom pivot: diff --git a/mach/powerpc/libem/cms.s b/mach/powerpc/libem/cms.s index 53cb65691..30aaccd20 100644 --- a/mach/powerpc/libem/cms.s +++ b/mach/powerpc/libem/cms.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Compare sets a, b. @@ -12,21 +10,21 @@ mr r4, sp ! r4 = ptr to set a add r5, sp, r3 ! r5 = ptr to set b mr r6, r3 ! r6 = size - rlwinm r3, r3, 30, 2, 31 + srwi r3, r3, 2 mtspr ctr, r3 ! ctr = size / 4 1: lwz r7, 0(r4) lwz r8, 0(r5) - cmp cr0, 0, r7, r8 ! compare words in sets + cmpw cr0, r7, r8 ! compare words in sets addi r4, r4, 4 addi r5, r5, 4 - bc IFFALSE, EQ, 2f ! branch if not equal - bc DNZ, 0, 1b ! loop ctr times + bne cr0, 2f ! branch if not equal + bdnz 1b ! loop ctr times addi r3, r0, 0 ! equal: return 0 b 3f 2: addi r3, r0, 1 ! not equal: return 1 3: - rlwinm r6, r6, 1, 0, 30 ! r6 = size * 2 + slwi r6, r6, 1 ! r6 = size * 2 add sp, sp, r6 ! remove sets from stack - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/com.s b/mach/powerpc/libem/com.s index 084eeeb62..3168cfe17 100644 --- a/mach/powerpc/libem/com.s +++ b/mach/powerpc/libem/com.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Set complement. @@ -11,12 +9,12 @@ addi sp, sp, 4 mr r4, sp ! r4 = pointer to set a - rlwinm r5, r3, 30, 2, 31 + srwi r5, r3, 2 mtspr ctr, r5 ! ctr = r3 / 4 1: lwz r6, 0(r4) nor r6, r6, r6 ! complement of word stw r6, 0(r4) addi r4, r4, 4 - bc DNZ, 0, 1b ! loop ctr times - bclr ALWAYS, 0, 0 + bdnz 1b ! loop ctr times + blr diff --git a/mach/powerpc/libem/csa.s b/mach/powerpc/libem/csa.s index 88e6e176a..3898241c4 100644 --- a/mach/powerpc/libem/csa.s +++ b/mach/powerpc/libem/csa.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! this is not a subroutine, but just a @@ -25,23 +18,17 @@ lwz r5, 4(r3) ! fetch lower bound subf. r4, r5, r4 ! adjust value - bcctr IFTRUE, LT, 0 ! jump to default if out of range - + bltctr ! jump to default if out of range + lwz r5, 8(r3) ! fetch range - cmp cr0, 0, r4, r5 - bcctr IFTRUE, GT, 0 ! jump to default if out of range - + cmplw r4, r5 + bgtctr ! jump to default if out of range + addi r3, r3, 12 ! skip header - rlwinm r4, r4, 2, 0, 31-2 ! scale value (<<2) - b 1f -1: + slwi r4, r4, 2 ! scale value (<<2) lwzx r5, r3, r4 ! load target - b 1f -1: mtspr ctr, r5 - + or. r5, r5, r5 ! test it - b 1f -1: - bcctr IFFALSE, EQ, 0 ! jump to target if non-zero + bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/csb.s b/mach/powerpc/libem/csb.s index a8df85d7f..571bfc210 100644 --- a/mach/powerpc/libem/csb.s +++ b/mach/powerpc/libem/csb.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! this is not a subroutine, but just a @@ -22,21 +15,21 @@ lwz r5, 0(r3) ! load default mtspr ctr, r5 - + lwz r6, 4(r3) ! fetch count - + 1: or. r6, r6, r6 ! test count - bcctr IFTRUE, EQ, 0 ! exit if zero + beqctr ! exit if zero addi r6, r6, -1 ! otherwise decrement - + lwzu r7, 8(r3) ! fetch target index, increment pointer - cmp cr0, 0, r4, r7 ! compare with value - bc IFFALSE, EQ, 1b ! if not equal, go again - + cmpw r4, r7 ! compare with value + bne 1b ! if not equal, go again + lwz r7, 4(r3) ! fetch target address mtspr ctr, r7 - + or. r7, r7, r7 ! test it - bcctr IFFALSE, EQ, 0 ! jump to target if non-zero + bnectr ! jump to target if non-zero b .trap_ecase ! otherwise trap diff --git a/mach/powerpc/libem/cuf8.s b/mach/powerpc/libem/cuf8.s index 5d5a12988..ce9932aa1 100644 --- a/mach/powerpc/libem/cuf8.s +++ b/mach/powerpc/libem/cuf8.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! Converts a 32-bit unsigned integer into a 64-bit double. @@ -14,18 +7,18 @@ .define .cuf8 .cuf8: addi sp, sp, -4 ! make space for the double - - addis r3, r0, 0x4330 + + lis r3, 0x4330 stw r3, 0(sp) ! set high word to construct a double - + lfd f0, 0(sp) ! load value - - li32 r3, pivot - lfd f1, 0(r3) ! load pivot value + + lis r3, ha16[pivot] + lfd f1, lo16[pivot](r3) ! load pivot value fsub f0, f0, f1 ! adjust - + stfd f0, 0(sp) ! save value again... - bclr ALWAYS, 0, 0 ! ...and return + blr ! ...and return .sect .rom pivot: diff --git a/mach/powerpc/libem/fd_00000000.s b/mach/powerpc/libem/fd_00000000.s index cefa91b8e..8ffe44a24 100644 --- a/mach/powerpc/libem/fd_00000000.s +++ b/mach/powerpc/libem/fd_00000000.s @@ -1,10 +1,5 @@ -# -! $Source$ -! $State$ -! $Revision$ +.sect .text; .sect .rom; .sect .data; .sect .bss -#include "powerpc.h" - .sect .rom ! Contains a handy double-precision zero. (Also works as a single-precision diff --git a/mach/powerpc/libem/fd_80000000.s b/mach/powerpc/libem/fd_80000000.s index 50eacd586..5c153bba8 100644 --- a/mach/powerpc/libem/fd_80000000.s +++ b/mach/powerpc/libem/fd_80000000.s @@ -1,10 +1,5 @@ -# -! $Source$ -! $State$ -! $Revision$ +.sect .text; .sect .rom; .sect .data; .sect .bss -#include "powerpc.h" - .sect .rom ! Contains a handy double-precision 0x80000000. diff --git a/mach/powerpc/libem/fd_FFFFFFFF.s b/mach/powerpc/libem/fd_FFFFFFFF.s index 9218f2726..88cf04bd9 100644 --- a/mach/powerpc/libem/fd_FFFFFFFF.s +++ b/mach/powerpc/libem/fd_FFFFFFFF.s @@ -1,15 +1,10 @@ -# -! $Source$ -! $State$ -! $Revision$ +.sect .text; .sect .rom; .sect .data; .sect .bss -#include "powerpc.h" - .sect .rom ! Contains a handy double-precision 0xFFFFFFFF. .define .fd_FFFFFFFF -.fd_FFFFFFFF: +.fd_FFFFFFFF: !float 4.294967295e+9 sz 8 .data1 0101,0357,0377,0377,0377,0340,00,00 diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s index fc72b04f2..f71ab8e38 100644 --- a/mach/powerpc/libem/fef8.s +++ b/mach/powerpc/libem/fef8.s @@ -1,4 +1,4 @@ -#include "powerpc.h" +.sect .text; .sect .rom; .sect .data; .sect .bss .sect .text @@ -10,46 +10,44 @@ ! r3 = fraction, high word (bits 0..31) ! r4 = fraction, low word (bits 32..63) ! r5 = exponent -! Kills: cr0 f0 f1 r6 r7 .define .fef8 .fef8: ! IEEE double-precision format: ! sign exponent fraction ! 0 1..11 12..63 - rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent - addis r7, r0, 0x7ff0 ! r7 = exponent mask + extrwi r6, r3, 11, 1 ! r6 = IEEE exponent addi r5, r6, -1022 ! r5 = true exponent - cmpi cr0, 0, r6, 2047 - bclr IFTRUE, EQ, 0 ! return if infinity or NaN - cmpi cr0, 0, r6, 0 - bc IFFALSE, EQ, 1f ! jump if normalized number + cmpwi r6, 2047 + beqlr ! return if infinity or NaN + cmpwi r6, 0 + bne 1f ! jump if normalized number ! Got denormalized number or zero, probably zero. - rlwinm r6, r3, 0, 12, 31 + extrwi r6, r3, 22, 12 addi r5, r0, 0 ! r5 = true exponent = 0 or. r6, r6, r4 ! r6 = high|low fraction - bclr IFTRUE, EQ, 0 ! return if zero + beqlr ! return if zero ! Got denormalized number, not zero. stwu r4, -4(sp) stwu r3, -4(sp) - li32 r6, _2_64 lfd f0, 0(sp) - lfd f1, 0(r6) + lis r6, ha16[_2_64] + lfd f1, lo16[_2_64](r6) fmul f0, f0, f1 ! multiply it by 2**64 stfd f0, 0(sp) lwz r3, 0(sp) lwz r4, 4(sp) - rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + extrwi r6, r3, 11, 1 ! r6 = IEEE exponent addi sp, sp, 8 addi r5, r6, -1022 - 64 ! r5 = true exponent 1: ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its ! exponent to true 0, IEEE 1022. - andc r3, r3, r7 ! clear old exponent + rlwinm r3, r3, 0, 12, 0 ! clear old exponent oris r3, r3, 1022 << 4 ! set new exponent - bclr ALWAYS, 0, 0 + blr .sect .rom _2_64: diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s index a26c77830..eda9b04f2 100644 --- a/mach/powerpc/libem/fif8.s +++ b/mach/powerpc/libem/fif8.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Multiplies two double-precision floats, then splits the product into @@ -9,7 +7,6 @@ ! Yields: ! f1 = fraction ! f2 = integer -! Kills: cr0 f1 f2 r3 r4 r5 r6 .define .fif8 .fif8: @@ -25,17 +22,16 @@ ! 0 to 51, then the IEEE fraction has that many integer bits. ! (IEEE has an implicit 1 before its fraction. If the IEEE ! fraction has 0 integer bits, we still have an integer.) - rlwinm r5, r3, 12, 21, 31 ! r5 = IEEE exponent + extrwi r5, r3, 11, 1 ! r5 = IEEE exponent addic. r5, r5, -1023 ! r5 = nr of integer bits - bc IFTRUE, LT, no_int - cmpi cr0, 0, r5, 21 - bc IFTRUE, LT, small_int - cmpi cr0, 0, r5, 52 - bc IFTRUE, LT, big_int + blt no_int + cmpwi r5, 21 + blt small_int + cmpwi r5, 52 + blt big_int - ! f1 is an integer without fraction. Jump to calculate - ! fraction f1 = f2 - f1. It will be zero (or perhaps NaN). - fmr f2, f1 + ! f1 is an integer without fraction (or infinity or NaN). + fmr f2, f1 ! integer = f1 b subtract no_int: @@ -46,17 +42,17 @@ no_int: small_int: ! f1 has r5 = 0 to 20 integer bits in the IEEE fraction. ! High word has 20 - r5 fraction bits. - addi r6, r0, 20 + li r6, 20 subf r6, r5, r6 srw r3, r3, r6 - addi r4, r0, 0 ! clear low word + li r4, 0 ! clear low word slw r3, r3, r6 ! clear fraction in high word b move_int big_int: ! f1 has r5 = 21 to 51 to integer bits. ! Low word has 52 - r5 fraction bits. - addi r6, r0, 52 + li r6, 52 subf r6, r5, r6 srw r4, r4, r6 slw r4, r4, r6 ! clear fraction in low word @@ -68,4 +64,4 @@ subtract: fsub f1, f1, f2 ! fraction = value - integer done: addi sp, sp, 8 ! restore stack pointer - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/inn.s b/mach/powerpc/libem/inn.s index 9770ac094..8925e776e 100644 --- a/mach/powerpc/libem/inn.s +++ b/mach/powerpc/libem/inn.s @@ -1,4 +1,4 @@ -#include "powerpc.h" +# .sect .text @@ -14,13 +14,13 @@ addi r5, sp, 8 /* r5 = base address of bit set */ rlwinm r6, r4, 29, 3, 29 /* r6 = byte index of word in set */ - andi. r7, r4, 31 /* r7 = bit within word */ + extrwi r7, r4, 5, 27 /* r7 = bit number within word */ - lwzx r8, r5, r6 /* r8 = individual byte from set */ - sraw r8, r8, r7 - rlwinm r8, r8, 0, 31, 31 + lwzx r8, r5, r6 /* r8 = individual word from set */ + srw r8, r8, r7 + extrwi r8, r8, 1, 31 addi sp, sp, 8 /* retract over the two words */ add sp, sp, r3 /* retract over bitfield */ stwu r8, -4(sp) /* push result */ - bclr ALWAYS, 0, 0 /* return */ + blr /* return */ diff --git a/mach/powerpc/libem/ior.s b/mach/powerpc/libem/ior.s index 363799e1d..e6cd1844e 100644 --- a/mach/powerpc/libem/ior.s +++ b/mach/powerpc/libem/ior.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Set union. @@ -12,7 +10,7 @@ mr r4, sp ! r4 = ptr to set a add r5, sp, r3 ! r5 = ptr to set b - rlwinm r6, r3, 30, 2, 31 + srwi r6, r3, 2 mtspr ctr, r6 ! ctr = r3 / 4 1: lwz r7, 0(r4) @@ -21,6 +19,6 @@ stw r8, 0(r5) addi r4, r4, 4 addi r5, r5, 4 - bc DNZ, 0, 1b ! loop ctr times + bdnz 1b ! loop ctr times add sp, sp, r3 - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/lar4.s b/mach/powerpc/libem/lar4.s index 6375979c4..2f5c3683c 100644 --- a/mach/powerpc/libem/lar4.s +++ b/mach/powerpc/libem/lar4.s @@ -1,6 +1,3 @@ -# -#include "powerpc.h" - .sect .text ! Load from bounds-checked array. @@ -18,19 +15,19 @@ ! r3 = ptr to element ! r0 = size of element - cmpi cr0, 0, r0, 1 - bc IFFALSE, EQ, 1f + cmpwi r0, 1 + bne 1f ! Load 1 byte. lbz r4, 0(r3) stwu r4, -4(sp) - bclr ALWAYS, 0, 0 + blr 1: - cmpi cr0, 0, r0, 2 - bc IFFALSE, EQ, 2f + cmpwi r0, 2 + bne 2f ! Load 2 bytes. lhz r4, 0(r3) stwu r4, -4(sp) - bclr ALWAYS, 0, 0 + blr 2: ! Load r0 bytes, where r0 must be a positive multiple of 4. subf sp, r0, sp ! move stack pointer down @@ -39,5 +36,5 @@ addic. r5, r5, -4 ! r5 -= 4 lwzx r4, r5, r3 stwx r4, r5, sp - bc IFTRUE, GT, 3b ! loop if r5 > 0 - bclr ALWAYS, 0, 0 + bgt 3b ! loop if r5 > 0 + blr diff --git a/mach/powerpc/libem/los.s b/mach/powerpc/libem/los.s index f867fe770..2d412bce8 100644 --- a/mach/powerpc/libem/los.s +++ b/mach/powerpc/libem/los.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! Loads a variable-sized structure onto the stack. @@ -15,32 +8,32 @@ .define .los .los: ! These sizes are handled specially. - - cmpi cr0, 0, r3, 1 - bc IFFALSE, GT, size1 - - cmpi cr0, 0, r3, 2 - bc IFFALSE, GT, size2 - - cmpi cr0, 0, r3, 4 - bc IFFALSE, GT, size4 - + + cmplwi r3, 1 + ble size1 + + cmplwi r3, 2 + ble size2 + + cmplwi r3, 4 + ble size4 + ! Variable-sized structure. - + addi r3, r3, 3 - andi. r3, r3, ~3 ! align size - + clrrwi r3, r3, 2 ! align size + add r4, r4, r3 ! adjust address to top of block - srawi r3, r3, 2 ! convert size to the number of words + srwi r3, r3, 2 ! convert size to the number of words mtspr ctr, r3 - + 1: lwzu r5, -4(r4) stwu r5, -4(sp) - bc DNZ, 0, 1b ! decrement CTR, jump if non-zero - bclr ALWAYS, 0, 0 - + bdnz 1b ! decrement CTR, jump if non-zero + blr + size1: lbz r3, 0(r4) b 1f @@ -51,4 +44,4 @@ size4: lwz r3, 0(r4) 1: stwu r3, -4(sp) - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/rck.s b/mach/powerpc/libem/rck.s index 0d5717f16..9008be610 100644 --- a/mach/powerpc/libem/rck.s +++ b/mach/powerpc/libem/rck.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Bounds check. Traps if the value is out of range. @@ -12,11 +10,11 @@ addi sp, sp, 4 ! leave value on stack lwz r5, 0 (r3) - cmp cr0, 0, r4, r5 - bc IFTRUE, LT, .trap_erange + cmpw r4, r5 + blt .trap_erange lwz r5, 4 (r3) - cmp cr0, 0, r4, r5 - bc IFTRUE, GT, .trap_erange + cmpw r4, r5 + bgt .trap_erange - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/ret.s b/mach/powerpc/libem/ret.s index cca79ae86..c498af240 100644 --- a/mach/powerpc/libem/ret.s +++ b/mach/powerpc/libem/ret.s @@ -1,19 +1,12 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text - + ! Standard boilerplate for returning from functions. .define .ret .ret: lwz r0, 4(fp) mtspr lr, r0 - lwz r0, 0(fp) ! our stack frame becomes invalid as soon as... - addi sp, fp, 8 ! ...we change sp - or fp, r0, r0 - bclr ALWAYS, 0, 0 + lwz r0, 0(fp) ! our stack frame becomes invalid as soon as... + addi sp, fp, 8 ! ...we change sp + mr fp, r0 + blr diff --git a/mach/powerpc/libem/sar4.s b/mach/powerpc/libem/sar4.s index 0c1368af1..7c9778958 100644 --- a/mach/powerpc/libem/sar4.s +++ b/mach/powerpc/libem/sar4.s @@ -1,6 +1,3 @@ -# -#include "powerpc.h" - .sect .text ! Store to bounds-checked array. @@ -18,21 +15,21 @@ ! r3 = ptr to element ! r0 = size of element - cmpi cr0, 0, r0, 1 - bc IFFALSE, EQ, 1f + cmpwi r0, 1 + bne 1f ! Store 1 byte. lwz r4, 0(sp) addi sp, sp, 4 stb r4, 0(r3) - bclr ALWAYS, 0, 0 + blr 1: - cmpi cr0, 0, r0, 2 - bc IFFALSE, EQ, 2f + cmpwi r0, 2 + bne 2f ! Store 2 bytes. lwz r4, 0(sp) addi sp, sp, 4 sth r4, 0(r3) - bclr ALWAYS, 0, 0 + blr 2: ! Store r0 bytes, where r0 must be a positive multiple of 4. or r5, r0, r0 ! index r5 = length r0 @@ -40,6 +37,6 @@ addic. r5, r5, -4 ! r5 -= 4 lwzx r4, r5, sp stwx r4, r5, r3 - bc IFTRUE, GT, 3b ! loop if r5 > 0 + bgt 3b ! loop if r5 > 0 add sp, r0, sp ! move stack pointer up - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/set.s b/mach/powerpc/libem/set.s index b42881cd7..3c4a9e579 100644 --- a/mach/powerpc/libem/set.s +++ b/mach/powerpc/libem/set.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Create singleton set. @@ -11,22 +9,22 @@ lwz r4, 4 (sp) addi sp, sp, 8 - rlwinm r7, r3, 30, 2, 31 + srwi r7, r3, 2 neg r5, r3 add sp, sp, r5 ! allocate set mr r6, sp ! r6 = ptr to set mtspr ctr, r7 ! ctr = r3 / 4 1: - rlwinm. r7, r4, 0, 0, 26 ! r7 = r4 & ~31 - bc IFTRUE, EQ, 2f ! branch if r4 in 0..31 - addi r5, r0, 0 ! no bit, word is zero + clrrwi. r7, r4, 5 ! r7 = r4 & ~31 + beq 2f ! branch if r4 in 0..31 + li r5, 0 ! no bit, word is zero b 3f 2: - addi r5, r0, 1 + li r5, 1 slw r5, r5, r4 ! yes bit, set bit in word 3: stw r5, 0(r6) ! store word in set addi r4, r4, -32 addi r6, r6, 4 - bc DNZ, 0, 1b ! loop ctr times - bclr ALWAYS, 0, 0 + bdnz 1b ! loop ctr times + blr diff --git a/mach/powerpc/libem/sts.s b/mach/powerpc/libem/sts.s index 2f8022ad9..411b0fb66 100644 --- a/mach/powerpc/libem/sts.s +++ b/mach/powerpc/libem/sts.s @@ -1,10 +1,3 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - .sect .text ! Stores a variable-sized structure from the stack. @@ -15,35 +8,35 @@ .define .sts .sts: ! These sizes are handled specially. - + lwz r5, 0(sp) - cmpi cr0, 0, r3, 1 - bc IFFALSE, GT, size1 - - cmpi cr0, 0, r3, 2 - bc IFFALSE, GT, size2 - - cmpi cr0, 0, r3, 4 - bc IFFALSE, GT, size4 - + cmplwi r3, 1 + ble size1 + + cmplwi r3, 2 + ble size2 + + cmplwi r3, 4 + ble size4 + ! Variable-sized structure. - + addi r3, r3, 3 - andi. r3, r3, ~3 ! align size - - srawi r3, r3, 2 ! convert size to the number of words + clrrwi r3, r3, 2 ! align size + + srwi r3, r3, 2 ! convert size to the number of words mtspr ctr, r3 - + 1: lwz r5, 0(sp) addi sp, sp, 4 stw r5, 0(r4) addi r4, r4, 4 - bc DNZ, 0, 1b ! decrement CTR, jump if non-zero - bclr ALWAYS, 0, 0 - + bdnz 1b ! decrement CTR, jump if non-zero + blr + size1: stb r5, 0(r4) b 1f @@ -54,4 +47,4 @@ size4: stw r5, 0(r4) 1: addi sp, sp, 4 - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/xor.s b/mach/powerpc/libem/xor.s index 9d4bc76b9..acb02a032 100644 --- a/mach/powerpc/libem/xor.s +++ b/mach/powerpc/libem/xor.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Set symmetric difference. @@ -10,7 +8,7 @@ .xor: mr r4, sp ! r4 = ptr to set a add r5, sp, r3 ! r5 = ptr to set b - rlwinm r6, r3, 30, 2, 31 + srwi r6, r3, 2 mtspr ctr, r6 ! ctr = r3 / 4 1: lwz r7, 0(r4) @@ -19,6 +17,6 @@ stw r8, 0(r5) addi r4, r4, 4 addi r5, r5, 4 - bc DNZ, 0, 1b ! loop ctr times + bdnz 1b ! loop ctr times add sp, sp, r3 - bclr ALWAYS, 0, 0 + blr diff --git a/mach/powerpc/libem/zer.s b/mach/powerpc/libem/zer.s index 697a5715f..a47a150cc 100644 --- a/mach/powerpc/libem/zer.s +++ b/mach/powerpc/libem/zer.s @@ -1,5 +1,3 @@ -#include "powerpc.h" - .sect .text ! Create empty set. @@ -10,8 +8,8 @@ lwz r3, 0(sp) addi sp, sp, 4 - rlwinm r7, r3, 30, 2, 31 - addi r4, r0, 0 ! r4 = zero + srwi r7, r3, 2 + li r4, 0 ! r4 = zero neg r5, r3 add sp, sp, r5 ! allocate set mr r6, sp ! r6 = ptr to set @@ -19,5 +17,5 @@ 1: stw r4, 0(r6) ! store zero in set addi r6, r6, 4 - bc DNZ, 0, 1b ! loop ctr times - bclr ALWAYS, 0, 0 + bdnz 1b ! loop ctr times + blr From bb67dbeb1151c81795b0051d068dc794ec15a340 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 23 Jan 2017 17:31:29 -0500 Subject: [PATCH 05/11] Use "kills ALL" instead of a list of killed registers. This is for fef 8 and fif 8. I changed .fef8 so it no longer kills r7, but I don't want to update the list. We already use "kills ALL" for most other calls to libem. --- mach/powerpc/ncg/table | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 1fc1f5b19..4deceefe8 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2238,14 +2238,14 @@ PATTERNS pat fef $1==INT64 /* Split exponent, fraction */ with GPR3 GPR4 - kills FPR0, FPR1, GPR6, GPR7 + kills ALL gen bl {LABEL, ".fef8"} yields R4 R3 R5 pat fif $1==INT64 /* Multiply then split integer, fraction */ with FPR1 FPR2 - kills FPR1, FPR2, GPR3, GPR4, GPR5, GPR6 + kills ALL gen bl {LABEL, ".fif8"} yields F1 F2 From 188b23bade620afc6be386973e45ce77a5999d0f Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 24 Jan 2017 11:26:35 -0500 Subject: [PATCH 06/11] Add constraints for pat lab, as done in the m68020 table. Always use 'kills ALL' when reaching a label, because our registers and tokens have the wrong values if the program jumps to this label from somewhere else. When falling through a label, if the top element is in r3, then require that the rest of the stack is in the real STACK, not in registers or tokens. I'm doing this to be certain that the missing constraints are not causing bugs. I did not find any such bug, perhaps because the labels are usually near other instructions (like conditional branches and function calls) that stack or kill tokens. --- mach/powerpc/ncg/table | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 4deceefe8..d42eb27eb 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -1852,12 +1852,14 @@ PATTERNS /* Other branching and labelling */ pat lab topeltsize($1)==4 && !fallthrough($1) + kills ALL gen labeldef $1 yields R3 pat lab topeltsize($1)==4 && fallthrough($1) - with GPR3 + with GPR3 STACK + kills ALL gen labeldef $1 yields %1 From a348853eced4b8836e7dbb2be696829d638bfab7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 25 Jan 2017 11:24:23 -0500 Subject: [PATCH 07/11] Add missing size declarations for 8-byte registers. This fixes the coercion from IND_ALL_D to FREG. The coercion had never happened, because IND_ALL_D had 8 bytes but FREG had 4 bytes. Instead, ncg always stacked the IND_ALL_D and unstacked a FREG. The stacking rule uses f0, so the code did load f0 with the indirect value, push f0 to stack, load f1 to stack, move stack pointer. Now that FREG has 8 bytes, ncg does the coercion, and the code just loads f1 with the indirect value. --- mach/powerpc/ncg/table | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index d42eb27eb..99c2e404c 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -42,9 +42,9 @@ PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ - REG_PAIR /* speed hack for sti 8 */ - FPR /* any FPR */ - FREG /* any allocatable FPR */ + REG_PAIR(8) /* speed hack for sti 8 */ + FPR(8) /* any FPR */ + FREG(8) /* any allocatable FPR */ FSREG /* any allocatable single-precision FPR */ SPR /* any SPR */ CR /* any CR */ @@ -56,10 +56,10 @@ PROPERTIES CR0 CR1 - FPR0 FPR1 FPR2 FPR3 FPR4 FPR5 FPR6 FPR7 - FPR8 FPR9 FPR10 FPR11 FPR12 FPR13 FPR14 FPR15 - FPR16 FPR17 FPR18 FPR19 FPR20 FPR21 FPR22 FPR23 - FPR24 FPR25 FPR26 FPR27 FPR28 FPR29 FPR30 FPR31 + FPR0(8) FPR1(8) FPR2(8) FPR3(8) FPR4(8) FPR5(8) FPR6(8) FPR7(8) + FPR8(8) FPR9(8) FPR10(8) FPR11(8) FPR12(8) FPR13(8) FPR14(8) FPR15(8) + FPR16(8) FPR17(8) FPR18(8) FPR19(8) FPR20(8) FPR21(8) FPR22(8) FPR23(8) + FPR24(8) FPR25(8) FPR26(8) FPR27(8) FPR28(8) FPR29(8) FPR30(8) FPR31(8) REGISTERS From f64b7d8ea08a91aeb571cf38c919e2c0dd9609f0 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 25 Jan 2017 19:08:55 -0500 Subject: [PATCH 08/11] Rewrite how PowerPC ncg does conditional branches and tests. The rewritten code rules bring 3 new features: 1. The new rules compare a small constant with a register by reversing the comparison and using `cmpwi` or `cmplwi`. The old rules put the constant in a register. 2. The new rules emit shorter code to yield the test results, without referencing the tables in mach/powerpc/ncg/tge.s. 3. The new rules use the extended `beq` and relatives, not the basic `bc`, in the assembly output. I delete the old tristate tokens and the old moves, because they confused me. Some of the old moves weren't really moves. For example, `move R3, C0` and then `move C0, R0` did not move r3 to r0. I rename C0 to CR0. --- mach/powerpc/ncg/table | 679 +++++++++++++++++++++++++++++------------ 1 file changed, 490 insertions(+), 189 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 99c2e404c..034c8f32d 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -27,15 +27,6 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF))) #define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF) -#define IFFALSE {CONST, 4} -#define IFTRUE {CONST, 12} -#define ALWAYS {CONST, 20} -#define DCTRZ {CONST, 34} - -#define LT {CONST, 0} -#define GT {CONST, 1} -#define EQ {CONST, 2} - PROPERTIES @@ -54,8 +45,6 @@ PROPERTIES GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23 GPR24 GPR25 GPR26 GPR27 GPR28 GPR29 GPR30 GPR31 - CR0 CR1 - FPR0(8) FPR1(8) FPR2(8) FPR3(8) FPR4(8) FPR5(8) FPR6(8) FPR7(8) FPR8(8) FPR9(8) FPR10(8) FPR11(8) FPR12(8) FPR13(8) FPR14(8) FPR15(8) FPR16(8) FPR17(8) FPR18(8) FPR19(8) FPR20(8) FPR21(8) FPR22(8) FPR23(8) @@ -158,7 +147,7 @@ REGISTERS LR("lr") : SPR. CTR("ctr") : SPR. - C0("cr0") : CR, CR0. + CR0("cr0") : CR. #define RSCRATCH R11 #define FSCRATCH F0 @@ -200,13 +189,6 @@ TOKENS SUM_RC = { GPR reg; INT off; } 4. SUM_RR = { GPR reg1; GPR reg2; } 4. - TRISTATE_RC_S = { GPR reg; INT val; } 4. - TRISTATE_RC_U = { GPR reg; INT val; } 4. - TRISTATE_RR_S = { GPR reg1; GPR reg2; } 4. - TRISTATE_RR_U = { GPR reg1; GPR reg2; } 4. - - TRISTATE_FF = { FPR reg1; FPR reg2; } 4. - SEX_B = { GPR reg; } 4. SEX_H = { GPR reg; } 4. @@ -231,6 +213,20 @@ TOKENS XOR_RIS = { GPR reg; INT valhi; } 4. XOR_RC = { GPR reg; INT val; } 4. + COND_RC = { GPR reg; INT val; } 4. + COND_RR = { GPR reg1; GPR reg2; } 4. + CONDL_RC = { GPR reg; INT val; } 4. + CONDL_RR = { GPR reg1; GPR reg2; } 4. + COND_FS = { FSREG reg1; FSREG reg2; } 4. + COND_FD = { FREG reg1; FREG reg2; } 4. + + XEQ = { GPR reg; } 4. + XNE = { GPR reg; } 4. + XGT = { GPR reg; } 4. + XGE = { GPR reg; } 4. + XLT = { GPR reg; } 4. + XLE = { GPR reg; } 4. + SETS @@ -246,9 +242,6 @@ SETS SUM_ALL = SUM_RC + SUM_RR. - TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + - TRISTATE_RR_U + TRISTATE_FF. - SEX_ALL = SEX_B + SEX_H. LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + @@ -265,8 +258,7 @@ SETS /* anything killed by sti (store indirect) */ MEMORY = IND_ALL_BHW + IND_ALL_D. - OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + - IND_ALL_W. + OP_ALL_W = SUM_ALL + SEX_ALL + LOGICAL_ALL + IND_ALL_W. INSTRUCTIONS @@ -293,14 +285,27 @@ INSTRUCTIONS andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro. b LABEL:ro. bc CONST:ro, CONST:ro, LABEL:ro. + beq LABEL:ro. + bne LABEL:ro. + bgt LABEL:ro. + bge LABEL:ro. + blt LABEL:ro. + ble LABEL:ro. + bxx LABEL:ro. /* dummy */ bcctr CONST:ro, CONST:ro, CONST:ro. + bctr. bcctrl CONST:ro, CONST:ro, CONST:ro. + bctrl. bclr CONST:ro, CONST:ro, CONST:ro. bl LABEL:ro. cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpw GPR:ro, GPR:ro kills :cc. cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpwi GPR:ro, CONST:ro kills :cc. cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmplw GPR:ro, GPR:ro kills :cc. cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmplwi GPR:ro, CONST:ro kills :cc. divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). eqv GPR:wo, GPR:ro, GPR:ro. @@ -308,7 +313,8 @@ INSTRUCTIONS extsh GPR:wo, GPR:ro. fadd FREG:wo, FREG:ro, FREG:ro cost(4, 5). fadds FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). - fcmpo CR:wo, FPR:ro, FPR:ro cost(4, 5). + fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). + fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). fdiv FREG:wo, FREG:ro, FREG:ro cost(4, 35). fdivs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 21). fmr FPR:wo, FPR:ro cost(4, 5). @@ -349,6 +355,8 @@ INSTRUCTIONS oris GPR:wo, GPR:ro, CONST:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. + extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. + extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. slw GPR:wo, GPR:ro, GPR:ro. subf GPR:wo, GPR:ro, GPR:ro. sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). @@ -566,67 +574,6 @@ MOVES COMMENT("move FPR->IND_RR_W") stfdx %1, %2.reg1, %2.reg2 -/* Extract condition code field (actually produces (CC&3)<<2) */ - - from CR0 to GPR - gen - COMMENT("move CR0->GPR") - mfcr %2 - rlwinm %2, %2, {CONST, 4}, {CONST, 32-4}, {CONST, 31-2} - -/* Comparisons */ - - from TRISTATE_RR_S to CR0 - gen - cmp %2, {CONST, 0}, %1.reg1, %1.reg2 - - from TRISTATE_RR_U to CR0 - gen - cmpl %2, {CONST, 0}, %1.reg1, %1.reg2 - - from TRISTATE_RC_S to CR0 - gen - COMMENT("move TRISTATE_RC_S->CR0 large") - move {CONST, %1.val}, RSCRATCH - cmp %2, {CONST, 0}, %1.reg, RSCRATCH - - from TRISTATE_RC_U smallu(%val) to CR0 - gen - COMMENT("move TRISTATE_RC_U->CR0 small") - cmpli %2, {CONST, 0}, %1.reg, {CONST, %1.val} - - from TRISTATE_RC_U to CR0 - gen - COMMENT("move TRISTATE_RC_U->CR0") - move {CONST, %1.val}, RSCRATCH - cmpl %2, {CONST, 0}, %1.reg, RSCRATCH - - from TRISTATE_FF to CR0 - gen - COMMENT("move TRISTATE_FF->CR0") - fcmpo %2, %1.reg1, %1.reg2 - - from GPR to CR0 - gen - COMMENT("move GPR->CR0") - orX RSCRATCH, %1, %1 /* alas, can't call test */ - - from TRISTATE_RR_S + TRISTATE_RC_S + TRISTATE_FF to GPR - gen - COMMENT("move TRISTATE_R*_S->GPR") - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tristate_s_table"}, %2 - lwzx %2, %2, RSCRATCH - - from TRISTATE_RR_U + TRISTATE_RC_U to GPR - gen - COMMENT("move TRISTATE_R*_U->GPR") - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tristate_u_table"}, %2 - lwzx %2, %2, RSCRATCH - /* Logicals */ from NOT_R to GPR @@ -669,6 +616,71 @@ MOVES COMMENT("move XOR_RC->GPR") xori %2, %1.reg, {CONST, %1.val} +/* Conditions */ + + /* Compare values, then copy cr0 to GPR. */ + + from COND_RC to GPR + gen + cmpwi %1.reg, {CONST, %1.val} + mfcr %2 + + from COND_RR to GPR + gen + cmpw %1.reg1, %1.reg2 + mfcr %2 + + from CONDL_RC to GPR + gen + cmplwi %1.reg, {CONST, %1.val} + mfcr %2 + + from CONDL_RR to GPR + gen + cmplw %1.reg1, %1.reg2 + mfcr %2 + + from COND_FS to GPR + gen + fcmpo CR0, %1.reg1, %1.reg2 + mfcr %2 + + from COND_FD to GPR + gen + fcmpo CR0, %1.reg1, %1.reg2 + mfcr %2 + + /* Given a copy of cr0 in %1.reg, extract a condition bit + * (lt, gt, eq) and perhaps flip it. + */ + + from XEQ to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} + + from XNE to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 2} + xori %2, %2, {CONST, 1} + + from XGT to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} + + from XGE to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} + xori %2, %2, {CONST, 1} + + from XLT to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 0} + + from XLE to GPR + gen + extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} + xori %2, %2, {CONST, 1} + /* Miscellaneous */ from OP_ALL_W + LABEL + CONST_ALL to GPRE @@ -720,9 +732,9 @@ STACKINGRULES extsh RSCRATCH, %1.reg stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} - from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK + from SUM_ALL + LOGICAL_ALL to STACK gen - COMMENT("stack SUM_ALL + TRISTATE_ALL + LOGICAL_ALL") + COMMENT("stack SUM_ALL + LOGICAL_ALL") move %1, RSCRATCH stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} @@ -804,7 +816,7 @@ COERCIONS extsh %a, %1.reg yields %a - from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL + from SUM_ALL + LOGICAL_ALL uses REG gen move %1, %a @@ -1683,153 +1695,337 @@ PATTERNS cal ".inn" + /* Boolean resolutions */ pat teq /* top = (top == 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".teq_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XEQ, %a}, %a yields %a pat tne /* top = (top != 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tne_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XNE, %a}, %a yields %a pat tlt /* top = (top < 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tlt_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XLT, %a}, %a yields %a pat tle /* top = (top <= 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tle_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XLE, %a}, %a yields %a pat tgt /* top = (top > 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tgt_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XGT, %a}, %a yields %a pat tge /* top = (top >= 0) */ - with TRISTATE_ALL + GPR + with REG uses reusing %1, REG gen - move %1, C0 - move C0, RSCRATCH - move {LABEL, ".tge_table"}, %a - lwzx %a, %a, RSCRATCH + test %1 + mfcr %a + move {XGE, %a}, %a yields %a + pat cmi teq $1==4 /* Signed second == top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XEQ, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XEQ, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XEQ, %a}, %a + yields %a + pat cmi tne $1==4 /* Signed second != top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XNE, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XNE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XNE, %a}, %a + yields %a + pat cmi tgt $1==4 /* Signed second > top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XLT, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XGT, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XGT, %a}, %a + yields %a -/* Simple branches */ + pat cmi tge $1==4 /* Signed second >= top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XLE, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XGE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XGE, %a}, %a + yields %a - pat zeq /* Branch if signed top == 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFTRUE, EQ, {LABEL, $1} + pat cmi tlt $1==4 /* Signed second < top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XGT, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XLT, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XLT, %a}, %a + yields %a - pat beq - leaving - cmi INT32 - zeq $1 + pat cmi tle $1==4 /* Signed second <= top */ + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen move {XGE, %a}, %a + yields %a + with CONST2 REG + uses reusing %1, REG={COND_RC, %2, %1.val} + gen move {XLE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen move {XLE, %a}, %a + yields %a - pat zne /* Branch if signed top != 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFFALSE, EQ, {LABEL, $1} + pat cmu teq $1==4 /* Unsigned second == top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XEQ, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XEQ, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XEQ, %a}, %a + yields %a - pat bne - leaving - cmi INT32 - zne $1 + pat cmu tne $1==4 /* Unsigned second != top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XNE, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XNE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XNE, %a}, %a + yields %a - pat zgt /* Branch if signed top > 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFTRUE, GT, {LABEL, $1} + pat cmu tgt $1==4 /* Unsigned second > top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XLT, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XGT, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XGT, %a}, %a + yields %a - pat bgt - leaving - cmi INT32 - zgt $1 + pat cmu tge $1==4 /* Unsigned second >= top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XLE, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XGE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XGE, %a}, %a + yields %a + + pat cmu tlt $1==4 /* Unsigned second < top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XGT, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XLT, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XLT, %a}, %a + yields %a - pat zge /* Branch if signed top >= 0 */ - with TRISTATE_ALL+GPR STACK + pat cmu tle $1==4 /* Unsigned second <= top */ + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen move {XGE, %a}, %a + yields %a + with UCONST2 REG + uses reusing %1, REG={CONDL_RC, %2, %1.val} + gen move {XLE, %a}, %a + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen move {XLE, %a}, %a + yields %a + + + +/* Simple branches */ + + proc zxx example zeq + with REG STACK gen - move %1, C0 - bc IFFALSE, LT, {LABEL, $1} + test %1 + bxx* {LABEL, $1} - pat bge - leaving - cmi INT32 - zge $1 + /* Pop signed int, branch if... */ + pat zeq call zxx("beq") /* top == 0 */ + pat zne call zxx("bne") /* top != 0 */ + pat zgt call zxx("bgt") /* top > 0 */ + pat zge call zxx("bge") /* top >= 0 */ + pat zlt call zxx("blt") /* top < 0 */ + pat zle call zxx("ble") /* top >= 0 */ + + /* The peephole optimizer rewrites + * cmi 4 zeq + * as beq, and does same for bne, bgt, and so on. + */ - pat zlt /* Branch if signed top < 0 */ - with TRISTATE_ALL+GPR STACK + proc bxx example beq + with REG CONST2 STACK gen - move %1, C0 - bc IFTRUE, LT, {LABEL, $1} + cmpwi %1, {CONST, %2.val} + bxx[2] {LABEL, $1} + with CONST2 REG STACK + gen + cmpwi %2, {CONST, %1.val} + bxx[1] {LABEL, $1} + with REG REG STACK + gen + cmpw %2, %1 + bxx[1] {LABEL, $1} - pat blt - leaving - cmi INT32 - zlt $1 + /* Pop two signed ints, branch if... */ + pat beq call bxx("beq", "beq") /* second == top */ + pat bne call bxx("bne", "bne") /* second != top */ + pat bgt call bxx("bgt", "blt") /* second > top */ + pat bge call bxx("bge", "ble") /* second >= top */ + pat blt call bxx("blt", "bgt") /* second < top */ + pat ble call bxx("ble", "bge") /* second >= top */ - pat zle /* Branch if signed top >= 0 */ - with TRISTATE_ALL+GPR STACK + proc cmu4zxx example cmu zeq + with REG CONST2 STACK gen - move %1, C0 - bc IFFALSE, GT, {LABEL, $1} + cmplwi %1, {CONST, %2.val} + bxx[2] {LABEL, $2} + with CONST2 REG STACK + gen + cmplwi %2, {CONST, %1.val} + bxx[1] {LABEL, $2} + with REG REG STACK + gen + cmplw %2, %1 + bxx[1] {LABEL, $2} - pat ble - leaving - cmi INT32 - zle $1 + /* Pop two unsigned ints, branch if... */ + pat cmu zeq $1==4 call cmu4zxx("beq", "beq") + pat cmu zne $1==4 call cmu4zxx("bne", "bne") + pat cmu zgt $1==4 call cmu4zxx("bgt", "blt") + pat cmu zge $1==4 call cmu4zxx("bge", "ble") + pat cmu zlt $1==4 call cmu4zxx("blt", "bgt") + pat cmu zle $1==4 call cmu4zxx("ble", "bge") -/* Compare and jump */ + +/* Comparisons */ + + /* Each comparison extracts the lt and gt bits from cr0. + * extlwi %a, %a, 2, 0 + * puts lt in the sign bit, so lt yields a negative result, + * gt yields positive. + * rlwinm %a, %a, 1, 31, 0 + * puts gt in the sign bit, to reverse the comparison. + */ pat cmi $1==INT32 /* Signed tristate compare */ - with CONST_ALL GPR - yields {TRISTATE_RC_S, %2, %1.val} - with GPR GPR - yields {TRISTATE_RR_S, %2, %1} + with REG CONST2 + uses reusing %1, REG={COND_RC, %1, %2.val} + gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + yields %a + with CONST2 REG + uses reusing %2, REG={COND_RC, %2, %1.val} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a + with REG REG + uses reusing %1, REG={COND_RR, %2, %1} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a pat cmu $1==INT32 /* Unsigned tristate compare */ - with CONST_ALL GPR - yields {TRISTATE_RC_U, %2, %1.val} - with GPR GPR - yields {TRISTATE_RR_U, %2, %1} + with REG UCONST2 + uses reusing %1, REG={CONDL_RC, %1, %2.val} + gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0} + yields %a + with UCONST2 REG + uses reusing %2, REG={CONDL_RC, %2, %1.val} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a + with REG REG + uses reusing %1, REG={CONDL_RR, %2, %1} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a pat cmp /* Compare pointers */ leaving @@ -1895,7 +2091,7 @@ PATTERNS kills ALL gen mtspr CTR, %1 - bcctrl ALWAYS, {CONST, 0}, {CONST, 0} + bctrl. pat lfr $1==INT32 /* Load function result, word */ yields R3 @@ -2022,7 +2218,7 @@ PATTERNS move {IND_RC_W, %a, 4}, SP move {IND_RC_W, %a, 0}, %a mtspr CTR, %a - bcctr ALWAYS, {CONST, 0}, {CONST, 0} + bctr. pat lor $1==0 /* Load FP */ uses REG @@ -2075,12 +2271,11 @@ PATTERNS pat lae rck $2==4 /* Range check */ with REG - uses CR0 gen - cmpli %a, {CONST, 0}, %1, {CONST, rom($1, 1)} - bc IFTRUE, LT, {LABEL, ".trap_erange"} - cmpli %a, {CONST, 0}, %1, {CONST, rom($1, 2)} - bc IFTRUE, GT, {LABEL, ".trap_erange"} + cmpwi %1, {CONST, rom($1, 1)} + blt {LABEL, ".trap_erange"} + cmpwi %1, {CONST, rom($1, 2)} + bgt {LABEL, ".trap_erange"} yields %1 @@ -2134,7 +2329,60 @@ PATTERNS pat cmf $1==INT32 /* Compare single */ with FSREG FSREG - yields {TRISTATE_FF, %2.1, %1.1} + uses REG={COND_FS, %2, %1} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a + + pat cmf teq $1==4 /* Single second == top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XEQ, %a}, %a + yields %a + + pat cmf tne $1==4 /* Single second == top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XNE, %a}, %a + yields %a + + pat cmf tgt $1==4 /* Single second > top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XGT, %a}, %a + yields %a + + pat cmf tge $1==4 /* Single second >= top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XGE, %a}, %a + yields %a + + pat cmf tlt $1==4 /* Single second < top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XLT, %a}, %a + yields %a + + pat cmf tle $1==4 /* Single second <= top */ + with FSREG FSREG + uses REG={COND_FS, %2, %1} + gen move {XLE, %a}, %a + yields %a + + proc cmf4zxx example cmf zeq + with FREG FREG STACK + uses REG + gen + fcmpo CR0, %2, %1 + bxx* {LABEL, $2} + + /* Pop 2 singles, branch if... */ + pat cmf zeq $1==4 call cmf4zxx("beq") + pat cmf zne $1==4 call cmf4zxx("bne") + pat cmf zgt $1==4 call cmf4zxx("bgt") + pat cmf zge $1==4 call cmf4zxx("bge") + pat cmf zlt $1==4 call cmf4zxx("blt") + pat cmf zle $1==4 call cmf4zxx("ble") pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ with FSREG @@ -2208,7 +2456,60 @@ PATTERNS pat cmf $1==INT64 /* Compare double */ with FREG FREG - yields {TRISTATE_FF, %2, %1} + uses REG={COND_FD, %2, %1} + gen extlwi %a, %a, {CONST, 2}, {CONST, 0} + yields %a + + pat cmf teq $1==8 /* Double second == top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XEQ, %a}, %a + yields %a + + pat cmf tne $1==8 /* Single second == top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XNE, %a}, %a + yields %a + + pat cmf tgt $1==8 /* Double second > top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XGT, %a}, %a + yields %a + + pat cmf tge $1==8 /* Double second >= top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XGE, %a}, %a + yields %a + + pat cmf tlt $1==8 /* Double second < top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XLT, %a}, %a + yields %a + + pat cmf tle $1==8 /* Double second <= top */ + with FREG FREG + uses REG={COND_FD, %2, %1} + gen move {XLE, %a}, %a + yields %a + + proc cmf8zxx example cmf zeq + with FREG FREG STACK + uses REG + gen + fcmpo CR0, %2, %1 + bxx* {LABEL, $2} + + /* Pop 2 doubles, branch if... */ + pat cmf zeq $1==8 call cmf8zxx("beq") + pat cmf zne $1==8 call cmf8zxx("bne") + pat cmf zgt $1==8 call cmf8zxx("bgt") + pat cmf zge $1==8 call cmf8zxx("bge") + pat cmf zlt $1==8 call cmf8zxx("blt") + pat cmf zle $1==8 call cmf8zxx("ble") pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */ with FREG From 8c8f291a073e7be90678a2e6584a7c031d6eda23 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 26 Jan 2017 12:39:16 -0500 Subject: [PATCH 09/11] In PowerPC libem, remove tge.s and powerpc.h Nothing uses the tables in tge.s, after I changed the ncg table. There are no *.e files in libem, so don't try to build them. --- mach/powerpc/libem/build.lua | 4 +--- mach/powerpc/libem/powerpc.h | 22 ----------------- mach/powerpc/libem/tge.s | 46 ------------------------------------ 3 files changed, 1 insertion(+), 71 deletions(-) delete mode 100644 mach/powerpc/libem/powerpc.h delete mode 100644 mach/powerpc/libem/tge.s diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index 786be4e11..466a28fb3 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -1,14 +1,12 @@ for _, plat in ipairs(vars.plats) do acklibrary { name = "headers_"..plat, - hdrs = { "./*.h" } } acklibrary { name = "lib_"..plat, srcs = { - "./*.s", -- zer.s - "./*.e", + "./*.s", }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/powerpc.h b/mach/powerpc/libem/powerpc.h deleted file mode 100644 index 3540a6856..000000000 --- a/mach/powerpc/libem/powerpc.h +++ /dev/null @@ -1,22 +0,0 @@ -# -! $Source$ -! $State$ -! $Revision$ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -#define IFFALSE 4 -#define IFTRUE 12 -#define ALWAYS 20 -#define DNZ 16 - -#define LT 0 -#define GT 1 -#define EQ 2 -#define OV 3 - diff --git a/mach/powerpc/libem/tge.s b/mach/powerpc/libem/tge.s deleted file mode 100644 index 4740d8436..000000000 --- a/mach/powerpc/libem/tge.s +++ /dev/null @@ -1,46 +0,0 @@ -# -! $Source$ -! $State$ -! $Revision$ - -#include "powerpc.h" - -.sect .rom - -! Lookup table for tge. - -.define .teq_table -.teq_table: - .data4 1 ! . . - .data4 0 ! . G - .data4 0 ! L . - -.define .tne_table -.tne_table: - .data4 0 ! . . - .data4 1 ! . G - .data4 1 ! L . - -.define .tgt_table -.tgt_table: - .data4 0 ! . . - .data4 1 ! . G - .data4 0 ! L . - -.define .tge_table -.tge_table: - .data4 1 ! . . - .data4 1 ! . G - .data4 0 ! L . - -.define .tlt_table -.tlt_table: - .data4 0 ! . . - .data4 0 ! . G - .data4 1 ! L . - -.define .tle_table -.tle_table: - .data4 1 ! . . - .data4 0 ! . G - .data4 1 ! L . From 1dfd5524e4c5691916a27e1fc5d84dd4b20e8c09 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 26 Jan 2017 12:44:32 -0500 Subject: [PATCH 10/11] In PowerPC top, don't delete addi r0, r0, 0 Also don't delete addis r0, r0, 0. These instructions are special cases that set r0 to zero. If we delete them, then r0 keeps its old value. I caught this bug because osxppc protects the .text segment against writing. (linuxppc doesn't protect it.) A program tried to set r0 to the NULL pointer, but top deleted the instruction, so r0 kept an old return address pointing into .text. Later the program checked that r0 wasn't NULL, tried to write to address r0, and crashed. --- mach/powerpc/top/table | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index acbe543a7..fdec03b2e 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -6,15 +6,15 @@ LABEL_STARTER '.'; %%; -P, Q, R { TRUE }; +RNZ { strcmp(VAL, "r0") }; /* not r0 */ X, Y, Z { TRUE }; %%; /* Whitespace is significant here! */ -addi X, X, 0 -> ; -addis X, X, 0 -> ; +addi RNZ, RNZ, 0 -> ; +addis RNZ, RNZ, 0 -> ; mr X, X -> ; fmr X, X -> ; From c41688929c7450399451c585f1dad734eed2b158 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 26 Jan 2017 13:10:08 -0500 Subject: [PATCH 11/11] In PowerPC ncg, switch the scratch register from r11 to r0. r0 is a special case and can't be used when adding a register to a constant. The few remaining users of the scratch register don't do that. I removed other usages of the scratch register in 7c64dab, 5b5f774, 19f0eb8, f64b7d8. --- mach/powerpc/ncg/table | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 034c8f32d..7cc4bbbca 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -10,7 +10,7 @@ INT64 = 8 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ -#define COMMENT(n) /* comment {LABEL, n} */ +#define COMMENT(n) comment {LABEL, n} #define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) @@ -74,7 +74,7 @@ REGISTERS R14("r14") : GPR, REG, GPR14 regvar. R13("r13") : GPR, REG, GPR13 regvar. R12("r12") : GPR, REG, GPR12. - R11("r11") : GPR, GPR11. + R11("r11") : GPR, REG, GPR11. R10("r10") : GPR, REG, GPR10. R9("r9") : GPR, REG, GPR9. R8("r8") : GPR, REG, GPR8. @@ -149,7 +149,7 @@ REGISTERS CTR("ctr") : SPR. CR0("cr0") : CR. -#define RSCRATCH R11 +#define RSCRATCH R0 #define FSCRATCH F0