Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PowerPC ncg: register allocation, floats, lxl #59

Merged
merged 32 commits into from
Oct 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c578c49
Edit PowerPC assembly for .and, .cms, .ior, .xor, .zer
kernigh Feb 11, 2017
2e41c39
Implement blm and bls using an inline loop.
kernigh Feb 12, 2017
1de1e8f
Experiment with conversions between integers and floats.
kernigh Feb 12, 2017
54949f7
Change .fef8 and .fif8 to pass values on the stack.
kernigh Feb 12, 2017
ba9b021
Use .los4 in lar 4 and .sts4 in sar 4.
kernigh Feb 13, 2017
89dd80e
Add missing instances of "kills ALL" or "with STACK".
kernigh Feb 13, 2017
dc05cb2
Add pat cms !defined($1)
kernigh Feb 13, 2017
c5bb3be
Speed up register allocation by removing some register classes.
kernigh Feb 13, 2017
a8f62f4
Remove REG_PAIR.
kernigh Feb 13, 2017
cf728c2
Implement lxl for PowerPC ncg.
kernigh Feb 14, 2017
cbe5d86
Add floating-point register variables to PowerPC ncg.
kernigh Feb 16, 2017
a20b87c
In ego, put both words and double-words in reg_float.
kernigh Feb 17, 2017
aa47f52
Switch error() and fatal() in mach/proto/ncg to stdarg.
kernigh Feb 17, 2017
e6df553
For PowerPC, never put a reg_float value in a reg_any.
kernigh Feb 17, 2017
736c454
Remove .ret from libem and inline the code.
kernigh Feb 17, 2017
23c365c
Fix comparison of 4-byte floats.
kernigh Feb 18, 2017
2c266c6
Reorder registers. Fix problem with ret 8.
kernigh Feb 18, 2017
7e93481
Add reglap to ncg. Add 4-byte reg_float to PowerPC ncg.
kernigh Oct 14, 2017
d6e9eac
Merge branch 'default' into kernigh-linuxppc
kernigh Oct 14, 2017
b342b83
Add function prototypes to mach/proto/ncg/regvar.c
kernigh Oct 15, 2017
aa876ff
Fix reglap for procedures that use both sizes of reg_float.
kernigh Oct 15, 2017
f0619ea
PowerPC ncg never uses the rules to stack LOCAL or DLOCAL.
kernigh Oct 15, 2017
5432bd0
Do a move when coercing FREG to FREG or FSREG to FSREG.
kernigh Oct 16, 2017
307a8b9
Add regvar_w() and regvar_d() for use with reglap.
kernigh Oct 17, 2017
893e170
Use my new regvar_w() and regvar_d() in PowerPC ncg.
kernigh Oct 17, 2017
47bd0ef
Stop inlining code to convert integers to floats.
kernigh Oct 17, 2017
ac2b071
Add more rules for single-precision reg_float.
kernigh Oct 17, 2017
459a9b5
Use lwzu, stwu to tighten more loops.
kernigh Oct 18, 2017
41f3bf7
Edit descr for linuxppc. Use powerpc.descr of ego.
kernigh Oct 18, 2017
73ad5a2
Rename RELOLIS to RELOPPC_LIS.
kernigh Oct 18, 2017
2a92f9b
Add a few more error checks and adjustments to reglap.
kernigh Oct 19, 2017
0102cc8
lwzu writes to the register in the token.
kernigh Oct 19, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions h/out.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ struct outname {
/*
* relocation type bits
*/
#define RELSZ 0x0fff /* relocation length */
#define RELO1 1 /* 1 byte */
#define RELO2 2 /* 2 bytes */
#define RELO4 3 /* 4 bytes */
#define RELOPPC 4 /* PowerPC 26-bit address */
#define RELOLIS 5 /* PowerPC lis */
#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */
#define RELSZ 0x0fff /* relocation length */
#define RELO1 1 /* 1 byte */
#define RELO2 2 /* 2 bytes */
#define RELO4 3 /* 4 bytes */
#define RELOPPC 4 /* PowerPC 26-bit address */
#define RELOPPC_LIS 5 /* PowerPC lis */
#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */

#define RELPC 0x2000 /* pc relative */
#define RELBR 0x4000 /* High order byte lowest address. */
Expand Down
6 changes: 3 additions & 3 deletions mach/powerpc/as/mach5.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ void emit_hl(word_t in)
case OP_HA: /* ha16[expr] */
if (PASS_RELO && (hl_expr.typ & S_TYP) != S_ABS) {
/*
* RELOLIS only works with lis _, _ (same as
* addis _, r0, _). Check if instruction
* RELOPPC_LIS only works with lis _, _ (same
* as addis _, r0, _). Check if instruction
* isn't addis or register RA isn't r0.
*/
if ((in & 0xfc1f0000) != (0x3c000000))
Expand All @@ -55,7 +55,7 @@ void emit_hl(word_t in)
* Low 26 bits: signed offset
*/
fit(fitx(hl_expr.val, 26));
newrelo(hl_expr.typ, RELOLIS | FIXUPFLAGS);
newrelo(hl_expr.typ, RELOPPC_LIS | FIXUPFLAGS);
reg = (in >> 21) & 0x1f;
in = (hl_token == OP_HA) << 31;
in |= reg << 26;
Expand Down
34 changes: 17 additions & 17 deletions mach/powerpc/libem/aar4.s
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
.sect .text

! Index into a bounds-checked array.
! Get address of element of bounds-checked array.
!
! On entry:
! r3 = ptr to descriptor
! r4 = index
! r5 = address of array
! Yields:
! r3 = address of element
! r0 = size of element (used by .lar4, .sar4)
! Stack: ( array-adr index descriptor-adr -- element-adr )
! Sets r3 = size of element for .los4, .sts4
! Preserves r10 for .lar4, .sar4

.define .aar4
Expand All @@ -17,16 +12,21 @@
ori r0, r0, lo16[.trap_earray]
mtspr ctr, r0 ! load CTR with trap address

lwz r0, 0(r3)
subf. r4, r0, r4 ! adjust range
bltctr ! check lower bound
lwz r4, 0(sp) ! r4 = address of descriptor
lwz r5, 4(sp) ! r5 = index
lwz r6, 8(sp) ! r6 = address of array

lwz r0, 4(r3)
cmplw r4, r3
bgectr ! check upper bound
lwz r0, 0(r4)
subf. r5, r0, r5 ! subtract lower bound from index
bltctr ! check lower bound

lwz r0, 8(r3)
mullw r4, r4, r0 ! scale index
add r3, r4, r5 ! calculate element address
lwz r0, 4(r4)
cmplw r5, r0
bgtctr ! check upper bound

lwz r3, 8(r4) ! r3 = size of element
mullw r5, r5, r3 ! scale index by size
add r6, r6, r5
stw r6, 8(sp) ! push address of element
addi sp, sp, 8
blr
26 changes: 11 additions & 15 deletions mach/powerpc/libem/and.s
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
.sect .text

! Set intersection.
! Stack: ( b a size -- a*b )
! Stack: ( a b size -- a*b )

.define .and
.and:
lwz r3, 0 (sp) ! r3 = size
addi sp, sp, 4
lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4
add r4, sp, r3 ! r4 = pointer before set a

mr r4, sp ! r4 = ptr to set a
add r5, sp, r3 ! r5 = ptr to set b
srwi r6, r3, 2
mtspr ctr, r6 ! ctr = r3 / 4
1:
lwz r7, 0(r4)
lwz r8, 0(r5)
and r8, r7, r8 ! intersection of words
stw r8, 0(r5)
addi r4, r4, 4
addi r5, r5, 4
! Loop with r4 in set a and sp in set b.
1: lwzu r5, 4(r4)
lwzu r6, 4(sp)
and r7, r5, r6 ! intersection of words
stw r7, 0(r4)
bdnz 1b ! loop ctr times
add sp, sp, r3
addi sp, sp, 4 ! drop last word of set b
blr
2 changes: 1 addition & 1 deletion mach/powerpc/libem/build.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ for _, plat in ipairs(vars.plats) do
acklibrary {
name = "lib_"..plat,
srcs = {
"./*.s",
"./*.s", -- rm ret.s
},
vars = { plat = plat },
deps = {
Expand Down
29 changes: 14 additions & 15 deletions mach/powerpc/libem/cif8.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,24 @@

.define .cif8
.cif8:
addi sp, sp, -4 ! make space for the double
! Conversion uses the pivot value
! 1 << 52 = 0x4330 0000 0000 0000
!
! From signed integer i, we compute
! ((1 << 52) + (1 << 31) + i) - ((1 << 52) + (1 << 31))

lis r3, 0x4330
stwu r3, -4(sp) ! make space for the double

lwz r3, 4(sp)
xoris r3, r3, 0x8000
stw r3, 4(sp) ! flip sign of integer value

addis r3, r0, 0x4330
stw r3, 0(sp) ! set high word to construct a double
stw r3, 4(sp) ! flip sign bit to get (1 << 31) + i

lfd f0, 0(sp) ! load value

lis r3, ha16[pivot]
lfd f1, lo16[pivot](r3) ! load pivot value
fsub f0, f0, f1 ! adjust
lfd f0, 0(sp) ! f0 = (1 << 52) + (1 << 31) + i
lis r3, 0x8000
stw r3, 4(sp)
lfd f1, 0(sp) ! f1 = (1 << 52) + (1 << 31)
fsub f0, f0, f1 ! finish conversion

stfd f0, 0(sp) ! save value again...
blr ! ...and return

.sect .rom
pivot:
.data4 0x43300000
.data4 0x80000000
39 changes: 18 additions & 21 deletions mach/powerpc/libem/cms.s
Original file line number Diff line number Diff line change
@@ -1,30 +1,27 @@
.sect .text

! Compare sets a, b.
! Stack: ( b a -- )
! With r3 = size of each set
! Yields r3 = 0 if equal, nonzero if not equal
! Stack: ( a b size -- result )
! Result is 0 if equal, nonzero if not equal.

.define .cms
.cms:
mr r4, sp ! r4 = ptr to set a
add r5, sp, r3 ! r5 = ptr to set b
mr r6, r3 ! r6 = size
srwi r3, r3, 2
mtspr ctr, r3 ! ctr = size / 4
1:
lwz r7, 0(r4)
lwz r8, 0(r5)
cmpw cr0, r7, r8 ! compare words in sets
addi r4, r4, 4
addi r5, r5, 4
bne cr0, 2f ! branch if not equal
lwz r3, 0(sp) ! r3 = size of each set
srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4
add r4, sp, r3 ! r4 = pointer before set a
add r7, r4, r3 ! r7 = pointer to store result

! Loop with r4 in a set a and sp in set b.
1: lwzu r5, 4(r4)
lwzu r6, 4(sp)
cmpw r5, r6 ! compare words
bne 2f ! branch if not equal
bdnz 1b ! loop ctr times
addi r3, r0, 0 ! equal: return 0

li r3, 0 ! equal: return 0
b 3f
2:
addi r3, r0, 1 ! not equal: return 1
3:
slwi r6, r6, 1 ! r6 = size * 2
add sp, sp, r6 ! remove sets from stack
2: li r3, 1 ! not equal: return 1
3: mr sp, r7
stw r3, 0(sp) ! push result
blr
19 changes: 9 additions & 10 deletions mach/powerpc/libem/com.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@

.define .com
.com:
lwz r3, 0 (sp) ! size
addi sp, sp, 4
lwz r3, 0(sp) ! r3 = size
srwi r7, r3, 2
mtspr ctr, r7 ! ctr = size / 4
mr r4, sp ! r4 = pointer before set a

mr r4, sp ! r4 = pointer to set a
srwi r5, r3, 2
mtspr ctr, r5 ! ctr = r3 / 4
1:
lwz r6, 0(r4)
nor r6, r6, r6 ! complement of word
stw r6, 0(r4)
addi r4, r4, 4
! Loop with r4 in set a.
1: lwzu r5, 4(r4)
nor r7, r5, r5 ! complement of word
stw r7, 0(r4)
bdnz 1b ! loop ctr times
addi sp, sp, 4 ! drop size from stack
blr
23 changes: 11 additions & 12 deletions mach/powerpc/libem/cuf8.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,20 @@

.define .cuf8
.cuf8:
addi sp, sp, -4 ! make space for the double
! Conversion uses the pivot value
! 1 << 52 = 0x4330 0000 0000 0000
!
! From unsigned integer u, we compute
! ((1 << 52) + u) - (1 << 52)

lis r3, 0x4330
stw r3, 0(sp) ! set high word to construct a double
stwu r3, -4(sp) ! make space for the double

lfd f0, 0(sp) ! load value

lis r3, ha16[pivot]
lfd f1, lo16[pivot](r3) ! load pivot value
fsub f0, f0, f1 ! adjust
lfd f0, 0(sp) ! f0 = (1 << 52) + u
li r3, 0x0000
stw r3, 4(sp)
lfd f1, 0(sp) ! f1 = (1 << 52)
fsub f0, f0, f1 ! finish conversion

stfd f0, 0(sp) ! save value again...
blr ! ...and return

.sect .rom
pivot:
.data4 0x43300000
.data4 0x00000000
59 changes: 33 additions & 26 deletions mach/powerpc/libem/fef8.s
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,48 @@
.sect .text

! Split a double-precision float into fraction and exponent, like
! frexp(3) in C. On entry:
! r3 = float, high word (bits 0..31)
! r4 = float, low word (bits 32..63)
! Yields:
! r3 = fraction, high word (bits 0..31)
! r4 = fraction, low word (bits 32..63)
! r5 = exponent
! frexp(3) in C.
!
! Stack: ( double -- fraction exponent )

.define .fef8
.fef8:
lwz r3, 0(sp) ! r3 = high word (bits 0..31)
lwz r4, 4(sp) ! r4 = low word (bits 32..63)

! IEEE double-precision format:
! sign exponent fraction
! 0 1..11 12..63
extrwi r6, r3, 11, 1 ! r6 = IEEE exponent
addi r5, r6, -1022 ! r5 = true exponent
!
! To get fraction in [0.5, 1) or (-1, -0.5], we subtract 1022
! from the IEEE exponent.

extrwi. r6, r3, 11, 1 ! r6 = IEEE exponent
addi r5, r6, -1022 ! r5 = our exponent
beq 2f ! jump if zero or denormalized
cmpwi r6, 2047
beqlr ! return if infinity or NaN
cmpwi r6, 0
bne 1f ! jump if normalized number
beq 1f ! jump if infinity or NaN
! fall through if normalized

! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
! IEEE exponent to 1022.
rlwinm r3, r3, 0, 12, 0 ! clear old exponent
oris r3, r3, 1022 << 4 ! set new exponent
! fall through

! Got denormalized number or zero, probably zero.
1: stw r3, 0(sp)
stw r4, 4(sp) ! push fraction
stwu r5, -4(sp) ! push exponent
blr

2: ! Got denormalized number or zero, probably zero.
extrwi r6, r3, 22, 12
addi r5, r0, 0 ! r5 = true exponent = 0
or. r6, r6, r4 ! r6 = high|low fraction
beqlr ! return if zero
bne 3f ! jump if not zero
li r5, 0 ! exponent = 0
b 1b

! Got denormalized number, not zero.
stwu r4, -4(sp)
stwu r3, -4(sp)
3: ! Got denormalized number, not zero.
lfd f0, 0(sp)
lis r6, ha16[_2_64]
lfd f1, lo16[_2_64](r6)
Expand All @@ -40,14 +53,8 @@
lwz r3, 0(sp)
lwz r4, 4(sp)
extrwi r6, r3, 11, 1 ! r6 = IEEE exponent
addi sp, sp, 8
addi r5, r6, -1022 - 64 ! r5 = true exponent
1:
! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
! exponent to true 0, IEEE 1022.
rlwinm r3, r3, 0, 12, 0 ! clear old exponent
oris r3, r3, 1022 << 4 ! set new exponent
blr
addi r5, r6, -1022 - 64 ! r5 = our exponent
b 1b

.sect .rom
_2_64:
Expand Down
Loading