Skip to content

Commit

Permalink
Improve floating point conversions on powerpc
Browse files Browse the repository at this point in the history
From-SVN: r163598
  • Loading branch information
Michael Meissner authored and Michael Meissner committed Aug 27, 2010
1 parent a3c85b7 commit 7042fe5
Show file tree
Hide file tree
Showing 16 changed files with 1,183 additions and 209 deletions.
120 changes: 120 additions & 0 deletions gcc/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,123 @@
2010-08-23 Michael Meissner <meissner@linux.vnet.ibm.com>

* config/rs6000/rs6000-protos.h (rs6000_address_for_fpconvert):
New declaration.
(rs6000_allocate_stack_temp): Ditto.
(rs6000_expand_convert_si_to_sfdf): Ditto.

* config/rs6000/rs6000.c (rs6000_override_options): Adjust long
line. Update the options set if power6 or power7 server/embedded
type options are used. If we give a warning for no vsx under
-mcpu=power7 -mno-altivec, mark -mvsx as an explicit option.
(rs6000_allocate_stack_temp): New function to allocate a stack
tempoary and adjust the address so it meets either REG+OFFSET or
REG+REG addressing requirements.
(rs6000_address_for_fpconvert): Adjust REG+OFFSET addresses so
that they can be used with the LFIWAX/LFIWZX instrucitons.
(rs6000_expand_convert_si_to_sfdf): New helper funciton for
converting signed/unsigned SImode to either SFmode/DFmode.

* config/rs6000/rs6000.h (TARGET_FCFID): New macros to determine
whether certain instructions can be generated.
(TARGET_FCTIDZ): Ditto.
(TARGET_STFIWX): Ditto.
(TARGET_LFIWAX): Ditto.
(TARGET_LFIWZX): Ditto.
(TARGET_FCFIDS): Ditto.
(TARGET_FCFIDU): Ditto.
(TARGET_FCFIDUS): Ditto.
(TARGET_FCTIDUZ): Ditto.
(TARGET_FCTIWUZ): Ditto.

* config/rs6000/rs6000.md (UNSPEC_FCTIW): New unspec constants.
(UNSPEC_FCTID): Ditto.
(UNSPEC_LFIWAX): Ditto.
(UNSPEC_LFIWZX): Ditto.
(UNSPEC_FCTIWUZ): Ditto.
(rreg): Use correct constraints.
(SI_CONVERT_FP): New mode attribute for floating point conversion
tests.
(E500_CONVERT): Ditto.
(lfiwax): New insns for converting from integer to floating point
utilizing newer instructions. Attempt to optimize conversions
that come from memory so that we don't load the value into a GPR,
spill it to the stack and reload it into a FPR.
(floatsi<mode>2_lfiwax): Ditto.
(floatsi<mode>2_lfiwax_mem): Ditto.
(floatsi<mode>2_lfiwax_mem2): Ditto.
(lfiwzx): Ditto.
(floatunssi<mode>2_lfiwzx): Ditto.
(floatunssi<mode>2_lfiwzx_mem): Ditto.
(floatunssi<mode>2_lfiwzx_mem2): Ditto.
(floatdidf2_mem): Ditto.
(floatunsdidf2_fcfidu): Ditto.
(floatunsdidf2_mem): Ditto.
(floatunsdisf2): Ditto.
(floatunsdisf2_fcfidus): Ditto.
(floatunsdisf2_mem): Ditto.
(floatsidf2): Add support for LFIWAX/LFIWZX/FCFIDS/FCFIDU/FCFIDUS.
Use FCFID on 32-bit hosts that support it.
(floatsidf2_internal): Ditto.
(floatunssisf2): Ditto.
(floatunssidf2): Ditto.
(floatunssidf2_internal): Ditto.
(floatsisf2): Ditto.
(floatdidf2): Ditto.
(floatdidf2_fpr): Ditto.
(floatunsdidf2): Ditto.
(floatdisf2): Ditto.
(floatdisf2_fcfids): Ditto.
(floatdisf2_internal1): Ditto.
(fixuns_truncsfsi2): Delete, merge into common pattern for both
SF/DF. Add power7 support.
(fix_truncsfsi2): Ditto.
(fixuns_truncdfsi2): Ditto.
(fixuns_truncdfdi2): Ditto.
(fix_truncdfsi2): Ditto.
(fix_truncdfsi2_internal): Ditto.
(fix_truncdfsi2_internal_gfxopt): Ditto.
(fix_truncdfsi2_mfpgpr): Ditto.
(fctiwz): Ditto.
(btruncdf2): Ditto.
(btruncdf2_fpr): Ditto.
(btructsf2): Ditto.
(ceildf2): Ditto.
(ceildf2_fpr): Ditto.
(ceilsf2): Ditto.
(floordf2): Ditto.
(floordf2_fpr): Ditto.
(floorsf2): Ditto.
(rounddf2): Ditto.
(rounddf2_fpr): Ditto.
(roundsf2): Ditto.
(fix_trunc<mode>si2): Combine SF/DF conversion into one insn.
(fix_trunc<mode>di2): Ditto.
(fixuns_trunc<mode>si2): Ditto.
(fixuns_trunc<mode>di2): Ditto.
(fctiwz_<mode>): Ditto.
(btrunc<mode>2): Ditto.
(btrunc<mode>2_fpr): Ditto.
(ceil<mode>2): Ditto.
(ceil<mode>2_fpr): Ditto.
(floor<mode>2): Ditto.
(float<mode>2_fpr): Ditto.
(round<mode>2): Ditto.
(round<mode>2_fpr): Ditto.
(fix_trunc<mode>si2_stfiwx): New insn for machines with STFIWX.
(fixuns_trunc<mode>si2_stfiwx): Ditto.
(fix_truncdfsi2_internal): Ditto.
(fix_trunc<mode>si2_mem): Combiner pattern to eliminate storing
converted value on stack, loaded into GPR, and then stored into
the final destination.
(fix_trunc<mode>di2_fctidz): New pattern for targets supporting
FCTIDZ.
(lrint<mode>di2): New insn, provide the lrint builtin functions.
(ftruncdf2): Delete, unused.
(fix_trunctfsi2_internal): Use gen_fctiwz_df, not gen_fctiwz.

* config/rs6000/vsx.md (toplevel): Update copyright year.
(VSr2): Use "ws" contraint for DFmode, not "!r#r".
(VSr3): Ditto.

2010-08-27 Basile Starynkevitch <basile@starynkevitch.net>
Jeremie Salvucci <jeremie.salvucci@free.fr>
Expand Down
3 changes: 3 additions & 0 deletions gcc/config/rs6000/rs6000-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ extern void rs6000_emit_parity (rtx, rtx);

extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode,
rtx);
extern rtx rs6000_address_for_fpconvert (rtx);
extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
extern void rs6000_expand_convert_si_to_sfdf (rtx, rtx, bool);
#endif /* RTX_CODE */

#ifdef TREE_CODE
Expand Down
149 changes: 139 additions & 10 deletions gcc/config/rs6000/rs6000.c
Original file line number Diff line number Diff line change
Expand Up @@ -2510,10 +2510,10 @@ rs6000_override_options (const char *default_cpu)
POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
| MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP
| MASK_MFPGPR | MASK_RECIP_PRECISION},
{"power7", PROCESSOR_POWER7,
{"power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
| MASK_VSX| MASK_RECIP_PRECISION}, /* Don't add MASK_ISEL by default */
| MASK_VSX | MASK_RECIP_PRECISION},
{"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK},
{"powerpc64", PROCESSOR_POWERPC64,
POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
Expand Down Expand Up @@ -2550,15 +2550,19 @@ rs6000_override_options (const char *default_cpu)
ISA_2_1_MASKS = MASK_MFCRF,
ISA_2_2_MASKS = (ISA_2_1_MASKS | MASK_POPCNTB | MASK_FPRND),

/* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and
don't add ALTIVEC, since in general it isn't a win on power6. */
ISA_2_5_MASKS = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION
| MASK_DFP),
/* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and don't
add ALTIVEC, since in general it isn't a win on power6. In ISA 2.04,
fsel, fre, fsqrt, etc. were no longer documented as optional. Group
masks by server and embedded. */
ISA_2_5_MASKS_EMBEDDED = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION
| MASK_PPC_GFXOPT | MASK_PPC_GPOPT),
ISA_2_5_MASKS_SERVER = (ISA_2_5_MASKS_EMBEDDED | MASK_DFP),

/* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
altivec is a win so enable it. */
ISA_2_6_MASKS = (ISA_2_5_MASKS | MASK_ALTIVEC | MASK_POPCNTD
| MASK_VSX | MASK_RECIP_PRECISION)
ISA_2_6_MASKS_EMBEDDED = (ISA_2_5_MASKS_EMBEDDED | MASK_POPCNTD),
ISA_2_6_MASKS_SERVER = (ISA_2_5_MASKS_SERVER | MASK_POPCNTD | MASK_ALTIVEC
| MASK_VSX)
};

/* Numerous experiment shows that IRA based loop pressure
Expand Down Expand Up @@ -2699,15 +2703,22 @@ rs6000_override_options (const char *default_cpu)
{
warning (0, msg);
target_flags &= ~ MASK_VSX;
target_flags_explicit |= MASK_VSX;
}
}

/* For the newer switches (vsx, dfp, etc.) set some of the older options,
unless the user explicitly used the -mno-<option> to disable the code. */
if (TARGET_VSX)
target_flags |= (ISA_2_6_MASKS & ~target_flags_explicit);
target_flags |= (ISA_2_6_MASKS_SERVER & ~target_flags_explicit);
else if (TARGET_POPCNTD)
target_flags |= (ISA_2_6_MASKS_EMBEDDED & ~target_flags_explicit);
else if (TARGET_DFP)
target_flags |= (ISA_2_5_MASKS & ~target_flags_explicit);
target_flags |= (ISA_2_5_MASKS_SERVER & ~target_flags_explicit);
else if (TARGET_CMPB)
target_flags |= (ISA_2_5_MASKS_EMBEDDED & ~target_flags_explicit);
else if (TARGET_POPCNTB || TARGET_FPRND)
target_flags |= (ISA_2_2_MASKS & ~target_flags_explicit);
else if (TARGET_ALTIVEC)
target_flags |= (MASK_PPC_GFXOPT & ~target_flags_explicit);

Expand Down Expand Up @@ -26959,4 +26970,122 @@ rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
}
}


/* Allocate a stack temp and fixup the address so it meets the particular
memory requirements (either offetable or REG+REG addressing). */

rtx
rs6000_allocate_stack_temp (enum machine_mode mode,
bool offsettable_p,
bool reg_reg_p)
{
rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
rtx addr = XEXP (stack, 0);
int strict_p = (reload_in_progress || reload_completed);

if (!legitimate_indirect_address_p (addr, strict_p))
{
if (offsettable_p
&& !rs6000_legitimate_offset_address_p (mode, addr, strict_p))
stack = replace_equiv_address (stack, copy_addr_to_reg (addr));

else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
}

return stack;
}

/* Given a memory reference, if it is not a reg or reg+reg addressing, convert
to such a form to deal with memory reference instructions like STFIWX that
only take reg+reg addressing. */

rtx
rs6000_address_for_fpconvert (rtx x)
{
int strict_p = (reload_in_progress || reload_completed);
rtx addr;

gcc_assert (MEM_P (x));
addr = XEXP (x, 0);
if (! legitimate_indirect_address_p (addr, strict_p)
&& ! legitimate_indexed_address_p (addr, strict_p))
x = replace_equiv_address (x, copy_addr_to_reg (addr));

return x;
}

/* Expand 32-bit int -> floating point conversions. Return true if
successful. */

void
rs6000_expand_convert_si_to_sfdf (rtx dest, rtx src, bool unsigned_p)
{
enum machine_mode dmode = GET_MODE (dest);
rtx (*func_si) (rtx, rtx, rtx, rtx);
rtx (*func_si_mem) (rtx, rtx);
rtx (*func_di) (rtx, rtx);
rtx reg, stack;

gcc_assert (GET_MODE (src) == SImode);

if (dmode == SFmode)
{
if (unsigned_p)
{
gcc_assert (TARGET_FCFIDUS && TARGET_LFIWZX);
func_si = gen_floatunssisf2_lfiwzx;
func_si_mem = gen_floatunssisf2_lfiwzx_mem;
func_di = gen_floatunsdisf2;
}
else
{
gcc_assert (TARGET_FCFIDS && TARGET_LFIWAX);
func_si = gen_floatsisf2_lfiwax;
func_si_mem = gen_floatsisf2_lfiwax_mem;
func_di = gen_floatdisf2;
}
}

else if (dmode == DFmode)
{
if (unsigned_p)
{
gcc_assert (TARGET_FCFIDU && TARGET_LFIWZX);
func_si = gen_floatunssidf2_lfiwzx;
func_si_mem = gen_floatunssidf2_lfiwzx_mem;
func_di = gen_floatunsdidf2;
}
else
{
gcc_assert (TARGET_FCFID && TARGET_LFIWAX);
func_si = gen_floatsidf2_lfiwax;
func_si_mem = gen_floatsidf2_lfiwax_mem;
func_di = gen_floatdidf2;
}
}

else
gcc_unreachable ();

if (MEM_P (src))
{
src = rs6000_address_for_fpconvert (src);
emit_insn (func_si_mem (dest, src));
}
else if (!TARGET_MFPGPR)
{
reg = gen_reg_rtx (DImode);
stack = rs6000_allocate_stack_temp (SImode, false, true);
emit_insn (func_si (dest, src, stack, reg));
}
else
{
if (!REG_P (src))
src = force_reg (SImode, src);
reg = convert_to_mode (DImode, src, unsigned_p);
emit_insn (func_di (dest, reg));
}
}

#include "gt-rs6000.h"
22 changes: 21 additions & 1 deletion gcc/config/rs6000/rs6000.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* Definitions of target machine for GNU compiler, for IBM RS/6000.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
2010
Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
Expand Down Expand Up @@ -554,6 +555,25 @@ extern int rs6000_vector_align[];
#define TARGET_E500_DOUBLE 0
#define CHECK_E500_OPTIONS do { } while (0)

/* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only.
Enable 32-bit fcfid's on any of the switches for newer ISA machines or
XILINX. */
#define TARGET_FCFID (TARGET_POWERPC64 \
|| TARGET_POPCNTB /* ISA 2.02 */ \
|| TARGET_CMPB /* ISA 2.05 */ \
|| TARGET_POPCNTD /* ISA 2.06 */ \
|| TARGET_XILINX_FPU)

#define TARGET_FCTIDZ TARGET_FCFID
#define TARGET_STFIWX TARGET_PPC_GFXOPT
#define TARGET_LFIWAX TARGET_CMPB
#define TARGET_LFIWZX TARGET_POPCNTD
#define TARGET_FCFIDS TARGET_POPCNTD
#define TARGET_FCFIDU TARGET_POPCNTD
#define TARGET_FCFIDUS TARGET_POPCNTD
#define TARGET_FCTIDUZ TARGET_POPCNTD
#define TARGET_FCTIWUZ TARGET_POPCNTD

/* E500 processors only support plain "sync", not lwsync. */
#define TARGET_NO_LWSYNC TARGET_E500

Expand Down
Loading

0 comments on commit 7042fe5

Please sign in to comment.