Skip to content

Commit

Permalink
Add popCnt# primop
Browse files Browse the repository at this point in the history
  • Loading branch information
tibbe authored and simonmar committed Aug 16, 2011
1 parent 49dbe60 commit 2d0438f
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 4 deletions.
2 changes: 2 additions & 0 deletions compiler/cmm/CmmMachOp.hs
Expand Up @@ -448,6 +448,8 @@ data CallishMachOp
| MO_Memcpy
| MO_Memset
| MO_Memmove

| MO_PopCnt Width
deriving (Eq, Show)

pprCallishMachOp :: CallishMachOp -> SDoc
Expand Down
17 changes: 17 additions & 0 deletions compiler/codeGen/CgPrimOp.hs
Expand Up @@ -374,6 +374,12 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] live =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] live =
doCopyMutableByteArrayOp src src_off dst dst_off n live

-- Population count
emitPrimOp [res] PopCnt8Op [w] live = emitPopCntCall res w W8 live
emitPrimOp [res] PopCnt16Op [w] live = emitPopCntCall res w W16 live
emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res w W32 live
emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live
emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live

-- The rest just translate straightforwardly
emitPrimOp [res] op [arg] _
Expand Down Expand Up @@ -908,3 +914,14 @@ emitAllocateCall res cap n live = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))

emitPopCntCall :: LocalReg -> CmmExpr -> Width -> StgLiveVars -> Code
emitPopCntCall res x width live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[CmmHinted res NoHint]
(CmmPrim (MO_PopCnt width))
[(CmmHinted x NoHint)]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
14 changes: 14 additions & 0 deletions compiler/codeGen/StgCmmPrim.hs
Expand Up @@ -443,6 +443,13 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
doCopyMutableByteArrayOp src src_off dst dst_off n

-- Population count
emitPrimOp [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp [res] PopCnt16Op [w] = emitPopCntCall res w W16
emitPrimOp [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth

-- The rest just translate straightforwardly
emitPrimOp [res] op [arg]
| nopOp op
Expand Down Expand Up @@ -940,3 +947,10 @@ emitAllocateCall res cap n = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))

emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
[ res ]
(MO_PopCnt width)
[ x ]
1 change: 1 addition & 0 deletions compiler/ghc.cabal.in
Expand Up @@ -497,6 +497,7 @@ Library
RegClass
PIC
Platform
CPrim
X86.Regs
X86.RegInfo
X86.Instr
Expand Down
2 changes: 2 additions & 0 deletions compiler/main/DynFlags.hs
Expand Up @@ -276,6 +276,7 @@ data DynFlag
| Opt_SharedImplib
| Opt_BuildingCabalPackage
| Opt_SSE2
| Opt_SSE4_2
| Opt_GhciSandbox
| Opt_HelpfulErrors

Expand Down Expand Up @@ -1518,6 +1519,7 @@ dynamic_flags = [
, flagA "monly-3-regs" (NoArg (addWarn "The -monly-3-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "msse2" (NoArg (setDynFlag Opt_SSE2))
, flagA "msse4.2" (NoArg (setDynFlag Opt_SSE4_2))

------ Warning opts -------------------------------------------------
, flagA "W" (NoArg (mapM_ setWarningFlag minusWOpts))
Expand Down
14 changes: 14 additions & 0 deletions compiler/nativeGen/CPrim.hs
@@ -0,0 +1,14 @@
-- | Generating C symbol names emitted by the compiler.
module CPrim (popCntLabel) where

import CmmType
import Outputable

popCntLabel :: Width -> String
popCntLabel w = "hs_popcnt" ++ pprWidth w
where
pprWidth W8 = "8"
pprWidth W16 = "16"
pprWidth W32 = "32"
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
3 changes: 3 additions & 0 deletions compiler/nativeGen/PPC/CodeGen.hs
Expand Up @@ -28,6 +28,7 @@ where
import PPC.Instr
import PPC.Cond
import PPC.Regs
import CPrim
import NCGMonad
import Instruction
import PIC
Expand Down Expand Up @@ -1142,6 +1143,8 @@ genCCall' gcp target dest_regs argsAndHints
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)

MO_PopCnt w -> (fsLit $ popCntLabel w, False)

other -> pprPanic "genCCall(ppc): unknown callish op"
(pprCallishMachOp other)

Expand Down
3 changes: 3 additions & 0 deletions compiler/nativeGen/SPARC/CodeGen/CCall.hs
Expand Up @@ -13,6 +13,7 @@ import SPARC.Instr
import SPARC.Imm
import SPARC.Regs
import SPARC.Base
import CPrim
import NCGMonad
import PIC
import Instruction
Expand Down Expand Up @@ -332,5 +333,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"

MO_PopCnt w -> fsLit $ popCntLabel w

_ -> pprPanic "outOfLineMachOp(sparc): Unknown callish mach op "
(pprCallishMachOp mop)
30 changes: 29 additions & 1 deletion compiler/nativeGen/X86/CodeGen.hs
Expand Up @@ -28,6 +28,7 @@ import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
import CPrim
import Instruction
import PIC
import NCGMonad
Expand Down Expand Up @@ -70,9 +71,14 @@ sse2Enabled = do
-- calling convention specifies the use of xmm regs,
-- and possibly other places.
return True
ArchX86 -> return (dopt Opt_SSE2 dflags)
ArchX86 -> return (dopt Opt_SSE2 dflags || dopt Opt_SSE4_2 dflags)
_ -> panic "sse2Enabled: Not an X86* arch"

sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
dflags <- getDynFlagsNat
return (dopt Opt_SSE4_2 dflags)

if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
Expand Down Expand Up @@ -1574,6 +1580,26 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.

genCCall (CmmPrim (MO_PopCnt width)) dest_regs@[CmmHinted dst _]
args@[CmmHinted src _] = do
sse4_2 <- sse4_2Enabled
if sse4_2
then do code_src <- getAnyReg src
src_r <- getNewRegNat size
return $ code_src src_r `appOL`
(if width == W8 then
-- The POPCNT instruction doesn't take a r/m8
unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
unitOL (POPCNT II16 (OpReg src_r)
(getRegisterReg False (CmmLocal dst)))
else
unitOL (POPCNT size (OpReg src_r)
(getRegisterReg False (CmmLocal dst))))
else genCCall (CmmCallee (fn width) CCallConv) dest_regs args
where size = intSize width
fn w = CmmLit (CmmLabel (mkForeignLabel (fsLit (popCntLabel w)) Nothing
ForeignLabelInExternalPackage IsFunction))

genCCall target dest_regs args =
do dflags <- getDynFlagsNat
if target32Bit (targetPlatform dflags)
Expand Down Expand Up @@ -1990,6 +2016,8 @@ outOfLineCmmOp mop res args
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"

MO_PopCnt _ -> fsLit "popcnt"

other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"


Expand Down
6 changes: 6 additions & 0 deletions compiler/nativeGen/X86/Instr.hs
Expand Up @@ -310,6 +310,8 @@ data Instr
-- call 1f
-- 1: popl %reg

-- SSE4.2
| POPCNT Size Operand Reg -- src, dst

data Operand
= OpReg Reg -- register
Expand Down Expand Up @@ -403,6 +405,8 @@ x86_regUsageOfInstr instr
COMMENT _ -> noUsage
DELTA _ -> noUsage

POPCNT _ src dst -> mkRU (use_R src) [dst]

_other -> panic "regUsage: unrecognised instr"

where
Expand Down Expand Up @@ -539,6 +543,8 @@ x86_patchRegsOfInstr instr env
JXX_GBL _ _ -> instr
CLTD _ -> instr

POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)

_other -> panic "patchRegs: unrecognised instr"

where
Expand Down
2 changes: 2 additions & 0 deletions compiler/nativeGen/X86/Ppr.hs
Expand Up @@ -574,6 +574,8 @@ pprInstr platform (XOR FF32 src dst) = pprOpOp platform (sLit "xorps") FF32 src
pprInstr platform (XOR FF64 src dst) = pprOpOp platform (sLit "xorpd") FF64 src dst
pprInstr platform (XOR size src dst) = pprSizeOpOp platform (sLit "xor") size src dst

pprInstr platform (POPCNT size src dst) = pprOpOp platform (sLit "popcnt") size src (OpReg dst)

pprInstr platform (NOT size op) = pprSizeOp platform (sLit "not") size op
pprInstr platform (NEGI size op) = pprSizeOp platform (sLit "neg") size op

Expand Down
19 changes: 16 additions & 3 deletions compiler/prelude/primops.txt.pp
Expand Up @@ -302,6 +302,22 @@
primop WordLtOp "ltWord#" Compare Word# -> Word# -> Bool
primop WordLeOp "leWord#" Compare Word# -> Word# -> Bool

primop PopCnt8Op "popCnt8#" Monadic Word# -> Word#
{Count the number of set bits in the lower 8 bits of a word.}
primop PopCnt16Op "popCnt16#" Monadic Word# -> Word#
{Count the number of set bits in the lower 16 bits of a word.}
primop PopCnt32Op "popCnt32#" Monadic Word# -> Word#
{Count the number of set bits in the lower 32 bits of a word.}
#if WORD_SIZE_IN_BITS < 64
primop PopCnt64Op "popCnt64#" Monadic Word64# -> Word#
{Count the number of set bits in a 64-bit word.}
#else
primop PopCnt64Op "popCnt64#" Monadic Word# -> Word#
{Count the number of set bits in a 64-bit word.}
#endif
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}

------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
Expand Down Expand Up @@ -1926,6 +1942,3 @@
------------------------------------------------------------------------

thats_all_folks



0 comments on commit 2d0438f

Please sign in to comment.