Skip to content

Commit

Permalink
Improve accuracy of get/setAllocationCounter
Browse files Browse the repository at this point in the history
Summary:
get/setAllocationCounter didn't take into account allocations in the
current block. This was known at the time, but it turns out to be
important to have more accuracy when using these in a fine-grained
way.

Test Plan:
New unit test to test incrementally larger allocaitons.  Before I got
results like this:

```
+0
+0
+0
+0
+0
+4096
+0
+0
+0
+0
+0
+4064
+0
+0
+4088
+4056
+0
+0
+0
+4088
+4096
+4056
+4096
```

Notice how the results aren't always monotonically increasing.  After
this patch:

```
+344
+416
+488
+560
+632
+704
+776
+848
+920
+992
+1064
+1136
+1208
+1280
+1352
+1424
+1496
+1568
+1640
+1712
+1784
+1856
+1928
+2000
+2072
+2144
```

Reviewers: niteria, bgamari, hvr, erikd

Subscribers: rwbarton, thomie, carter

Differential Revision: https://phabricator.haskell.org/D4288
  • Loading branch information
simonmar committed Jan 8, 2018
1 parent 303106d commit a1a689d
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 34 deletions.
4 changes: 2 additions & 2 deletions compiler/codeGen/StgCmmForeign.hs
Expand Up @@ -408,8 +408,8 @@ Opening the nursery corresponds to the following code:
@
tso = CurrentTSO;
cn = CurrentNursery;
bdfree = CurrentNuresry->free;
bdstart = CurrentNuresry->start;
bdfree = CurrentNursery->free;
bdstart = CurrentNursery->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
Expand Down
14 changes: 14 additions & 0 deletions compiler/prelude/primops.txt.pp
Expand Up @@ -2921,6 +2921,20 @@
has_side_effects = True
out_of_line = True

primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp
State# RealWorld -> (# State# RealWorld, INT64 #)
{ Retrieves the allocation counter for the current thread. }
with
has_side_effects = True
out_of_line = True

primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
INT64 -> State# RealWorld -> State# RealWorld
{ Sets the allocation counter for the current thread to the given value. }
with
has_side_effects = True
out_of_line = True

------------------------------------------------------------------------
section "Safe coercions"
------------------------------------------------------------------------
Expand Down
2 changes: 0 additions & 2 deletions includes/rts/Threads.h
Expand Up @@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *);
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);

Expand Down
3 changes: 3 additions & 0 deletions includes/stg/MiscClosures.h
Expand Up @@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh);
RTS_FUN_DECL(stg_clearCCSzh);
RTS_FUN_DECL(stg_traceEventzh);
RTS_FUN_DECL(stg_traceMarkerzh);
RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
RTS_FUN_DECL(stg_setThreadAllocationCounterzh);


/* Other misc stuff */
// See wiki:Commentary/Compiler/Backends/PprC#Prototypes
Expand Down
21 changes: 5 additions & 16 deletions libraries/base/GHC/Conc/Sync.hs
Expand Up @@ -105,6 +105,7 @@ import Data.Maybe
import GHC.Base
import {-# SOURCE #-} GHC.IO.Handle ( hFlush )
import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout )
import GHC.Int
import GHC.IO
import GHC.IO.Encoding.UTF8
import GHC.IO.Exception
Expand Down Expand Up @@ -194,18 +195,16 @@ instance Ord ThreadId where
--
-- @since 4.8.0.0
setAllocationCounter :: Int64 -> IO ()
setAllocationCounter i = do
ThreadId t <- myThreadId
rts_setThreadAllocationCounter t i
setAllocationCounter (I64# i) = IO $ \s ->
case setThreadAllocationCounter# i s of s' -> (# s', () #)

-- | Return the current value of the allocation counter for the
-- current thread.
--
-- @since 4.8.0.0
getAllocationCounter :: IO Int64
getAllocationCounter = do
ThreadId t <- myThreadId
rts_getThreadAllocationCounter t
getAllocationCounter = IO $ \s ->
case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #)

-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
Expand Down Expand Up @@ -242,16 +241,6 @@ disableAllocationLimit = do
ThreadId t <- myThreadId
rts_disableThreadAllocationLimit t

-- We cannot do these operations safely on another thread, because on
-- a 32-bit machine we cannot do atomic operations on a 64-bit value.
-- Therefore, we only expose APIs that allow getting and setting the
-- limit of the current thread.
foreign import ccall unsafe "rts_setThreadAllocationCounter"
rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()

foreign import ccall unsafe "rts_getThreadAllocationCounter"
rts_getThreadAllocationCounter :: ThreadId# -> IO Int64

foreign import ccall unsafe "rts_enableThreadAllocationLimit"
rts_enableThreadAllocationLimit :: ThreadId# -> IO ()

Expand Down
20 changes: 20 additions & 0 deletions rts/PrimOps.cmm
Expand Up @@ -2495,3 +2495,23 @@ stg_traceMarkerzh ( W_ msg )
return ();
}


stg_getThreadAllocationCounterzh ()
{
// Account for the allocation in the current block
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
return (StgTSO_alloc_limit(CurrentTSO) - offset);
}

stg_setThreadAllocationCounterzh ( I64 counter )
{
// Allocation in the current block will be subtracted by
// getThreadAllocationCounter#, so we have to offset any existing
// allocation here. See also openNursery/closeNursery in
// compiler/codeGen/StgCmmForeign.hs.
W_ offset;
offset = Hp - bdescr_start(CurrentNursery);
StgTSO_alloc_limit(CurrentTSO) = counter + offset;
return ();
}
4 changes: 2 additions & 2 deletions rts/RtsSymbols.c
Expand Up @@ -744,8 +744,6 @@
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
SymI_HasProto(rts_setInCallCapability) \
SymI_HasProto(rts_getThreadAllocationCounter) \
SymI_HasProto(rts_setThreadAllocationCounter) \
SymI_HasProto(rts_enableThreadAllocationLimit) \
SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_setMainThread) \
Expand Down Expand Up @@ -896,6 +894,8 @@
SymI_HasProto(stg_traceCcszh) \
SymI_HasProto(stg_traceEventzh) \
SymI_HasProto(stg_traceMarkerzh) \
SymI_HasProto(stg_getThreadAllocationCounterzh) \
SymI_HasProto(stg_setThreadAllocationCounterzh) \
SymI_HasProto(getMonotonicNSec) \
SymI_HasProto(lockFile) \
SymI_HasProto(unlockFile) \
Expand Down
13 changes: 1 addition & 12 deletions rts/Threads.c
Expand Up @@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
}

/* ---------------------------------------------------------------------------
* Getting & setting the thread allocation limit
* Enabling and disabling the thread allocation limit
* ------------------------------------------------------------------------ */
HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
{
// NB. doesn't take into account allocation in the current nursery
// block, so it might be off by up to 4k.
return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
}

void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
{
ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
}

void rts_enableThreadAllocationLimit(StgPtr tso)
{
Expand Down
7 changes: 7 additions & 0 deletions testsuite/tests/rts/all.T
Expand Up @@ -382,3 +382,10 @@ test('T12903', [when(opsys('mingw32'), skip)], compile_and_run, [''])
test('T13832', exit_code(1), compile_and_run, ['-threaded'])
test('T13894', normal, compile_and_run, [''])
test('T14497', normal, compile_and_run, ['-O'])

test('alloccounter1', normal, compile_and_run,
[
# avoid allocating stack chunks, which counts as
# allocation and messes up the results:
'-with-rtsopts=-k1m'
])
19 changes: 19 additions & 0 deletions testsuite/tests/rts/alloccounter1.hs
@@ -0,0 +1,19 @@
module Main where

import Control.Exception
import Control.Monad
import Data.List
import System.Mem

main = do
let
testAlloc n = do
let start = 999999
setAllocationCounter start
evaluate (last [1..n])
c <- getAllocationCounter
-- print (start - c)
return (start - c)
results <- forM [1..1000] testAlloc
print (sort results == results)
-- results better be in ascending order
1 change: 1 addition & 0 deletions testsuite/tests/rts/alloccounter1.stdout
@@ -0,0 +1 @@
True

0 comments on commit a1a689d

Please sign in to comment.