-
Notifications
You must be signed in to change notification settings - Fork 1
/
Main.hs
1397 lines (1221 loc) · 60.3 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE LambdaCase, RecordWildCards #-}
module Main where
import GHC.Debug.Client hiding (DebugM)
import GHC.Debug.Client.Monad hiding (DebugM)
import GHC.Debug.Client.Monad.Simple (DebugM(..))
import GHC.Debug.Retainers
import GHC.Debug.Fragmentation
-- import GHC.Debug.Profile
import GHC.Debug.Dominators (retainerSize)
import GHC.Debug.Snapshot
-- import GHC.Debug.Count
-- import GHC.Debug.Types.Graph (heapGraphSize, traverseHeapGraph, ppClosure)
import GHC.Debug.Types.Ptr
--import GHC.Debug.Types.Closures
import GHC.Debug.Trace
-- import GHC.Debug.ObjectEquiv
import Control.Monad.RWS
-- import Control.Monad.Identity
-- import Control.Monad.Writer
-- import qualified Data.ByteString.Char8 as B
-- import qualified Data.ByteString.Builder as B
-- import qualified Data.Text as T
-- import qualified Data.Text.IO as T
import Control.Monad.State
-- import Data.Text (Text)
-- import GHC.Exts.Heap.ClosureTypes
import qualified Data.Foldable as F
-- import Control.Monad
-- import Debug.Trace
import Control.Exception
import Control.Concurrent
-- import Control.Concurrent.Async
-- import qualified Control.Concurrent.Chan.Unagi.Bounded as Bounded
import qualified Data.IntMap.Strict as IM
-- import Data.Bitraversable
-- import Data.Monoid
-- import Control.Applicative
-- import Data.Traversable
import Data.Kind
import Data.Tuple
import Data.Word
-- import System.Process
import System.Environment
import System.IO
import Data.Tree
import Data.Maybe
import Data.Either
import Control.Arrow (first, (***), (&&&))
import qualified Data.Map.Strict as Map
import qualified Data.Map.Lazy as LazyMap
-- import Data.Ord
import Data.List
import Data.Function
import Data.List.NonEmpty(NonEmpty(..))
-- import Data.Function
import GHC.Generics
import GHC.Clock
import GHC.Int
import qualified Data.Graph.Inductive.Graph as FGL
import qualified Data.Graph.Inductive.PatriciaTree as FGL
import qualified Data.Graph.Inductive.Query.Dominators as FGL
import qualified Data.Set as Set
-- Collect snapshot, stepping through so we have some control over memory usage:
main :: IO ()
main = do
hSetBuffering stdout NoBuffering
getArgs >>= \case
("--analyze-snapshot":limDirty:mbFile) -> do
let file = case mbFile of
[f] -> f
[] -> defaultSnapshotLocation
_ -> error "bad args"
-- zero indicates no limit:
let limI = read limDirty
lim | limI == 0 = Nothing
| otherwise = Just limI
snapshotRun file $
-- pRetainingThunks
-- pDominators lim
-- pFragmentation
-- pClusteredHeapGML (ClusterBySourceInfo False) "/tmp/per-infoTable-byLoc-NEW"
pAnalyzePointerCompression
-- pAnalyzeNestedClosureFreeVars
-- pInfoTableTree
-- pDistinctInfoTableAnalysis
-- pCommonPtrArgs
-- pPointersToPointers
("--take-snapshot":mbSocket) -> do
let sockPath = case mbSocket of
[] -> "/tmp/ghc-debug"
[p] -> p
_ -> error "usage: --take-snapshot [<socket-path>]"
-- jank: just loop until this works:
let maxAttempts = (50 :: Int)
loop attempt = do
try (go sockPath) >>= \case
Left (e :: SomeException)
| attempt == maxAttempts -> do
print e
throw e
| otherwise -> putStr "." >> threadDelay 200_000 >> loop (attempt+1)
Right _ -> do
putStrLn $ "Snapshot created at: "<>defaultSnapshotLocation
loop 1
_ -> error "bad args"
where
go sockPath = withDebuggeeConnect sockPath $ \e -> do
makeSnapshot e defaultSnapshotLocation
outputRequestLog e
-- See: https://well-typed.com/blog/2021/01/fragmentation-deeper-look/
-- TODO optionally take a GC to compact non-pinned? Maybe no point to that
pFragmentation :: Debuggee -> IO ()
pFragmentation e = do
pause e
(bs, pinnedCensus, mblockCensus, blockCensus) <- run e $ do
bs <- precacheBlocks
roots <- gcRoots
pinnedCensus <- censusPinnedBlocks bs roots
mblockCensus <- censusByMBlock roots
blockCensus <- censusByBlock roots
-- TODO can we do this outside of `run`, i.e. can `roots` leak?
let badPtrs = findBadPtrs pinnedCensus
forM_ badPtrs $ \((_,ptrs),_l)-> do
liftIO $ print "=============== fragment object ========================================================"
-- Look for data with just a single retainer (although we need to limit
-- at 2 for that) which we are more likely to be able to do something
-- about:
rs <- findRetainersOf (Just 2) roots ptrs
case rs of
[ ] -> liftIO $ print "no retainers... why?"
[_,_] -> liftIO $ print "two retainers, skipping"
[r] -> do
cs <- dereferenceClosures r
cs' <- mapM (quintraverse pure pure dereferenceConDesc pure pure) cs
locs <- mapM getSourceLoc cs'
-- displayRetainerStack is arbitrary and weird...
-- TODO could be cool to look for the last thunk in the list (highest up in retainer tree)
-- TODO would be cool to call out the top-most line from our own codebase too
liftIO $ putStrLn "FIXME for 0.4!"
-- liftIO $ displayRetainerStack
-- [ ("", zip cs' locs) ]
_ -> error $ "findRetainersOf broken apparently"<>(show rs)
return (bs, pinnedCensus, mblockCensus, blockCensus)
resume e
-- Output:
putStrLn "--------------------------------"
summariseBlocks bs
putStrLn "---------- mega-block histogram: --------------------------------"
printMBlockCensus mblockCensus
putStrLn "---------- block histogram: --------------------------------"
printBlockCensus blockCensus
putStrLn "---------- pinned block histogram: --------------------------------"
-- TODO is printBlockCensus correct for pinned? i.e. same size?
printBlockCensus $ fmap (\(PinnedCensusStats (censusStats, _))-> censusStats) pinnedCensus
putStrLn "--------------------------------"
-- print thunks and their retained size (sort of...)
pRetainingThunks :: Debuggee -> IO ()
pRetainingThunks e = do
pause e
runTrace e $ do
_bs <- precacheBlocks
liftIO $ hPutStrLn stderr "!!!!! Done precacheBlocks !!!!!"
roots <- gcRoots
liftIO $ hPutStrLn stderr "!!!!! Done gcRoots !!!!!"
(totSize,_) <- flip execStateT (0, 0::Int) $
traceFromM emptyTraceFunctions{closTrace = closTraceFunc} roots
liftIO $ hPutStrLn stderr $ "!!!!! TOTAL SIZE: "<>show totSize
where
-- how deeply a nested thunk (i.e. with how many thunk parents) do we
-- want to report about?:
thunkDepthLim = 10
closTraceFunc _ptr (DCS size clos) continue = do
(!sizeAcc, !thunkDepth) <- get
mbSourceInfo <- lift $ getSourceInfo $ tableId $ info clos
case mbSourceInfo of
Just SourceInformation{..}
| "THUNK" `isPrefixOf` show infoClosureType
&& thunkDepth < thunkDepthLim -> do
-- reset size counter for children, at one more thunk deep:
(sizeChildren, _) <- lift $ execStateT continue (0, thunkDepth+1)
-- For now: print weight and info to STDOUT; we can sort this later
liftIO $ putStrLn $
show (getSize sizeChildren)<>" "<>show thunkDepth<>" "<> show mbSourceInfo
-- We might also be the child of a THUNK, so need to accumulate
put (sizeAcc+size+sizeChildren, thunkDepth)
_ -> do
-- Note a thunk or else thunkDepthLim exceeded:
put (sizeAcc+size, thunkDepth)
continue
----------------------------------------
-- TODO this is still non-working for GML, in that we need GML node ids to be int32 ... :(
{-
-- Write out the heap graph to a file, in GML format
-- ( https://web.archive.org/web/20190303094704/http://www.fim.uni-passau.de:80/fileadmin/files/lehrstuhl/brandenburg/projekte/gml/gml-technical-report.pdf )
pWriteToGML :: FilePath -> Debuggee -> IO ()
pWriteToGML path e = do
pause e
runTrace e $ do
_bs <- precacheBlocks
liftIO $ hPutStrLn stderr "!!!!! Done precacheBlocks !!!!!"
roots <- gcRoots
liftIO $ hPutStrLn stderr "!!!!! Done gcRoots !!!!!"
-- We start a separate thread for serializing to GML format and writing to file:
(ioChanW, ioChanR) <- unsafeLiftIO $ Bounded.newChan 256
outHandle <- unsafeLiftIO $ openFile path WriteMode
-- Hacky: choose graph output format based on filename
let writer = case dropWhile (/='.') path of
".gml" -> gmlFileWriter
".net" -> pajekFileWriter
_ -> error "Only .gml and .net (pajek) supported"
do fileWriterThread <- unsafeLiftIO $ async $ writer outHandle ioChanR
runIdentityT $ traceFromM emptyTraceFunctions{closTrace = closTraceFunc ioChanW} roots
-- Wait for the writer thread to process the last bit of the graph data:
unsafeLiftIO $ do
Bounded.writeChan ioChanW GMLDone
wait fileWriterThread
unsafeLiftIO $ hClose outHandle
where
closTraceFunc ioChanW ptr (DCS size clos) continue = do
lift $ do
mbSourceInfo <- getSourceInfo $ tableId $ info clos
unsafeLiftIO $ do
let closureType = constrName clos
Bounded.writeChan ioChanW $
GMLNode{..}
-- Map over this closure's pointer arguments, recording an edge in
-- our closure graph
let sendEdge = Bounded.writeChan ioChanW . GMLEdge ptr
void $ quintraverse pure pure pure pure sendEdge clos
continue
-}
{-
-- FIXME this doesn't actually work (node ids seemingly needed to be
-- contiguous, for one thing. Probably need to be incrementing (in which case
-- the file format seems to make no sense)
--
-- This format is obnoxious and we can't easily stream it out in constant memory
pajekFileWriter :: Handle -> Bounded.OutChan GMLPayload -> IO ()
pajekFileWriter outHandle ioChanR = do
(minSeen, maxSeen, lenNodes, nodes, edges) <- go maxBound minBound 0 [] []
print ("!!!!!!!!!!!!!!!!!!!!!!!!!", minSeen, maxSeen, maxSeen - minSeen)
let write = hPutStrLn outHandle
write $ "*Vertices "<> show lenNodes
forM_ nodes $ \n-> write (show n<> " "<>show (show n)) -- e.g. 234 "234"
write "*Edges"
forM_ edges $ \(n0, n1) -> write (show n0<>" "<>show n1)
where
go !minSeen !maxSeen !lenNodes !nodes !edges = do
Bounded.readChan ioChanR >>= \case
GMLDone -> return (minSeen, maxSeen, lenNodes, nodes, edges)
GMLEdge (ClosurePtr !fromW) (ClosurePtr !toW) ->
go minSeen maxSeen lenNodes nodes ((fromW,toW):edges)
GMLNode _ (ClosurePtr !n) _ _ ->
go (minSeen `min` n) (maxSeen `max` n) (lenNodes+1) (n:nodes) edges
-}
{-
-- This handles writing the graph to 'outFile' in GML format, while trying to
-- buffer writes efficiently
gmlFileWriter :: Handle -> Bounded.OutChan GMLPayload -> IO ()
gmlFileWriter outHandle ioChanR = do
writeOpenGML
pop >>= goWriteBatch [] batchSize
writeCloseGML
where
batchSize = 100 -- TODO tune me?
pop = Bounded.readChan ioChanR
write = B.hPutBuilder outHandle
bShow :: (Show a) => a -> B.Builder
bShow = bStr . show
bStr = B.byteString . B.pack
goWriteBatch payloadStack n GMLDone =
writeNodesEdges payloadStack -- terminal case
-- write this batch out and continue:
goWriteBatch payloadStack 0 p = do
writeNodesEdges (p:payloadStack)
pop >>= goWriteBatch [] batchSize
-- keep accumulating:
goWriteBatch payloadStack n p = do
pop >>= goWriteBatch (p:payloadStack) (n-1)
-- NOTE: GML is defined as a 7-bit ascii serialization. We'll just use
-- ByteString.Char8 for now.
writeOpenGML =
write $ "graph [\n"
<> "comment \"this is a graph in GML format\"\n"
<> "directed 1\n"
writeCloseGML =
write $ "]\n"
writeNodesEdges = write . mconcat . map ser where
ser = \case
GMLDone -> error "impossible"
GMLNode{..} ->
"node [\n"
<> "id " <> bShowPtr ptr <> "\n"
<> "tp " <> bShow closureType <> "\n"
<> "sz " <> bShow (getSize size) <> "\n"
<> "]\n"
GMLEdge{..} ->
"edge [\n"
<> "source "<> bShowPtr ptrFrom <> "\n"
<> "target "<> bShowPtr ptrTo <> "\n"
<> "]\n"
where bShowPtr (ClosurePtr w) = bShow w
-- | Communication with our GML file writer thread
data GMLPayload
= GMLNode{
mbSourceInfo :: !(Maybe SourceInformation)
, ptr :: !ClosurePtr
-- ^ id referenced by GMLEdge
, size :: !Size
, closureType :: !String
}
| GMLEdge{
ptrFrom :: !ClosurePtr
, ptrTo :: !ClosurePtr
}
| GMLDone
-- ^ We've finished traversing the heap, chan can be closed
-}
-- --------------------------------------------------
-- Utility crap
constrName :: (HasConstructor (Rep a), Generic a)=> a -> String
constrName = genericConstrName . from
class HasConstructor (f :: Type -> Type) where
genericConstrName :: f x -> String
instance HasConstructor f => HasConstructor (D1 c f) where
genericConstrName (M1 x) = genericConstrName x
instance (HasConstructor x, HasConstructor y) => HasConstructor (x :+: y) where
genericConstrName (L1 l) = genericConstrName l
genericConstrName (R1 r) = genericConstrName r
instance Constructor c => HasConstructor (C1 c f) where
genericConstrName x = conName x
-- --------------------------------------------------
-- See also pClusteredHeapGML which annotates dominator size, clustered by infotable/source loc
pDominators
:: Maybe Int
-- ^ How deep should we recurse?
-> Debuggee
-> IO ()
pDominators lim e = do
pause e
runTrace e $ do
_bs <- precacheBlocks
roots <- gcRoots
liftIO $ hPutStrLn stderr "!!!!! Done gcRoots !!!!!"
ns0 <- liftIO getMonotonicTime
hg :: HeapGraph Size <- case roots of
[] -> error "Empty roots"
(x:xs) -> do
multiBuildHeapGraph lim (x :| xs)
ns1 <- liftIO getMonotonicTime
liftIO $ hPutStrLn stderr $ "!!!!! Done multiBuildHeapGraph !!!!! in "<>(show $ (ns1-ns0))
-- Validate that sizes in dominator tree seem right:
let !sizeTot = IM.foldl' (\s e_-> s + hgeData e_) 0 $ graph hg
liftIO $ hPutStrLn stderr $ "!!!!! Total size: "<> (show sizeTot)
{-
-- Further try to validate that heap sizes seem right...
liftIO $ putStrLn "!!!!!! ----------------- !!!!!!!"
liftIO $ summariseBlocks _bs
liftIO $ putStrLn "!!!!!! ----------------- !!!!!!!"
mblockMap <- censusByMBlock (map hgeClosurePtr $ IM.elems $ graph hg)
liftIO . print $ length mblockMap
liftIO . print $ ("totsize", sum $ fmap cssize mblockMap)
liftIO $ putStrLn "!!!!!! ----------------- !!!!!!!"
error "DONE!"
-}
forrest <- forM (retainerSize hg) $ \tree -> do
-- get some pieces we're interested in:
let fiddle hge =
let (Size s, RetainerSize rs) = hgeData hge
i = info $ hgeClosure hge
t = tipe $ decodedTable i
-- (size of this and all children, size of just us, closure type, InfoTablePtr)
in ((rs, s, t), tableId i)
pure (fiddle <$> tree)
-- recursively sort tree so largest retained sizes at top:
let sortTree (Node x xs) = Node x $ sortBy (flip compare `on` rootLabel) $ map sortTree xs
-- For validating whether we've got close to the heap size we expect represented
let totalRetained = sum $ map (\(Node ((rs,_,_),_) _)-> rs) forrest
totalRetainedMB :: Float
totalRetainedMB = fromIntegral totalRetained / 1_000_000
liftIO $ hPutStrLn stderr $ "!!! TOTAL SIZE RETAINED REPORTED: "<> show totalRetainedMB <> " MB"
-- Sort just top-level
let forrestSorted = sortBy (flip compare `on` rootLabel) forrest
{- TODO what was the goal here?
-- descend until we're at 90% of peak
let limFactor = 0.2
let rLimLower = case forrestSorted of
(Node ((rBiggest,_,_),_) _ : _) -> round (fromIntegral rBiggest * limFactor)
_ -> error "Blah"
liftIO $ hPutStrLn stderr $ show ("rLimLower", rLimLower)
let goDescend n@(Node ((rSize, x, y), ptr) ns)
| rSize > rLimLower = F.for_ ns goDescend
| otherwise = do
nAnnotated <- forM n $ traverse getSourceInfo
liftIO $ putStrLn $ drawTree $ fmap show nAnnotated
F.for_ forrestSorted $ goDescend . sortTree
-}
-- {-
let tree0 =
Node ((0,0,TSO), nullInfoTablePtr) $ --nonsense
forrestSorted
-- let tree1 = topThunkClosures tree0
let tree1 = pruneDownToPct 0.001 tree0
-- Annotate all with source info
tree2 <- forM tree1 $ traverse $ \tid ->
if tid == nullInfoTablePtr -- dumb workaround for root of tree...
then return Nothing
else getSourceInfo tid
liftIO $ putStrLn $ drawTree $
fmap show $ sortTree tree2
-- -}
-- {-
-- Prune all grandchildren of thunks, for clarity/brevity:
topThunkClosures :: Tree ((x, y, ClosureType), InfoTablePtr) -> Tree ((x, y, ClosureType), InfoTablePtr)
topThunkClosures (Node n@((_, _, tp), _) forrest)
| tp `elem` [ THUNK , THUNK_1_0 , THUNK_0_1 , THUNK_2_0 , THUNK_1_1 , THUNK_0_2 , THUNK_STATIC , THUNK_SELECTOR]
= Node n $ map prune forrest -- remove grandchildren
| otherwise = Node n $ map topThunkClosures forrest
where prune (Node x _) = Node x []
-- ...or alternatively, prune children with retained size under some magnitude:
-- assumes reverse sorted tree by retained
pruneDownToPct :: Float -> Tree ((Int, y, ClosureType), InfoTablePtr) -> Tree ((Int, y, ClosureType), InfoTablePtr)
pruneDownToPct p _root@(Node x forrest) = Node x $ mapMaybe go forrest
where limLower = case forrest of
(Node ((rBiggest,_,_),_) _ : _) -> round (fromIntegral rBiggest * p)
_ -> error "Blah"
go (Node n@((r,_,_),_) ns)
| r < limLower = Nothing
| otherwise = Just $ Node n $ mapMaybe go ns
-- -}
defaultSnapshotLocation :: String
defaultSnapshotLocation = "/tmp/ghc-debug-cache"
-- Take snapshots in a loop forever, at intervals, overwriting.
pSteppingSnapshot :: Debuggee -> IO ()
pSteppingSnapshot e = forM_ [(0::Int)..] $ \i -> do
makeSnapshot e defaultSnapshotLocation
putStrLn ("CACHED: " ++ show i)
threadDelay 5_000_000
-- TODO add to ghc-debug?
nullInfoTablePtr :: InfoTablePtr
nullInfoTablePtr = InfoTablePtr 0
-- TODO add to ghc-debug
emptyTraceFunctions :: (MonadTrans m, Monad (m DebugM))=> TraceFunctions m
emptyTraceFunctions =
TraceFunctions {
papTrace = const (lift $ return ())
, srtTrace = const (lift $ return ())
, stackTrace = const (lift $ return ())
, closTrace = \_ _ -> id -- ^ Just recurse
, visitedVal = const (lift $ return ())
, conDescTrace = const (lift $ return ())
}
-- TODO add to ghc-debug
deriving instance MonadIO DebugM
getSourceLoc :: DebugClosureWithSize srt pap string s b -> DebugM (Maybe SourceInformation)
getSourceLoc c = getSourceInfo (tableId (info (noSize c)))
-- ================================================================================
-- TODO ...then a mode that consumes size of child without source info (add a bool flag)
-- TODO add a simple repl for doing queries, displaying data
-- TODO print stats, e.g. objects by module
data ClusteringStrategy
= ClusterByInfoTable -- ^ node per info-table, with accumulated size in bytes
| ClusterBySourceInfo Bool
-- ^ above but go further, folding nodes with identical (but not missing)
-- metadata. 'True' here indicates whether to go even further and cluster on
-- source location spans, ignoring type information (type will be labeled
-- "VARIOUS")
deriving (Show, Read, Eq)
-- | Write out the heap graph, with heap objects clustered by info table, to a
-- file, in GML format:
--
-- https://web.archive.org/web/20190303094704/http://www.fim.uni-passau.de:80/fileadmin/files/lehrstuhl/brandenburg/projekte/gml/gml-technical-report.pdf
--
-- Directed edge in the normal graph means "retains", with weights counting
-- number of such relationships; in the dominator tree graph an edge means
-- "only reachable by".
--
-- Both graphs have nodes tagged with size and transitive dominated size (i.e.
-- size of self and all dominated child nodes)
pClusteredHeapGML :: ClusteringStrategy -> FilePath -> Debuggee -> IO ()
pClusteredHeapGML clusteringStrategy pathNoExtension e = do
pause e
runTrace e $ do
_bs <- precacheBlocks
liftIO $ hPutStrLn stderr "!!!!! Done precacheBlocks !!!!!"
roots <- gcRoots
liftIO $ hPutStrLn stderr "!!!!! Done gcRoots !!!!!"
-- GML only supports Int32 Ids, so we need to remap iptr below
-- NOTE: addDominatedSize assumes 1 is the root node
(nodes, edges, _) <- flip execStateT (mempty, mempty, 1::Int32) $
-- 'traceFromM' visits every closure once, accounting for cycles
traceFromM emptyTraceFunctions{closTrace = closTraceFunc} roots
let (edgesToWrite, nodesClustered) = buildClusteredGraph nodes edges clusteringStrategy
-- add transitive dominated size to nodes:
let (nodesToWriteMap, dominatedEdges) = addDominatedSize nodesClustered edgesToWrite
nodesToWrite = Map.elems nodesToWriteMap
unsafeLiftIO $ do
hPutStrLn stderr "!!!!! Start writing to file !!!!!"
let path = pathNoExtension<>".gml"
outHandle <- openFile path WriteMode
writeToFile nodesToWrite edgesToWrite outHandle
hClose outHandle
hPutStrLn stderr $ "!!!!! Done writing regular graph at "<>path<> " !!!!!"
-- Write out a separate dominator tree graph (we'd really prefer if
-- graphia could just do this transform):
let domTreePath = pathNoExtension<>".dominator_tree.gml"
outHandleDomTree <- openFile domTreePath WriteMode
writeToFile nodesToWrite dominatedEdges outHandleDomTree
hClose outHandleDomTree
hPutStrLn stderr $ "!!!!! Done writing dominator tree graph at "<>domTreePath<> " !!!!!"
where
writeToFile
:: [((Maybe SourceInformation, String, Bool, Int32), Size, Size, [a])]
-> [(Int32, Int32, Int)]
-> Handle
-> IO ()
writeToFile nodesToWrite edgesToWrite outHandle = do
writeOpenGML
F.for_ nodesToWrite $ \((mbSI, closureTypeStr, isThunk, iptr32), size, sizeDominated, iptrsFolded) ->
writeNode (length iptrsFolded) iptr32 isThunk size sizeDominated $
case mbSI of
Nothing ->
(closureTypeStr, Nothing)
Just SourceInformation{..} ->
(closureTypeStr<>" "<>infoType, Just (infoLabel, infoPosition))
F.for_ edgesToWrite writeEdge
writeCloseGML
where
write = hPutStr outHandle
---- GML File format:
writeOpenGML =
write $ "graph [\n"
<> "comment \"this is a graph in GML format\"\n"
<> "directed 1\n"
writeCloseGML =
write $ "]\n"
writeEdge (iptrFrom32, iptrTo32, cnt) = do
write $ "edge [\n"
<> "source "<> show iptrFrom32 <> "\n"
<> "target "<> show iptrTo32 <> "\n"
<> "count "<> show cnt <> "\n"
<> "]\n"
writeNode
:: Int
-- ^ number of folded per-info-table clusters here; these would
-- expand into n+1 nodes under ClusterByInfoTable
-> Int32
-> Bool -> Size -> Size -> (String , Maybe (String,String)) -> IO ()
writeNode iptrsFoldedCnt iptr32 isThunk size sizeDominated (typeStr,mbMeta) = do
-- The spec is vague, but graphia chokes on \" so strip:
let renderQuoted = show . filter (/= '"')
write $ "node [\n"
<> "id " <> show iptr32 <> "\n"
<> (guard isThunk >>
"isThunk 1\n")
<> "sizeBytes " <> show (getSize size) <> "\n"
<> "sizeTransitiveDominated " <> show (getSize sizeDominated) <> "\n"
<> "infotablesFoldedCnt " <> show iptrsFoldedCnt <> "\n"
-- string attributes; need to be quoted:
<> "type " <> renderQuoted typeStr <> "\n"
<> (case mbMeta of
Nothing -> ""
Just (infoLabel, infoPosition) ->
"name "<> renderQuoted infoLabel<> "\n"
<> "pos " <> renderQuoted infoPosition<> "\n"
)
<> "]\n"
closTraceFunc _ptr (DCS size clos) continue = do
-- TODO is info pointer included in `size`? It seems only STATIC closures have just 8 bytes
(nodes, edges, iptr32) <- get
let tid@(InfoTablePtr _iptr) = tableId $ info clos
(!nodes', !iptr32') <-
if Map.member tid nodes
-- Just accumulate the size from this new node:
-- TODO add counts
then pure (Map.adjust (fmap (+size)) tid nodes , iptr32)
-- Use iptr32 and increment for the next new node
else lift $ do
-- 'tipe' also ends up in SourceInformation, but not all info tables have that
let closureTypeStr = show $ tipe $ decodedTable $ info clos
let isThunk = "THUNK" `isPrefixOf` closureTypeStr
getSourceInfo tid >>= \case
-- TODO in what cases is source info not present?
Nothing ->
pure (Map.insert tid ((Nothing, closureTypeStr, False, iptr32), size) nodes, iptr32+1)
Just si@SourceInformation{} -> do
-- When we record edges, we'll record some special metadata when from isThunk
pure (Map.insert tid ((Just si, closureTypeStr, isThunk, iptr32), size) nodes, iptr32+1)
-- Collect all outgoing edges from this closure...
!edges' <- lift $ flip execStateT edges $
-- Here we go one hop further to build (possibly to an already-visited
-- node which we wouldn't be able to reach via traceFromM)
-- TODO this is probably slow, since we need to resolve the InfoTablePtr again to make an edge
void $ flip (quintraverse pure pure pure pure) clos $ \toPtr-> do
(DCS _ toClos) <- lift $ dereferenceClosure toPtr
let tidTarget = tableId $ info toClos
-- Increase edge count tid -> tidTo by one, else add new edge
modify $ Map.insertWith (+) (tid, tidTarget) 1
put (nodes', edges', iptr32')
continue
buildClusteredGraph
:: Map.Map InfoTablePtr ((Maybe SourceInformation, String, Bool, Int32), Size)
-> Map.Map (InfoTablePtr, InfoTablePtr) Int
-> ClusteringStrategy
-> ([(Int32, Int32, Int)],
Map.Map Int32 ((Maybe SourceInformation, String, Bool, Int32), Size, [InfoTablePtr])
)
buildClusteredGraph nodes edges = \case
-- --------
ClusterByInfoTable ->
-- just 'nodes' and 'edges', with no meaningful modifications:
let nodesToWrite = Map.fromList $
map (\t@(x, size) -> (getIptr32 t, (x, size, []))) $ -- []: no folded infoTable nodes
Map.elems nodes
edgesToWrite = map (\((ptrFrom, ptrTo), cnt) -> (toPtr32 ptrFrom, toPtr32 ptrTo, cnt)) $ Map.toList edges
where toPtr32 ptr = getIptr32 $ fromJust $ Map.lookup ptr nodes
in (edgesToWrite, nodesToWrite)
-- --------
ClusterBySourceInfo justBySourceLoc ->
-- we'll write nodesNoInfo out unmodified, and fold identical nodesByInfo_dupes:
let (nodesNoInfo, nodesByInfo_dupes) = partitionEithers $ map (uncurry hasSourceInfo) $ Map.toList nodes
where
hasSourceInfo iptr (xMeta@(mbSI, x, y, z), size) = case mbSI of
Just si@SourceInformation{..}
-- We'll fold nodes with a key like e.g.:
-- ("main.balancedTree","example/Main.hs:25:67-69","Tree")
-- ...so long as we have defined code location
| all (not . null) [infoLabel, infoPosition]
-> Right $ if justBySourceLoc
then (infoPosition
, ((Just si{infoLabel="VARIOUS", infoType="VARIOUS"},x,y,z), size, [iptr]))
else (infoLabel <> infoPosition <> infoType
, (xMeta, size, [iptr]))
_ -> Left (xMeta, size, []) -- []: no folded infoTable nodes
nodesByInfo :: Map.Map String -- either (infoLabel <> infoPosition <> infoType) or just infoPosition, if justBySourceLoc
((Maybe SourceInformation, String, Bool, Int32), Size, [InfoTablePtr])
nodesByInfo = Map.fromListWith mergeNodes nodesByInfo_dupes
-- merge sizes in bytes, store source infotable ptrs so we can
-- remap edges and store as graph metadata the number of folded nodes:
where mergeNodes ( ( mbSI0, closureTypeStr0, isThunk0, iptr32_0), size0, iptrs0 )
( (_mbSI1, _closureTypeStr1, _isThunk1, iptr32_1), size1, iptrs1 ) =
-- NOTE: keep the smallest iptr32, since that corresponds to first seen in traversal:
( (mbSI0, closureTypeStr0, isThunk0, min iptr32_0 iptr32_1)
-- merge sizes:
, size0+size1 , iptrs1<>iptrs0 )
-- map edge src/dst ids to the new folded node ids, combine counts of any now-folded edges
edgesRemapped :: Map.Map (Int32, Int32) Int
edgesRemapped = Map.fromListWith (+) $ map (first (remap *** remap)) $ Map.toList edges where
remap iptr = fromMaybe iptr32Orig $ Map.lookup iptr iptrRemapping where
-- this to/from node couldn't be folded (since no source info,
-- probably), so use the original node's int32 key
!iptr32Orig = maybe (error "Impossible! edgesRemapped") getIptr32 $ Map.lookup iptr nodes
iptrRemapping :: Map.Map InfoTablePtr Int32
iptrRemapping = Map.fromList $ concatMap iptrsToIptrs32 $ Map.elems nodesByInfo where
iptrsToIptrs32 ((_, _, _, iptr32), _, iptrs) = map (,iptr32) iptrs
-- output:
nodesClustered = Map.fromList $ map (getIptr32_ &&& id) $
Map.elems nodesByInfo <> nodesNoInfo
edgesToWrite = map (\((ptrFrom, ptrTo), cnt) -> (ptrFrom, ptrTo, cnt)) $ Map.toList edgesRemapped
in (edgesToWrite, nodesClustered)
where getIptr32 ((_, _, _, iptr32), _) = iptr32
getIptr32_ ((_, _, _, iptr32), _,_) = iptr32
-- Generate a dominator tree from the graph, annotating each node with the
-- transitive size of the graph it dominates; i.e. the total size in bytes of
-- all its descendents.
addDominatedSize ::
Map.Map Int32 ((a, b, c, Int32), Size, ds) ->
[(Int32, Int32, int)] ->
( Map.Map Int32 ((a, b, c, Int32), Size, Size, ds)
, [(Int32, Int32, Int)])
-- ^ Also return immediately-dominates relationship edges (with 0 weight)
addDominatedSize nodes0 edges =
-- NOTE: iDom returns reverse order from dominator tree edges:
let dominatesEdges = map swap $ FGL.iDom g 1
-- lazily accumulate transitive dominator sub-tree sizes
-- leaves will be missing, but their sizes accounted for
transSizes =
let domNodesChilds :: LazyMap.Map Int [Int]
domNodesChilds = LazyMap.fromListWith (<>) $ map (fmap pure) dominatesEdges
in LazyMap.mapWithKey accum domNodesChilds
where accum parent children = sizeOf parent + (sum $ map transSizeOf children)
transSizeOf nodeId =
fromMaybe (sizeOf nodeId) $ -- ...if it's a leaf of dominator tree
LazyMap.lookup nodeId transSizes
annotate xsz@(x, sz, ds) = (x, sz, transSizeOf $ getNodeId xsz, ds)
-- back to our Int32 keys; just put 0 for edge weight, arbitrarily::
dominatesEdges32 = map (\(src, dest) -> (fromIntegral src, fromIntegral dest, 0)) dominatesEdges
in (fmap annotate nodes0, dominatesEdges32)
where
getKey i = getNodeId $ fromJust $ Map.lookup i nodes0
getNodeId ((_, _, _, iptr32), _, _) = fromIntegral iptr32
g :: FGL.Gr Size ()
g = FGL.mkGraph fglNodes fglEdges
fglNodes = map (getNodeId &&& \(_, sz, _)-> sz) $ Map.elems nodes0
-- drops edge weights entirely
fglEdges = map ( \(fromI,toI,_)-> (getKey fromI, getKey toI, ()) ) edges
sizeOf = fromJust . FGL.lab g
test_addDominatedSize :: Bool
test_addDominatedSize =
-- From https://en.wikipedia.org/wiki/Dominator_(graph_theory)
let edges = map (\(f,t)-> (f,t,())) [(1,2), (2, 3), (2, 4), (2, 6), (3, 5), (4, 5), (5, 2)]
-- make the size equal to node id
nodes = Map.fromList $ map (\n-> (n, (((),(),(),fromIntegral n), fromIntegral n, ()) )) [1..6]
expected =
Map.fromList [
(1,(((),(),(),1),Size {getSize = 1},Size {getSize = 21},())),
-- own size (2) + 3+4+5+6 = 20:
(2,(((),(),(),2),Size {getSize = 2},Size {getSize = 20},())),
-- leaves just accum their own size:
(3,(((),(),(),3),Size {getSize = 3},Size {getSize = 3},())),
(4,(((),(),(),4),Size {getSize = 4},Size {getSize = 4},())),
(5,(((),(),(),5),Size {getSize = 5},Size {getSize = 5},())),
(6,(((),(),(),6),Size {getSize = 6},Size {getSize = 6},()))]
in expected == fst (addDominatedSize nodes edges)
-- ================================================================================
-- | A value of N means: this pointer needs at least N bits (plus a sign bit)
-- if represented as an /offset/ from the closure header (or from the first
-- child pointer, as in histSiblingOffs)
type BitWidthBucket = Int
data AnalyzePointerCompressionStats = AnalyzePointerCompressionStats {
infoTableMin :: Word64
, infoTableMax :: Word64
, infoPointers :: Map.Map Word64 Int
, lastPointingNext :: Int
-- ^ number of last-in-object pointers pointing to next adjacent heap object
, histFirst :: Map.Map BitWidthBucket Int
, histLast :: Map.Map BitWidthBucket Int
-- ^ stats on the first and last heap pointers of a closure
, histOffs :: Map.Map (BitWidthBucket, Int) Int
-- ^ histogram of heap pointers on an individual basis, bucketed by offset
-- distance and number of sibling pointers:
, histSiblingOffs :: Map.Map (BitWidthBucket, Int) Int
-- ^ (like above, but offset from the first sibling pointer value)
, histMaxOffs :: Map.Map (BitWidthBucket, Int) Int
-- ^ ...whereas if all child pointers of a closure had to be equal sized
-- signed ints, what size would we need for all the pointers in this
-- closure? (these will all be positive bit width buckets):
, blockGraph :: IM.IntMap (Map.Map Int Int)
-- ^ a graph from block to block, where we make an edge if any heap
-- pointer in the source block points to the destination block. Mark the
-- edge with a count. Limit edges to maxBlockGraphEdgesToRecord (i.e.
-- `Map int Int` of size maxBlockGraphEdgesToRecord means,
-- "maxBlockGraphEdgesToRecord or more"). Includes self edges
, histClosureSizes :: Map.Map Int Int
-- ^ size in bytes -> total size in bytes for all closures of this size
--
-- The largest bucket means "N and larger"
} deriving (Show, Eq)
emptyAnalyzePointerCompressionStats :: AnalyzePointerCompressionStats
emptyAnalyzePointerCompressionStats =
AnalyzePointerCompressionStats maxBound minBound mempty 0 mempty mempty mempty mempty mempty mempty mempty
-- What are the prospective benefits of pointer compression on this heap?
pAnalyzePointerCompression :: Debuggee -> IO ()
pAnalyzePointerCompression e = do
pause e
runTrace e $ do
_bs <- precacheBlocks
liftIO $ hPutStrLn stderr "!!!!! Done precacheBlocks !!!!!"
roots <- gcRoots
liftIO $ hPutStrLn stderr "!!!!! Done gcRoots !!!!!"
-- Use state to collect:
-- - range of info tables
-- - histograms of offsets of first and last pointer field in closure (if any)
-- - histogram of offsets for every field
-- - histogram of `maximum offsets`, where 'offsets' are all the pointer fields in a closure
-- ...the latter two also keyed by number of child pointers, up to 6+
AnalyzePointerCompressionStats{..}
<- flip execStateT emptyAnalyzePointerCompressionStats $
traceFromM emptyTraceFunctions{closTrace = closTraceFunc} roots
-- --------- Output and analysis:
liftIO $ do
putStrLn "========= Raw output ======================================"
-- print infoPointers -- < lots of output
putStrLn "* offset bits buckets for first and last pointers:"
print histFirst
print histLast
putStrLn "* count of final heap pointers pointing to adjacent heap object to the right:"
print lastPointingNext
putStrLn "* count of heap pointers by (offset bits bucket, pointers in closure):"
print histOffs
putStrLn "* (NEW!) count of sibling heap pointers by (offset-from-first-sibling-pointer-val bits bucket, pointers in closure):"
print histSiblingOffs
putStrLn "* count of closures by (offset bits reqd. for all fields, pointers in closure):"
print histMaxOffs
putStrLn "* (NEW!) histogram of heap residency, by closure size (divide to get counts)"
print histClosureSizes
putStrLn "========= Analysis ======================================"
let infoTableRange :: Double
infoTableRange = fromIntegral $ infoTableMax - infoTableMin
putStrLn $ "infotables + code range, in bits: "<> (show $ logBase 2 infoTableRange)
putStrLn "----------------------------------"
let (pos,neg) = (f *** f) $ partition ((>0) . fst . fst) $ Map.toList histOffs
where f = sum . map snd
putStrLn $ "Percentage of pointers representable as positive offset: "
<>show (pct pos (pos+neg))
putStrLn "----------------------------------"
let pointersByObjectSizeMembership =
Map.fromListWith (+) $ map (first snd) $ Map.toList histOffs
totalPointersCnt = sum pointersByObjectSizeMembership
putStrLn $ "Out of "<>show totalPointersCnt<>" total heap pointers..."
F.for_ (Map.toList pointersByObjectSizeMembership) $ \(ptrFields, cnt) -> do
let end | ptrFields == 6 = " 6 or more sibling pointers"
| otherwise = " "<>show ptrFields<>" sibling pointers"
putStrLn $ " ..."<>show (pct cnt totalPointersCnt)<>"% of heap pointers are in objects with"<>end
putStrLn "----------------------------------"
let closuresByObjectSize =
Map.fromListWith (+) $ map (first snd) $ Map.toList histMaxOffs
totalClosCount = sum closuresByObjectSize
putStrLn $ "Out of "<>show totalClosCount<>" total closures..."
F.for_ (Map.toList closuresByObjectSize) $ \(ptrFields, cnt) -> do
let end | ptrFields == 6 = " 6 or pointer fields"
| otherwise = " "<>show ptrFields<>" pointer fields"
putStrLn $ " ..."<>show (pct cnt totalClosCount)<>"% have"<>end
putStrLn "----------------------------------"
-- we'll assume 6 or more to be exactly six
-- (here and elsewhere assume x86_64)
putStrLn "Conservative estimate of size of heap pointers + info pointers:"
let totalPtrBytes =
-- 1 Word for info pointer 1 word for each ptr field
foldl' (\n (ptrFields,cnt)-> n + ((8*cnt)*(1 + ptrFields))) 0 $
Map.toList closuresByObjectSize
putStrLn $ " "<>show (totalPtrBytes `div` (1000*1000))<>" MB"
putStrLn "----------------------------------"
putStrLn "If all heap pointers in an object must have same width compressed (i.e. as offset),"
let closuresByOffsetBits =
Map.fromListWith (+) $ map (first fst) $ Map.toList histMaxOffs
F.for_ (Map.toList closuresByOffsetBits) $ \(bitWidth, cnt) -> do
putStrLn $ " ... "<>show (pct cnt totalClosCount)<> "% of closures could use offsets of width "
<>show (min 64 (bitWidth+2)) -- make these normal word sizes
putStrLn "----------------------------------"
let numBlocks = IM.size blockGraph
blockEdges = Map.fromListWith (+) $
map (\(_blk, outEdges) -> (Map.size outEdges, 1::Int)) $ IM.toList blockGraph
putStrLn $ "Out of "<>show numBlocks<>" blocks..."
F.for_ (Map.toList blockEdges) $ \(numOutEdges, countOfSuchBlocks) -> do
if numOutEdges == maxBlockGraphEdgesToRecord
then
putStrLn $ " ... "<>show (pct countOfSuchBlocks numBlocks)<>"% have edges to "
<>show numOutEdges<>" or more other blocks"
else
putStrLn $ " ... "<>show (pct countOfSuchBlocks numBlocks)<>"% have edges to exactly "
<>show numOutEdges<>" distinct other blocks"
where
pct :: Integral n=> n -> n -> Int
pct num den = round ((fromIntegral num / fromIntegral den)*100::Float)
-- so we don't blow up memory
maxBlockGraphEdgesToRecord = 20
closTraceFunc cp@(UntaggedClosurePtr ptr) (DCS (Size size) clos) continue = do
AnalyzePointerCompressionStats{..} <- get
let (InfoTablePtr iptr) = tableId $ info clos
!infoTableMax' = max infoTableMax iptr
!infoTableMin' = min infoTableMin iptr
-- just collect all info pointers, to see what's really going on:
-- (expect most counts to be 1 due to distinct-info-tables):
!infoPointers' = Map.insertWith (+) iptr 1 infoPointers
toPtrs <- fmap reverse $ lift $ flip execStateT [] $
-- Here we go one hop further to get this closure's pointers
void $ flip (quintraverse pure pure pure pure) clos $ \(UntaggedClosurePtr toPtr)-> do
ptrStack <- get
put (toPtr:ptrStack)
-- the last bucket indicates "six or more pointers"
let !numFieldsBucket = min 6 $ length toPtrs
-- stats on the first and last heap pointers of a closure
let goHistFirstLast mp = \case
[] -> histFirst
(toPtr:_) ->
let bucket = offsetFromPtrBucket toPtr
in Map.insertWith (+) bucket 1 mp
let !histFirst' = goHistFirstLast histFirst toPtrs
let !histLast' = goHistFirstLast histLast $ reverse toPtrs
-- histogram of heap pointers on an individual basis, bucketed by offset
-- distance and number of sibling pointers:
let !histOffs' = foldl' (flip go) histOffs toPtrs where
go toPtr = Map.insertWith (+) bucket 1 where
bucket = (offsetFromPtrBucket toPtr, numFieldsBucket)