-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathtracing_gc.clj
69 lines (64 loc) · 2.84 KB
/
tracing_gc.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
(ns hitchhiker.tracing-gc
(:require [hitchhiker.tree.core :as hh]))
;; Note: this implementation is single-threaded, and could be made parallel without too much effort
;; We might need to trace millions or billions of keys. That might not fit in memory, so this could be backed
;; by leveldb or hsql so that we can spill to disk when necessary. We don't need a functional datastructure here.
(defprotocol IGCScratch
(add-to-work-queue! [this addr] "Adds the given address to the work queue to be processed")
(pop-from-work-queue! [this] "Pops the next element off of the work queue, or returns nil if we're done")
(observe-addr! [this addr] "Marks the given addr as being currently active")
(observed? [this addr] "Returns true if the given addr was observed"))
;
;;; The workq is a ref containing a collection of addresses we still need to scan.
;;; The observed-set is a ref containing the set of addresses we know are active
;;; For simplicity, adding an addr to the workq automatically observes it as well
;;; ^^ this allows us to only add new addrs to the workq, without a separate set of "in workq"
(defrecord InMemScratch [workq observed-set]
IGCScratch
(add-to-work-queue! [_ addr]
(dosync
(when-not (contains? @observed-set addr)
(alter workq conj addr)
(alter observed-set conj addr))))
(pop-from-work-queue! [_]
(dosync
(when (seq @workq)
(let [head (peek @workq)]
(alter workq pop)
head))))
(observe-addr! [_ addr]
(dosync
(alter observed-set conj addr)))
(observed? [_ addr]
(contains? @observed-set addr)))
(defn in-mem-scratch
"Creates an instance of in memory GC scratch"
[]
(->InMemScratch (ref []) (ref #{})))
(defn trace-gc!
"Does a tracing GC and frees up all unused keys.
This is a simple mark-sweep algorithm.
gc-scratch should be an instance of IGCScratch
gc-roots should be a list of the roots, which should implement IResolve. These are generated by calls to anchor-root.
all-keys should be a lazy sequence that will contain every key in storage. This algorithm will not hold the whole sequence in memory
delete-fn will be called on every key that should be deleted during the sweep phase"
[gc-scratch gc-roots all-keys delete-fn]
;; First, we'll initialize the work queue
(doseq [root gc-roots]
(add-to-work-queue! gc-scratch root))
;; Now, we'll do the mark phase
(loop []
(when-let [addr (pop-from-work-queue! gc-scratch)]
(observe-addr! gc-scratch addr)
(when (hh/index? addr)
(let [node (hh/resolve addr)]
(doseq [c (:children node)]
(add-to-work-queue! gc-scratch c))))
(recur)))
;; Next, we do the sweep
(loop [ks all-keys]
(when (seq ks)
(let [head (first ks)]
(when-not (observed? gc-scratch head)
(delete-fn head)))
(recur (next ks)))))