-
Notifications
You must be signed in to change notification settings - Fork 26
/
node.clj
163 lines (146 loc) · 6.85 KB
/
node.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
(ns clj-uuid.node
(:require [clj-uuid.util :refer [java6? compile-if]]
[clj-uuid.bitmop :refer [sb8 assemble-bytes ldb dpb mask]]
[clj-uuid.constants :refer :all]
[clj-uuid.random :as random])
(:import [java.net InetAddress
NetworkInterface]
[java.security MessageDigest]
[java.util Properties]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Clock Sequence [RFC4122:4.1.5 "CLOCK SEQUENCE"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; For time-based UUID's the "clock-sequence" value is a somewhat counter-
;; intuitively named value that is used to reduce the potential that duplicate
;; UUID's might be generated under unusual situations, such as if the system
;; hardware clock is set backward in time or if, despite all efforts otherwise,
;; a duplecate +node-id+ (see below) happens to be generated. This value is
;; initialized to a random 16-bit number once per lifetime of the system.
(defonce +clock-sequence+ (inc (rand-int 0xffff)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NodeID Representation [RFC4122:4.1.6 "NODE"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The representation of NodeID used for consutruction of time-based (v1) UUIDs
;; is a list with the following encoding semantics:
;;
;; SIZE TYPE REPRESENTATION
;; -----------+------+---------+---------------------------------------------
;; node | 6 | ub48 | (<BYTE> <BYTE> <BYTE> <BYTE> <BYTE> <BYTE>)
;;
;; prepending two other (computed) bytes to the node-id before
;; bitwise assembly.
;;
;; (cons clock-high (cons clock-low @+node-id+))
;;
;;
;; ( <BYTE> . <BYTE> . <BYTE> <BYTE> <BYTE> <BYTE> <BYTE> <BYTE>)
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NodeID Calculation [RFC4122:4.5 "NODE IDS THAT DO NOT IDENTIFY THE HOST"] ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; This turns out to be surprisingly problematic. I've tried various
;; approaches. The most straightforward is the use of IEEE 802 MAC Address:
;;
;; (.getHardwareAddress
;; (java.net.NetworkInterface/getByInetAddress
;; (java.net.InetAddress/getLocalHost))))))
;;
;; Unfortunately got reports of NPE on some platforms (openjdk?). Also, it
;; discloses the hardware address of the host system -- this is how the
;; creator of the melissa virus was actually tracked down and caught.
;;
;; choosing node-id randomly does not provide consistent generation of UUID's
;; across runtimes.
;;
;; This topic is specifically addressed by the RFC:
;;
;;
;; "A better solution is to obtain a 47-bit cryptographic quality random
;; number and use it as the low 47-bits of the Node-ID, with the least
;; significant bit of the first octet of the Node-ID set to one. This
;; bit is the unicast/multicast bit, which will never be set in IEEE 802
;; addresses obtained from network cards. Hence, there can never be a
;; conflict between UUID's generated by machines with and without network
;; cards."
;;
;; . . .
;;
;; "In addition, items such as the computer's name and the name of the
;; operating system, while not strictly speaking random, will help
;; differentiate the results from those obtained by other systems...
;; ... A generic approach... IS TO ACCUMULATE AS MANY SOURCES AS POSSIBLE
;; INTO A BUFFER, USE A MESSAGE DIGEST SUCH AS MD5 OR SHA1, TAKE AN
;; ARBITRARY 6 BYTES FROM THE HASH VALUE, AND SET THE MULTICAST BIT
;; AS DESCRIBED ABOVE."
;;
;; -- [RFC4122:4.5 "Node IDs that do not Identify the Host"]
;;
;;
;; We do exactly that. Taking into account that the term "first octet"
;; in the above excerpt refers to network transmission order, and we
;; 'bit-or' the corresponding bytes:
;;
;; hi-byte | byte5 | byte4 | byte3 | byte2 | lo-byte
;; ---------+-------+-------+-------+-------+---------
;; 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x01
;;
;; Thanks to Datastax and to @jjcomer for submitting the original patch
;; from which this current implementation is largely derived.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def ^:private datasources ["java.vendor"
"java.vendor.url"
"java.version"
"os.arch"
"os.name"
"os.version"])
(defn- all-local-addresses []
(let [^InetAddress local-host (InetAddress/getLocalHost)
host-name (.getCanonicalHostName local-host)
base-addresses #{(str local-host) host-name}
network-interfaces (reduce (fn [acc ^NetworkInterface ni]
(concat acc
(map str (enumeration-seq
(.getInetAddresses ni)))))
base-addresses
(enumeration-seq
(NetworkInterface/getNetworkInterfaces)))]
(reduce conj network-interfaces
(map str (InetAddress/getAllByName host-name)))))
(defn- make-node-id []
(let [addresses (all-local-addresses)
^MessageDigest digest (MessageDigest/getInstance "MD5")
^Properties props (System/getProperties)
to-digest (reduce (fn [acc key]
(conj acc (.getProperty props key)))
addresses datasources)]
(doseq [^String d to-digest]
(compile-if (java6?)
(.update digest (.getBytes d))
(.update digest
(.getBytes d java.nio.charset.StandardCharsets/UTF_8))))
(map bit-or
[0x00 0x00 0x00 0x00 0x00 0x01]
(take 6 (seq (.digest digest))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Public NodeID API
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def node-id make-node-id)
(def +node-id+ (delay (assemble-bytes (cons 0 (cons 0 (node-id))))))
(defn- +v1-lsb+'
[]
(let [clk-high (dpb (mask 2 6) (ldb (mask 6 8) +clock-sequence+) 0x2)
clk-low (ldb (mask 8 0) +clock-sequence+)]
(dpb (mask 8 56) (dpb (mask 8 48) @+node-id+ clk-low) clk-high)))
(def +v1-lsb+ (memoize +v1-lsb+'))
;; v6 lsb uses a cryptographically secure random node identifier that is
;; initialized at runtime.
(defn- +v6-lsb+'
[]
(let [clk-high (dpb (mask 2 6) (ldb (mask 6 8) +clock-sequence+) 0x2)
clk-low (ldb (mask 8 0) +clock-sequence+)]
(dpb (mask 8 56) (dpb (mask 8 48) (random/long) clk-low) clk-high)))
(def +v6-lsb+ (memoize +v6-lsb+'))