Skip to content
Newer
Older
100644 266 lines (233 sloc) 7.17 KB
987da6d @tudor Move Rabin fingerprinting code to folly.
tudor authored
1 /*
275ca94 Copyright 2014->2015
Nicholas Ormrod authored
2 * Copyright 2015 Facebook, Inc.
987da6d @tudor Move Rabin fingerprinting code to folly.
tudor authored
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /**
18 * Compute 64-, 96-, and 128-bit Rabin fingerprints, as described in
19 * Michael O. Rabin (1981)
20 * Fingerprinting by Random Polynomials
21 * Center for Research in Computing Technology, Harvard University
22 * Tech Report TR-CSE-03-01
23 *
24 * The implementation follows the optimization described in
25 * Andrei Z. Broder (1993)
26 * Some applications of Rabin's fingerprinting method
27 *
28 * extended for fingerprints larger than 64 bits, and modified to use
29 * 64-bit instead of 32-bit integers for computation.
30 *
31 * The precomputed tables are in FingerprintTable.cpp, which is automatically
32 * generated by ComputeFingerprintTable.cpp.
33 *
34 * Benchmarked on 10/13/2009 on a 2.5GHz quad-core Xeon L5420,
35 * - Fingerprint<64>::update64() takes about 12ns
36 * - Fingerprint<96>::update64() takes about 30ns
37 * - Fingerprint<128>::update128() takes about 30ns
38 * (unsurprisingly, Fingerprint<96> and Fingerprint<128> take the
39 * same amount of time, as they both use 128-bit operations; the least
40 * significant 32 bits of Fingerprint<96> will always be 0)
41 *
42 * @author Tudor Bosman (tudorb@facebook.com)
43 */
44
45 #ifndef FOLLY_FINGERPRINT_H_
46 #define FOLLY_FINGERPRINT_H_
47
48 #include <cstdint>
49
ce64f0f @tudor Codemod: use #include angle brackets in folly and thrift
tudor authored
50 #include <folly/Range.h>
987da6d @tudor Move Rabin fingerprinting code to folly.
tudor authored
51
52 namespace folly {
53
54 namespace detail {
55 template <int BITS>
56 struct FingerprintTable {
57 static const uint64_t poly[1 + (BITS-1)/64];
58 static const uint64_t table[8][256][1 + (BITS-1)/64];
59 };
60 } // namespace detail
61
62 /**
63 * Compute the Rabin fingerprint.
64 *
65 * TODO(tudorb): Extend this to allow removing values from the computed
66 * fingerprint (so we can fingerprint a sliding window, as in the Rabin-Karp
67 * string matching algorithm)
68 *
69 * update* methods return *this, so you can chain them together:
70 * Fingerprint<96>().update8(x).update(str).update64(val).write(output);
71 */
72 template <int BITS>
73 class Fingerprint {
74 public:
75 Fingerprint() {
76 // Use a non-zero starting value. We'll use (1 << (BITS-1))
77 fp_[0] = 1UL << 63;
78 for (int i = 1; i < size(); i++)
79 fp_[i] = 0;
80 }
81
82 Fingerprint& update8(uint8_t v) {
83 uint8_t out = shlor8(v);
84 xortab(detail::FingerprintTable<BITS>::table[0][out]);
85 return *this;
86 }
87
88 // update32 and update64 are convenience functions to update the fingerprint
89 // with 4 and 8 bytes at a time. They are faster than calling update8
90 // in a loop. They process the bytes in big-endian order.
91 Fingerprint& update32(uint32_t v) {
92 uint32_t out = shlor32(v);
93 for (int i = 0; i < 4; i++) {
94 xortab(detail::FingerprintTable<BITS>::table[i][out&0xff]);
95 out >>= 8;
96 }
97 return *this;
98 }
99
100 Fingerprint& update64(uint64_t v) {
101 uint64_t out = shlor64(v);
102 for (int i = 0; i < 8; i++) {
103 xortab(detail::FingerprintTable<BITS>::table[i][out&0xff]);
104 out >>= 8;
105 }
106 return *this;
107 }
108
109 Fingerprint& update(StringPiece str) {
110 // TODO(tudorb): We could be smart and do update64 or update32 if aligned
111 for (auto c : str) {
112 update8(uint8_t(c));
113 }
114 return *this;
115 }
116
117 /**
118 * Return the number of uint64s needed to hold the fingerprint value.
119 */
120 static int size() {
121 return 1 + (BITS-1)/64;
122 }
123
124 /**
125 * Write the computed fingeprint to an array of size() uint64_t's.
126 * For Fingerprint<64>, size()==1; we write 64 bits in out[0]
127 * For Fingerprint<96>, size()==2; we write 64 bits in out[0] and
128 * the most significant 32 bits of out[1]
129 * For Fingerprint<128>, size()==2; we write 64 bits in out[0] and
130 * 64 bits in out[1].
131 */
132 void write(uint64_t* out) const {
133 for (int i = 0; i < size(); i++) {
134 out[i] = fp_[i];
135 }
136 }
137
138 private:
139 // XOR the fingerprint with a value from one of the tables.
140 void xortab(const uint64_t* tab) {
141 for (int i = 0; i < size(); i++) {
142 fp_[i] ^= tab[i];
143 }
144 }
145
146 // Helper functions: shift the fingerprint value left by 8/32/64 bits,
147 // return the "out" value (the bits that were shifted out), and add "v"
148 // in the bits on the right.
149 uint8_t shlor8(uint8_t v);
150 uint32_t shlor32(uint32_t v);
151 uint64_t shlor64(uint64_t v);
152
153 uint64_t fp_[1 + (BITS-1)/64];
154 };
155
156 // Convenience functions
157
158 /**
159 * Return the 64-bit Rabin fingerprint of a string.
160 */
161 inline uint64_t fingerprint64(StringPiece str) {
162 uint64_t fp;
163 Fingerprint<64>().update(str).write(&fp);
164 return fp;
165 }
166
167 /**
168 * Compute the 96-bit Rabin fingerprint of a string.
169 * Return the 64 most significant bits in *msb, and the 32 least significant
170 * bits in *lsb.
171 */
172 inline void fingerprint96(StringPiece str,
173 uint64_t* msb, uint32_t* lsb) {
174 uint64_t fp[2];
175 Fingerprint<96>().update(str).write(fp);
176 *msb = fp[0];
177 *lsb = (uint32_t)(fp[1] >> 32);
178 }
179
180 /**
181 * Compute the 128-bit Rabin fingerprint of a string.
182 * Return the 64 most significant bits in *msb, and the 64 least significant
183 * bits in *lsb.
184 */
185 inline void fingerprint128(StringPiece str,
186 uint64_t* msb, uint64_t* lsb) {
187 uint64_t fp[2];
188 Fingerprint<128>().update(str).write(fp);
189 *msb = fp[0];
190 *lsb = fp[1];
191 }
192
193
194 template <>
195 inline uint8_t Fingerprint<64>::shlor8(uint8_t v) {
196 uint8_t out = (uint8_t)(fp_[0] >> 56);
197 fp_[0] = (fp_[0] << 8) | ((uint64_t)v);
198 return out;
199 }
200
201 template <>
202 inline uint32_t Fingerprint<64>::shlor32(uint32_t v) {
203 uint32_t out = (uint32_t)(fp_[0] >> 32);
204 fp_[0] = (fp_[0] << 32) | ((uint64_t)v);
205 return out;
206 }
207
208 template <>
209 inline uint64_t Fingerprint<64>::shlor64(uint64_t v) {
210 uint64_t out = fp_[0];
211 fp_[0] = v;
212 return out;
213 }
214
215 template <>
216 inline uint8_t Fingerprint<96>::shlor8(uint8_t v) {
217 uint8_t out = (uint8_t)(fp_[0] >> 56);
218 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
219 fp_[1] = (fp_[1] << 8) | ((uint64_t)v << 32);
220 return out;
221 }
222
223 template <>
224 inline uint32_t Fingerprint<96>::shlor32(uint32_t v) {
225 uint32_t out = (uint32_t)(fp_[0] >> 32);
226 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
227 fp_[1] = ((uint64_t)v << 32);
228 return out;
229 }
230
231 template <>
232 inline uint64_t Fingerprint<96>::shlor64(uint64_t v) {
233 uint64_t out = fp_[0];
234 fp_[0] = fp_[1] | (v >> 32);
235 fp_[1] = v << 32;
236 return out;
237 }
238
239 template <>
240 inline uint8_t Fingerprint<128>::shlor8(uint8_t v) {
241 uint8_t out = (uint8_t)(fp_[0] >> 56);
242 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
243 fp_[1] = (fp_[1] << 8) | ((uint64_t)v);
244 return out;
245 }
246
247 template <>
248 inline uint32_t Fingerprint<128>::shlor32(uint32_t v) {
249 uint32_t out = (uint32_t)(fp_[0] >> 32);
250 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
251 fp_[1] = (fp_[1] << 32) | ((uint64_t)v);
252 return out;
253 }
254
255 template <>
256 inline uint64_t Fingerprint<128>::shlor64(uint64_t v) {
257 uint64_t out = fp_[0];
258 fp_[0] = fp_[1];
259 fp_[1] = v;
260 return out;
261 }
262
263 } // namespace folly
264
265 #endif /* FOLLY_FINGERPRINT_H_ */
Something went wrong with that request. Please try again.