Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

native version

  • Loading branch information...
commit 4234046f0eaccc6670d8c8a5aeaed396ec063f0f 1 parent a48a016
Sokolov Yura authored August 02, 2012
8  ext/murmurhash3/extconf.rb
... ...
@@ -0,0 +1,8 @@
  1
+if RUBY_ENGINE == 'ruby'
  2
+  require 'mkmf'
  3
+  create_makefile("native_murmur")
  4
+else
  5
+  File.open(File.dirname(__FILE__) + "/Makefile", 'w') do |f|
  6
+    f.write("install:\n\t#nothing to build")
  7
+  end
  8
+end
441  ext/murmurhash3/murmur3.c
... ...
@@ -0,0 +1,441 @@
  1
+#include <ruby.h>
  2
+/*-----------------------------------------------------------------------------
  3
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
  4
+ * domain. The author hereby disclaims copyright to this source code.
  5
+
  6
+ * Note - The x86 and x64 versions do _not_ produce the same results, as the
  7
+ * algorithms are optimized for their respective platforms. You can still
  8
+ * compile and run any of them on any platform, but your performance with the
  9
+ * non-native version will be less than optimal.
  10
+ */
  11
+
  12
+typedef unsigned char uint8_t;
  13
+typedef unsigned int uint32_t;
  14
+#if SIZEOF_LONG == 8
  15
+typedef unsigned long uint64_t;
  16
+#else
  17
+typedef unsigned long long uint64_t;
  18
+#endif
  19
+
  20
+/*-----------------------------------------------------------------------------
  21
+ * Platform-specific functions and macros
  22
+ */
  23
+
  24
+#ifdef __GNUC__
  25
+#define FORCE_INLINE __attribute__((always_inline))
  26
+#elif defined(_MSC_VER)
  27
+#define FORCE_INLINE  __forceinline
  28
+#else
  29
+#define FORCE_INLINE
  30
+#endif
  31
+
  32
+#if defined(_MSC_VER)
  33
+
  34
+#define ROTL32(x,y)  _rotl(x,y)
  35
+#define ROTL64(x,y)  _rotl64(x,y)
  36
+
  37
+#define BIG_CONSTANT(x) (x)
  38
+
  39
+#else
  40
+
  41
+static inline FORCE_INLINE uint32_t
  42
+rotl32 ( uint32_t x, int8_t r )
  43
+{
  44
+  return (x << r) | (x >> (32 - r));
  45
+}
  46
+
  47
+static inline FORCE_INLINE uint64_t
  48
+rotl64 ( uint64_t x, int8_t r )
  49
+{
  50
+  return (x << r) | (x >> (64 - r));
  51
+}
  52
+
  53
+#define	ROTL32(x,y)	rotl32(x,y)
  54
+#define ROTL64(x,y)	rotl64(x,y)
  55
+
  56
+#define BIG_CONSTANT(x) (x##LLU)
  57
+#endif
  58
+
  59
+/* end platform specific */
  60
+
  61
+/* Block read - if your platform needs to do endian-swapping or can only
  62
+ * handle aligned reads, do the conversion here */
  63
+#ifdef WORDS_BIGENDIAN
  64
+#if GCC_VERSION_SINCE(4,3,0)
  65
+# define swap32(x) __builtin_bswap32(x)
  66
+# define swap64(x) __builtin_bswap64(x)
  67
+#endif
  68
+
  69
+#ifndef swap32
  70
+# define swap32(x)	((((x)&0xFF)<<24)	\
  71
+			|(((x)>>24)&0xFF)	\
  72
+			|(((x)&0x0000FF00)<<8)	\
  73
+			|(((x)&0x00FF0000)>>8)	)
  74
+#endif
  75
+
  76
+#ifndef swap64
  77
+# ifdef HAVE_INT64_T
  78
+static inline FORCE_INLINE uint64_t
  79
+swap64(uint64_t x) {
  80
+    x = (x>>32) | (x << 32);
  81
+    x = ((x & BIG_CONSTANT(0xFFFF0000FFFF0000)) >> 16) |
  82
+        ((x & BIG_CONSTANT(0x0000FFFF0000FFFF)) << 16);
  83
+    return ((x & BIG_CONSTANT(0xFF00FF00FF00FF00)) >> 8) |
  84
+           ((x & BIG_CONSTANT(0x00FF00FF00FF00FF)) << 8);
  85
+}
  86
+# endif
  87
+
  88
+#endif
  89
+static inline FORCE_INLINE uint32_t
  90
+getblock32(const uint32_t * p, int i)
  91
+{
  92
+    return swap32(p[i]);
  93
+}
  94
+
  95
+static inline FORCE_INLINE uint64_t
  96
+getblock64(const uint64_t * p, int i)
  97
+{
  98
+    return swap64(p[i]);
  99
+}
  100
+#else
  101
+#define getblock32(p, i) (p[i])
  102
+#define getblock64(p, i) (p[i])
  103
+#endif
  104
+
  105
+/* Finalization mix - force all bits of a hash block to avalanche */
  106
+
  107
+static inline FORCE_INLINE uint32_t
  108
+fmix32 ( uint32_t h )
  109
+{
  110
+  h ^= h >> 16;
  111
+  h *= 0x85ebca6b;
  112
+  h ^= h >> 13;
  113
+  h *= 0xc2b2ae35;
  114
+  h ^= h >> 16;
  115
+
  116
+  return h;
  117
+}
  118
+
  119
+static inline FORCE_INLINE uint64_t
  120
+fmix64 ( uint64_t k )
  121
+{
  122
+  k ^= k >> 33;
  123
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
  124
+  k ^= k >> 33;
  125
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
  126
+  k ^= k >> 33;
  127
+
  128
+  return k;
  129
+}
  130
+
  131
+static inline FORCE_INLINE uint32_t
  132
+mmix32(uint32_t k1)
  133
+{
  134
+    k1 *= 0xcc9e2d51;
  135
+    k1 = ROTL32(k1, 15);
  136
+    return k1 * 0x1b873593;
  137
+}
  138
+
  139
+static uint32_t
  140
+MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed)
  141
+{
  142
+  const uint8_t * data = (const uint8_t*)key;
  143
+  const int nblocks = len / 4;
  144
+  int i;
  145
+
  146
+  uint32_t h1 = seed;
  147
+  uint32_t k1 = 0;
  148
+
  149
+
  150
+  /* body */
  151
+
  152
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
  153
+
  154
+  for(i = -nblocks; i; i++)
  155
+  {
  156
+    h1 ^= mmix32(getblock32(blocks, i));
  157
+    h1 = ROTL32(h1,13); 
  158
+    h1 = h1*5+0xe6546b64;
  159
+  }
  160
+
  161
+  /* tail */
  162
+
  163
+  data += nblocks*4;
  164
+
  165
+  switch(len & 3)
  166
+  {
  167
+  case 3: k1 ^= data[2] << 16;
  168
+  case 2: k1 ^= data[1] << 8;
  169
+  case 1: k1 ^= data[0];
  170
+          h1 ^= mmix32(k1);
  171
+  };
  172
+
  173
+  /* finalization */
  174
+
  175
+  h1 ^= len;
  176
+
  177
+  h1 = fmix32(h1);
  178
+
  179
+  return h1;
  180
+} 
  181
+
  182
+#define C1_128 BIG_CONSTANT(0x87c37b91114253d5)
  183
+#define C2_128 BIG_CONSTANT(0x4cf5ad432745937f)
  184
+
  185
+static inline FORCE_INLINE uint64_t
  186
+mmix128_1(uint64_t k1)
  187
+{
  188
+    k1 *= C1_128;
  189
+    k1 = ROTL64(k1, 31);
  190
+    return k1 * C2_128;
  191
+}
  192
+
  193
+static inline FORCE_INLINE uint64_t
  194
+mmix128_2(uint64_t k2)
  195
+{
  196
+    k2 *= C2_128;
  197
+    k2 = ROTL64(k2, 33);
  198
+    return k2 * C1_128;
  199
+}
  200
+
  201
+static void MurmurHash3_x64_128 ( const void * key, const int len,
  202
+                           const uint32_t seed, void * out )
  203
+{
  204
+  const uint8_t * data = (const uint8_t*)key;
  205
+  const int nblocks = len / 16;
  206
+  int i;
  207
+
  208
+  uint64_t h1 = seed;
  209
+  uint64_t h2 = seed;
  210
+  uint64_t k1 = 0, k2 = 0;
  211
+
  212
+  /* body */
  213
+
  214
+  const uint64_t * blocks = (const uint64_t *)(data);
  215
+
  216
+  for(i = 0; i < nblocks; i++)
  217
+  {
  218
+    k1 = getblock64(blocks, i*2+0);
  219
+    k2 = getblock64(blocks, i*2+1);
  220
+
  221
+    h1 ^= mmix128_1(k1);
  222
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
  223
+
  224
+    h2 ^= mmix128_2(k2);
  225
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
  226
+  }
  227
+
  228
+  /* tail */
  229
+
  230
+  data += nblocks*16;
  231
+  k1 = k2 = 0;
  232
+
  233
+  switch(len & 15)
  234
+  {
  235
+  case 15: k2 ^= (uint64_t)(data[14]) << 48;
  236
+  case 14: k2 ^= (uint64_t)(data[13]) << 40;
  237
+  case 13: k2 ^= (uint64_t)(data[12]) << 32;
  238
+  case 12: k2 ^= (uint64_t)(data[11]) << 24;
  239
+  case 11: k2 ^= (uint64_t)(data[10]) << 16;
  240
+  case 10: k2 ^= (uint64_t)(data[ 9]) << 8;
  241
+  case  9: k2 ^= (uint64_t)(data[ 8]) << 0;
  242
+           h2 ^= mmix128_2(k2);
  243
+
  244
+  case  8: k1 ^= (uint64_t)(data[ 7]) << 56;
  245
+  case  7: k1 ^= (uint64_t)(data[ 6]) << 48;
  246
+  case  6: k1 ^= (uint64_t)(data[ 5]) << 40;
  247
+  case  5: k1 ^= (uint64_t)(data[ 4]) << 32;
  248
+  case  4: k1 ^= (uint64_t)(data[ 3]) << 24;
  249
+  case  3: k1 ^= (uint64_t)(data[ 2]) << 16;
  250
+  case  2: k1 ^= (uint64_t)(data[ 1]) << 8;
  251
+  case  1: k1 ^= (uint64_t)(data[ 0]) << 0;
  252
+           h1 ^= mmix128_1(k1);
  253
+  };
  254
+
  255
+  /* finalization */
  256
+
  257
+  h1 ^= len; h2 ^= len;
  258
+
  259
+  h1 += h2;
  260
+  h2 += h1;
  261
+
  262
+  h1 = fmix64(h1);
  263
+  h2 = fmix64(h2);
  264
+
  265
+  h1 += h2;
  266
+  h2 += h1;
  267
+
  268
+  ((uint64_t*)out)[0] = h1;
  269
+  ((uint64_t*)out)[1] = h2;
  270
+}
  271
+
  272
+/* end of MurmurHash3 algorithm */
  273
+
  274
+static VALUE
  275
+rb_fmix32(VALUE self, VALUE integer)
  276
+{
  277
+    uint32_t _int = NUM2UINT(integer);
  278
+    return UINT2NUM(fmix32(_int));
  279
+}
  280
+
  281
+static VALUE
  282
+rb_fmix64(VALUE self, VALUE integer)
  283
+{
  284
+#if SIZEOF_LONG == 8
  285
+    uint64_t _int = NUM2ULONG(integer);
  286
+    return ULONG2NUM(fmix64(_int));
  287
+#else
  288
+    uint64_t _int = NUM2ULL(integer);
  289
+    return ULL2NUM(fmix64(_int));
  290
+#endif
  291
+}
  292
+
  293
+static VALUE
  294
+rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
  295
+{
  296
+    VALUE rstr, rseed;
  297
+    uint32_t result;
  298
+
  299
+    rb_scan_args(argc, argv, "11", &rstr, &rseed);
  300
+
  301
+    result = MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(rseed));
  302
+
  303
+    return UINT2NUM(result);
  304
+}
  305
+
  306
+static VALUE
  307
+rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
  308
+{
  309
+    VALUE rint, rseed;
  310
+    uint32_t _int;
  311
+    uint32_t result;
  312
+
  313
+    rb_scan_args(argc, argv, "11", &rint, &rseed);
  314
+    _int = NUM2UINT(rint);
  315
+
  316
+    result = MurmurHash3_x86_32(&_int, 4, argc == 1 ? 0 : NUM2UINT(rseed));
  317
+
  318
+    return UINT2NUM(result);
  319
+}
  320
+
  321
+static VALUE
  322
+rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
  323
+{
  324
+    VALUE rint, rseed;
  325
+    uint64_t _int;
  326
+    uint32_t result;
  327
+
  328
+    rb_scan_args(argc, argv, "11", &rint, &rseed);
  329
+#if SIZEOF_LONG == 8
  330
+    _int = NUM2ULONG(rint);
  331
+#else
  332
+    _int = NUM2ULL(rint);
  333
+#endif
  334
+
  335
+    result = MurmurHash3_x86_32(&_int, 8, argc == 1 ? 0 : NUM2UINT(rseed));
  336
+
  337
+    return UINT2NUM(result);
  338
+}
  339
+
  340
+#define PREPARE_128_BIT()         \
  341
+    VALUE rstr, rseed, ar_result; \
  342
+    uint32_t result[4];           \
  343
+    rb_scan_args(argc, argv, "11", &rstr, &rseed)
  344
+
  345
+#define SWAP_128_BIT() do {    \
  346
+        uint32_t tmp;          \
  347
+        tmp = result[0];       \
  348
+        result[0] = result[1]; \
  349
+        result[1] = tmp;       \
  350
+        tmp = result[2];       \
  351
+        result[2] = result[3]; \
  352
+        result[3] = tmp;       \
  353
+} while (0)
  354
+
  355
+#define RETURN_128_BIT()       \
  356
+    ar_result = rb_ary_new2(4);      \
  357
+    rb_ary_push(ar_result, UINT2NUM(result[0])); \
  358
+    rb_ary_push(ar_result, UINT2NUM(result[1])); \
  359
+    rb_ary_push(ar_result, UINT2NUM(result[2])); \
  360
+    rb_ary_push(ar_result, UINT2NUM(result[3])); \
  361
+    return ar_result
  362
+
  363
+static VALUE
  364
+rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
  365
+{
  366
+    PREPARE_128_BIT();
  367
+
  368
+    MurmurHash3_x64_128(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(rseed), result);
  369
+#if WORDS_BIGENDIAN
  370
+    SWAP_128_BIT();
  371
+#endif
  372
+    RETURN_128_BIT();
  373
+}
  374
+
  375
+static VALUE
  376
+rb_murmur3_128_int32_hash(int argc, VALUE* argv, VALUE self)
  377
+{
  378
+    PREPARE_128_BIT();
  379
+
  380
+    {
  381
+        uint32_t _int = NUM2UINT(rstr);
  382
+        MurmurHash3_x64_128(&_int, 4, argc == 1 ? 0 : NUM2UINT(rseed), result);
  383
+    }
  384
+#if WORDS_BIGENDIAN
  385
+    SWAP_128_BIT();
  386
+#endif
  387
+    RETURN_128_BIT();
  388
+}
  389
+
  390
+static VALUE
  391
+rb_murmur3_128_int64_hash(int argc, VALUE* argv, VALUE self)
  392
+{
  393
+    PREPARE_128_BIT();
  394
+
  395
+    {
  396
+#if SIZEOF_LONG == 8
  397
+        uint64_t _int = NUM2ULONG(rstr);
  398
+#else
  399
+        uint64_t _int = NUM2ULL(rstr);
  400
+#endif
  401
+        MurmurHash3_x64_128(&_int, 8, argc == 1 ? 0 : NUM2UINT(rseed), result);
  402
+    }
  403
+#if WORDS_BIGENDIAN
  404
+    SWAP_128_BIT();
  405
+#endif
  406
+    RETURN_128_BIT();
  407
+}
  408
+
  409
+void
  410
+Init_native_murmur() {
  411
+    VALUE singleton;
  412
+    VALUE mod_murmur = rb_define_module("MurmurHash3");
  413
+    VALUE mod_murmur32 = rb_define_module_under(mod_murmur, "Native32");
  414
+    VALUE mod_murmur128 = rb_define_module_under(mod_murmur, "Native128");
  415
+
  416
+    rb_define_method(mod_murmur32, "murmur3_32_fmix", rb_fmix32, 1);
  417
+    rb_define_method(mod_murmur32, "murmur3_32_str_hash", rb_murmur3_32_str_hash, -1);
  418
+    rb_define_method(mod_murmur32, "murmur3_32_int32_hash", rb_murmur3_32_int32_hash, -1);
  419
+    rb_define_method(mod_murmur32, "murmur3_32_int64_hash", rb_murmur3_32_int64_hash, -1);
  420
+
  421
+    rb_extend_object(mod_murmur32, mod_murmur32);
  422
+    singleton = rb_singleton_class(mod_murmur32);
  423
+    rb_define_alias(singleton, "fmix", "murmur3_32_fmix");
  424
+    rb_define_alias(singleton, "str_hash", "murmur3_32_str_hash");
  425
+    rb_define_alias(singleton, "int32_hash", "murmur3_32_int32_hash");
  426
+    rb_define_alias(singleton, "int64_hash", "murmur3_32_int64_hash");
  427
+
  428
+
  429
+    rb_define_method(mod_murmur128, "murmur3_128_fmix", rb_fmix64, 1);
  430
+    rb_define_method(mod_murmur128, "murmur3_128_str_hash", rb_murmur3_128_str_hash, -1);
  431
+    rb_define_method(mod_murmur128, "murmur3_128_int32_hash", rb_murmur3_128_int32_hash, -1);
  432
+    rb_define_method(mod_murmur128, "murmur3_128_int64_hash", rb_murmur3_128_int64_hash, -1);
  433
+
  434
+    rb_extend_object(mod_murmur128, mod_murmur128);
  435
+    singleton = rb_singleton_class(mod_murmur128);
  436
+    rb_define_alias(singleton, "fmix", "murmur3_128_fmix");
  437
+    rb_define_alias(singleton, "str_hash", "murmur3_128_str_hash");
  438
+    rb_define_alias(singleton, "int32_hash", "murmur3_128_int32_hash");
  439
+    rb_define_alias(singleton, "int64_hash", "murmur3_128_int64_hash");
  440
+
  441
+}
16  lib/murmurhash3/pure_ruby.rb
@@ -50,7 +50,7 @@ def murmur3_32_int32_hash(i, seed=0)
50 50
     end
51 51
 
52 52
     def murmur3_32_int64_hash(i, seed=0)
53  
-      str_hash([i].pack("Q>"), seed)
  53
+      str_hash([i].pack("Q<"), seed)
54 54
     end
55 55
 
56 56
     class << self
@@ -78,18 +78,18 @@ def murmur3_128_fmix(h)
78 78
       h ^ (h >> 33)
79 79
     end
80 80
 
81  
-    C1 = 0x87c37b91_114253d5
82  
-    C2 = 0x4cf5ad43_2745937f
  81
+    C1_128 = 0x87c37b91_114253d5
  82
+    C2_128 = 0x4cf5ad43_2745937f
83 83
     def murmur3_128__mmix1(k1)
84  
-      k1 = (k1 * C1) & MASK64
  84
+      k1 = (k1 * C1_128) & MASK64
85 85
       k1 = murmur3_128_rotl(k1, 31)
86  
-      (k1 * C2) & MASK64
  86
+      (k1 * C2_128) & MASK64
87 87
     end
88 88
 
89 89
     def murmur3_128__mmix2(k2)
90  
-      k2 = (k2 * C2) & MASK64
  90
+      k2 = (k2 * C2_128) & MASK64
91 91
       k2 = murmur3_128_rotl(k2, 33)
92  
-      (k2 * C1) & MASK64
  92
+      (k2 * C1_128) & MASK64
93 93
     end
94 94
 
95 95
     def murmur3_128_str_hash(str, seed=0)
@@ -141,7 +141,7 @@ def murmur3_128_int32_hash(i, seed=0)
141 141
     end
142 142
 
143 143
     def murmur3_128_int64_hash(i, seed=0)
144  
-      str_hash([i].pack("Q>"), seed)
  144
+      str_hash([i].pack("Q<"), seed)
145 145
     end
146 146
 
147 147
     class << self
25  test/test_murmur.rb
@@ -15,8 +15,8 @@
15 15
   end
16 16
 
17 17
   it 'should make correct hash for 64bit integer' do
18  
-    murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x00\x00\x00\x00\x12\x34\x56\x78")
19  
-    murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x12\x34\x56\x78\x12\x34\x56\x78")
  18
+    murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x00\x00\x00\x00")
  19
+    murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x78\x56\x34\x12")
20 20
   end
21 21
 
22 22
   it 'should make correct fmix for 64bit integer' do
@@ -38,8 +38,8 @@
38 38
   end
39 39
 
40 40
   it 'should make correct hash for 64bit integer' do
41  
-    murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x00\x00\x00\x00\x12\x34\x56\x78")
42  
-    murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x12\x34\x56\x78\x12\x34\x56\x78")
  41
+    murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x00\x00\x00\x00")
  42
+    murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x78\x56\x34\x12")
43 43
   end
44 44
 
45 45
   it 'should make correct fmix for 32bit integer' do
@@ -57,3 +57,20 @@
57 57
   let(:murmur) { MurmurHash3::PureRuby128 }
58 58
   class_exec &shared_examples_128
59 59
 end
  60
+
  61
+begin
  62
+  require 'murmurhash3/native_murmur'
  63
+
  64
+  describe "Native 32" do
  65
+    let(:murmur) { MurmurHash3::Native32 }
  66
+    class_exec &shared_examples_32
  67
+  end
  68
+
  69
+  describe "Native 128" do
  70
+    let(:murmur) { MurmurHash3::Native128 }
  71
+    class_exec &shared_examples_128
  72
+  end
  73
+
  74
+rescue LoadError => e
  75
+  puts "Could not load native extension: #{e}"
  76
+end

0 notes on commit 4234046

Please sign in to comment.
Something went wrong with that request. Please try again.