From 3e9b6919ccb8ccf53b5bcb9fe183a1b7fad0e9ef Mon Sep 17 00:00:00 2001 From: tedyu Date: Mon, 4 May 2015 16:59:02 -0700 Subject: [PATCH 01/10] Use UNSAFE.getLong() to speed up BitSetMethods#anySet() --- .../org/apache/spark/unsafe/bitset/BitSetMethods.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index f30626d8f4317..b7f86658c53d8 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -29,6 +29,7 @@ public final class BitSetMethods { private static final long WORD_SIZE = 8; + private static final int SIZE_OF_LONG = Long.SIZE; private BitSetMethods() { // Make the default constructor private, since this only holds static methods. @@ -71,7 +72,13 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { * Returns {@code true} if any bit is set. */ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { - for (int i = 0; i <= bitSetWidthInBytes; i++) { + long widthInLong = bitSetWidthInBytes / SIZE_OF_LONG; + for (long i = 0; i <= widthInLong; i++) { + if (PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + i) != 0) { + return true; + } + } + for (long i = SIZE_OF_LONG * widthInLong; i <= bitSetWidthInBytes; i++) { if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) { return true; } From 4ca0ef6d1128f1615f0f60993be38e7b4b7396d5 Mon Sep 17 00:00:00 2001 From: tedyu Date: Mon, 4 May 2015 17:04:57 -0700 Subject: [PATCH 02/10] Use UNSAFE.getLong() to speed up BitSetMethods#anySet() --- .../main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index b7f86658c53d8..cb2e71ea32161 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -78,7 +78,7 @@ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidt return true; } } - for (long i = SIZE_OF_LONG * widthInLong; i <= bitSetWidthInBytes; i++) { + for (long i = SIZE_OF_LONG * widthInLong; i < bitSetWidthInBytes; i++) { if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) { return true; } From 63ee05094dfdfa68d5decbed86de46f38a3dde6d Mon Sep 17 00:00:00 2001 From: tedyu Date: Mon, 4 May 2015 18:05:15 -0700 Subject: [PATCH 03/10] Use UNSAFE.getLong() to speed up BitSetMethods#anySet() --- .../java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index cb2e71ea32161..9a02947901a29 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -73,12 +73,12 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { */ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { long widthInLong = bitSetWidthInBytes / SIZE_OF_LONG; - for (long i = 0; i <= widthInLong; i++) { + for (int i = 0; i <= widthInLong; i++) { if (PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + i) != 0) { return true; } } - for (long i = SIZE_OF_LONG * widthInLong; i < bitSetWidthInBytes; i++) { + for (int i = (int)(SIZE_OF_LONG * widthInLong); i < bitSetWidthInBytes; i++) { if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) { return true; } From 093b7a408905b0d760e1f4e6413cb7e7ec54f9d8 Mon Sep 17 00:00:00 2001 From: tedyu Date: Mon, 4 May 2015 18:10:21 -0700 Subject: [PATCH 04/10] Use UNSAFE.getLong() to speed up BitSetMethods#anySet() --- .../main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 9a02947901a29..19b4edf73fe34 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -72,7 +72,7 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { * Returns {@code true} if any bit is set. */ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { - long widthInLong = bitSetWidthInBytes / SIZE_OF_LONG; + int widthInLong = (int)(bitSetWidthInBytes / SIZE_OF_LONG); for (int i = 0; i <= widthInLong; i++) { if (PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + i) != 0) { return true; From 855374b3025226e81e914e9d956c0195c2dd3d4e Mon Sep 17 00:00:00 2001 From: tedyu Date: Tue, 5 May 2015 06:43:35 -0700 Subject: [PATCH 05/10] Remove second loop since bitSetWidthInBytes is WORD aligned --- .../java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 19b4edf73fe34..55c7596965eef 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -72,17 +72,13 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { * Returns {@code true} if any bit is set. */ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { + assert bitSetWidthInBytes % SIZE_OF_LONG == 0; int widthInLong = (int)(bitSetWidthInBytes / SIZE_OF_LONG); for (int i = 0; i <= widthInLong; i++) { if (PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + i) != 0) { return true; } } - for (int i = (int)(SIZE_OF_LONG * widthInLong); i < bitSetWidthInBytes; i++) { - if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) { - return true; - } - } return false; } From 75a467b8a01141d2124d5442a7ca846729e00fb5 Mon Sep 17 00:00:00 2001 From: tedyu Date: Tue, 5 May 2015 18:54:00 -0700 Subject: [PATCH 06/10] Correct offset for UNSAFE.getLong() --- .../java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 55c7596965eef..8dcfc4e2c2e3f 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -74,8 +74,9 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { assert bitSetWidthInBytes % SIZE_OF_LONG == 0; int widthInLong = (int)(bitSetWidthInBytes / SIZE_OF_LONG); - for (int i = 0; i <= widthInLong; i++) { - if (PlatformDependent.UNSAFE.getLong(baseObject, baseOffset + i) != 0) { + long addr = baseOffset; + for (int i = 0; i <= widthInLong; i++, addr += 8) { + if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } } From 817e3f9b4fe9684eb5b7f2b3cfe1d19266fcbd2e Mon Sep 17 00:00:00 2001 From: tedyu Date: Tue, 5 May 2015 21:10:57 -0700 Subject: [PATCH 07/10] Replace constant 8 with SIZE_OF_LONG --- .../main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 8dcfc4e2c2e3f..70cfb2e818f3f 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -75,7 +75,7 @@ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidt assert bitSetWidthInBytes % SIZE_OF_LONG == 0; int widthInLong = (int)(bitSetWidthInBytes / SIZE_OF_LONG); long addr = baseOffset; - for (int i = 0; i <= widthInLong; i++, addr += 8) { + for (int i = 0; i <= widthInLong; i++, addr += SIZE_OF_LONG) { if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } From b51dcaf0f6661dff29d5d9f868a9dffa9f1c5342 Mon Sep 17 00:00:00 2001 From: tedyu Date: Wed, 6 May 2015 11:36:44 -0700 Subject: [PATCH 08/10] Add unit test in BitSetSuite for BitSet#anySet() --- .../main/java/org/apache/spark/unsafe/bitset/BitSet.java | 8 ++++++++ .../org/apache/spark/unsafe/bitset/BitSetMethods.java | 9 ++++----- .../java/org/apache/spark/unsafe/bitset/BitSetSuite.java | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java index f72e07fce92fd..65915ab267930 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java @@ -102,4 +102,12 @@ public boolean isSet(int index) { public int nextSetBit(int fromIndex) { return BitSetMethods.nextSetBit(baseObject, baseOffset, fromIndex, numWords); } + + /* + * @return whether any bit in the BitSet is set + */ + public boolean anySet() { + return BitSetMethods.anySet(baseObject, baseOffset, numWords*BitSetMethods.WORD_SIZE); + } + } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 70cfb2e818f3f..c6b536b233b60 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -28,8 +28,7 @@ */ public final class BitSetMethods { - private static final long WORD_SIZE = 8; - private static final int SIZE_OF_LONG = Long.SIZE; + static final long WORD_SIZE = 8; private BitSetMethods() { // Make the default constructor private, since this only holds static methods. @@ -72,10 +71,10 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { * Returns {@code true} if any bit is set. */ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { - assert bitSetWidthInBytes % SIZE_OF_LONG == 0; - int widthInLong = (int)(bitSetWidthInBytes / SIZE_OF_LONG); + assert bitSetWidthInBytes % WORD_SIZE == 0; + int widthInLong = (int)(bitSetWidthInBytes / WORD_SIZE); long addr = baseOffset; - for (int i = 0; i <= widthInLong; i++, addr += SIZE_OF_LONG) { + for (int i = 0; i <= widthInLong; i++, addr += WORD_SIZE) { if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java index e3a824e29b768..85f3bb02ec96b 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java @@ -39,6 +39,7 @@ public void basicOps() { for (int i = 0; i < bs.capacity(); i++) { Assert.assertFalse(bs.isSet(i)); } + Assert.assertFalse(bs.anySet()); // Set every bit and check it. for (int i = 0; i < bs.capacity(); i++) { From 1719c5b0d820994b2e7dc430a80ae38ed54fb0da Mon Sep 17 00:00:00 2001 From: tedyu Date: Wed, 6 May 2015 13:43:47 -0700 Subject: [PATCH 09/10] Correct upper bound in for loop --- .../main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index c6b536b233b60..2d44f08b9e24e 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -74,7 +74,7 @@ public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidt assert bitSetWidthInBytes % WORD_SIZE == 0; int widthInLong = (int)(bitSetWidthInBytes / WORD_SIZE); long addr = baseOffset; - for (int i = 0; i <= widthInLong; i++, addr += WORD_SIZE) { + for (int i = 0; i < widthInLong; i++, addr += WORD_SIZE) { if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } From 473bf9de1b888b615d9ebf9b9251f49780998a8c Mon Sep 17 00:00:00 2001 From: tedyu Date: Wed, 6 May 2015 15:17:07 -0700 Subject: [PATCH 10/10] Address Josh's review comments --- .../main/java/org/apache/spark/unsafe/bitset/BitSet.java | 2 +- .../java/org/apache/spark/unsafe/bitset/BitSetMethods.java | 6 ++---- .../java/org/apache/spark/unsafe/bitset/BitSetSuite.java | 6 ++++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java index 65915ab267930..5468c47b4202f 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java @@ -107,7 +107,7 @@ public int nextSetBit(int fromIndex) { * @return whether any bit in the BitSet is set */ public boolean anySet() { - return BitSetMethods.anySet(baseObject, baseOffset, numWords*BitSetMethods.WORD_SIZE); + return BitSetMethods.anySet(baseObject, baseOffset, numWords); } } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index 2d44f08b9e24e..573268c4f2f68 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -70,11 +70,9 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { /** * Returns {@code true} if any bit is set. */ - public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { - assert bitSetWidthInBytes % WORD_SIZE == 0; - int widthInLong = (int)(bitSetWidthInBytes / WORD_SIZE); + public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) { long addr = baseOffset; - for (int i = 0; i < widthInLong; i++, addr += WORD_SIZE) { + for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) { if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java index 85f3bb02ec96b..18393db9f382f 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java @@ -39,6 +39,7 @@ public void basicOps() { for (int i = 0; i < bs.capacity(); i++) { Assert.assertFalse(bs.isSet(i)); } + // another form of asserting that the bit set is empty Assert.assertFalse(bs.anySet()); // Set every bit and check it. @@ -53,6 +54,11 @@ public void basicOps() { bs.unset(i); Assert.assertFalse(bs.isSet(i)); } + + // Make sure anySet() can detect any set bit + bs = createBitSet(256); + bs.set(64); + Assert.assertTrue(bs.anySet()); } @Test