gunnarmorling · gunnarmorling · Jan 10, 2024 · Jan 9, 2024 · Jan 9, 2024 · Jan 9, 2024
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java b/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue.java
@@ -30,28 +30,28 @@
 
 /**
  * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
- * sun.misc.Unsafe to directly access the mapped memory.
- *
- * Runs in 0.92s on my Intel i9-13900K
+ * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
+ * <p>
+ * Runs in 0.80s on my Intel i9-13900K
  * Perf stats:
- *     65,004,666,383      cpu_core/cycles/
- *     71,141,249,972      cpu_atom/cycles/
+ * 53,344,113,388      cpu_core/cycles/
+ * 66,193,198,848      cpu_atom/cycles/
  */
 public class CalculateAverage_thomaswue {
     private static final String FILE = "./measurements.txt";
 
     // Holding the current result for a single city.
     private static class Result {
-        short min;
-        short max;
+        int min;
+        int max;
         long sum;
         int count;
         final long nameAddress;
 
         private Result(long nameAddress, int value) {
             this.nameAddress = nameAddress;
-            this.min = (short) value;
-            this.max = (short) value;
+            this.min = value;
+            this.max = value;
             this.sum = value;
             this.count = 1;
         }
@@ -66,8 +66,8 @@ private static double round(double value) {
 
         // Accumulate another result into this one.
         private void add(Result other) {
-            min = (short) Math.min(min, other.min);
-            max = (short) Math.max(max, other.max);
+            min = Math.min(min, other.min);
+            max = Math.max(max, other.max);
             sum += other.sum;
             count += other.count;
         }
@@ -81,8 +81,7 @@ public static void main(String[] args) throws IOException {
         // Parallel processing of segments.
         List<HashMap<String, Result>> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> {
             HashMap<String, Result> cities = HashMap.newHashMap(1 << 10);
-            Result[] results = new Result[1 << 18];
-            parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], results, cities);
+            parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], cities);
             return cities;
         }).parallel().toList();
 
@@ -114,44 +113,44 @@ private static Unsafe initUnsafe() {
         }
     }
 
-    private static void parseLoop(long chunkStart, long chunkEnd, Result[] results, HashMap<String, Result> cities) {
+    private static void parseLoop(long chunkStart, long chunkEnd, HashMap<String, Result> cities) {
+        Result[] results = new Result[1 << 18];
         long scanPtr = chunkStart;
         byte b;
         while (scanPtr < chunkEnd) {
             long nameAddress = scanPtr;
-            int hash = 0;
-
-            // Skip first letter.
-            scanPtr++;
-
-            // Scan for ';' delimiter, always 4 bytes at a time.
-            while (true) {
-                int nextVal = UNSAFE.getInt(scanPtr);
-                if ((nextVal & 0x3B) == 0x3B) {
-                    scanPtr++;
-                    break;
-                }
-                else if ((nextVal & 0x3B00) == 0x3B00) {
-                    scanPtr += 2;
-                    hash = hash ^ (nextVal & 0xFF);
-                    break;
-                }
-                else if ((nextVal & 0x3B0000) == 0x3B0000) {
-                    scanPtr += 3;
-                    hash = hash ^ (nextVal & 0xFFFF);
-                    break;
-                }
-                else if (((nextVal & 0x3B000000) == 0x3B000000)) {
-                    scanPtr += 4;
-                    hash = hash ^ (nextVal & 0xFFFFFF);
-                    break;
+            long hash = 0;
+
+            // Search for ';', one long at a time.
+            long word = UNSAFE.getLong(scanPtr);
+            int pos = findDelimiter(word);
+            if (pos != 8) {
+                scanPtr += pos;
+                word = word & (-1L >>> ((8 - pos - 1) << 3));
+                hash ^= word;
+            }
+            else {
+                scanPtr += 8;
+                hash ^= word;
+                while (true) {
+                    word = UNSAFE.getLong(scanPtr);
+                    pos = findDelimiter(word);
+                    if (pos != 8) {
+                        scanPtr += pos;
+                        word = word & (-1L >>> ((8 - pos - 1) << 3));
+                        hash ^= word;
+                        break;
+                    }
+                    else {
+                        scanPtr += 8;
+                        hash ^= word;
+                    }
                 }
-                scanPtr += 4;
-                hash = hash ^ nextVal;
             }
 
             // Save length of name for later.
-            int nameLength = (int) (scanPtr - nameAddress - 1);
+            int nameLength = (int) (scanPtr - nameAddress);
+            scanPtr++;
 
             // Parse number.
             int number;
@@ -175,42 +174,25 @@ else if (((nextVal & 0x3B000000) == 0x3B000000)) {
             }
 
             // Final calculation for index into hash table.
-            int tableIndex = (((hash ^ (hash >>> 18)) & (results.length - 1)));
-            while (true) {
+            int hashAsInt = (int) (hash ^ (hash >>> 32));
+            int finalHash = (hashAsInt ^ (hashAsInt >>> 18));
+            int tableIndex = (finalHash & (results.length - 1));
+            outer: while (true) {
                 Result existingResult = results[tableIndex];
                 if (existingResult == null) {
                     newEntry(results, cities, nameAddress, number, tableIndex, nameLength);
                     break;
                 }
                 else {
                     // Check for collision.
-                    boolean result = true;
                     int i = 0;
-                    if ((long) nameLength >= 8) {
-                        if (UNSAFE.getLong(existingResult.nameAddress) != UNSAFE.getLong(nameAddress)) {
-                            result = false;
-                        }
-                        else {
-                            i += 8;
+                    for (; i < nameLength - 8; i += 8) {
+                        if (UNSAFE.getLong(existingResult.nameAddress + i) != UNSAFE.getLong(nameAddress + i)) {
+                            tableIndex = (tableIndex + 1) & (results.length - 1);
+                            continue outer;
                         }
                     }
-                    else if ((long) nameLength >= 4) {
-                        if (UNSAFE.getInt(existingResult.nameAddress) != UNSAFE.getInt(nameAddress)) {
-                            result = false;
-                        }
-                        else {
-                            i += 4;
-                        }
-                    }
-                    if (result) {
-                        for (; i < (long) nameLength; ++i) {
-                            if (UNSAFE.getByte(existingResult.nameAddress + i) != UNSAFE.getByte(nameAddress + i)) {
-                                result = false;
-                                break;
-                            }
-                        }
-                    }
-                    if (result) {
+                    if (((UNSAFE.getLong(existingResult.nameAddress + i) ^ UNSAFE.getLong(nameAddress + i)) << (64 - (nameLength - i) << 3)) == 0) {
                         existingResult.min = (short) Math.min(existingResult.min, number);
                         existingResult.max = (short) Math.max(existingResult.max, number);
                         existingResult.sum += number;
@@ -229,12 +211,20 @@ else if ((long) nameLength >= 4) {
         }
     }
 
+    private static int findDelimiter(long word) {
+        long input = word ^ 0x3B3B3B3B3B3B3B3BL;
+        long tmp = (input & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL;
+        tmp = ~(tmp | input | 0x7F7F7F7F7F7F7F7FL);
+        return Long.numberOfTrailingZeros(tmp) >>> 3;
+    }
+
     private static void newEntry(Result[] results, HashMap<String, Result> cities, long nameAddress, int number, int hash, int nameLength) {
         Result r = new Result(nameAddress, number);
         results[hash] = r;
         byte[] bytes = new byte[nameLength];
         UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength);
-        cities.put(new String(bytes, StandardCharsets.UTF_8), r);
+        String nameAsString = new String(bytes, StandardCharsets.UTF_8);
+        cities.put(nameAsString, r);
     }
 
     private static long[] getSegments(int numberOfChunks) throws IOException {