From 30f65149352c2633696f4d9dd3fb17ed7fe46a62 Mon Sep 17 00:00:00 2001 From: Wadeck Follonier Date: Sat, 24 Oct 2020 18:05:24 +0200 Subject: [PATCH] [JENKINS-60563] Replace MD5 with SHA-256 in ConsistentHash - Also update outdated link - Reformat some of the code - Adjust the tests - Ignore the performance test - Correct typos --- .../main/java/hudson/util/ConsistentHash.java | 132 +++++++++--------- .../java/hudson/util/ConsistentHashTest.java | 64 +++++---- 2 files changed, 102 insertions(+), 94 deletions(-) diff --git a/core/src/main/java/hudson/util/ConsistentHash.java b/core/src/main/java/hudson/util/ConsistentHash.java index 16e5f3c5ef74..46a119737d87 100644 --- a/core/src/main/java/hudson/util/ConsistentHash.java +++ b/core/src/main/java/hudson/util/ConsistentHash.java @@ -34,7 +34,6 @@ import java.util.Iterator; import java.util.NoSuchElementException; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import hudson.util.Iterators.DuplicateFilterIterator; /** @@ -42,13 +41,13 @@ * *

* This implementation is concurrency safe; additions and removals are serialized, but look up - * can be performed concurrently even when modifications is in progress. + * can be performed concurrently even when modifications are in progress. * *

* Since typical hash functions we use in {@link Object#hashCode()} isn't random enough to * evenly populate the 2^32 ring space, we only ask the user to give us * an injective function to a string, - * and then we use MD5 to create random enough distribution. + * and then we use SHA-256 to create random enough distribution. * *

* This consistent hash implementation is consistent both to the addition/removal of Ts, as well @@ -56,7 +55,7 @@ * *

* See http://en.wikipedia.org/wiki/Consistent_hashing for references, and - * http://weblogs.java.net/blog/tomwhite/archive/2007/11/consistent_hash.html is probably a reasonable depiction. + * http://tom-e-white.com/2007/11/consistent-hashing.html is probably a reasonable depiction. * If we trust his experiments, creating 100 replicas will reduce the stddev to 10% of the mean for 10 nodes. * * @author Kohsuke Kawaguchi @@ -66,14 +65,14 @@ public class ConsistentHash { /** * All the items in the hash, to their replication factors. */ - private final Map items = new HashMap<>(); + private final Map items = new HashMap<>(); private int numPoints; private final int defaultReplication; private final Hash hash; /** - * Used for remembering the computed MD5 hash, since it's bit expensive to do it all over again. + * Used for remembering the computed SHA-256 hash, since it's bit expensive to do it all over again. */ private static final class Point implements Comparable { final int hash; @@ -99,37 +98,41 @@ public int compareTo(Point that) { */ private final class Table { private final int[] hash; - private final Object[] owner; // really T[] + // really T[] + private final Object[] owner; private Table() { - int r=0; - for (Point[] v : items.values()) - r+=v.length; + int r = 0; + for (Point[] v : items.values()) { + r += v.length; + } numPoints = r; // merge all points from all nodes and sort them into a single array Point[] allPoints = new Point[numPoints]; - int p=0; + int p = 0; for (Point[] v : items.values()) { - System.arraycopy(v,0,allPoints,p,v.length); - p+=v.length; + System.arraycopy(v, 0, allPoints, p, v.length); + p += v.length; } Arrays.sort(allPoints); hash = new int[allPoints.length]; owner = new Object[allPoints.length]; - for (int i=0; i * This hash function need not produce a very uniform distribution, as the - * output is rehashed with MD5. But it does need to make sure it doesn't + * output is rehashed with SHA-256. But it does need to make sure it doesn't * produce the same value for two different 'T's (and that's why this returns * String, not the usual int.) */ @@ -194,11 +203,7 @@ public interface Hash { String hash(T t); } - static final Hash DEFAULT_HASH = new Hash() { - public String hash(Object o) { - return o.toString(); - } - }; + static final Hash DEFAULT_HASH = (Hash) Object::toString; public ConsistentHash() { this((Hash) DEFAULT_HASH); @@ -226,15 +231,16 @@ public int countAllPoints() { * Adds a new node with the default number of replica. */ public synchronized void add(T node) { - add(node,defaultReplication); + add(node, defaultReplication); } /** * Calls {@link #add(Object)} with all the arguments. */ public synchronized void addAll(T... nodes) { - for (T node : nodes) - addInternal(node,defaultReplication); + for (T node : nodes) { + addInternal(node, defaultReplication); + } refreshTable(); } @@ -242,17 +248,19 @@ public synchronized void addAll(T... nodes) { * Calls {@link #add(Object)} with all the arguments. */ public synchronized void addAll(Collection nodes) { - for (T node : nodes) - addInternal(node,defaultReplication); + for (T node : nodes) { + addInternal(node, defaultReplication); + } refreshTable(); } /** * Calls {@link #add(Object,int)} with all the arguments. */ - public synchronized void addAll(Map nodes) { - for (Map.Entry node : nodes.entrySet()) - addInternal(node.getKey(),node.getValue()); + public synchronized void addAll(Map nodes) { + for (Map.Entry node : nodes.entrySet()) { + addInternal(node.getKey(), node.getValue()); + } refreshTable(); } @@ -272,14 +280,15 @@ public synchronized void add(T node, int replica) { } private synchronized void addInternal(T node, int replica) { - if (replica==0) { + if (replica == 0) { items.remove(node); } else { Point[] points = new Point[replica]; String seed = hash.hash(node); - for (int i=0; i 4 bytes - for (int i=0; i<4; i++) - digest[i] ^= digest[i+4]+digest[i+8]+digest[i+12]; - return (b2i(digest[0])<< 24)|(b2i(digest[1])<<16)|(b2i(digest[2])<< 8)|b2i(digest[3]); + for (int i = 0; i < 4; i++) { + digest[i] ^= digest[i + 4] + digest[i + 8] + digest[i + 12]; + } + return (b2i(digest[0]) << 24) | (b2i(digest[1]) << 16) | (b2i(digest[2]) << 8) | b2i(digest[3]); } catch (GeneralSecurityException e) { - throw new RuntimeException("Could not generate MD5 hash", e); + throw new RuntimeException("Could not generate SHA-256 hash", e); } } - // TODO JENKINS-60563 remove MD5 from all usages in Jenkins - @SuppressFBWarnings(value = "WEAK_MESSAGE_DIGEST_MD5", justification = "Not used for security.") - private MessageDigest getMd5() throws NoSuchAlgorithmException { - return MessageDigest.getInstance("MD5"); + private MessageDigest createMessageDigest() throws NoSuchAlgorithmException { + return MessageDigest.getInstance("SHA-256"); } /** * unsigned byte->int. */ private int b2i(byte b) { - return ((int)b)&0xFF; + return ((int)b) & 0xFF; } /** @@ -334,10 +342,10 @@ public T lookup(int queryPoint) { } /** - * Takes a string, hash it with MD5, then calls {@link #lookup(int)}. + * Takes a string, hash it with SHA-256, then calls {@link #lookup(int)}. */ public T lookup(String queryPoint) { - return lookup(md5(queryPoint)); + return lookup(digest(queryPoint)); } /** @@ -352,17 +360,13 @@ public T lookup(String queryPoint) { * Nodes with more replicas are more likely to show up early in the list */ public Iterable list(final int queryPoint) { - return new Iterable() { - public Iterator iterator() { - return table.list(queryPoint); - } - }; + return () -> table.list(queryPoint); } /** - * Takes a string, hash it with MD5, then calls {@link #list(int)}. + * Takes a string, hash it with SHA-256, then calls {@link #list(int)}. */ public Iterable list(String queryPoint) { - return list(md5(queryPoint)); + return list(digest(queryPoint)); } } diff --git a/core/src/test/java/hudson/util/ConsistentHashTest.java b/core/src/test/java/hudson/util/ConsistentHashTest.java index 8a782ccb62ef..0b7d77f6de3e 100644 --- a/core/src/test/java/hudson/util/ConsistentHashTest.java +++ b/core/src/test/java/hudson/util/ConsistentHashTest.java @@ -31,6 +31,7 @@ import static org.junit.Assert.fail; import hudson.util.CopyOnWriteMap.Hash; +import org.junit.Ignore; import org.junit.Test; import java.util.Random; @@ -55,24 +56,23 @@ public void basic() { hash.add("data2"); hash.add("data3"); - System.out.println(hash.lookup(0)); - - // there's one in 2^32 chance that this test fails, but these two query points are - // only off by one. + // In a pure random scenario, there's one in 2^32 chance that this test fails + // but as we control the input and the hash function is deterministic, there is no risk of failure String x = hash.lookup(Integer.MIN_VALUE); String y = hash.lookup(Integer.MAX_VALUE); - assertEquals(x,y); + // both values are data3 + assertEquals(x, y); // list them up Iterator itr = hash.list(Integer.MIN_VALUE).iterator(); Set all = new HashSet<>(); String z = itr.next(); all.add(z); - assertEquals(z,x); + assertEquals(z, x); all.add(itr.next()); all.add(itr.next()); assertFalse(itr.hasNext()); - assertEquals(3,all.size()); + assertEquals(3, all.size()); } /** @@ -81,20 +81,23 @@ public void basic() { @Test public void unevenDistribution() { ConsistentHash hash = new ConsistentHash<>(); - hash.add("even",10); - hash.add("odd",100); + hash.add("Even",10); + hash.add("Odd",100); Random r = new Random(0); - int even=0,odd=0; - for(int i=0; i<1000; i++) { + int even = 0; + int odd = 0; + for(int i = 0; i < 1000; i++) { String v = hash.lookup(r.nextInt()); - if(v.equals("even")) even++; - else odd++; + if(v.equals("Even")) { + even++; + } else { + odd++; + } } - // again, there's a small chance tha this test fails. - System.out.printf("%d/%d\n",even,odd); - assertTrue(even*8 hash = new ConsistentHash<>(); - for( int i=0; i<10; i++ ) + for (int i = 0; i < 10; i++) { hash.add(i); + } // what was the mapping before the mutation? - Map before = new HashMap<>(); + Map before = new HashMap<>(); Random r = new Random(0); - for(int i=0; i<1000; i++) { + for (int i = 0; i < 1000; i++) { int q = r.nextInt(); - before.put(q,hash.lookup(q)); + before.put(q, hash.lookup(q)); } // remove a node @@ -120,7 +124,7 @@ public void removal() { // verify that the mapping remains consistent for (Entry e : before.entrySet()) { int m = hash.lookup(e.getKey()); - assertTrue(e.getValue()==0 || e.getValue()==m); + assertTrue(e.getValue() == 0 || e.getValue() == m); } } @@ -164,10 +168,8 @@ public void setCustomDefaultReplication() { @Test public void usesCustomHash() { final RuntimeException exception = new RuntimeException(); - ConsistentHash.Hash hashFunction = new ConsistentHash.Hash() { - public String hash(String str) { - throw exception; - } + ConsistentHash.Hash hashFunction = str -> { + throw exception; }; try { @@ -183,18 +185,20 @@ public String hash(String str) { * This test doesn't fail but it's written to measure the performance of the consistent hash function with large data set. */ @Test + @Ignore("Helper test for performance, no assertion") public void speed() { Map data = new Hash<>(); - for (int i = 0; i < 1000; i++) - data.put("node" + i,100); - data.put("tail",100); + for (int i = 0; i < 1000; i++) { + data.put("node" + i, 100); + } + data.put("tail", 100); long start = System.currentTimeMillis(); - for (int j=0; j<10; j++) { + for (int j = 0; j < 10; j++) { ConsistentHash b = new ConsistentHash<>(); b.addAll(data); } - System.out.println(System.currentTimeMillis()-start); + System.out.println(System.currentTimeMillis() - start); } }