From 7d0d39e05af38ae20f8318765ad2fd14faa8a158 Mon Sep 17 00:00:00 2001 From: jaebradley Date: Mon, 31 Jul 2017 09:34:41 -0400 Subject: [PATCH 1/3] hashmap implementation --- codereview/hashMap.md | 208 +++++++++++++++++++ src/main/java/problems/impl/HashMap.java | 173 +++++++++++++++ src/test/java/problems/impl/HashMapTest.java | 136 ++++++++++++ 3 files changed, 517 insertions(+) create mode 100644 codereview/hashMap.md create mode 100644 src/main/java/problems/impl/HashMap.java create mode 100644 src/test/java/problems/impl/HashMapTest.java diff --git a/codereview/hashMap.md b/codereview/hashMap.md new file mode 100644 index 0000000..34a7d34 --- /dev/null +++ b/codereview/hashMap.md @@ -0,0 +1,208 @@ +## Purpose + +I've never implemented a `HashMap` and thought it would be a good data structure exercise. I tried looking at the Java +source code as little as possible. + +## Discussion + +The `HashMap` was made up of an array of `Entry`s. Each `Entry` is points to another `Entry` (or `null` if it's the +last `Entry` in the linked list). + +Since you'd want `Entry`s distributed as equally as possible across the different index values of the internal `Entry` +array, each key's hashcode is hashed again to combat a bad hashcode (this code is from the Java 8 source code) and +eventually an index value is calculated - this index value represents the index of the internal array that the +key-value pair will exist under. + +Thus, the logic for getting a value for a particular key would be to translate the key into an index value, and then +to get the first `Entry` for that index value from the internal array. And then to get the `Entry` in the linked list +with the same key (or return `null` if the linked list has been exhausted). + +Another thing that was interesting to implement was array resizing. Every time `put` is called, it checks to see if array +resizing needs to occur. I resize the array if the number of objects that have been added to the array is greater than +75% of the allocated array capacity. I don't know if this is the best way to implement a resize check. When actually +resizing the array, I needed to iterate through every `Entry` and reindex the `Entry`. + +Things I don't like: +* My implementation seems really messy. + 1. I don't like the `addEntry` method. I don't like it has a `boolean` return. However, I did this so that when `put`ting + I wouldn't update the `size` when I updated a key-value pair vs. added one. + 2. I don't like how the `setEntry` method keeps the `next` value in memory before overwriting it (this was done so + that no `Entry` in the linked list did not get reindexed). +* Is the way I'm resizing logical? I read [this article](http://coding-geek.com/how-does-a-hashmap-work-in-java/) and + that's where I got my ideas for how to resize the internal array. + +## Implementation + + + + public class HashMap { + private int size = 0; + private int capacity = 16; + private Entry[] entries = new Entry[capacity]; + private double loadFactor = 0.75; + + private static class Entry { + private final K key; + private V value; + private Entry next = null; + + public Entry(K key, V value) { + this.key = key; + this.value = value; + } + } + + public HashMap() { + } + + public boolean isEmpty() { + return this.size == 0; + } + + public int getSize() { + return this.size; + } + + public boolean containsKey(K key) { + Entry matchingEntry = getMatchingEntry(key); + + return matchingEntry != null && matchingEntry.key == key; + } + + public boolean containsValue(V value) { + for (Entry entry : this.entries) { + while (entry != null && !matches(value, entry.value)) { + entry = entry.next; + } + + if (entry != null) { + return true; + } + } + + return false; + } + + public V get(K key) { + Entry matchingEntry = getMatchingEntry(key); + + return matchingEntry == null ? null : matchingEntry.value; + } + + public void put(K key, V value) { + if (this.shouldResize()) { + this.resize(); + } + + if (addEntry(new Entry<>(key, value), this.entries)) { + this.size++; + } + + } + + public void remove(K key) { + int index = indexOf(key); + Entry currentEntry = this.entries[index]; + + while (currentEntry != null && currentEntry.next != null && !matches(key, currentEntry.next.key)) { + currentEntry = currentEntry.next; + } + + if (currentEntry != null) { + // this case can only occur if there is only one non-null entry at the index + if (matches(key, currentEntry.key)) { + this.entries[index] = null; + // this case can only occur because the next entry's key matched + } else if (currentEntry.next != null) { + currentEntry.next = currentEntry.next.next; + } + + this.size--; + } + } + + private boolean shouldResize() { + return this.size > Math.ceil((double) this.capacity * this.loadFactor); + } + + private void resize() { + this.capacity = this.size * 2; + + Entry[] newEntries = new Entry[this.capacity]; + for (Entry entry : this.entries) { + if (entry != null) { + this.setEntry(entry, newEntries); + } + } + + this.entries = newEntries; + } + + private void setEntry(Entry entry, Entry[] entries){ + Entry nextEntry = entry.next; + entry.next = null; + + this.addEntry(entry, entries); + + if (nextEntry != null) { + this.setEntry(nextEntry, entries); + } + } + + private boolean addEntry(Entry entry, Entry[] entries) { + int index = indexOf(entry.key); + Entry existingEntry = entries[index]; + + if (existingEntry == null) { + entries[index] = entry; + return true; + } else { + while (!this.matches(entry.key, existingEntry.key) && existingEntry.next != null) { + existingEntry = existingEntry.next; + } + + if (this.matches(entry.key, existingEntry.key)) { + existingEntry.value = entry.value; + return false; + } + + existingEntry.next = entry; + return true; + + } + } + + private Entry getMatchingEntry(K key) { + Entry existingEntry = this.entries[indexOf(key)]; + + while (existingEntry != null && !matches(key, existingEntry.key)) { + existingEntry = existingEntry.next; + } + + return existingEntry; + } + + private int indexOf(K object) { + return object == null ? 0 : hash(object) & (this.capacity - 1); + } + + private boolean matches(Object o1, Object o2) { + return (o1 == null && o2 == null) || + (o1 != null && o2 != null && o1.equals(o2)); + } + + /** + * Applies a supplemental hash function to a given hashCode, which + * defends against poor quality hash functions. This is critical + * because HashMap uses power-of-two length hash tables, that + * otherwise encounter collisions for hashCodes that do not differ + * in lower bits. Note: Null keys always map to hash 0, thus index 0. + */ + private static int hash(Object key) { + // This function ensures that hashCodes that differ only by + // constant multiples at each bit position have a bounded + // number of collisions (approximately 8 at default load factor). + int h; + return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); + } + } diff --git a/src/main/java/problems/impl/HashMap.java b/src/main/java/problems/impl/HashMap.java new file mode 100644 index 0000000..05e8215 --- /dev/null +++ b/src/main/java/problems/impl/HashMap.java @@ -0,0 +1,173 @@ +package problems.impl; + +public class HashMap { + private int size = 0; + private int capacity = 16; + private Entry[] entries = new Entry[capacity]; + private double loadFactor = 0.75; + + private static class Entry { + private final K key; + private V value; + private Entry next = null; + + public Entry(K key, V value) { + this.key = key; + this.value = value; + } + } + + public HashMap() { + } + + public boolean isEmpty() { + return this.size == 0; + } + + public int getSize() { + return this.size; + } + + public boolean containsKey(K key) { + Entry matchingEntry = getMatchingEntry(key); + + return matchingEntry != null && matchingEntry.key == key; + } + + public boolean containsValue(V value) { + for (Entry entry : this.entries) { + while (entry != null && !matches(value, entry.value)) { + entry = entry.next; + } + + if (entry != null) { + return true; + } + } + + return false; + } + + public V get(K key) { + Entry matchingEntry = getMatchingEntry(key); + + return matchingEntry == null ? null : matchingEntry.value; + } + + public void put(K key, V value) { + if (this.shouldResize()) { + this.resize(); + } + + if (addEntry(new Entry<>(key, value), this.entries)) { + this.size++; + } + + } + + public void remove(K key) { + int index = indexOf(key); + Entry currentEntry = this.entries[index]; + + while (currentEntry != null && currentEntry.next != null && !matches(key, currentEntry.next.key)) { + currentEntry = currentEntry.next; + } + + if (currentEntry != null) { + // this case can only occur if there is only one non-null entry at the index + if (matches(key, currentEntry.key)) { + this.entries[index] = null; + // this case can only occur because the next entry's key matched + } else if (currentEntry.next != null) { + currentEntry.next = currentEntry.next.next; + } + + this.size--; + } + } + + private boolean shouldResize() { + return this.size > Math.ceil((double) this.capacity * this.loadFactor); + } + + private void resize() { + this.capacity = this.size * 2; + + Entry[] newEntries = new Entry[this.capacity]; + for (Entry entry : this.entries) { + if (entry != null) { + this.setEntry(entry, newEntries); + } + } + + this.entries = newEntries; + } + + private void setEntry(Entry entry, Entry[] entries){ + Entry nextEntry = entry.next; + entry.next = null; + + this.addEntry(entry, entries); + + if (nextEntry != null) { + this.setEntry(nextEntry, entries); + } + } + + private boolean addEntry(Entry entry, Entry[] entries) { + int index = indexOf(entry.key); + Entry existingEntry = entries[index]; + + if (existingEntry == null) { + entries[index] = entry; + return true; + } else { + while (!this.matches(entry.key, existingEntry.key) && existingEntry.next != null) { + existingEntry = existingEntry.next; + } + + if (this.matches(entry.key, existingEntry.key)) { + existingEntry.value = entry.value; + return false; + } + + existingEntry.next = entry; + return true; + + } + } + + private Entry getMatchingEntry(K key) { + Entry existingEntry = this.entries[indexOf(key)]; + + while (existingEntry != null && !matches(key, existingEntry.key)) { + existingEntry = existingEntry.next; + } + + return existingEntry; + } + + private int indexOf(K object) { + return object == null ? 0 : hash(object) & (this.capacity - 1); + } + + private boolean matches(Object o1, Object o2) { + return (o1 == null && o2 == null) || + (o1 != null && o2 != null && o1.equals(o2)); + } + + /** + * Applies a supplemental hash function to a given hashCode, which + * defends against poor quality hash functions. This is critical + * because HashMap uses power-of-two length hash tables, that + * otherwise encounter collisions for hashCodes that do not differ + * in lower bits. Note: Null keys always map to hash 0, thus index 0. + */ + private static int hash(Object key) { + // This function ensures that hashCodes that differ only by + // constant multiples at each bit position have a bounded + // number of collisions (approximately 8 at default load factor). + int h; + return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); + } +} diff --git a/src/test/java/problems/impl/HashMapTest.java b/src/test/java/problems/impl/HashMapTest.java new file mode 100644 index 0000000..0327a65 --- /dev/null +++ b/src/test/java/problems/impl/HashMapTest.java @@ -0,0 +1,136 @@ +package problems.impl; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class HashMapTest { + private String firstKey = "firstKey"; + private String firstValue = "firstValue"; + private String secondKey = "secondKey"; + private String secondValue = "secondValue"; + + + @Test + public void itShouldInstantiate() { + HashMap map = new HashMap<>(); + assertNotNull(map); + assertEquals(0, map.getSize()); + assertTrue(map.isEmpty()); + } + + @Test + public void itShouldPutAValue() { + HashMap map = new HashMap<>(); + map.put(firstKey, firstValue); + assertEquals(1, map.getSize()); + assertEquals(firstValue, map.get(firstKey)); + } + + @Test + public void itShouldPutIdenticalKeys() { + HashMap map = new HashMap<>(); + map.put(firstKey, firstValue); + map.put(firstKey, secondValue); + assertEquals(1, map.getSize()); + assertEquals(secondValue, map.get(firstKey)); + } + + @Test + public void itShouldPutMultipleValues() { + HashMap map = new HashMap<>(); + map.put(firstKey, firstValue); + map.put(secondKey, secondValue); + assertEquals(2, map.getSize()); + assertEquals(firstValue, map.get(firstKey)); + assertEquals(secondValue, map.get(secondKey)); + } + + @Test + public void itShouldTestResizing() { + HashMap map = new HashMap<>(); + for (int i = 0; i < 100; i++) { + map.put(i, i); + assertEquals(i + 1, map.getSize()); + + for (int j = 0; j <= i; j++) { + assertEquals(Integer.valueOf(j), map.get(j)); + } + } + } + + @Test + public void itShouldTestNullKey() { + HashMap map = new HashMap<>(); + map.put(null, firstValue); + assertEquals(1, map.getSize()); + assertEquals(firstValue, map.get(null)); + } + + @Test + public void itShouldTestContainsNonNullKey() { + HashMap map = new HashMap<>(); + map.put(firstKey, firstValue); + assertTrue(map.containsKey(firstKey)); + } + + @Test + public void itShouldTestContainsNullKey() { + HashMap map = new HashMap<>(); + map.put(null, firstValue); + assertTrue(map.containsKey(null)); + } + + @Test + public void itShouldTestRemovingNonNullKey() { + HashMap map = new HashMap<>(); + map.put(firstKey, firstValue); + map.remove(firstKey); + assertEquals(0, map.getSize()); + } + + @Test + public void itShouldTestRemovingNullKey() { + HashMap map = new HashMap<>(); + map.put(null, firstValue); + map.remove(null); + assertEquals(0, map.getSize()); + } + + @Test + public void itShouldTestRemoving() { + HashMap map = new HashMap<>(); + for (int i = 0; i < 100; i++) { + map.put(i, i); + } + + for (int i = 99; i >= 0; i--) { + map.remove(i); + assertEquals(i, map.getSize()); + assertFalse(map.containsKey(i)); + } + } + + @Test + public void itShouldTestContainsValue() { + HashMap map = new HashMap<>(); + for (int i = 0; i < 100; i++) { + map.put(i, i); + } + + for (int i = 0; i < 100; i++) { + assertTrue(map.containsValue(i)); + } + + for (int i = 100; i < 200; i++) { + assertFalse(map.containsValue(i)); + } + } + + @Test + public void itShouldTestContainsNullValue() { + HashMap map = new HashMap<>(); + map.put(101, null); + assertTrue(map.containsValue(null)); + } +} \ No newline at end of file From ecb43e9a59c404f2241250413cf53dbd792ca21d Mon Sep 17 00:00:00 2001 From: jaebradley Date: Mon, 31 Jul 2017 09:43:38 -0400 Subject: [PATCH 2/3] update --- .../impl/{HashMap.java => SimpleHashMap.java} | 9 ++++-- ...ashMapTest.java => SimpleHashMapTest.java} | 28 +++++++++---------- 2 files changed, 20 insertions(+), 17 deletions(-) rename src/main/java/problems/impl/{HashMap.java => SimpleHashMap.java} (97%) rename src/test/java/problems/impl/{HashMapTest.java => SimpleHashMapTest.java} (76%) diff --git a/src/main/java/problems/impl/HashMap.java b/src/main/java/problems/impl/SimpleHashMap.java similarity index 97% rename from src/main/java/problems/impl/HashMap.java rename to src/main/java/problems/impl/SimpleHashMap.java index 05e8215..46cb898 100644 --- a/src/main/java/problems/impl/HashMap.java +++ b/src/main/java/problems/impl/SimpleHashMap.java @@ -1,11 +1,13 @@ package problems.impl; -public class HashMap { +public class SimpleHashMap { private int size = 0; private int capacity = 16; - private Entry[] entries = new Entry[capacity]; private double loadFactor = 0.75; + @SuppressWarnings("unchecked") + private Entry[] entries = new Entry[capacity]; + private static class Entry { private final K key; private V value; @@ -17,7 +19,7 @@ public Entry(K key, V value) { } } - public HashMap() { + public SimpleHashMap() { } public boolean isEmpty() { @@ -93,6 +95,7 @@ private boolean shouldResize() { private void resize() { this.capacity = this.size * 2; + @SuppressWarnings("unchecked") Entry[] newEntries = new Entry[this.capacity]; for (Entry entry : this.entries) { if (entry != null) { diff --git a/src/test/java/problems/impl/HashMapTest.java b/src/test/java/problems/impl/SimpleHashMapTest.java similarity index 76% rename from src/test/java/problems/impl/HashMapTest.java rename to src/test/java/problems/impl/SimpleHashMapTest.java index 0327a65..722cb89 100644 --- a/src/test/java/problems/impl/HashMapTest.java +++ b/src/test/java/problems/impl/SimpleHashMapTest.java @@ -4,7 +4,7 @@ import static org.junit.Assert.*; -public class HashMapTest { +public class SimpleHashMapTest { private String firstKey = "firstKey"; private String firstValue = "firstValue"; private String secondKey = "secondKey"; @@ -13,7 +13,7 @@ public class HashMapTest { @Test public void itShouldInstantiate() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); assertNotNull(map); assertEquals(0, map.getSize()); assertTrue(map.isEmpty()); @@ -21,7 +21,7 @@ public void itShouldInstantiate() { @Test public void itShouldPutAValue() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(firstKey, firstValue); assertEquals(1, map.getSize()); assertEquals(firstValue, map.get(firstKey)); @@ -29,7 +29,7 @@ public void itShouldPutAValue() { @Test public void itShouldPutIdenticalKeys() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(firstKey, firstValue); map.put(firstKey, secondValue); assertEquals(1, map.getSize()); @@ -38,7 +38,7 @@ public void itShouldPutIdenticalKeys() { @Test public void itShouldPutMultipleValues() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(firstKey, firstValue); map.put(secondKey, secondValue); assertEquals(2, map.getSize()); @@ -48,7 +48,7 @@ public void itShouldPutMultipleValues() { @Test public void itShouldTestResizing() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); for (int i = 0; i < 100; i++) { map.put(i, i); assertEquals(i + 1, map.getSize()); @@ -61,7 +61,7 @@ public void itShouldTestResizing() { @Test public void itShouldTestNullKey() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(null, firstValue); assertEquals(1, map.getSize()); assertEquals(firstValue, map.get(null)); @@ -69,21 +69,21 @@ public void itShouldTestNullKey() { @Test public void itShouldTestContainsNonNullKey() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(firstKey, firstValue); assertTrue(map.containsKey(firstKey)); } @Test public void itShouldTestContainsNullKey() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(null, firstValue); assertTrue(map.containsKey(null)); } @Test public void itShouldTestRemovingNonNullKey() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(firstKey, firstValue); map.remove(firstKey); assertEquals(0, map.getSize()); @@ -91,7 +91,7 @@ public void itShouldTestRemovingNonNullKey() { @Test public void itShouldTestRemovingNullKey() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(null, firstValue); map.remove(null); assertEquals(0, map.getSize()); @@ -99,7 +99,7 @@ public void itShouldTestRemovingNullKey() { @Test public void itShouldTestRemoving() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); for (int i = 0; i < 100; i++) { map.put(i, i); } @@ -113,7 +113,7 @@ public void itShouldTestRemoving() { @Test public void itShouldTestContainsValue() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); for (int i = 0; i < 100; i++) { map.put(i, i); } @@ -129,7 +129,7 @@ public void itShouldTestContainsValue() { @Test public void itShouldTestContainsNullValue() { - HashMap map = new HashMap<>(); + SimpleHashMap map = new SimpleHashMap<>(); map.put(101, null); assertTrue(map.containsValue(null)); } From cb2f49785a89c07c9d5a51e5a1c6df2af5a130b5 Mon Sep 17 00:00:00 2001 From: jaebradley Date: Tue, 1 Aug 2017 01:17:18 -0400 Subject: [PATCH 3/3] update hashmap implementation --- .../java/problems/impl/SimpleHashMap.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/main/java/problems/impl/SimpleHashMap.java b/src/main/java/problems/impl/SimpleHashMap.java index 46cb898..4d66a78 100644 --- a/src/main/java/problems/impl/SimpleHashMap.java +++ b/src/main/java/problems/impl/SimpleHashMap.java @@ -1,5 +1,13 @@ package problems.impl; +import java.util.Objects; + +/** + * https://codereview.stackexchange.com/questions/171650/hashmap-implementation/171662#171662 + * @param Key Object + * @param Value Object + */ + public class SimpleHashMap { private int size = 0; private int capacity = 16; @@ -11,17 +19,14 @@ public class SimpleHashMap { private static class Entry { private final K key; private V value; - private Entry next = null; + private Entry next; - public Entry(K key, V value) { + Entry(K key, V value) { this.key = key; this.value = value; } } - public SimpleHashMap() { - } - public boolean isEmpty() { return this.size == 0; } @@ -31,9 +36,7 @@ public int getSize() { } public boolean containsKey(K key) { - Entry matchingEntry = getMatchingEntry(key); - - return matchingEntry != null && matchingEntry.key == key; + return getMatchingEntry(key) != null; } public boolean containsValue(V value) { @@ -155,8 +158,7 @@ private int indexOf(K object) { } private boolean matches(Object o1, Object o2) { - return (o1 == null && o2 == null) || - (o1 != null && o2 != null && o1.equals(o2)); + return Objects.equals(o1, o2); } /**