Skip to content

Commit

Permalink
5179: Bucket.findEntryOffset() can reduce calls to getKeySize() (#5949)
Browse files Browse the repository at this point in the history
Fixes: #5179
Fixes: #5188
Reviewed-by: Ivan Malygin <ivan@swirldslabs.com>, Oleg Mazurov <oleg.mazurov@swirldslabs.com>
Signed-off-by: Artem Ananev <artem.ananev@swirldslabs.com>
  • Loading branch information
artemananiev committed Apr 4, 2023
1 parent f56f7a7 commit 6a0fd24
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import static com.swirlds.merkledb.files.hashmap.HalfDiskHashMap.SPECIAL_DELETE_ME_VALUE;
import static com.swirlds.merkledb.files.hashmap.HalfDiskHashMap.VALUE_SIZE;

import com.swirlds.common.io.streams.SerializableDataOutputStream;
import com.swirlds.common.utility.Units;
import com.swirlds.merkledb.serialize.KeySerializer;
import com.swirlds.virtualmap.VirtualKey;
Expand All @@ -43,14 +42,13 @@
* <li><b>int</b> - Bucket index in map hash index</li>
* <li><b>int</b> - Bucket size, total number of bytes taken by bucket including header</li>
* <li><b>int</b> - Number of entries in this bucket</li>
* <li><b>Entry[]</b> - array of entries</li>
* </ul>
*
* Each Entry contains:
* Then comes an array of entries. Each Entry contains:
*
* <ul>
* <li><b>KEY_HASHCODE_SIZE(int/long)</b> - key hash code</li>
* <li><b>value</b> - the value of the key/value pair. It is here because it is fixed size</li>
* <li><b>hash code</b> - key hash code (int)</li>
* <li><b>value</b> - the long value. It is here because it is fixed size (long)</li>
* <li><b>key data</b> - can be fixed size of entryKeySize or variable size</li>
* </ul>
*/
Expand All @@ -72,11 +70,20 @@ public final class Bucket<K extends VirtualKey<? super K>> {
private static final int BUCKET_HEADER_SIZE = BUCKET_ENTRY_COUNT_OFFSET + BUCKET_ENTRY_COUNT_SIZE;

private static final int ENTRY_VALUE_OFFSET = KEY_HASHCODE_SIZE;
private static final int ENTRY_KEY_OFFSET = KEY_HASHCODE_SIZE + VALUE_SIZE;

/** Keep track of the largest bucket we have ever created for logging */
private static final AtomicInteger LARGEST_SIZE_OF_BUCKET_CREATED = new AtomicInteger(0);

private int keySerializationVersion;

/**
* Byte buffer that holds this bucket data, including bucket index, size in bytes, number of
* entries, and entry data. Buffer is expanded as needed, when new entries are added. Buffer
* limit is kept equal to the bucket size in bytes.
*/
private ByteBuffer bucketBuffer;

private KeySerializer<K> keySerializer;
private ByteBuffer reusableBuffer;

Expand Down Expand Up @@ -107,8 +114,7 @@ public Bucket<K> clear() {
// reset size
setSize(BUCKET_HEADER_SIZE);
// reset buffer
bucketBuffer.position(0);
bucketBuffer.limit(bucketBuffer.capacity());
bucketBuffer.clear();
return this;
}

Expand Down Expand Up @@ -200,7 +206,7 @@ public long findValue(final int keyHashCode, final K key, final long notFoundVal
final FindResult found = findEntryOffset(keyHashCode, key);
if (found.found) {
// yay! we found it
return getValue(found.entryOffset);
return found.entryValue;
} else {
return notFoundValue;
}
Expand Down Expand Up @@ -233,34 +239,26 @@ public void putValue(final K key, final long value) {
// move all entries after this one up
final int offsetOfNextEntry = result.entryOffset + entrySize;
final int sizeOfEntriesToMove = currentSize - offsetOfNextEntry;
// FUTURE WORK For Java 17 do this
// https://github.com/swirlds/swirlds-platform/issues/4090
// bucketBuffer.put(result.entryOffset,bucketBuffer,offsetOfNextEntry,sizeOfEntriesToMove);
final byte[] bucketBytes = bucketBuffer.array();
System.arraycopy(
bucketBytes, offsetOfNextEntry, bucketBytes, result.entryOffset, sizeOfEntriesToMove);
bucketBuffer.put(result.entryOffset, bucketBuffer, offsetOfNextEntry, sizeOfEntriesToMove);
}
// decrement count
decrementBucketEntryCount();
// update size by removing entry size from size
setSize(currentSize - entrySize);
// we are done deleting
return;
} else {
// was not found, and we are deleting so nothing to do
return;
}
return;
}
// handle UPDATE
if (result.found) {
// yay! we found it, so update value
setValue(result.entryOffset, value);
bucketBuffer.putLong(result.entryOffset + ENTRY_VALUE_OFFSET, value);
return;
}
/* We have to serialize a variable-size key to a temp byte buffer to check
if there is going to be enough room to store it in this bucket. */
if (keySerializer.isVariableSize()) {
reusableBuffer.rewind();
reusableBuffer.clear();
while (true) {
try {
keySerializer.serialize(key, reusableBuffer);
Expand All @@ -281,12 +279,6 @@ public void putValue(final K key, final long value) {
// write the key
reusableBuffer.flip();
bucketBuffer.put(reusableBuffer);
// shrink reusable buffer, if possible
if (keySizeBytes < reusableBuffer.capacity() / 2) {
reusableBuffer = ByteBuffer.allocate(reusableBuffer.capacity() / 2);
}
// increment count and update size
incrementBucketEntryCount();
} else {
final int newSize =
result.entryOffset + KEY_HASHCODE_SIZE + VALUE_SIZE + keySerializer.getSerializedSize();
Expand All @@ -297,9 +289,9 @@ public void putValue(final K key, final long value) {
bucketBuffer.putInt(keyHashCode);
bucketBuffer.putLong(value);
keySerializer.serialize(key, bucketBuffer);
// increment count
incrementBucketEntryCount();
}
// increment count
incrementBucketEntryCount();
} catch (IOException e) {
logger.error(EXCEPTION.getMarker(), "Failed putting key={} value={} in a bucket", key, value, e);
throw new UncheckedIOException(e);
Expand All @@ -313,35 +305,26 @@ public void putValue(final K key, final long value) {
*/
public void putAllData(ByteBuffer dataBuffer) {
ensureCapacity(dataBuffer.limit());
/* FUTURE WORK - https://github.com/swirlds/swirlds-platform/issues/3939 Maybe we should wrap not copy buffer */
bucketBuffer.rewind().put(dataBuffer);
}

/**
* Write the complete data bytes for this bucket to a output stream.
* Write the complete data bytes for this bucket to a byte buffer
*
* @param outputStream The stream to write to
* @param buffer The byte buffer to write to
* @return the number of bytes written
* @throws IOException If there was a problem writing
*/
public int writeToOutputStream(final SerializableDataOutputStream outputStream) throws IOException {
final int bucketSize = getSize();
outputStream.write(bucketBuffer.array(), 0, bucketSize);
return bucketSize;
}

public int writeToByteBuffer(final ByteBuffer buffer) {
final int bucketSize = getSize();
buffer.put(ByteBuffer.wrap(bucketBuffer.array(), 0, bucketSize));
return bucketSize;
buffer.put(bucketBuffer.rewind());
return getSize();
}

// =================================================================================================================
// Private API

/**
* Expand the capacity of this bucket to make sure it is at least big enough to contain
* neededSize
* neededSize and sets bucket buffer limit to the requested size.
*/
private void ensureCapacity(int neededSize) {
int capacity = bucketBuffer.capacity();
Expand All @@ -354,6 +337,7 @@ private void ensureCapacity(int neededSize) {
newBucketBuffer.put(bucketBuffer);
bucketBuffer = newBucketBuffer;
}
bucketBuffer.limit(neededSize);
}

/**
Expand All @@ -370,22 +354,26 @@ private FindResult findEntryOffset(final int keyHashCode, final K key) throws IO
final int entryCount = getBucketEntryCount();
int entryOffset = BUCKET_HEADER_SIZE;
for (int i = 0; i < entryCount; i++) {
final int readHashCode = bucketBuffer.getInt(entryOffset);
bucketBuffer.position(entryOffset);
final int readHashCode = bucketBuffer.getInt();
if (readHashCode == keyHashCode) {
final long readValue = bucketBuffer.getLong();
// now check the full key
bucketBuffer.position(entryOffset + KEY_HASHCODE_SIZE + VALUE_SIZE);
if (keySerializer.equals(bucketBuffer, keySerializationVersion, key)) {
// yay! we found it
return new FindResult(entryOffset, i, true);
return new FindResult(entryOffset, i, true, readValue);
}
}
// now read the key size so we can jump
/* FUTURE WORK - https://github.com/swirlds/swirlds-platform/issues/3932 */
int keySize = getKeySize(entryOffset);
// move to next entry
entryOffset += KEY_HASHCODE_SIZE + VALUE_SIZE + keySize;
// Move entry offset to the next entry. No need to do this for the last entry
if (i < entryCount - 1) {
// now read the key size so we can jump
int keySize = getKeySize(entryOffset);
// move to next entry
entryOffset += KEY_HASHCODE_SIZE + VALUE_SIZE + keySize;
}
}
return new FindResult(entryOffset, -1, false);
// Entry is not found. Return the current size of the buffer as the offset
return new FindResult(getSize(), -1, false, 0);
}

/**
Expand All @@ -398,7 +386,7 @@ private int getKeySize(final int entryOffset) {
if (!keySerializer.isVariableSize()) {
return keySerializer.getSerializedSize();
}
bucketBuffer.position(entryOffset + KEY_HASHCODE_SIZE + VALUE_SIZE);
bucketBuffer.position(entryOffset + ENTRY_KEY_OFFSET);
return keySerializer.deserializeKeySize(bucketBuffer);
}

Expand All @@ -410,30 +398,10 @@ private int getKeySize(final int entryOffset) {
* @throws IOException If there was a problem reading or deserializing the key
*/
private K getKey(int entryOffset) throws IOException {
bucketBuffer.position(entryOffset + Integer.BYTES);
bucketBuffer.position(entryOffset + ENTRY_KEY_OFFSET);
return keySerializer.deserialize(bucketBuffer, keySerializationVersion);
}

/**
* Read a value for a given entry
*
* @param entryOffset the offset for the entry
* @return the value stored in given entry
*/
private long getValue(int entryOffset) {
return bucketBuffer.getLong(entryOffset + ENTRY_VALUE_OFFSET);
}

/**
* Read a value for a given entry
*
* @param entryOffset the offset for the entry
* @param value the value to set for entry
*/
private void setValue(int entryOffset, long value) {
bucketBuffer.putLong(entryOffset + ENTRY_VALUE_OFFSET, value);
}

/** toString for debugging */
@SuppressWarnings("StringConcatenationInsideStringBufferAppend")
@Override
Expand Down Expand Up @@ -480,18 +448,14 @@ public String toString() {
// =================================================================================================================
// FindResult class

/** Simple struct style data class for allowing return of two things */
private static class FindResult {
public final int entryOffset;
public final int entryIndex;
public final boolean found;

public FindResult(final int entryOffset, final int entryIndex, final boolean found) {
this.entryOffset = entryOffset;
this.entryIndex = entryIndex;
this.found = found;
}
}
/**
* Simple record for entry lookup results. If an entry is found, "found" is set to true,
* "entryOffset" is the entry offset in bytes in the bucket buffer, "entryIndex" is the
* entry index in the array of entries, and "entryValue" is the entry value. If no entity
* is found, "found" is false, "entryOffset" is the total size of the bucket buffer,
* "entryIndex" and "entryValue" are undefined.
*/
private record FindResult(int entryOffset, int entryIndex, boolean found, long entryValue) {}

/** Get bucket buffer for tests */
ByteBuffer getBucketBuffer() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ void toStringAsExpectedForBucket() {
bucket.setBucketIndex(0);
final String nonEmptyBucketRepr = "Bucket{bucketIndex=0, entryCount=1, size=32\n"
+ " ENTRY[0] value= 5124 keyHashCode=2056 keyVer=3054"
+ " key=LongVirtualKey{value=5124, hashCode=5124} keySize=8\n"
+ " key=LongVirtualKey{value=2056, hashCode=2056} keySize=8\n"
+ "} RAW DATA = 00 00 00 00 00 00 00 20 00 00 00 01 00 00 08 08 00 00 00 00 00"
+ " 00 14 04 00 00 00 00 00 00 08 08 ";
assertEquals(nonEmptyBucketRepr, bucket.toString(), "Non-empty bucket represent as expected");
Expand Down

0 comments on commit 6a0fd24

Please sign in to comment.