Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright 2023 The Fury authors
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.fury.resolver;

import com.google.common.base.Preconditions;
import io.fury.annotation.Internal;
import io.fury.util.MurmurHash3;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

@Internal
public final class EnumStringBytes {
static final short DEFAULT_DYNAMIC_WRITE_STRING_ID = -1;

final byte[] bytes;
final long hashCode;
short dynamicWriteStringId = DEFAULT_DYNAMIC_WRITE_STRING_ID;

/**
* Create a binary EnumString.
*
* @param bytes String encoded bytes.
* @param hashCode String hash code. This should be unique and has no hash collision, and be
* deterministic, so we can use cache to reduce hash loop up for read.
*/
public EnumStringBytes(byte[] bytes, long hashCode) {
assert hashCode != 0;
this.bytes = bytes;
this.hashCode = hashCode;
}

public EnumStringBytes(String string) {
byte[] classNameBytes = string.getBytes(StandardCharsets.UTF_8);
Preconditions.checkArgument(classNameBytes.length <= Short.MAX_VALUE);
// Set seed to ensure hash is deterministic.
long hashCode =
MurmurHash3.murmurhash3_x64_128(classNameBytes, 0, classNameBytes.length, 47)[0];
if (hashCode == 0) {
// Ensure hashcode is not 0, so we can do some optimization to avoid boxing.
hashCode += 1;
}
this.bytes = classNameBytes;
this.hashCode = hashCode;
}

@Override
public boolean equals(Object o) {
// EnumStringBytes is used internally, skip unnecessary parameter check.
// if (this == o) {
// return true;
// }
// if (o == null || getClass() != o.getClass()) {
// return false;
// }
EnumStringBytes that = (EnumStringBytes) o;
// Skip compare data for equality for performance.
// Enum string such as classname are very common, compare hashcode only will have better
// performance.
// Java hashcode is 32-bit, so comparing hashCode equality is necessary here.
return hashCode == that.hashCode;
}

public byte[] getBytes() {
return bytes;
}

@Override
public int hashCode() {
// equals will compare 8 byte hash code.
return (int) hashCode;
}

@Override
public String toString() {
// TODO support other str encoding.
String str = new String(bytes);
;
return "string: " + str + " " + "size: " + bytes.length + " " + Arrays.toString(bytes);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/*
* Copyright 2023 The Fury authors
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.fury.resolver;

import io.fury.collection.LongMap;
import io.fury.collection.ObjectMap;
import io.fury.memory.MemoryBuffer;
import java.nio.charset.StandardCharsets;

/**
* A resolver for limited string value writing. Currently, we only support classname dynamic
* writing. In the future, we may profile string field value dynamically and writing by this
* resolver to reduce string cost. TODO add common inner package names and classnames here. TODO
* share common immutable datastructure globally across multiple fury.
*/
public final class EnumStringResolver {
public static final byte USE_STRING_VALUE = 0;
public static final byte USE_STRING_ID = 1;
private static final int initialCapacity = 8;
// use a lower load factor to minimize hash collision
private static final float furyMapLoadFactor = 0.25f;

// Every deserialization for unregistered string will query it, performance is important.
private final ObjectMap<EnumStringBytes, String> enumStringBytes2StringMap =
new ObjectMap<>(initialCapacity, furyMapLoadFactor);
private final LongMap<EnumStringBytes> hash2EnumStringBytesMap =
new LongMap<>(initialCapacity, furyMapLoadFactor);
// Every enum bytes should be singleton at every fury, since we keep state in it.
private final ObjectMap<String, EnumStringBytes> enumString2BytesMap =
new ObjectMap<>(initialCapacity, furyMapLoadFactor);
private EnumStringBytes[] dynamicWrittenString = new EnumStringBytes[32];
private EnumStringBytes[] dynamicReadStringIds = new EnumStringBytes[32];
private short dynamicWriteStringId;
private short dynamicReadStringId;

public EnumStringResolver() {
dynamicWriteStringId = 0;
dynamicReadStringId = 0;
}

EnumStringBytes getOrCreateEnumStringBytes(String str) {
EnumStringBytes enumStringBytes = enumString2BytesMap.get(str);
if (enumStringBytes == null) {
enumStringBytes = new EnumStringBytes(str);
enumString2BytesMap.put(str, enumStringBytes);
}
return enumStringBytes;
}

public void writeEnumString(MemoryBuffer buffer, String str) {
writeEnumStringBytes(buffer, getOrCreateEnumStringBytes(str));
}

public String readEnumString(MemoryBuffer buffer) {
EnumStringBytes byteString = readEnumStringBytes(buffer);
String str = enumStringBytes2StringMap.get(byteString);
if (str == null) { // TODO use io.fury.resolver.ObjectMap
str = new String(byteString.bytes, StandardCharsets.UTF_8);
enumStringBytes2StringMap.put(byteString, str);
}
return str;
}

public void writeEnumStringBytes(MemoryBuffer buffer, EnumStringBytes byteString) {
short id = byteString.dynamicWriteStringId;
int writerIndex = buffer.writerIndex();
if (id == EnumStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID) {
id = dynamicWriteStringId++;
byteString.dynamicWriteStringId = id;
EnumStringBytes[] dynamicWrittenEnumString = this.dynamicWrittenString;
if (dynamicWrittenEnumString.length <= id) {
EnumStringBytes[] tmp = new EnumStringBytes[id * 2];
System.arraycopy(dynamicWrittenEnumString, 0, tmp, 0, dynamicWrittenEnumString.length);
dynamicWrittenEnumString = tmp;
this.dynamicWrittenString = tmp;
}
dynamicWrittenEnumString[id] = byteString;
int bytesLen = byteString.bytes.length;
buffer.increaseWriterIndex(11 + bytesLen);
buffer.unsafePut(writerIndex, USE_STRING_VALUE);
// Since duplicate enum string writing are avoided by dynamic id,
// use 8-byte hash won't increase too much space.
buffer.unsafePutLong(writerIndex + 1, byteString.hashCode);
buffer.unsafePutShort(writerIndex + 9, (short) bytesLen);
buffer.put(writerIndex + 11, byteString.bytes, 0, bytesLen);
} else {
buffer.increaseWriterIndex(3);
buffer.unsafePut(writerIndex, USE_STRING_ID);
buffer.unsafePutShort(writerIndex + 1, id);
}
}

EnumStringBytes readEnumStringBytes(MemoryBuffer buffer) {
if (buffer.readByte() == USE_STRING_VALUE) {
long hashCode = buffer.readLong();
EnumStringBytes byteString = trySkipEnumStringBytes(buffer, hashCode);
updateDynamicString(byteString);
return byteString;
} else {
return dynamicReadStringIds[buffer.readShort()];
}
}

EnumStringBytes readEnumStringBytes(MemoryBuffer buffer, EnumStringBytes cache) {
if (buffer.readByte() == USE_STRING_VALUE) {
long hashCode = buffer.readLong();
if (cache.hashCode == hashCode) {
// skip byteString data
buffer.increaseReaderIndex(2 + cache.bytes.length);
updateDynamicString(cache);
return cache;
} else {
EnumStringBytes byteString = trySkipEnumStringBytes(buffer, hashCode);
updateDynamicString(byteString);
return byteString;
}
} else {
return dynamicReadStringIds[buffer.readShort()];
}
}

/** Read enum string by try to reuse previous read {@link EnumStringBytes} object. */
private EnumStringBytes trySkipEnumStringBytes(MemoryBuffer buffer, long hashCode) {
EnumStringBytes byteString = hash2EnumStringBytesMap.get(hashCode);
if (byteString == null) {
int strBytesLength = buffer.readShort();
byte[] strBytes = buffer.readBytes(strBytesLength);
byteString = new EnumStringBytes(strBytes, hashCode);
hash2EnumStringBytesMap.put(hashCode, byteString);
} else {
// skip byteString data
buffer.increaseReaderIndex(2 + byteString.bytes.length);
}
return byteString;
}

private void updateDynamicString(EnumStringBytes byteString) {
short currentDynamicReadId = dynamicReadStringId++;
EnumStringBytes[] dynamicReadStringIds = this.dynamicReadStringIds;
if (dynamicReadStringIds.length <= currentDynamicReadId) {
EnumStringBytes[] tmp = new EnumStringBytes[currentDynamicReadId * 2];
System.arraycopy(dynamicReadStringIds, 0, tmp, 0, dynamicReadStringIds.length);
dynamicReadStringIds = tmp;
this.dynamicReadStringIds = tmp;
}
dynamicReadStringIds[currentDynamicReadId] = byteString;
}

public void reset() {
resetRead();
resetWrite();
}

public void resetRead() {
int dynamicReadId = this.dynamicReadStringId;
if (dynamicReadId != 0) {
for (int i = 0; i < dynamicReadId; i++) {
dynamicReadStringIds[i] = null;
}
this.dynamicReadStringId = 0;
}
}

public void resetWrite() {
int dynamicWriteStringId = this.dynamicWriteStringId;
if (dynamicWriteStringId != 0) {
for (int i = 0; i < dynamicWriteStringId; i++) {
dynamicWrittenString[i].dynamicWriteStringId =
EnumStringBytes.DEFAULT_DYNAMIC_WRITE_STRING_ID;
dynamicWrittenString[i] = null;
}
this.dynamicWriteStringId = 0;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright 2023 The Fury authors
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.fury.resolver;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

import io.fury.memory.MemoryBuffer;
import io.fury.memory.MemoryUtils;
import io.fury.util.StringUtils;
import org.testng.annotations.Test;

public class EnumStringResolverTest {

@Test
public void testWriteEnumString() {
MemoryBuffer buffer = MemoryUtils.buffer(32);
String str = StringUtils.random(128, 0);
EnumStringResolver stringResolver = new EnumStringResolver();
for (int i = 0; i < 128; i++) {
stringResolver.writeEnumString(buffer, str);
}
for (int i = 0; i < 128; i++) {
String enumString = stringResolver.readEnumString(buffer);
assertEquals(enumString.hashCode(), str.hashCode());
assertEquals(enumString.getBytes(), str.getBytes());
}
assertTrue(buffer.writerIndex() < str.getBytes().length + 128 * 4);
}
}