-
Notifications
You must be signed in to change notification settings - Fork 175
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Docs] split user guide doc into 3 parts to sync with website (#665)
split user guide doc into 3 parts for sync with website
- Loading branch information
1 parent
ed43ac2
commit f8094d3
Showing
6 changed files
with
761 additions
and
757 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# User Guide | ||
- For Cross Language Object Graph Guide, see [xlang_object_graph_guide](https://github.com/alipay/fury/blob/main/docs/xlang_object_graph_guide.md) doc. | ||
- For Java Object Graph Guide, see [java_object_graph_guide](https://github.com/alipay/fury/blob/main/docs/java_object_graph_guide.md) doc. | ||
- For Row Format Guide, see [row format_guide](https://github.com/alipay/fury/blob/main/docs/row_format_guide.md) doc. | ||
|
||
# Development | ||
- For cpp debug, see [cpp_debug](https://github.com/alipay/fury/blob/main/docs/cpp_debug.md) doc. | ||
- For development, see [development](https://github.com/alipay/fury/blob/main/docs/development.md) doc. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
## Java object graph serialization | ||
When only java object serialization needed, this mode will have better performance compared to cross-language object graph serialization. | ||
|
||
### Quick Start | ||
```java | ||
import io.fury.Fury; | ||
import java.util.List; | ||
import java.util.Arrays; | ||
|
||
public class Example { | ||
public static void main(String[] args) { | ||
SomeClass object = new SomeClass(); | ||
// Note that Fury instances should be reused between | ||
// multiple serializations of different objects. | ||
{ | ||
Fury fury = Fury.builder().withLanguage(Language.JAVA) | ||
.withRefTracking(true) | ||
// Allow to deserialize objects unknown types, | ||
// more flexible but less secure. | ||
// .withSecureMode(false) | ||
.build(); | ||
// Registering types can reduce class name serialization overhead, but not mandatory. | ||
// If secure mode enabled, all custom types must be registered. | ||
fury.register(SomeClass.class); | ||
byte[] bytes = fury.serialize(object); | ||
System.out.println(fury.deserialize(bytes)); | ||
} | ||
{ | ||
ThreadSafeFury fury = Fury.builder().withLanguage(Language.JAVA) | ||
// Allow to deserialize objects unknown types, | ||
// more flexible but less secure. | ||
// .withSecureMode(false) | ||
.withRefTracking(true) | ||
.buildThreadSafeFury(); | ||
byte[] bytes = fury.serialize(object); | ||
System.out.println(fury.deserialize(bytes)); | ||
} | ||
{ | ||
ThreadSafeFury fury = new ThreadSafeFury(() -> { | ||
Fury fury = Fury.builder().withLanguage(Language.JAVA) | ||
.withRefTracking(true).build(); | ||
fury.register(SomeClass.class); | ||
return fury; | ||
}); | ||
byte[] bytes = fury.serialize(object); | ||
System.out.println(fury.deserialize(bytes)); | ||
} | ||
} | ||
} | ||
``` | ||
|
||
### Advanced Fury Creation | ||
Single thread fury: | ||
```java | ||
Fury fury = Fury.builder() | ||
.withLanguage(Language.JAVA) | ||
// enable referecne tracking for shared/circular reference. | ||
// Disable it will have better performance if no duplciate reference. | ||
.withRefTracking(true) | ||
// compress int/long for smaller size | ||
// .withNumberCompressed(true) | ||
.withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) | ||
// enable type forward/backward compatibility | ||
// disable it for small size and better performance. | ||
// .withCompatibleMode(CompatibleMode.COMPATIBLE) | ||
// enable async multi-threaded compilation. | ||
.withAsyncCompilationEnabled(true) | ||
.build(); | ||
byte[] bytes = fury.serialize(object); | ||
System.out.println(fury.deserialize(bytes)); | ||
``` | ||
Thread-safe fury: | ||
```java | ||
ThreadSafeFury fury = Fury.builder() | ||
.withLanguage(Language.JAVA) | ||
// enable referecne tracking for shared/circular reference. | ||
// Disable it will have better performance if no duplciate reference. | ||
.withRefTracking(true) | ||
// compress int/long for smaller size | ||
// .withNumberCompressed(true) | ||
.withCompatibleMode(CompatibleMode.SCHEMA_CONSISTENT) | ||
// enable type forward/backward compatibility | ||
// disable it for small size and better performance. | ||
// .withCompatibleMode(CompatibleMode.COMPATIBLE) | ||
// enable async multi-threaded compilation. | ||
.withAsyncCompilationEnabled(true) | ||
.buildThreadSafeFury(); | ||
byte[] bytes = fury.serialize(object); | ||
System.out.println(fury.deserialize(bytes)); | ||
``` | ||
|
||
### Security & Class Registration | ||
`FuryBuilder#requireClassRegistration`/`FuryBuilder#withSecureMode` can be used to disable class registration, this will allow to deserialize objects unknown types, more flexible but **less secure**. | ||
|
||
**Do not disable class registration unless you can ensure your environment is indeed secure**. Malicious code in `init/equals/hashCode` can be executed when deserializing unknown/untrusted types when this option disabled. | ||
|
||
Class registration can not only reduce security risks, but also avoid classname serialization cost. | ||
|
||
You can register class with API `Fury#register`. | ||
|
||
Note that class registration order is important, serialization and deserialization peer should have same registration order. | ||
|
||
### Serializer Registration | ||
You can also register a custom serializer for a class by `Fury#registerSerializer` API. | ||
|
||
Or implement `java.io.Externalizable` for a class. | ||
|
||
### Zero-Copy Serialization | ||
```java | ||
import io.fury.*; | ||
import io.fury.serializers.BufferObject; | ||
import io.fury.memory.MemoryBuffer; | ||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
||
public class ZeroCopyExample { | ||
// mvn exec:java -Dexec.mainClass="io.ray.fury.examples.ZeroCopyExample" | ||
public static void main(String[] args) { | ||
// Note that fury instance should be reused instead of creation every time. | ||
Fury fury = Fury.builder() | ||
.withLanguage(Language.JAVA) | ||
.build(); | ||
List<Object> list = Arrays.asList("str", new byte[1000], new int[100], new double[100]); | ||
Collection<BufferObject> bufferObjects = new ArrayList<>(); | ||
byte[] bytes = fury.serialize(list, e -> !bufferObjects.add(e)); | ||
List<MemoryBuffer> buffers = bufferObjects.stream() | ||
.map(BufferObject::toBuffer).collect(Collectors.toList()); | ||
System.out.println(fury.deserialize(bytes, buffers)); | ||
} | ||
} | ||
``` | ||
|
||
### Meta Sharing | ||
Fury supports share type metadata (class name, field name, final field type information, etc.) between multiple serializations in a context (ex. TCP connection), and this information will be sent to the peer during the first serialization in the context. Based on this metadata, the peer can rebuild the same deserializer, which avoids transmitting metadata for subsequent serializations and reduces network traffic pressure and supports type forward/backward compatibility automatically. | ||
|
||
```java | ||
// Fury.builder() | ||
// .withLanguage(Language.JAVA) | ||
// .withReferenceTracking(true) | ||
// // share meta across serialization. | ||
// .withMetaContextShareEnabled(true) | ||
// Not thread-safe fury. | ||
MetaContext context = xxx; | ||
fury.getSerializationContext().setMetaContext(context); | ||
byte[] bytes = fury.serialize(o); | ||
// Not thread-safe fury. | ||
MetaContext context = xxx; | ||
fury.getSerializationContext().setMetaContext(context); | ||
fury.deserialize(bytes) | ||
|
||
// Thread-safe fury | ||
fury.setClassLoader(beanA.getClass().getClassLoader()); | ||
byte[] serialized = fury.execute( | ||
f -> { | ||
f.getSerializationContext().setMetaContext(context); | ||
return f.serialize(beanA); | ||
}); | ||
// thread-safe fury | ||
fury.setClassLoader(beanA.getClass().getClassLoader()); | ||
Object newObj = fury.execute( | ||
f -> { | ||
f.getSerializationContext().setMetaContext(context); | ||
return f.deserialize(serialized); | ||
}); | ||
``` | ||
|
||
### Deserialize un-exited classes. | ||
Fury support deserializing unexisted classes, this feature can be enabled by `FuryBuilder#withDeserializeUnExistClassEnabled(true)`. When enabled, and metadata sharing enabled, Fury will store the deserialized data of this type in a lazy subclass of Map. By using the lazy map implemented by Fury, the rebalance cost of filling map during deserialization can be avoided, which further improves performance. If this data is sent to another process and the class exists in this process, the data will be deserialized into the object of this type without losing any information. | ||
|
||
If metadata sharing is not enabled, the new class data will be skipped and a UnExistedSkipClass stub object will be returned. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
## Row format protocol | ||
### Java | ||
```java | ||
public class Bar { | ||
String f1; | ||
List<Long> f2; | ||
} | ||
|
||
public class Foo { | ||
int f1; | ||
List<Integer> f2; | ||
Map<String, Integer> f3; | ||
List<Bar> f4; | ||
} | ||
|
||
Encoder<Foo> encoder = Encoders.bean(Foo.class); | ||
Foo foo = new Foo(); | ||
foo.f1 = 10; | ||
foo.f2 = IntStream.range(0, 1000000).boxed().collect(Collectors.toList()); | ||
foo.f3 = IntStream.range(0, 1000000).boxed().collect(Collectors.toMap(i -> "k"+i, i->i)); | ||
List<Bar> bars = new ArrayList<>(1000000); | ||
for (int i = 0; i < 1000000; i++) { | ||
Bar bar = new Bar(); | ||
bar.f1 = "s"+i; | ||
bar.f2 = LongStream.range(0, 10).boxed().collect(Collectors.toList()); | ||
bars.add(bar); | ||
} | ||
foo.f4 = bars; | ||
// Can be zero-copy read by python | ||
BinaryRow binaryRow = encoder.toRow(foo); | ||
// can be data from python | ||
Foo newFoo = encoder.fromRow(binaryRow); | ||
// zero-copy read List<Integer> f2 | ||
BinaryArray binaryArray2 = binaryRow.getArray(1); | ||
// zero-copy read List<Bar> f4 | ||
BinaryArray binaryArray4 = binaryRow.getArray(4); | ||
// zero-copy read 11th element of `readList<Bar> f4` | ||
BinaryRow barStruct = binaryArray4.getStruct(10); | ||
|
||
// zero-copy read 6th of f2 of 11th element of `readList<Bar> f4` | ||
barStruct.getArray(1).getLong(5); | ||
Encoder<Bar> barEncoder = Encoders.bean(Bar.class); | ||
// deserialize part of data. | ||
Bar newBar = barEncoder.fromRow(barStruct); | ||
Bar newBar2 = barEncoder.fromRow(binaryArray4.getStruct(20)); | ||
``` | ||
### Python | ||
```python | ||
@dataclass | ||
class Bar: | ||
f1: str | ||
f2: List[pa.int64] | ||
@dataclass | ||
class Foo: | ||
f1: pa.int32 | ||
f2: List[pa.int32] | ||
f3: Dict[str, pa.int32] | ||
f4: List[Bar] | ||
|
||
encoder = pyfury.encoder(Foo) | ||
foo = Foo(f1=10, f2=list(range(1000_000)), | ||
f3={f"k{i}": i for i in range(1000_000)}, | ||
f4=[Bar(f1=f"s{i}", f2=list(range(10))) for i in range(1000_000)]) | ||
binary: bytes = encoder.to_row(foo).to_bytes() | ||
print(f"start: {datetime.datetime.now()}") | ||
foo_row = pyfury.RowData(encoder.schema, binary) | ||
print(foo_row.f2[100000], foo_row.f4[100000].f1, foo_row.f4[200000].f2[5]) | ||
print(f"end: {datetime.datetime.now()}") | ||
|
||
binary = pickle.dumps(foo) | ||
print(f"pickle start: {datetime.datetime.now()}") | ||
new_foo = pickle.loads(binary) | ||
print(new_foo.f2[100000], new_foo.f4[100000].f1, new_foo.f4[200000].f2[5]) | ||
print(f"pickle end: {datetime.datetime.now()}") | ||
``` | ||
### Apache Arrow Support | ||
Fury Format also supports automatic conversion from/to Arrow Table/RecordBatch. | ||
|
||
Java: | ||
```java | ||
Schema schema = TypeInference.inferSchema(BeanA.class); | ||
ArrowWriter arrowWriter = ArrowUtils.createArrowWriter(schema); | ||
Encoder<BeanA> encoder = Encoders.rowEncoder(BeanA.class); | ||
for (int i = 0; i < 10; i++) { | ||
BeanA beanA = BeanA.createBeanA(2); | ||
arrowWriter.write(encoder.toRow(beanA)); | ||
} | ||
return arrowWriter.finishAsRecordBatch(); | ||
``` | ||
Python: | ||
```python | ||
import pyfury | ||
encoder = pyfury.encoder(Foo) | ||
encoder.to_arrow_record_batch([foo] * 10000) | ||
encoder.to_arrow_table([foo] * 10000) | ||
``` | ||
C++ | ||
```c++ | ||
std::shared_ptr<ArrowWriter> arrow_writer; | ||
EXPECT_TRUE( | ||
ArrowWriter::Make(schema, ::arrow::default_memory_pool(), &arrow_writer) | ||
.ok()); | ||
for (auto &row : rows) { | ||
EXPECT_TRUE(arrow_writer->Write(row).ok()); | ||
} | ||
std::shared_ptr<::arrow::RecordBatch> record_batch; | ||
EXPECT_TRUE(arrow_writer->Finish(&record_batch).ok()); | ||
EXPECT_TRUE(record_batch->Validate().ok()); | ||
EXPECT_EQ(record_batch->num_columns(), schema->num_fields()); | ||
EXPECT_EQ(record_batch->num_rows(), row_nums); | ||
``` | ||
```java | ||
Schema schema = TypeInference.inferSchema(BeanA.class); | ||
ArrowWriter arrowWriter = ArrowUtils.createArrowWriter(schema); | ||
Encoder<BeanA> encoder = Encoders.rowEncoder(BeanA.class); | ||
for (int i = 0; i < 10; i++) { | ||
BeanA beanA = BeanA.createBeanA(2); | ||
arrowWriter.write(encoder.toRow(beanA)); | ||
} | ||
return arrowWriter.finishAsRecordBatch(); | ||
``` |
Oops, something went wrong.