From 2ee9725a4a45321c6f1b8cb1f9f874e31e520038 Mon Sep 17 00:00:00 2001 From: David Wolinsky Date: Mon, 19 Aug 2019 12:09:08 -0700 Subject: [PATCH 1/4] [common] Add Canonical Serialization Spec Same as title. This defines the spec and gives concrete examples (also implemented in Rust) for RawTransaction and its contents. --- common/canonical_serialization/README.md | 517 ++++++++++++++++++ .../canonical_serialization_examples.rs | 349 ++++++++++++ types/src/unit_tests/mod.rs | 1 + 3 files changed, 867 insertions(+) create mode 100644 common/canonical_serialization/README.md create mode 100644 types/src/unit_tests/canonical_serialization_examples.rs diff --git a/common/canonical_serialization/README.md b/common/canonical_serialization/README.md new file mode 100644 index 000000000000..6170262c2c53 --- /dev/null +++ b/common/canonical_serialization/README.md @@ -0,0 +1,517 @@ +# Libra Canonical Serialization (LCS) + +## Overview + +This document defines Libra Canonical Serialization (LCS). LCS defines a +deterministic means for translating a message or data structure into bytes +irrespective of platform, architecture, or programming language. + +## Background + +In Libra, participants pass around messages or data structures that often times need to be signed +by a prover and verified by one or more verifiers. Serialization in this context refers to the +process of converting a message into a byte array. Many serialization approaches support loose +standards such that two implementations can produce two different byte streams that would +represent the same, identical message. While for many applications, non-deterministic +serialization causes no issues, it does so for applications using serialization for cryptographic +purposes. For example, given a signature and a message, a verifier may not unable to produce the +same serialized byte array constructed by the prover when the prover signed the message resulting +in a non-verifiable message. In other words, to ensure message verifiability when using +non-deterministic serialization, participants must either retain the original serialized bytes or +risk losing the ability to verify messages. This creates a burden requiring participants to +maintain both a copy of the serialized bytes and the deserialized message often leading to +confusion about safety and correctness. While there exist a handful of existing deterministic +serialization formats, there is no obvious choice. To address this, we propose Libra Canonical +Serialization that defines a deterministic means for translating a message into bytes. + +## Specification + +LCS supports the following primitive types: + +* Booleans +* Signed 8-bit, 16-bit, 32-bit, and 64-bit integers +* Unsigned 8-bit, 16-bit, 32-bit, and 64-bit integers +* Length prefixed byte array +* UTF-8 Encoded Strings +* Structures + +## General structure + +The serialized format of a message does not self-specify its encoding, in other words LCS employs +stream encoding enabling storage of arbitrary and variable length messages. LCS defines the +structure of primitives like integers, booleans, and byte arrays. It uses a composition of these +primitives to support advanced data structures such as enumerations maps, and objects. +Because of this typeless but expressive nature, each message requires both a serializer and +reciprocal deserializer. + +Unless specified, all numbers are stored in little endian, two's complement format. + +### Booleans and Integers + +|Type |Original data |Hex representation |Serialized format | +|--- |--- |--- |--- | +|Boolean |True / False |0x01 / 0x00 |[01] / [00] | +|8-bit signed integer |-1 |0xFF |[FF] | +|8-bit unsigned integer |1 |0x01 |[01] | +|16-bit signed integer |-4660 |0x8800EDCC |[CCED0088] | +|16-bit unsigned integer |4660 |0xEDCC |[CCED] | +|32-bit signed integer |-305419896 |0x8800EDCC |[CCED0088] | +|32-bit unsigned integer |305419896 |0x12345678 |[78563412] | +|64-bit signed integer |-1311768467750121216 |0xEDCBA98754321100 |[00113254879ACBED] | +|64-bit unsigned integer |1311768467750121216 |0x12345678ABCDEF00 |[00EFCDAB78563412] | + +### Byte Arrays + +Byte arrays are length prefixed with a 32-bit unsigned integer as defined in +the primitive section of this document. Byte arrays must be 2^31 bytes or less. + +Example: + +Given a byte array of length 3490578122: [0x00 0x01 ... 0xFE 0xFF] + +LCS representation: [CAFE D00D 00 01 ... FE FF] + +Where 3490578122 serialized into [CAFE D00D] + +### Strings + +Strings by default are stored in UTF-8 format with a 32-bit unsigned int prefix for the byte +array representation of the UTF-8 string. + +Given the string of length :ሰማይ አይታረስ ንጉሥ አይከሰስ። + +Length prefixed format: [36000000E188B0E1889BE18BAD20E18AA0E18BADE189B3E188A8E188B520E18A95E18C89E188A520E18AA0E18BADE18AA8E188B0E188B5E18DA2] + +Note: the string consists of 20 characters but 54 UTF-8 encoded bytes + +### Structures + +Structures are fixed sized types consisting of fields with potentially different types: + +``` +struct MyStruct { + boolean: bool, + bytes: Vec, + label: String, +} +``` + +Each field within a struct is serialized in the order specified by the canonical structure +definition. + +LCS would serialize an instance of MyStruct into: +[boolean bytes label] + +Furthermore, structs can exist within other structs. LCS recurses into each struct and serializes +them in order. Consider + +``` +struct Wrapper { + inner: MyStruct, + name: String, +} +``` + +LCS would serialize an instance of Wrapper into: +[inner name] +With one layer of abstraction removed: +[boolean bytes label name] + +There are no labels, the struct ordering defines the organization within the serialization +stream. + +## Advanced Datatypes + +Leveraging the primitive specification more advanced types can be serialized via LCS. + +Advanced types include: + +* Tuples +* Variable length arrays +* Maps (Key / Value Stores) +* Enumerations +* Optional data + +### Tuples + +Tuples are typed composition of objects: +(Type0, Type1) + +Tuples can be considered unnamed structs and leverage the same organization as an anonymous +structure within LCS. Like structures, each object should be serialized using its well-defined +consistent serialization method and then placed sequentially into the bitstream in the order +defined within the tuple. + +In byte representation: +[tuple.0, tuple.1] + +Note: Tuples do not need length as they are fixed in length like structures are fixed in fields. + +### Variable Length Arrays + +Variable length arrays consist of a common object. In LCS, they are represented first with a +length prefix on the number of elements in the array and then the object serialization for each +object in the order it is stored within the array. + +Assuming an array of objects, [obj0, obj1, obj2, ...]: + +LCS would serialize an instance of this tuple into: +[length of array | obj0 obj1 obj2 ...] + +### Maps (Key / Value Stores) + +Maps can be considered a variable length array of length two tuples where Key points to Value is +represented as (Key, Value). Hence they should be serialized first with the length of number of +entries followed by each entry in lexicographical order as defined by the byte representation of +the LCS serialized key. + +Consider the following map: + +``` +{ + "A" => "B", + "C" => "D", + "E" => "F" +} +``` + +LCS would serialize this into: +[ 3 A B C D E F] + +(Note the above are already in lexicographic order) + +### Enumerations + +An enumeration is typically represented as: + +``` +enum Option { + option0(u32), + option1(u64) +} +``` + +wherein the enum object can only representation one of these options. + +In LCS, each option is mapped to a specific unsigned 32-bit integer followed by optionally +serialized data if the type has an associated value. + +option0 would be encoded as 0 + +and option1 would be encoded as 1 + +Examples: + +* option0(5) -> [0000 0000 0500 0000] +* option1(6) -> [0100 0000 0600 0000 0000 0000] + +### Optional Data + +Optional or nullable data either exists in its full representation or does not. For example, + +``` +optional_data: Option(uint8); // Rust +uint8 *optional_data; // C +``` + +LCS represents this as such: +If the data, say optional\_data is equal to 8, is present: +[True data] -> [01 08] +If the data is not present: +[False] -> [00] + +## Backwards compatibility + +Advanced objects are only loosely defined but are more dependent upon the specification in which +they are used. LCS does not provide direct provisions for versioning or backwards / forwards +compatibility. A change in an objects structure could prevent historical clients from +understanding new clients and vice-versa. + +## RawTransaction Serialization + +Note: See `types/src/unit_tests/canonical_serializer_examples.rs` for verification of these +types in the Rust implementation of LCS. + +RawTransaction: + +``` +struct RawTransaction { + sender: AccountAddress, + sequence_number: u64, + payload: TransactionPayload, + max_gas_amount: u64, + gas_unit_price: u64, + expiration_time: Duration, +} +``` + +- AccountAddress is represented as a variable length byte area wherein the byte area is the address itself +- u64 is a 64-bit unsigned integer +- TransactionPayload is an enum for either Program or WriteSet +- Duration is the time in seconds as a 64-bit unsigned integer + +Program: + +``` +struct Program { + code: Vec, // Variable length byte array + args: Vec, // Variable length array of TransactionArguments + modules: Vec>, // Variable length array of variable length byte arrays +} +``` + +TransactionArgument: + +``` +enum TransactionArgument { + U64(u64), // unsigned 64-bit integer + Address(AccountAddress), // Address represented as a variable length byte array + ByteArray(ByteArray), // Variable length byte array + String(String), // Variable length byte array of a string in UTF8 format +} +``` + +WriteSet: + +``` + +struct WriteSet { + // Variable length array of the tuple containing AccessPath and WriteOp + write_set: Vec<(AccessPath, WriteOp)>, +} +``` + + +AccessPath: + +``` +struct AccessPath { + address: AccountAddress, // Address represented as a variable length byte array + path: Vec, // Variable length byte array +} +``` + +WriteOp: + +``` +struct WriteOp { + is_value: bool, + value: Vec, // This is optional and not written if is_value is false +} +``` + +### Examples + +**AccountAddress** + +String representation: +``` +ca820bf9305eb97d0d784f71b3955457fbf6911f5300ceaa5d7e8621529eae19 +``` + +LCS representation: +[20000000CA820BF9305EB97D0D784F71B3955457FBF6911F5300CEAA5D7E8621529EAE19] + +where 20000000 is the size of the address: 32 represented as a little endian 32-bit unsigned integer + +**TransactionArgument u64** + +String representation: + +``` +{U64: 9213671392124193148} +``` + +LCS representation: +[000000007CC9BDA45089DD7F] + +**TransactionArgument AccountAddress** + +String representation: + +``` +{ADDRESS: 2c25991785343b23ae073a50e5fd809a2cd867526b3c1db2b0bf5d1924c693ed} +``` + +LCS representation: +[01000000200000002C25991785343B23AE073A50E5FD809A2CD867526B3C1DB2B0BF5D1924C693ED] + +**TransactionArgument String** + +String representation: +``` +{STRING: Hello, World!} +``` + +LCS representation: +[020000000D00000048656C6C6F2C20576F726C6421] + +**TransactionArgument ByteAddress** + +String representation: +``` +{ByteArray: 0xb"cafed00d"} +``` + +LCS representation: +[0300000004000000CAFED00D] + +**Program** + +String representation: + +``` +{ + code: "move", + args: [{STRING: CAFE D00D}, {STRING: cafe d00d}], + modules: [[CA][FED0][0D]], +} +``` + +LCS representation: +[040000006D6F766502000000020000000900000043414645204430304402000000090000006361666520643030640300000001000000CA02000000FED0010000000D] + +**AccessPath** + +String representation: +``` +{ + address: 9a1ad09742d1ffc62e659e9a7797808b206f956f131d07509449c01ad8220ad4, + path: 01217da6c6b3e19f1825cfb2676daecce3bf3de03cf26647c78df00b371b25cc97 +} +``` + +LCS representation: +[200000009A1AD09742D1FFC62E659E9A7797808B206F956F131D07509449C01AD8220AD42100000001217DA6C6B3E19F1825CFB2676DAECCE3BF3DE03CF26647C78DF00B371B25CC97] + +**WriteOp Deletion** + +LCS representation: +[00000000] + +**WriteOp Value** + +String representation: +cafed00d +[0100000004000000CAFED00D] + +**WriteSet** + +String representation: +``` +[ + ( + AccessPath { + address: a71d76faa2d2d5c3224ec3d41deb293973564a791e55c6782ba76c2bf0495f9a, + path: 01217da6c6b3e19f1825cfb2676daecce3bf3de03cf26647c78df00b371b25cc97 + }, + Deletion + ), + ( + AccessPath { + address: c4c63f80c74b11263e421ebf8486a4e398d0dbc09fa7d4f62ccdb309f3aea81f, + path: 01217da6c6b3e19f18 + }, + cafed00d + ) +] +``` + +LCS representation: +[0200000020000000A71D76FAA2D2D5C3224EC3D41DEB293973564A791E55C6782BA76C2BF0495F9A2100000001217DA6C6B3E19F1825CFB2676DAECCE3BF3DE03CF26647C78DF00B371B25CC970000000020000000C4C63F80C74B11263E421EBF8486A4E398D0DBC09FA7D4F62CCDB309F3AEA81F0900000001217DA6C6B3E19F180100000004000000CAFED00D] + +**TransactionPayload with a Program** + +String representation: +``` +{ + code: "move", + args: [{STRING: CAFE D00D}, {STRING: cafe d00d}], + modules: [[CA][FED0][0D]], +} +``` + +LCS representation: +[00000000040000006D6F766502000000020000000900000043414645204430304402000000090000006361666520643030640300000001000000CA02000000FED0010000000D] + +**TransactionPayload with a WriteSet** + +String representation: +``` +[ + ( + AccessPath { + address: a71d76faa2d2d5c3224ec3d41deb293973564a791e55c6782ba76c2bf0495f9a, + path: 01217da6c6b3e19f1825cfb2676daecce3bf3de03cf26647c78df00b371b25cc97 + }, + Deletion + ), + ( + AccessPath { + address: c4c63f80c74b11263e421ebf8486a4e398d0dbc09fa7d4f62ccdb309f3aea81f, + path: 01217da6c6b3e19 + }, + cafed00d + ) +] +``` + +LCS representation: +[010000000200000020000000A71D76FAA2D2D5C3224EC3D41DEB293973564A791E55C6782BA76C2BF0495F9A2100000001217DA6C6B3E19F1825CFB2676DAECCE3BF3DE03CF26647C78DF00B371B25CC970000000020000000C4C63F80C74B11263E421EBF8486A4E398D0DBC09FA7D4F62CCDB309F3AEA81F0900000001217DA6C6B3E19F180100000004000000CAFED00D] + +**RawTransaction with a Program** + +String representation: + +``` +{ + sender: 3a24a61e05d129cace9e0efc8bc9e33831fec9a9be66f50fd352a2638a49b9ee, + sequence_number: 32, + payload: Program { + code: "move", + args: [{STRING: CAFE D00D}, {STRING: cafe d00d}], + modules: [[CA][FED0][0D]], + } , + max_gas_amount: 10000, + gas_unit_price: 20000, + expiration_time: 86400 seconds +} +``` + +LCS representation: +[200000003A24A61E05D129CACE9E0EFC8BC9E33831FEC9A9BE66F50FD352A2638A49B9EE200000000000000000000000040000006D6F766502000000020000000900000043414645204430304402000000090000006361666520643030640300000001000000CA02000000FED0010000000D1027000000000000204E0000000000008051010000000000] + +**RawTransaction** + +String representation: + +``` +{ + sender: c3398a599a6f3b9f30b635af29f2ba046d3a752c26e9d0647b9647d1f4c04ad4, + sequence_number: 32, + payload: WriteSet { + write_set: [ + ( + AccessPath { + address: a71d76faa2d2d5c3224ec3d41deb293973564a791e55c6782ba76c2bf0495f9a, + path: 01217da6c6b3e19f1825cfb2676daecce3bf3de03cf26647c78df00b371b25cc97 + }, + Deletion + ), + ( + AccessPath { + address: c4c63f80c74b11263e421ebf8486a4e398d0dbc09fa7d4f62ccdb309f3aea81f, + path: 01217da6c6b3e19 + }, + cafed00d + ) + ] + }, + max_gas_amount: 0, + gas_unit_price: 0, + expiration_time: 18446744073709551615 seconds +} +``` + +LCS representation: +[20000000C3398A599A6F3B9F30B635AF29F2BA046D3A752C26E9D0647B9647D1F4C04AD42000000000000000010000000200000020000000A71D76FAA2D2D5C3224EC3D41DEB293973564A791E55C6782BA76C2BF0495F9A2100000001217DA6C6B3E19F1825CFB2676DAECCE3BF3DE03CF26647C78DF00B371B25CC970000000020000000C4C63F80C74B11263E421EBF8486A4E398D0DBC09FA7D4F62CCDB309F3AEA81F0900000001217DA6C6B3E19F180100000004000000CAFED00D00000000000000000000000000000000FFFFFFFFFFFFFFFF] diff --git a/types/src/unit_tests/canonical_serialization_examples.rs b/types/src/unit_tests/canonical_serialization_examples.rs new file mode 100644 index 000000000000..e064749cd18b --- /dev/null +++ b/types/src/unit_tests/canonical_serialization_examples.rs @@ -0,0 +1,349 @@ +// Copyright (c) The Libra Core Contributors +// SPDX-License-Identifier: Apache-2.0 + +//! This test verifies the behavior of the Canonical Serializer against the test vectors +//! provided in the Canonical Serializer document. +//! See: common/canonical_serializer/README.md +//! +//! Test data was generated as follows: +//! - AccountAddress: AccountAddress::random() +//! - u64: rand::thread_rng().gen::() +//! +//! To retrieve the binary output, the types were then passed to the following function: +//! +//! fn serialize(input: T) +//! where T: CanonicalSerialize + std::fmt::Debug +//! { +//! let mut serializer = SimpleSerializer::>::new(); +//! serializer.encode_struct(&input).unwrap(); +//! let output = serializer.get_output(); +//! println!("{:?} = {:?}", input, hex::encode_upper(output)); +//! } +//! +//! Both input represented as hex arrays, specifically AccountAddress and path, and the +//! serialized output were passed to the following formatting script to make Rust arrays: +//! +//! #!/usr/bin/env python3 +//! +//! import fileinput +//! +//! indata = fileinput.input().readline().strip() +//! outdata = "" +//! +//! for idx in range(0, len(indata), 2): +//! outdata += "0x{}{}, ".format(indata[idx], indata[idx+1]) +//! +//! print(outdata[:-2]) + +use crate::{ + access_path::AccessPath, + account_address::AccountAddress, + byte_array::ByteArray, + transaction::{Program, RawTransaction, TransactionArgument, TransactionPayload}, + write_set::{WriteOp, WriteSet, WriteSetMut}, +}; +use canonical_serialization::SimpleSerializer; +use std::time::Duration; + +#[test] +fn test_access_path_canonical_serialization_example() { + let account_address = AccountAddress::new([ + 0x9a, 0x1a, 0xd0, 0x97, 0x42, 0xd1, 0xff, 0xc6, 0x2e, 0x65, 0x9e, 0x9a, 0x77, 0x97, 0x80, + 0x8b, 0x20, 0x6f, 0x95, 0x6f, 0x13, 0x1d, 0x07, 0x50, 0x94, 0x49, 0xc0, 0x1a, 0xd8, 0x22, + 0x0a, 0xd4, + ]); + let input = AccessPath::new( + account_address, + vec![ + 0x01, 0x21, 0x7d, 0xa6, 0xc6, 0xb3, 0xe1, 0x9f, 0x18, 0x25, 0xcf, 0xb2, 0x67, 0x6d, + 0xae, 0xcc, 0xe3, 0xbf, 0x3d, 0xe0, 0x3c, 0xf2, 0x66, 0x47, 0xc7, 0x8d, 0xf0, 0x0b, + 0x37, 0x1b, 0x25, 0xcc, 0x97, + ], + ); + + let expected_output = vec![ + 0x20, 0x00, 0x00, 0x00, 0x9A, 0x1A, 0xD0, 0x97, 0x42, 0xD1, 0xFF, 0xC6, 0x2E, 0x65, 0x9E, + 0x9A, 0x77, 0x97, 0x80, 0x8B, 0x20, 0x6F, 0x95, 0x6F, 0x13, 0x1D, 0x07, 0x50, 0x94, 0x49, + 0xC0, 0x1A, 0xD8, 0x22, 0x0A, 0xD4, 0x21, 0x00, 0x00, 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, + 0xB3, 0xE1, 0x9F, 0x18, 0x25, 0xCF, 0xB2, 0x67, 0x6D, 0xAE, 0xCC, 0xE3, 0xBF, 0x3D, 0xE0, + 0x3C, 0xF2, 0x66, 0x47, 0xC7, 0x8D, 0xF0, 0x0B, 0x37, 0x1B, 0x25, 0xCC, 0x97, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_account_address_canonical_serialization_example() { + let input = AccountAddress::new([ + 0xca, 0x82, 0x0b, 0xf9, 0x30, 0x5e, 0xb9, 0x7d, 0x0d, 0x78, 0x4f, 0x71, 0xb3, 0x95, 0x54, + 0x57, 0xfb, 0xf6, 0x91, 0x1f, 0x53, 0x00, 0xce, 0xaa, 0x5d, 0x7e, 0x86, 0x21, 0x52, 0x9e, + 0xae, 0x19, + ]); + + let expected_output: Vec = vec![ + 0x20, 0x00, 0x00, 0x00, 0xCA, 0x82, 0x0B, 0xF9, 0x30, 0x5E, 0xB9, 0x7D, 0x0D, 0x78, 0x4F, + 0x71, 0xB3, 0x95, 0x54, 0x57, 0xFB, 0xF6, 0x91, 0x1F, 0x53, 0x00, 0xCE, 0xAA, 0x5D, 0x7E, + 0x86, 0x21, 0x52, 0x9E, 0xAE, 0x19, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_program_canonical_serialization_example() { + let input = get_common_program(); + + let expected_output: Vec = vec![ + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x6F, 0x76, 0x65, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, + 0x00, 0x09, 0x00, 0x00, 0x00, 0x43, 0x41, 0x46, 0x45, 0x20, 0x44, 0x30, 0x30, 0x44, 0x02, + 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x61, 0x66, 0x65, 0x20, 0x64, 0x30, 0x30, + 0x64, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xCA, 0x02, 0x00, 0x00, 0x00, 0xFE, + 0xD0, 0x01, 0x00, 0x00, 0x00, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_raw_transaction_with_a_program_canonical_serialization_example() { + let input = RawTransaction::new( + AccountAddress::new([ + 0x3a, 0x24, 0xa6, 0x1e, 0x05, 0xd1, 0x29, 0xca, 0xce, 0x9e, 0x0e, 0xfc, 0x8b, 0xc9, + 0xe3, 0x38, 0x31, 0xfe, 0xc9, 0xa9, 0xbe, 0x66, 0xf5, 0x0f, 0xd3, 0x52, 0xa2, 0x63, + 0x8a, 0x49, 0xb9, 0xee, + ]), + 32, + get_common_program(), + 10000, + 20000, + Duration::from_secs(86400), + ); + + let expected_output = vec![ + 0x20, 0x00, 0x00, 0x00, 0x3A, 0x24, 0xA6, 0x1E, 0x05, 0xD1, 0x29, 0xCA, 0xCE, 0x9E, 0x0E, + 0xFC, 0x8B, 0xC9, 0xE3, 0x38, 0x31, 0xFE, 0xC9, 0xA9, 0xBE, 0x66, 0xF5, 0x0F, 0xD3, 0x52, + 0xA2, 0x63, 0x8A, 0x49, 0xB9, 0xEE, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x6F, 0x76, 0x65, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x43, 0x41, 0x46, 0x45, 0x20, 0x44, 0x30, + 0x30, 0x44, 0x02, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x61, 0x66, 0x65, 0x20, + 0x64, 0x30, 0x30, 0x64, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xCA, 0x02, 0x00, + 0x00, 0x00, 0xFE, 0xD0, 0x01, 0x00, 0x00, 0x00, 0x0D, 0x10, 0x27, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x4E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x51, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_raw_transaction_with_a_write_set_canonical_serialization_example() { + let input = RawTransaction::new_write_set( + AccountAddress::new([ + 0xc3, 0x39, 0x8a, 0x59, 0x9a, 0x6f, 0x3b, 0x9f, 0x30, 0xb6, 0x35, 0xaf, 0x29, 0xf2, + 0xba, 0x04, 0x6d, 0x3a, 0x75, 0x2c, 0x26, 0xe9, 0xd0, 0x64, 0x7b, 0x96, 0x47, 0xd1, + 0xf4, 0xc0, 0x4a, 0xd4, + ]), + 32, + get_common_write_set(), + ); + + let expected_output = vec![ + 0x20, 0x00, 0x00, 0x00, 0xC3, 0x39, 0x8A, 0x59, 0x9A, 0x6F, 0x3B, 0x9F, 0x30, 0xB6, 0x35, + 0xAF, 0x29, 0xF2, 0xBA, 0x04, 0x6D, 0x3A, 0x75, 0x2C, 0x26, 0xE9, 0xD0, 0x64, 0x7B, 0x96, + 0x47, 0xD1, 0xF4, 0xC0, 0x4A, 0xD4, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xA7, 0x1D, 0x76, 0xFA, + 0xA2, 0xD2, 0xD5, 0xC3, 0x22, 0x4E, 0xC3, 0xD4, 0x1D, 0xEB, 0x29, 0x39, 0x73, 0x56, 0x4A, + 0x79, 0x1E, 0x55, 0xC6, 0x78, 0x2B, 0xA7, 0x6C, 0x2B, 0xF0, 0x49, 0x5F, 0x9A, 0x21, 0x00, + 0x00, 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x25, 0xCF, 0xB2, 0x67, + 0x6D, 0xAE, 0xCC, 0xE3, 0xBF, 0x3D, 0xE0, 0x3C, 0xF2, 0x66, 0x47, 0xC7, 0x8D, 0xF0, 0x0B, + 0x37, 0x1B, 0x25, 0xCC, 0x97, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xC4, 0xC6, + 0x3F, 0x80, 0xC7, 0x4B, 0x11, 0x26, 0x3E, 0x42, 0x1E, 0xBF, 0x84, 0x86, 0xA4, 0xE3, 0x98, + 0xD0, 0xDB, 0xC0, 0x9F, 0xA7, 0xD4, 0xF6, 0x2C, 0xCD, 0xB3, 0x09, 0xF3, 0xAE, 0xA8, 0x1F, + 0x09, 0x00, 0x00, 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x01, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xCA, 0xFE, 0xD0, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_argument_address_canonical_serialization_example() { + let input = TransactionArgument::Address(AccountAddress::new([ + 0x2c, 0x25, 0x99, 0x17, 0x85, 0x34, 0x3b, 0x23, 0xae, 0x07, 0x3a, 0x50, 0xe5, 0xfd, 0x80, + 0x9a, 0x2c, 0xd8, 0x67, 0x52, 0x6b, 0x3c, 0x1d, 0xb2, 0xb0, 0xbf, 0x5d, 0x19, 0x24, 0xc6, + 0x93, 0xed, + ])); + + let expected_output: Vec = vec![ + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x99, 0x17, 0x85, 0x34, 0x3B, + 0x23, 0xAE, 0x07, 0x3A, 0x50, 0xE5, 0xFD, 0x80, 0x9A, 0x2C, 0xD8, 0x67, 0x52, 0x6B, 0x3C, + 0x1D, 0xB2, 0xB0, 0xBF, 0x5D, 0x19, 0x24, 0xC6, 0x93, 0xED, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_argument_byte_array_canonical_serialization_example() { + let input = TransactionArgument::ByteArray(ByteArray::new(vec![0xCA, 0xFE, 0xD0, 0x0D])); + + let expected_output: Vec = vec![ + 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xCA, 0xFE, 0xD0, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_argument_string_canonical_serialization_example() { + let input = TransactionArgument::String("Hello, World!".to_string()); + let expected_output: Vec = vec![ + 0x02, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2C, 0x20, + 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_argument_u64_canonical_serialization_example() { + let input = TransactionArgument::U64(9_213_671_392_124_193_148); + let expected_output: Vec = vec![ + 0x00, 0x00, 0x00, 0x00, 0x7C, 0xC9, 0xBD, 0xA4, 0x50, 0x89, 0xDD, 0x7F, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_payload_with_a_program_canonical_serialization_example() { + let input = TransactionPayload::Program(get_common_program()); + + let expected_output = vec![ + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x6F, 0x76, 0x65, 0x02, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x43, 0x41, 0x46, 0x45, 0x20, 0x44, + 0x30, 0x30, 0x44, 0x02, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x61, 0x66, 0x65, + 0x20, 0x64, 0x30, 0x30, 0x64, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xCA, 0x02, + 0x00, 0x00, 0x00, 0xFE, 0xD0, 0x01, 0x00, 0x00, 0x00, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_transaction_payload_with_a_write_set_canonical_serialization_example() { + let input = TransactionPayload::WriteSet(get_common_write_set()); + + let expected_output = vec![ + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xA7, 0x1D, 0x76, + 0xFA, 0xA2, 0xD2, 0xD5, 0xC3, 0x22, 0x4E, 0xC3, 0xD4, 0x1D, 0xEB, 0x29, 0x39, 0x73, 0x56, + 0x4A, 0x79, 0x1E, 0x55, 0xC6, 0x78, 0x2B, 0xA7, 0x6C, 0x2B, 0xF0, 0x49, 0x5F, 0x9A, 0x21, + 0x00, 0x00, 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x25, 0xCF, 0xB2, + 0x67, 0x6D, 0xAE, 0xCC, 0xE3, 0xBF, 0x3D, 0xE0, 0x3C, 0xF2, 0x66, 0x47, 0xC7, 0x8D, 0xF0, + 0x0B, 0x37, 0x1B, 0x25, 0xCC, 0x97, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xC4, + 0xC6, 0x3F, 0x80, 0xC7, 0x4B, 0x11, 0x26, 0x3E, 0x42, 0x1E, 0xBF, 0x84, 0x86, 0xA4, 0xE3, + 0x98, 0xD0, 0xDB, 0xC0, 0x9F, 0xA7, 0xD4, 0xF6, 0x2C, 0xCD, 0xB3, 0x09, 0xF3, 0xAE, 0xA8, + 0x1F, 0x09, 0x00, 0x00, 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x01, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xCA, 0xFE, 0xD0, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_write_op_delete_canonical_serialization_example() { + let input = WriteOp::Deletion; + let expected_output = vec![0x00, 0x00, 0x00, 0x00]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_write_op_value_canonical_serialization_example() { + let input = WriteOp::Value(vec![0xca, 0xfe, 0xd0, 0x0d]); + let expected_output = vec![ + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xCA, 0xFE, 0xD0, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +#[test] +fn test_write_set_canonical_serialization_example() { + let input = get_common_write_set(); + + let expected_output = vec![ + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xA7, 0x1D, 0x76, 0xFA, 0xA2, 0xD2, 0xD5, + 0xC3, 0x22, 0x4E, 0xC3, 0xD4, 0x1D, 0xEB, 0x29, 0x39, 0x73, 0x56, 0x4A, 0x79, 0x1E, 0x55, + 0xC6, 0x78, 0x2B, 0xA7, 0x6C, 0x2B, 0xF0, 0x49, 0x5F, 0x9A, 0x21, 0x00, 0x00, 0x00, 0x01, + 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x25, 0xCF, 0xB2, 0x67, 0x6D, 0xAE, 0xCC, + 0xE3, 0xBF, 0x3D, 0xE0, 0x3C, 0xF2, 0x66, 0x47, 0xC7, 0x8D, 0xF0, 0x0B, 0x37, 0x1B, 0x25, + 0xCC, 0x97, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xC4, 0xC6, 0x3F, 0x80, 0xC7, + 0x4B, 0x11, 0x26, 0x3E, 0x42, 0x1E, 0xBF, 0x84, 0x86, 0xA4, 0xE3, 0x98, 0xD0, 0xDB, 0xC0, + 0x9F, 0xA7, 0xD4, 0xF6, 0x2C, 0xCD, 0xB3, 0x09, 0xF3, 0xAE, 0xA8, 0x1F, 0x09, 0x00, 0x00, + 0x00, 0x01, 0x21, 0x7D, 0xA6, 0xC6, 0xB3, 0xE1, 0x9F, 0x18, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0xCA, 0xFE, 0xD0, 0x0D, + ]; + + let actual_output = SimpleSerializer::>::serialize(&input).unwrap(); + assert_eq!(expected_output, actual_output); +} + +fn get_common_program() -> Program { + Program::new( + b"move".to_vec(), + vec![vec![0xCA], vec![0xFE, 0xD0], vec![0x0D]], + vec![ + TransactionArgument::String("CAFE D00D".to_string()), + TransactionArgument::String("cafe d00d".to_string()), + ], + ) +} + +fn get_common_write_set() -> WriteSet { + WriteSetMut::new(vec![ + ( + AccessPath::new( + AccountAddress::new([ + 0xa7, 0x1d, 0x76, 0xfa, 0xa2, 0xd2, 0xd5, 0xc3, 0x22, 0x4e, 0xc3, 0xd4, 0x1d, + 0xeb, 0x29, 0x39, 0x73, 0x56, 0x4a, 0x79, 0x1e, 0x55, 0xc6, 0x78, 0x2b, 0xa7, + 0x6c, 0x2b, 0xf0, 0x49, 0x5f, 0x9a, + ]), + vec![ + 0x01, 0x21, 0x7d, 0xa6, 0xc6, 0xb3, 0xe1, 0x9f, 0x18, 0x25, 0xcf, 0xb2, 0x67, + 0x6d, 0xae, 0xcc, 0xe3, 0xbf, 0x3d, 0xe0, 0x3c, 0xf2, 0x66, 0x47, 0xc7, 0x8d, + 0xf0, 0x0b, 0x37, 0x1b, 0x25, 0xcc, 0x97, + ], + ), + WriteOp::Deletion, + ), + ( + AccessPath::new( + AccountAddress::new([ + 0xc4, 0xc6, 0x3f, 0x80, 0xc7, 0x4b, 0x11, 0x26, 0x3e, 0x42, 0x1e, 0xbf, 0x84, + 0x86, 0xa4, 0xe3, 0x98, 0xd0, 0xdb, 0xc0, 0x9f, 0xa7, 0xd4, 0xf6, 0x2c, 0xcd, + 0xb3, 0x09, 0xf3, 0xae, 0xa8, 0x1f, + ]), + vec![0x01, 0x21, 0x7d, 0xa6, 0xc6, 0xb3, 0xe1, 0x9f, 0x18], + ), + WriteOp::Value(vec![0xca, 0xfe, 0xd0, 0x0d]), + ), + ]) + .freeze() + .unwrap() +} diff --git a/types/src/unit_tests/mod.rs b/types/src/unit_tests/mod.rs index 69d4fa665af0..45cd6bc12a9f 100644 --- a/types/src/unit_tests/mod.rs +++ b/types/src/unit_tests/mod.rs @@ -3,6 +3,7 @@ mod access_path_test; mod address_test; +mod canonical_serialization_examples; mod contract_event_proto_conversion_test; mod get_with_proof_proto_conversion_test; mod language_storage_test; From ba4c23c436a31d784b8c83dd7e79515672256ea2 Mon Sep 17 00:00:00 2001 From: David Wolinsky Date: Wed, 21 Aug 2019 14:52:59 -0700 Subject: [PATCH 2/4] [commmon] Restructure canonical serialization Given the importance of canonical serialization within the code base, we are restructuring it to make it to simplify access and understanding. This diff moves each major component into its own separate module (file) and re-exports so as to not break any dependencies. In addition, major types are now fully exposed and implemented, namely signed integers. --- .../src/canonical_deserialize.rs | 238 +++++++ .../src/canonical_serialization_test.rs | 12 +- .../src/canonical_serialize.rs | 217 ++++++ common/canonical_serialization/src/lib.rs | 667 +----------------- .../src/simple_deserializer.rs | 180 +++++ .../src/simple_serializer.rs | 185 +++++ 6 files changed, 836 insertions(+), 663 deletions(-) create mode 100644 common/canonical_serialization/src/canonical_deserialize.rs create mode 100644 common/canonical_serialization/src/canonical_serialize.rs create mode 100644 common/canonical_serialization/src/simple_deserializer.rs create mode 100644 common/canonical_serialization/src/simple_serializer.rs diff --git a/common/canonical_serialization/src/canonical_deserialize.rs b/common/canonical_serialization/src/canonical_deserialize.rs new file mode 100644 index 000000000000..48f8f1c63c38 --- /dev/null +++ b/common/canonical_serialization/src/canonical_deserialize.rs @@ -0,0 +1,238 @@ +// Copyright (c) The Libra Core Contributors +// SPDX-License-Identifier: Apache-2.0 + +use failure::prelude::*; +use std::collections::BTreeMap; + +///! In order to guarantee consistency of object representation to byte representation for +///! signature generation and verification, Libra leverages Libra Canonical Serialization (LCS) +///! documented in `README.md`. + +/// Interface that all types must implement to support LCS deserialization. +pub trait CanonicalDeserialize { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized; +} + +/// Trait for deserializers that implement LCS +pub trait CanonicalDeserializer { + fn decode_bool(&mut self) -> Result; + + fn decode_btreemap( + &mut self, + ) -> Result>; + + // decode a byte array with the given length as input + fn decode_bytes_with_len(&mut self, len: u32) -> Result>; + + fn decode_i8(&mut self) -> Result; + + fn decode_i16(&mut self) -> Result; + + fn decode_i32(&mut self) -> Result; + + fn decode_i64(&mut self) -> Result; + + fn decode_optional(&mut self) -> Result>; + + fn decode_string(&mut self) -> Result; + + fn decode_struct(&mut self) -> Result + where + T: CanonicalDeserialize, + Self: Sized, + { + T::deserialize(self) + } + + fn decode_tuple2(&mut self) -> Result<(T0, T1)> + where + Self: Sized, + T0: CanonicalDeserialize, + T1: CanonicalDeserialize, + { + Ok((T0::deserialize(self)?, T1::deserialize(self)?)) + } + + fn decode_tuple3(&mut self) -> Result<(T0, T1, T2)> + where + Self: Sized, + T0: CanonicalDeserialize, + T1: CanonicalDeserialize, + T2: CanonicalDeserialize, + { + Ok(( + T0::deserialize(self)?, + T1::deserialize(self)?, + T2::deserialize(self)?, + )) + } + + fn decode_u8(&mut self) -> Result; + + fn decode_u16(&mut self) -> Result; + + fn decode_u32(&mut self) -> Result; + + fn decode_u64(&mut self) -> Result; + + fn decode_variable_length_bytes(&mut self) -> Result>; + + fn decode_vec(&mut self) -> Result>; +} + +impl CanonicalDeserialize for BTreeMap, Vec> { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + Ok(deserializer.decode_btreemap()?) + } +} + +impl CanonicalDeserialize for i8 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_i8()?; + Ok(num) + } +} + +impl CanonicalDeserialize for i16 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_i16()?; + Ok(num) + } +} + +impl CanonicalDeserialize for i32 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_i32()?; + Ok(num) + } +} + +impl CanonicalDeserialize for i64 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_i64()?; + Ok(num) + } +} + +impl CanonicalDeserialize for String { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + Ok(deserializer.decode_string()?) + } +} + +impl CanonicalDeserialize for (T0, T1) +where + T0: CanonicalDeserialize, + T1: CanonicalDeserialize, +{ + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + deserializer.decode_tuple2() + } +} + +impl CanonicalDeserialize for (T0, T1, T2) +where + T0: CanonicalDeserialize, + T1: CanonicalDeserialize, + T2: CanonicalDeserialize, +{ + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + deserializer.decode_tuple3() + } +} + +impl CanonicalDeserialize for u8 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_u8()?; + Ok(num) + } +} + +impl CanonicalDeserialize for u16 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { + deserializer.decode_u16() + } +} + +impl CanonicalDeserialize for u32 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + deserializer.decode_u32() + } +} + +impl CanonicalDeserialize for u64 { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + let num = deserializer.decode_u64()?; + Ok(num) + } +} + +impl CanonicalDeserialize for Option +where + T: CanonicalDeserialize, +{ + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + deserializer.decode_optional() + } +} + +/// usize is dependent on architecture. LCS encodes it as a 64-bit unsigned integer. The serializer +/// enforces that usize is smaller than or equal to the largest 64-bit unsigned integer. +impl CanonicalDeserialize for usize { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + Ok(deserializer.decode_u64()? as usize) + } +} + +impl CanonicalDeserialize for Vec +where + T: CanonicalDeserialize, +{ + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + where + Self: Sized, + { + deserializer.decode_vec() + } +} diff --git a/common/canonical_serialization/src/canonical_serialization_test.rs b/common/canonical_serialization/src/canonical_serialization_test.rs index de5e0d226bef..dd09c98531e5 100644 --- a/common/canonical_serialization/src/canonical_serialization_test.rs +++ b/common/canonical_serialization/src/canonical_serialization_test.rs @@ -6,10 +6,10 @@ #![allow(clippy::blacklisted_name)] #![allow(clippy::many_single_char_names)] -use super::*; +use crate::*; use byteorder::WriteBytesExt; use failure::Result; -use std::u32; +use std::collections::BTreeMap; // Do not change the test vectors. Please read the comment below. const TEST_VECTOR_1: &str = "ffffffffffffffff060000006463584d4237640000000000000009000000000102\ @@ -125,7 +125,7 @@ fn test_btreemap_encode() { let mut deserializer = SimpleDeserializer::new(&serialized_bytes); // ensure the order was encoded in lexicographic order - assert_eq!(deserializer.raw_bytes.read_u32::().unwrap(), 4); + assert_eq!(deserializer.decode_u32().unwrap(), 4); assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key1); assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key3); @@ -165,10 +165,8 @@ fn test_serialization_roundtrip() { let mut deserializer = SimpleDeserializer::new(&serialized_bytes); let deserialized_foo = Foo::deserialize(&mut deserializer).unwrap(); assert_eq!(foo, deserialized_foo); - assert_eq!( - deserializer.raw_bytes.position(), - deserializer.raw_bytes.get_ref().len() as u64 - ); + assert_eq!(deserializer.position(), deserializer.len() as u64); + assert!(deserializer.is_empty()); } #[test] diff --git a/common/canonical_serialization/src/canonical_serialize.rs b/common/canonical_serialization/src/canonical_serialize.rs new file mode 100644 index 000000000000..2bcfc5d7fb46 --- /dev/null +++ b/common/canonical_serialization/src/canonical_serialize.rs @@ -0,0 +1,217 @@ +// Copyright (c) The Libra Core Contributors +// SPDX-License-Identifier: Apache-2.0 + +use failure::prelude::*; +use std::collections::BTreeMap; + +///! In order to guarantee consistency of object representation to byte representation for +///! signature generation and verification, Libra leverages Libra Canonical Serialization (LCS) +///! documented in `README.md`. + +/// Interface that all types must implement to support LCS serialization. +pub trait CanonicalSerialize { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()>; +} + +/// Trait for serializers that implement LCS +pub trait CanonicalSerializer { + fn encode_bool(&mut self, b: bool) -> Result<&mut Self>; + + fn encode_btreemap( + &mut self, + v: &BTreeMap, + ) -> Result<&mut Self>; + + fn encode_i8(&mut self, v: i8) -> Result<&mut Self>; + + fn encode_i16(&mut self, v: i16) -> Result<&mut Self>; + + fn encode_i32(&mut self, v: i32) -> Result<&mut Self>; + + fn encode_i64(&mut self, v: i64) -> Result<&mut Self>; + + fn encode_optional(&mut self, v: &Option) -> Result<&mut Self>; + + // Use this encoder when the length of the array is known to be fixed and always known at + // deserialization time. The raw bytes of the array without length prefix are encoded. + // For deserialization, use decode_bytes_with_len() which requires giving the length + // as input + fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self>; + + fn encode_string(&mut self, s: &str) -> Result<&mut Self>; + + fn encode_struct(&mut self, structure: &impl CanonicalSerialize) -> Result<&mut Self> + where + Self: std::marker::Sized, + { + structure.serialize(self)?; + Ok(self) + } + + fn encode_tuple2(&mut self, v: &(T0, T1)) -> Result<&mut Self> + where + Self: Sized, + T0: CanonicalSerialize, + T1: CanonicalSerialize, + { + v.0.serialize(self)?; + v.1.serialize(self)?; + Ok(self) + } + + fn encode_tuple3(&mut self, v: &(T0, T1, T2)) -> Result<&mut Self> + where + Self: Sized, + T0: CanonicalSerialize, + T1: CanonicalSerialize, + T2: CanonicalSerialize, + { + v.0.serialize(self)?; + v.1.serialize(self)?; + v.2.serialize(self)?; + Ok(self) + } + + fn encode_u8(&mut self, v: u8) -> Result<&mut Self>; + + fn encode_u16(&mut self, v: u16) -> Result<&mut Self>; + + fn encode_u32(&mut self, v: u32) -> Result<&mut Self>; + + fn encode_u64(&mut self, v: u64) -> Result<&mut Self>; + + // Use this encoder to encode variable length byte arrays whose length may not be known at + // deserialization time. + fn encode_variable_length_bytes(&mut self, v: &[u8]) -> Result<&mut Self>; + + fn encode_vec(&mut self, v: &[T]) -> Result<&mut Self>; +} + +impl CanonicalSerialize for BTreeMap, Vec> { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_btreemap(self)?; + Ok(()) + } +} + +impl CanonicalSerialize for i8 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_i8(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for i16 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_i16(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for i32 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_i32(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for i64 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_i64(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for Option +where + T: CanonicalSerialize, +{ + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_optional(self)?; + Ok(()) + } +} + +impl CanonicalSerialize for &str { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_string(self)?; + Ok(()) + } +} + +impl CanonicalSerialize for String { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_string(self.as_str())?; + Ok(()) + } +} + +impl CanonicalSerialize for (T0, T1) +where + T0: CanonicalSerialize, + T1: CanonicalSerialize, +{ + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_tuple2(self)?; + Ok(()) + } +} + +impl CanonicalSerialize for (T0, T1, T2) +where + T0: CanonicalSerialize, + T1: CanonicalSerialize, + T2: CanonicalSerialize, +{ + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_tuple3(self)?; + Ok(()) + } +} + +impl CanonicalSerialize for u8 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_u8(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for u16 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_u16(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for u32 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_u32(*self)?; + Ok(()) + } +} + +impl CanonicalSerialize for u64 { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_u64(*self)?; + Ok(()) + } +} + +/// usize is architecture dependent, LCS encodes it as a 64-bit unsigned integer and fails +/// if usize is larger than a 64-bit integer. +impl CanonicalSerialize for usize { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_u64(*self as u64)?; + Ok(()) + } +} + +impl CanonicalSerialize for Vec +where + T: CanonicalSerialize, +{ + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_vec(self.as_ref())?; + Ok(()) + } +} diff --git a/common/canonical_serialization/src/lib.rs b/common/canonical_serialization/src/lib.rs index f90ae1957a85..fb74189b128a 100644 --- a/common/canonical_serialization/src/lib.rs +++ b/common/canonical_serialization/src/lib.rs @@ -1,664 +1,19 @@ // Copyright (c) The Libra Core Contributors // SPDX-License-Identifier: Apache-2.0 -//! This module defines traits and implementations of canonical serialization mechanism. -//! -//! A struct can implement the CanonicalSerialize trait to specify how to serialize itself, -//! and the CanonicalDeserialize trait to specify deserialization, if it needs to. One design -//! goal of this serialization format is to optimize for simplicity. It is not designed to be -//! another full-fledged network serialization as Protobuf or Thrift. It is designed -//! for doing only one thing right, which is to deterministically generate consistent bytes -//! from a data structure. -//! -//! A good example of how to use this framework is described in -//! ./canonical_serialization_test.rs -//! -//! An extremely simple implementation of CanonicalSerializer is also provided, the encoding -//! rules are: -//! (All unsigned integers are encoded in little-endian representation unless specified otherwise) -//! -//! 1. The encoding of an unsigned 64-bit integer is defined as its little-endian representation -//! in 8 bytes -//! -//! 2. The encoding of an item (byte array) is defined as: -//! [length in bytes, represented as 4-byte integer] || [item in bytes] -//! -//! -//! 3. The encoding of a list of items is defined as: (This is not implemented yet because -//! there is no known struct that needs it yet, but can be added later easily) -//! [No. of items in the list, represented as 4-byte integer] || encoding(item_0) || .... -//! -//! 4. The encoding of an ordered map where the keys are ordered by lexicographic order. -//! Currently, we only support key and value of type Vec. The encoding is defined as: -//! [No. of key value pairs in the map, represented as 4-byte integer] || encode(key1) || -//! encode(value1) || encode(key2) || encode(value2)... -//! where the pairs are appended following the lexicographic order of the key -//! -//! What is canonical serialization? -//! -//! Canonical serialization guarantees byte consistency when serializing an in-memory -//! data structure. It is useful for situations where two parties want to efficiently compare -//! data structures they independently maintain. It happens in consensus where -//! independent validators need to agree on the state they independently compute. A cryptographic -//! hash of the serialized data structure is what ultimately gets compared. In order for -//! this to work, the serialization of the same data structures must be identical when computed -//! by independent validators potentially running different implementations -//! of the same spec in different languages. - -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use failure::prelude::*; -use std::{ - collections::BTreeMap, - io::{Cursor, Read}, - mem::size_of, -}; +const ARRAY_MAX_LENGTH: usize = i32::max_value() as usize; +type Endianness = byteorder::LittleEndian; +mod canonical_deserialize; +mod canonical_serialize; +mod simple_deserializer; +mod simple_serializer; pub mod test_helper; +pub use canonical_deserialize::{CanonicalDeserialize, CanonicalDeserializer}; +pub use canonical_serialize::{CanonicalSerialize, CanonicalSerializer}; +pub use simple_deserializer::SimpleDeserializer; +pub use simple_serializer::SimpleSerializer; + #[cfg(test)] mod canonical_serialization_test; - -// use the signed 32-bit integer's max value as the maximum array length instead of -// unsigned 32-bit integer. This gives us the opportunity to use the additional sign bit -// to signal a length extension to support arrays longer than 2^31 in the future -const ARRAY_MAX_LENGTH: usize = i32::max_value() as usize; - -pub trait CanonicalSerialize { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()>; -} - -pub trait CanonicalSerializer { - fn encode_struct(&mut self, structure: &impl CanonicalSerialize) -> Result<&mut Self> - where - Self: std::marker::Sized, - { - structure.serialize(self)?; - Ok(self) - } - - fn encode_optional(&mut self, v: &Option) -> Result<&mut Self>; - - fn encode_u64(&mut self, v: u64) -> Result<&mut Self>; - - fn encode_u32(&mut self, v: u32) -> Result<&mut Self>; - - fn encode_u16(&mut self, v: u16) -> Result<&mut Self>; - - fn encode_u8(&mut self, v: u8) -> Result<&mut Self>; - - fn encode_bool(&mut self, b: bool) -> Result<&mut Self>; - - // Use this encoder when the length of the array is known to be fixed and always known at - // deserialization time. The raw bytes of the array without length prefix are encoded. - // For deserialization, use decode_bytes_with_len() which requires giving the length - // as input - fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self>; - - // Use this encoder to encode variable length byte arrays whose length may not be known at - // deserialization time. - fn encode_variable_length_bytes(&mut self, v: &[u8]) -> Result<&mut Self>; - - fn encode_btreemap( - &mut self, - v: &BTreeMap, - ) -> Result<&mut Self>; - - fn encode_vec(&mut self, v: &[T]) -> Result<&mut Self>; - - fn encode_string(&mut self, s: &str) -> Result<&mut Self>; - - fn encode_tuple2( - &mut self, - v: &(T0, T1), - ) -> Result<&mut Self>; -} - -type Endianness = LittleEndian; - -/// An implementation of a simple canonical serialization format that implements the -/// CanonicalSerializer trait using a byte vector. -#[derive(Clone)] -pub struct SimpleSerializer { - output: W, -} - -impl Default for SimpleSerializer -where - W: Default + std::io::Write, -{ - fn default() -> Self { - SimpleSerializer::new() - } -} - -impl SimpleSerializer -where - W: Default + std::io::Write, -{ - pub fn new() -> Self { - SimpleSerializer { - output: W::default(), - } - } - - /// Create a SimpleSerializer on the fly and serialize `object` - pub fn serialize(object: &impl CanonicalSerialize) -> Result { - let mut serializer = Self::default(); - object.serialize(&mut serializer)?; - Ok(serializer.get_output()) - } - - /// Consume the SimpleSerializer and return the output - pub fn get_output(self) -> W { - self.output - } -} - -impl CanonicalSerializer for SimpleSerializer -where - W: std::io::Write, -{ - fn encode_optional(&mut self, v: &Option) -> Result<&mut Self> { - match v.as_ref() { - Some(val) => { - self.encode_bool(true)?; - self.encode_struct(val)?; - } - None => { - self.encode_bool(false)?; - } - } - Ok(self) - } - - fn encode_u64(&mut self, v: u64) -> Result<&mut Self> { - self.output.write_u64::(v)?; - Ok(self) - } - - fn encode_u32(&mut self, v: u32) -> Result<&mut Self> { - self.output.write_u32::(v)?; - Ok(self) - } - - fn encode_u16(&mut self, v: u16) -> Result<&mut Self> { - self.output.write_u16::(v)?; - Ok(self) - } - - fn encode_u8(&mut self, v: u8) -> Result<&mut Self> { - self.output.write_u8(v)?; - Ok(self) - } - - fn encode_bool(&mut self, b: bool) -> Result<&mut Self> { - let byte: u8 = if b { 1 } else { 0 }; - self.output.write_u8(byte)?; - Ok(self) - } - - fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self> { - self.output.write_all(bytes.as_ref())?; - Ok(self) - } - - fn encode_variable_length_bytes(&mut self, v: &[u8]) -> Result<&mut Self> { - ensure!( - v.len() <= ARRAY_MAX_LENGTH, - "array length exceeded the maximum length limit. \ - length: {}, Max length limit: {}", - v.len(), - ARRAY_MAX_LENGTH, - ); - - // first add the length as a 4-byte integer - self.output.write_u32::(v.len() as u32)?; - self.output.write_all(v)?; - Ok(self) - } - - fn encode_btreemap( - &mut self, - v: &BTreeMap, - ) -> Result<&mut Self> { - ensure!( - v.len() <= ARRAY_MAX_LENGTH, - "map size exceeded the maximum limit. length: {}, max length limit: {}", - v.len(), - ARRAY_MAX_LENGTH, - ); - - // add the number of pairs in the map - self.output.write_u32::(v.len() as u32)?; - - // Regardless of the order defined for K of the map, write in the order of the lexicographic - // order of the canonical serialized bytes of K - let mut map = BTreeMap::new(); - for (key, value) in v { - map.insert( - SimpleSerializer::>::serialize(key)?, - SimpleSerializer::>::serialize(value)?, - ); - } - - for (key, value) in map { - self.encode_raw_bytes(&key)?; - self.encode_raw_bytes(&value)?; - } - Ok(self) - } - - fn encode_vec(&mut self, v: &[T]) -> Result<&mut Self> { - ensure!( - v.len() <= ARRAY_MAX_LENGTH, - "map size exceeded the maximum limit. length: {}, max length limit: {}", - v.len(), - ARRAY_MAX_LENGTH, - ); - - // add the number of items in the vec - self.output.write_u32::(v.len() as u32)?; - for value in v { - self.encode_struct(value)?; - } - Ok(self) - } - - fn encode_string(&mut self, s: &str) -> Result<&mut Self> { - // String::as_bytes returns the UTF-8 encoded byte array - self.encode_variable_length_bytes(s.as_bytes()) - } - - fn encode_tuple2( - &mut self, - v: &(T0, T1), - ) -> Result<&mut Self> { - self.encode_struct(&v.0)?; - self.encode_struct(&v.1)?; - Ok(self) - } -} - -pub trait CanonicalDeserializer { - fn decode_struct(&mut self) -> Result - where - T: CanonicalDeserialize, - Self: Sized, - { - T::deserialize(self) - } - - fn decode_optional(&mut self) -> Result>; - - fn decode_u64(&mut self) -> Result; - - fn decode_u32(&mut self) -> Result; - - fn decode_u16(&mut self) -> Result; - - fn decode_u8(&mut self) -> Result; - - fn decode_bool(&mut self) -> Result; - - // decode a byte array with the given length as input - fn decode_bytes_with_len(&mut self, len: u32) -> Result>; - - fn decode_variable_length_bytes(&mut self) -> Result>; - - fn decode_btreemap( - &mut self, - ) -> Result>; - - fn decode_vec(&mut self) -> Result>; - - fn decode_string(&mut self) -> Result; - - fn decode_tuple2( - &mut self, - ) -> Result<(T0, T1)>; -} - -pub trait CanonicalDeserialize { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized; -} - -#[derive(Clone)] -pub struct SimpleDeserializer<'a> { - raw_bytes: Cursor<&'a [u8]>, -} - -impl<'a> SimpleDeserializer<'a> { - pub fn new(raw_bytes: &'a T) -> Self - where - T: AsRef<[u8]> + ?Sized, - { - Self { - raw_bytes: Cursor::new(raw_bytes.as_ref()), - } - } - - pub fn deserialize(data: &'a [u8]) -> Result - where - T: CanonicalDeserialize, - { - let mut deserializer = Self::new(data); - T::deserialize(&mut deserializer) - } -} - -impl<'a> CanonicalDeserializer for SimpleDeserializer<'a> { - fn decode_optional(&mut self) -> Result> { - if self.decode_bool()? { - Ok(Some(T::deserialize(self)?)) - } else { - Ok(None) - } - } - - fn decode_u64(&mut self) -> Result { - let num = self.raw_bytes.read_u64::()?; - Ok(num) - } - - fn decode_u32(&mut self) -> Result { - let num = self.raw_bytes.read_u32::()?; - Ok(num) - } - - fn decode_u16(&mut self) -> Result { - let num = self.raw_bytes.read_u16::()?; - Ok(num) - } - - fn decode_u8(&mut self) -> Result { - let num = self.raw_bytes.read_u8()?; - Ok(num) - } - - fn decode_bool(&mut self) -> Result { - let b = self.raw_bytes.read_u8()?; - ensure!(b == 0 || b == 1, "bool must be 0 or 1, found {}", b,); - Ok(b != 0) - } - - fn decode_bytes_with_len(&mut self, len: u32) -> Result> { - // make sure there is enough bytes left in the buffer - let remain = self.raw_bytes.get_ref().len() as u64 - self.raw_bytes.position(); - ensure!( - remain >= len.into(), - "not enough bytes left. input size: {}, remaining: {}", - len, - remain - ); - - let mut buffer = vec![0; len as usize]; - self.raw_bytes.read_exact(&mut buffer)?; - Ok(buffer) - } - - fn decode_variable_length_bytes(&mut self) -> Result> { - let len = self.raw_bytes.read_u32::()?; - ensure!( - len as usize <= ARRAY_MAX_LENGTH, - "array length longer than max allowed length. len: {}, max: {}", - len, - ARRAY_MAX_LENGTH - ); - - // make sure there is enough bytes left in the buffer - let remain = self.raw_bytes.get_ref().len() - self.raw_bytes.position() as usize; - ensure!( - remain >= (len as usize), - "not enough bytes left. len: {}, remaining: {}", - len, - remain - ); - - let mut vec = vec![0; len as usize]; - self.raw_bytes.read_exact(&mut vec)?; - Ok(vec) - } - - fn decode_btreemap( - &mut self, - ) -> Result> { - let len = self.raw_bytes.read_u32::()?; - ensure!( - len as usize <= ARRAY_MAX_LENGTH, - "map size bigger than max allowed. size: {}, max: {}", - len, - ARRAY_MAX_LENGTH - ); - - let mut map = BTreeMap::new(); - for _i in 0..len { - let key = K::deserialize(self)?; - let value = V::deserialize(self)?; - map.insert(key, value); - } - Ok(map) - } - - fn decode_vec(&mut self) -> Result> { - let len = self.raw_bytes.read_u32::()?; - ensure!( - len as usize <= ARRAY_MAX_LENGTH, - "map size bigger than max allowed. size: {}, max: {}", - len, - ARRAY_MAX_LENGTH - ); - - let mut vec = Vec::new(); - for _i in 0..len { - let v = T::deserialize(self)?; - vec.push(v); - } - Ok(vec) - } - - fn decode_string(&mut self) -> Result { - Ok(String::from_utf8(self.decode_variable_length_bytes()?)?) - } - - fn decode_tuple2( - &mut self, - ) -> Result<(T0, T1)> { - Ok((T0::deserialize(self)?, T1::deserialize(self)?)) - } -} - -impl CanonicalSerialize for Vec -where - T: CanonicalSerialize, -{ - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_vec(self.as_ref())?; - Ok(()) - } -} - -impl CanonicalDeserialize for Vec -where - T: CanonicalDeserialize, -{ - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_vec() - } -} - -impl CanonicalSerialize for u16 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u16(*self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for u16 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { - deserializer.decode_u16() - } -} - -impl CanonicalSerialize for u32 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u32(*self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for u32 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_u32() - } -} - -impl CanonicalSerialize for i32 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u32(*self as u32)?; - Ok(()) - } -} - -impl CanonicalDeserialize for i32 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_u32()? as i32; - Ok(num) - } -} - -impl CanonicalSerialize for u64 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u64(*self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for u64 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_u64()?; - Ok(num) - } -} - -impl CanonicalSerialize for i64 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u64(*self as u64)?; - Ok(()) - } -} - -impl CanonicalDeserialize for i64 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_u64()? as i64; - Ok(num) - } -} - -impl CanonicalSerialize for usize { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - assert_eq!(8, size_of::()); - serializer.encode_u64(*self as u64)?; - Ok(()) - } -} - -impl CanonicalDeserialize for usize { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - assert_eq!(8, size_of::()); - let num = deserializer.decode_u64()? as usize; - Ok(num) - } -} - -impl CanonicalSerialize for u8 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u8(*self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for u8 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_u8()?; - Ok(num) - } -} - -impl CanonicalSerialize for BTreeMap, Vec> { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_btreemap(self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for BTreeMap, Vec> { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - Ok(deserializer.decode_btreemap()?) - } -} - -impl CanonicalSerialize for &str { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_string(self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for String { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - Ok(deserializer.decode_string()?) - } -} - -impl CanonicalSerialize for (T0, T1) -where - T0: CanonicalSerialize, - T1: CanonicalSerialize, -{ - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_tuple2(self)?; - Ok(()) - } -} - -impl CanonicalDeserialize for (T0, T1) -where - T0: CanonicalDeserialize, - T1: CanonicalDeserialize, -{ - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_tuple2() - } -} diff --git a/common/canonical_serialization/src/simple_deserializer.rs b/common/canonical_serialization/src/simple_deserializer.rs new file mode 100644 index 000000000000..709f3deff759 --- /dev/null +++ b/common/canonical_serialization/src/simple_deserializer.rs @@ -0,0 +1,180 @@ +// Copyright (c) The Libra Core Contributors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + canonical_deserialize::{CanonicalDeserialize, CanonicalDeserializer}, + Endianness, ARRAY_MAX_LENGTH, +}; +use byteorder::ReadBytesExt; +use failure::prelude::*; +use std::{ + collections::BTreeMap, + io::{Cursor, Read}, +}; + +/// An implementation of LCS deserializer (CanonicalDeserialize) for [u8]. +#[derive(Clone)] +pub struct SimpleDeserializer<'a> { + raw_bytes: Cursor<&'a [u8]>, +} + +impl<'a> SimpleDeserializer<'a> { + pub fn new(raw_bytes: &'a T) -> Self + where + T: AsRef<[u8]> + ?Sized, + { + Self { + raw_bytes: Cursor::new(raw_bytes.as_ref()), + } + } + + pub fn deserialize(data: &'a [u8]) -> Result + where + T: CanonicalDeserialize, + { + let mut deserializer = Self::new(data); + T::deserialize(&mut deserializer) + } + + /// Returns true if the deserializer has no remaining bytes to deserialize + pub fn is_empty(&self) -> bool { + (self.len() as u64) - self.position() == 0 + } + + /// Returns the total length of the underlying bytes used by the deserializer + pub fn len(&self) -> usize { + self.raw_bytes.get_ref().len() + } + + /// Returns the current index into the bytes used by the deserializer + pub fn position(&self) -> u64 { + self.raw_bytes.position() + } +} + +impl<'a> CanonicalDeserializer for SimpleDeserializer<'a> { + fn decode_bool(&mut self) -> Result { + let b = self.raw_bytes.read_u8()?; + ensure!(b == 0 || b == 1, "bool must be 0 or 1, found {}", b,); + Ok(b != 0) + } + + fn decode_btreemap( + &mut self, + ) -> Result> { + let len = self.decode_u32()?; + ensure!( + len as usize <= ARRAY_MAX_LENGTH, + "array length longer than max allowed. size: {}, max: {}", + len, + ARRAY_MAX_LENGTH + ); + + let mut map = BTreeMap::new(); + for _i in 0..len { + let key = K::deserialize(self)?; + let value = V::deserialize(self)?; + map.insert(key, value); + } + Ok(map) + } + + fn decode_bytes_with_len(&mut self, len: u32) -> Result> { + // make sure there is enough bytes left in the buffer + let remain = self.raw_bytes.get_ref().len() as u64 - self.raw_bytes.position(); + ensure!( + remain >= len.into(), + "not enough bytes left. input size: {}, remaining: {}", + len, + remain + ); + + let mut buffer = vec![0; len as usize]; + self.raw_bytes.read_exact(&mut buffer)?; + Ok(buffer) + } + + fn decode_i8(&mut self) -> Result { + Ok(self.raw_bytes.read_i8()?) + } + + fn decode_i16(&mut self) -> Result { + Ok(self.raw_bytes.read_i16::()?) + } + + fn decode_i32(&mut self) -> Result { + Ok(self.raw_bytes.read_i32::()?) + } + + fn decode_i64(&mut self) -> Result { + Ok(self.raw_bytes.read_i64::()?) + } + + fn decode_optional(&mut self) -> Result> { + if self.decode_bool()? { + Ok(Some(T::deserialize(self)?)) + } else { + Ok(None) + } + } + + fn decode_u8(&mut self) -> Result { + Ok(self.raw_bytes.read_u8()?) + } + + fn decode_u16(&mut self) -> Result { + Ok(self.raw_bytes.read_u16::()?) + } + + fn decode_u32(&mut self) -> Result { + Ok(self.raw_bytes.read_u32::()?) + } + + fn decode_u64(&mut self) -> Result { + Ok(self.raw_bytes.read_u64::()?) + } + + fn decode_string(&mut self) -> Result { + Ok(String::from_utf8(self.decode_variable_length_bytes()?)?) + } + + fn decode_variable_length_bytes(&mut self) -> Result> { + let len = self.decode_u32()?; + ensure!( + len as usize <= ARRAY_MAX_LENGTH, + "array length longer than max allowed length. len: {}, max: {}", + len, + ARRAY_MAX_LENGTH + ); + + // make sure there is enough bytes left in the buffer + let remain = self.raw_bytes.get_ref().len() - self.raw_bytes.position() as usize; + ensure!( + remain >= (len as usize), + "not enough bytes left. len: {}, remaining: {}", + len, + remain + ); + + let mut vec = vec![0; len as usize]; + self.raw_bytes.read_exact(&mut vec)?; + Ok(vec) + } + + fn decode_vec(&mut self) -> Result> { + let len = self.decode_u32()?; + ensure!( + len as usize <= ARRAY_MAX_LENGTH, + "array length longer than max allowed. size: {}, max: {}", + len, + ARRAY_MAX_LENGTH + ); + + let mut vec = Vec::new(); + for _i in 0..len { + let v = T::deserialize(self)?; + vec.push(v); + } + Ok(vec) + } +} diff --git a/common/canonical_serialization/src/simple_serializer.rs b/common/canonical_serialization/src/simple_serializer.rs new file mode 100644 index 000000000000..28df70cb247b --- /dev/null +++ b/common/canonical_serialization/src/simple_serializer.rs @@ -0,0 +1,185 @@ +// Copyright (c) The Libra Core Contributors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + canonical_serialize::{CanonicalSerialize, CanonicalSerializer}, + Endianness, ARRAY_MAX_LENGTH, +}; +use byteorder::WriteBytesExt; +use failure::prelude::*; +use std::collections::BTreeMap; + +/// An implementation of LCS serializer (CanonicalSerializer) for std::io::Write, which includes +/// Vec. +#[derive(Clone)] +pub struct SimpleSerializer { + output: W, +} + +impl Default for SimpleSerializer +where + W: Default + std::io::Write, +{ + fn default() -> Self { + SimpleSerializer::new() + } +} + +impl SimpleSerializer +where + W: Default + std::io::Write, +{ + pub fn new() -> Self { + SimpleSerializer { + output: W::default(), + } + } + + /// Create a SimpleSerializer on the fly and serialize `object` + pub fn serialize(object: &impl CanonicalSerialize) -> Result { + let mut serializer = Self::default(); + object.serialize(&mut serializer)?; + Ok(serializer.get_output()) + } + + /// Consume the SimpleSerializer and return the output + pub fn get_output(self) -> W { + self.output + } +} + +impl CanonicalSerializer for SimpleSerializer +where + W: std::io::Write, +{ + fn encode_bool(&mut self, b: bool) -> Result<&mut Self> { + let byte: u8 = if b { 1 } else { 0 }; + self.output.write_u8(byte)?; + Ok(self) + } + + fn encode_btreemap( + &mut self, + v: &BTreeMap, + ) -> Result<&mut Self> { + ensure!( + v.len() <= ARRAY_MAX_LENGTH, + "array length exceeded the maximum limit. length: {}, max length limit: {}", + v.len(), + ARRAY_MAX_LENGTH, + ); + + // add the number of pairs in the map + self.encode_u32(v.len() as u32)?; + + // Regardless of the order defined for K of the map, write in the order of the lexicographic + // order of the canonical serialized bytes of K + let mut map = BTreeMap::new(); + for (key, value) in v { + map.insert( + SimpleSerializer::>::serialize(key)?, + SimpleSerializer::>::serialize(value)?, + ); + } + + for (key, value) in map { + self.encode_raw_bytes(&key)?; + self.encode_raw_bytes(&value)?; + } + Ok(self) + } + + fn encode_i8(&mut self, v: i8) -> Result<&mut Self> { + self.output.write_i8(v)?; + Ok(self) + } + + fn encode_i16(&mut self, v: i16) -> Result<&mut Self> { + self.output.write_i16::(v)?; + Ok(self) + } + + fn encode_i32(&mut self, v: i32) -> Result<&mut Self> { + self.output.write_i32::(v)?; + Ok(self) + } + + fn encode_i64(&mut self, v: i64) -> Result<&mut Self> { + self.output.write_i64::(v)?; + Ok(self) + } + + fn encode_optional(&mut self, v: &Option) -> Result<&mut Self> { + match v.as_ref() { + Some(val) => { + self.encode_bool(true)?; + self.encode_struct(val)?; + } + None => { + self.encode_bool(false)?; + } + } + Ok(self) + } + + fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self> { + self.output.write_all(bytes.as_ref())?; + Ok(self) + } + + fn encode_string(&mut self, s: &str) -> Result<&mut Self> { + // String::as_bytes returns the UTF-8 encoded byte array + self.encode_variable_length_bytes(s.as_bytes()) + } + + fn encode_u8(&mut self, v: u8) -> Result<&mut Self> { + self.output.write_u8(v)?; + Ok(self) + } + + fn encode_u16(&mut self, v: u16) -> Result<&mut Self> { + self.output.write_u16::(v)?; + Ok(self) + } + + fn encode_u32(&mut self, v: u32) -> Result<&mut Self> { + self.output.write_u32::(v)?; + Ok(self) + } + + fn encode_u64(&mut self, v: u64) -> Result<&mut Self> { + self.output.write_u64::(v)?; + Ok(self) + } + + fn encode_variable_length_bytes(&mut self, v: &[u8]) -> Result<&mut Self> { + ensure!( + v.len() <= ARRAY_MAX_LENGTH, + "array length exceeded the maximum length limit. \ + length: {}, Max length limit: {}", + v.len(), + ARRAY_MAX_LENGTH, + ); + + // first add the length as a 4-byte integer + self.encode_u32(v.len() as u32)?; + self.output.write_all(v)?; + Ok(self) + } + + fn encode_vec(&mut self, v: &[T]) -> Result<&mut Self> { + ensure!( + v.len() <= ARRAY_MAX_LENGTH, + "array length exceeded the maximum limit. length: {}, max length limit: {}", + v.len(), + ARRAY_MAX_LENGTH, + ); + + // add the number of items in the vec + self.encode_u32(v.len() as u32)?; + for value in v { + self.encode_struct(value)?; + } + Ok(self) + } +} From ca4616cf12897c35db8d8a5f75cf8edafe64c9c8 Mon Sep 17 00:00:00 2001 From: David Wolinsky Date: Wed, 21 Aug 2019 23:42:41 -0700 Subject: [PATCH 3/4] [common] Refactor canonical serialization to use macros Macros reduce the duplicate code. Some challenges arise though: - macros cannot exist within a trait or an impl therefore the macro has to wrap around it so we avoid macros for traits - macros do not support concatenation of inputs, so the entire function name must be specified - macros involving indexes of tuples are just plain complicated and probably less readable for our purposes - tuple of size 1 is colliding with all types -- needs more investigation --- .../src/canonical_deserialize.rs | 194 +++++------------ .../src/canonical_serialize.rs | 195 +++++++----------- 2 files changed, 136 insertions(+), 253 deletions(-) diff --git a/common/canonical_serialization/src/canonical_deserialize.rs b/common/canonical_serialization/src/canonical_deserialize.rs index 48f8f1c63c38..a28f852fced7 100644 --- a/common/canonical_serialization/src/canonical_deserialize.rs +++ b/common/canonical_serialization/src/canonical_deserialize.rs @@ -19,13 +19,6 @@ pub trait CanonicalDeserialize { pub trait CanonicalDeserializer { fn decode_bool(&mut self) -> Result; - fn decode_btreemap( - &mut self, - ) -> Result>; - - // decode a byte array with the given length as input - fn decode_bytes_with_len(&mut self, len: u32) -> Result>; - fn decode_i8(&mut self) -> Result; fn decode_i16(&mut self) -> Result; @@ -34,17 +27,15 @@ pub trait CanonicalDeserializer { fn decode_i64(&mut self) -> Result; - fn decode_optional(&mut self) -> Result>; - fn decode_string(&mut self) -> Result; - fn decode_struct(&mut self) -> Result - where - T: CanonicalDeserialize, - Self: Sized, - { - T::deserialize(self) - } + fn decode_u8(&mut self) -> Result; + + fn decode_u16(&mut self) -> Result; + + fn decode_u32(&mut self) -> Result; + + fn decode_u64(&mut self) -> Result; fn decode_tuple2(&mut self) -> Result<(T0, T1)> where @@ -69,138 +60,67 @@ pub trait CanonicalDeserializer { )) } - fn decode_u8(&mut self) -> Result; - - fn decode_u16(&mut self) -> Result; - - fn decode_u32(&mut self) -> Result; - - fn decode_u64(&mut self) -> Result; - - fn decode_variable_length_bytes(&mut self) -> Result>; - - fn decode_vec(&mut self) -> Result>; -} - -impl CanonicalDeserialize for BTreeMap, Vec> { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - Ok(deserializer.decode_btreemap()?) - } -} - -impl CanonicalDeserialize for i8 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_i8()?; - Ok(num) - } -} - -impl CanonicalDeserialize for i16 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_i16()?; - Ok(num) - } -} - -impl CanonicalDeserialize for i32 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_i32()?; - Ok(num) - } -} - -impl CanonicalDeserialize for i64 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_i64()?; - Ok(num) - } -} - -impl CanonicalDeserialize for String { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - Ok(deserializer.decode_string()?) - } -} + fn decode_btreemap( + &mut self, + ) -> Result>; -impl CanonicalDeserialize for (T0, T1) -where - T0: CanonicalDeserialize, - T1: CanonicalDeserialize, -{ - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_tuple2() - } -} + // decode a byte array with the given length as input + fn decode_bytes_with_len(&mut self, len: u32) -> Result>; -impl CanonicalDeserialize for (T0, T1, T2) -where - T0: CanonicalDeserialize, - T1: CanonicalDeserialize, - T2: CanonicalDeserialize, -{ - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_tuple3() - } -} + fn decode_optional(&mut self) -> Result>; -impl CanonicalDeserialize for u8 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result + fn decode_struct(&mut self) -> Result where + T: CanonicalDeserialize, Self: Sized, { - let num = deserializer.decode_u8()?; - Ok(num) + T::deserialize(self) } -} -impl CanonicalDeserialize for u16 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { - deserializer.decode_u16() - } -} + fn decode_variable_length_bytes(&mut self) -> Result>; -impl CanonicalDeserialize for u32 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - deserializer.decode_u32() - } + fn decode_vec(&mut self) -> Result>; } -impl CanonicalDeserialize for u64 { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result - where - Self: Sized, - { - let num = deserializer.decode_u64()?; - Ok(num) - } -} +macro_rules! impl_canonical_deserialize { + ($function:ident, $type:ty) => { + impl CanonicalDeserialize for $type { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result<(Self)> { + deserializer.$function() + } + } + }; +} + +macro_rules! impl_canonical_deserialize_for_tuple { + ($function:ident, $($type:ident)+) => ( + impl<$($type), +> CanonicalDeserialize for ($($type), +) + where + $($type: CanonicalDeserialize,) + + { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result<(Self)> + where + Self: Sized, + { + deserializer.$function() + } + } + ); +} + +impl_canonical_deserialize!(decode_bool, bool); +impl_canonical_deserialize!(decode_btreemap, BTreeMap, Vec>); +impl_canonical_deserialize!(decode_i8, i8); +impl_canonical_deserialize!(decode_i16, i16); +impl_canonical_deserialize!(decode_i32, i32); +impl_canonical_deserialize!(decode_i64, i64); +impl_canonical_deserialize!(decode_string, String); +impl_canonical_deserialize_for_tuple!(decode_tuple2, T0 T1); +impl_canonical_deserialize_for_tuple!(decode_tuple3, T0 T1 T2); +impl_canonical_deserialize!(decode_u8, u8); +impl_canonical_deserialize!(decode_u16, u16); +impl_canonical_deserialize!(decode_u32, u32); +impl_canonical_deserialize!(decode_u64, u64); impl CanonicalDeserialize for Option where diff --git a/common/canonical_serialization/src/canonical_serialize.rs b/common/canonical_serialization/src/canonical_serialize.rs index 2bcfc5d7fb46..113f4a6a9d5b 100644 --- a/common/canonical_serialization/src/canonical_serialize.rs +++ b/common/canonical_serialization/src/canonical_serialize.rs @@ -15,12 +15,7 @@ pub trait CanonicalSerialize { /// Trait for serializers that implement LCS pub trait CanonicalSerializer { - fn encode_bool(&mut self, b: bool) -> Result<&mut Self>; - - fn encode_btreemap( - &mut self, - v: &BTreeMap, - ) -> Result<&mut Self>; + fn encode_bool(&mut self, v: bool) -> Result<&mut Self>; fn encode_i8(&mut self, v: i8) -> Result<&mut Self>; @@ -30,23 +25,15 @@ pub trait CanonicalSerializer { fn encode_i64(&mut self, v: i64) -> Result<&mut Self>; - fn encode_optional(&mut self, v: &Option) -> Result<&mut Self>; + fn encode_string(&mut self, v: &str) -> Result<&mut Self>; - // Use this encoder when the length of the array is known to be fixed and always known at - // deserialization time. The raw bytes of the array without length prefix are encoded. - // For deserialization, use decode_bytes_with_len() which requires giving the length - // as input - fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self>; + fn encode_u8(&mut self, v: u8) -> Result<&mut Self>; - fn encode_string(&mut self, s: &str) -> Result<&mut Self>; + fn encode_u16(&mut self, v: u16) -> Result<&mut Self>; - fn encode_struct(&mut self, structure: &impl CanonicalSerialize) -> Result<&mut Self> - where - Self: std::marker::Sized, - { - structure.serialize(self)?; - Ok(self) - } + fn encode_u32(&mut self, v: u32) -> Result<&mut Self>; + + fn encode_u64(&mut self, v: u64) -> Result<&mut Self>; fn encode_tuple2(&mut self, v: &(T0, T1)) -> Result<&mut Self> where @@ -72,13 +59,26 @@ pub trait CanonicalSerializer { Ok(self) } - fn encode_u8(&mut self, v: u8) -> Result<&mut Self>; + fn encode_btreemap( + &mut self, + v: &BTreeMap, + ) -> Result<&mut Self>; - fn encode_u16(&mut self, v: u16) -> Result<&mut Self>; + fn encode_optional(&mut self, v: &Option) -> Result<&mut Self>; - fn encode_u32(&mut self, v: u32) -> Result<&mut Self>; + // Use this encoder when the length of the array is known to be fixed and always known at + // deserialization time. The raw bytes of the array without length prefix are encoded. + // For deserialization, use decode_bytes_with_len() which requires giving the length + // as input + fn encode_raw_bytes(&mut self, bytes: &[u8]) -> Result<&mut Self>; - fn encode_u64(&mut self, v: u64) -> Result<&mut Self>; + fn encode_struct(&mut self, structure: &impl CanonicalSerialize) -> Result<&mut Self> + where + Self: std::marker::Sized, + { + structure.serialize(self)?; + Ok(self) + } // Use this encoder to encode variable length byte arrays whose length may not be known at // deserialization time. @@ -87,40 +87,55 @@ pub trait CanonicalSerializer { fn encode_vec(&mut self, v: &[T]) -> Result<&mut Self>; } -impl CanonicalSerialize for BTreeMap, Vec> { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_btreemap(self)?; - Ok(()) - } -} - -impl CanonicalSerialize for i8 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_i8(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for i16 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_i16(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for i32 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_i32(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for i64 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_i64(*self)?; - Ok(()) - } -} +macro_rules! impl_canonical_serialize_for_complex { + ($function:ident, $type:ty) => { + impl CanonicalSerialize for $type { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.$function(self)?; + Ok(()) + } + } + }; +} + +macro_rules! impl_canonical_serialize_for_primitive { + ($function:ident, $type:ty) => { + impl CanonicalSerialize for $type { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.$function(*self)?; + Ok(()) + } + } + }; +} + +macro_rules! impl_canonical_serialize_for_tuple { + ($function:ident,$($type:ident)+) => ( + impl<$($type), +> CanonicalSerialize for ($($type),+) + where + $($type: CanonicalSerialize,) + + { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.$function(self)?; + Ok(()) + } + } + ); +} + +impl_canonical_serialize_for_primitive!(encode_bool, bool); +impl_canonical_serialize_for_complex!(encode_btreemap, BTreeMap, Vec>); +impl_canonical_serialize_for_primitive!(encode_i8, i8); +impl_canonical_serialize_for_primitive!(encode_i16, i16); +impl_canonical_serialize_for_primitive!(encode_i32, i32); +impl_canonical_serialize_for_primitive!(encode_i64, i64); +impl_canonical_serialize_for_complex!(encode_string, &str); +impl_canonical_serialize_for_tuple!(encode_tuple2, T0 T1); +impl_canonical_serialize_for_tuple!(encode_tuple3, T0 T1 T2); +impl_canonical_serialize_for_primitive!(encode_u8, u8); +impl_canonical_serialize_for_primitive!(encode_u16, u16); +impl_canonical_serialize_for_primitive!(encode_u32, u32); +impl_canonical_serialize_for_primitive!(encode_u64, u64); impl CanonicalSerialize for Option where @@ -132,13 +147,6 @@ where } } -impl CanonicalSerialize for &str { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_string(self)?; - Ok(()) - } -} - impl CanonicalSerialize for String { fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { serializer.encode_string(self.as_str())?; @@ -146,61 +154,16 @@ impl CanonicalSerialize for String { } } -impl CanonicalSerialize for (T0, T1) -where - T0: CanonicalSerialize, - T1: CanonicalSerialize, -{ - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_tuple2(self)?; - Ok(()) - } -} - -impl CanonicalSerialize for (T0, T1, T2) -where - T0: CanonicalSerialize, - T1: CanonicalSerialize, - T2: CanonicalSerialize, -{ - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_tuple3(self)?; - Ok(()) - } -} - -impl CanonicalSerialize for u8 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u8(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for u16 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u16(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for u32 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u32(*self)?; - Ok(()) - } -} - -impl CanonicalSerialize for u64 { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer.encode_u64(*self)?; - Ok(()) - } -} - /// usize is architecture dependent, LCS encodes it as a 64-bit unsigned integer and fails /// if usize is larger than a 64-bit integer. impl CanonicalSerialize for usize { fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + ensure!( + *self <= u64::max_value() as usize, + "usize bigger than max allowed. Expected <= {}, found: {}", + u64::max_value(), + *self, + ); serializer.encode_u64(*self as u64)?; Ok(()) } From a101ebf5253a4c5da2cc1cf289147e562cd8c01c Mon Sep 17 00:00:00 2001 From: David Wolinsky Date: Thu, 22 Aug 2019 11:52:41 -0700 Subject: [PATCH 4/4] [common] LCS tests are now based upon proptest Proptest allows for broader testing than fixed sets of test vectors. This diff migrates to proptest and also covers all of the types within LCS which did not have coverage before. --- common/canonical_serialization/Cargo.toml | 3 +- .../src/canonical_serialization_test.rs | 390 ++++++++---------- .../src/test_helper.rs | 6 +- 3 files changed, 172 insertions(+), 227 deletions(-) diff --git a/common/canonical_serialization/Cargo.toml b/common/canonical_serialization/Cargo.toml index 171a3daf96bc..90f3190abc64 100644 --- a/common/canonical_serialization/Cargo.toml +++ b/common/canonical_serialization/Cargo.toml @@ -8,8 +8,9 @@ edition = "2018" [dependencies] byteorder = "1.3.2" - failure = { path = "../failure_ext", package = "failure_ext" } +proptest = "0.9" +proptest-derive = "0.1.1" [dev-dependencies] hex = "0.3" diff --git a/common/canonical_serialization/src/canonical_serialization_test.rs b/common/canonical_serialization/src/canonical_serialization_test.rs index dd09c98531e5..0d94819f9f6a 100644 --- a/common/canonical_serialization/src/canonical_serialization_test.rs +++ b/common/canonical_serialization/src/canonical_serialization_test.rs @@ -1,6 +1,9 @@ // Copyright (c) The Libra Core Contributors // SPDX-License-Identifier: Apache-2.0 +// Required to allow Arbitrary +#![allow(clippy::unit_arg)] + //https://rust-lang.github.io/rust-clippy/master/index.html#blacklisted_name //disable it in test so that we can use variable names such as 'foo' and 'bar' #![allow(clippy::blacklisted_name)] @@ -9,27 +12,17 @@ use crate::*; use byteorder::WriteBytesExt; use failure::Result; +use proptest::prelude::*; +use proptest_derive::Arbitrary; use std::collections::BTreeMap; -// Do not change the test vectors. Please read the comment below. -const TEST_VECTOR_1: &str = "ffffffffffffffff060000006463584d4237640000000000000009000000000102\ - 03040506070805050505050505050505050505050505050505050505050505050505\ - 05050505630000000103000000010000000103000000161543030000000038150300\ - 0000160a05040000001415596903000000c9175a"; - -// Why do we need test vectors? -// -// 1. Sometimes it helps to catch common bugs between serialization and -// deserialization functions that would have been missed by a simple round trip test. -// For example, if there's a bug in a shared procedure that serializes and -// deserialize both calls then roundtrip might miss it. -// -// 2. It helps to catch code changes that inadvertently introduce breaking changes -// in the serialization format that is incompatible with what generated in the -// past which would be missed by roundtrip tests, or changes that are not backward -// compatible in the sense that it may fail to deserialize bytes generated in the past. - -#[derive(Clone, Debug, Eq, PartialEq)] +// Do not change this test vector. It is used to verify correctness of the serializer. +const TEST_VECTOR: &str = "ffffffffffffffff060000006463584d4237640000000000000009000000000102\ + 03040506070805050505050505050505050505050505050505050505050505050505\ + 05050505630000000103000000010000000103000000161543030000000038150300\ + 0000160a05040000001415596903000000c9175a"; + +#[derive(Arbitrary, Clone, Debug, Eq, PartialEq)] pub struct Addr(pub [u8; 32]); impl Addr { @@ -38,16 +31,23 @@ impl Addr { } } -#[derive(Clone, Debug, Eq, PartialEq)] -struct Foo { - a: u64, - b: Vec, - c: Bar, - d: bool, - e: BTreeMap, Vec>, +impl CanonicalDeserialize for Addr { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { + let mut data_slice: [u8; 32] = [0; 32]; + let data_decoded = deserializer.decode_bytes_with_len(32)?; + data_slice.copy_from_slice(data_decoded.as_slice()); + Ok(Addr::new(data_slice)) + } +} + +impl CanonicalSerialize for Addr { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer.encode_raw_bytes(&self.0)?; + Ok(()) + } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Arbitrary, Clone, Debug, Eq, PartialEq)] struct Bar { a: u64, b: Vec, @@ -55,15 +55,14 @@ struct Bar { d: u32, } -impl CanonicalSerialize for Foo { - fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { - serializer - .encode_u64(self.a)? - .encode_variable_length_bytes(&self.b)? - .encode_struct(&self.c)? - .encode_bool(self.d)? - .encode_btreemap(&self.e)?; - Ok(()) +impl CanonicalDeserialize for Bar { + fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { + Ok(Bar { + a: deserializer.decode_u64()?, + b: deserializer.decode_variable_length_bytes()?, + c: deserializer.decode_struct()?, + d: deserializer.decode_u32()?, + }) } } @@ -72,179 +71,128 @@ impl CanonicalSerialize for Bar { serializer .encode_u64(self.a)? .encode_variable_length_bytes(&self.b)? - .encode_raw_bytes(&self.c.0)? + .encode_struct(&self.c)? .encode_u32(self.d)?; Ok(()) } } -impl CanonicalDeserialize for Foo { - fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { - let a = deserializer.decode_u64()?; - let b = deserializer.decode_variable_length_bytes()?; - let c: Bar = deserializer.decode_struct::()?; - let d: bool = deserializer.decode_bool()?; - let e: BTreeMap, Vec> = deserializer.decode_btreemap()?; - Ok(Foo { a, b, c, d, e }) - } +#[derive(Arbitrary, Clone, Debug, Eq, PartialEq)] +struct Foo { + a: u64, + b: Vec, + c: Bar, + d: bool, + e: BTreeMap, Vec>, } -impl CanonicalDeserialize for Bar { +impl CanonicalDeserialize for Foo { fn deserialize(deserializer: &mut impl CanonicalDeserializer) -> Result { - let a = deserializer.decode_u64()?; - let b = deserializer.decode_variable_length_bytes()?; - let c = deserializer.decode_bytes_with_len(32)?; - let mut cc: [u8; 32] = [0; 32]; - cc.copy_from_slice(c.as_slice()); - - let d = deserializer.decode_u32()?; - Ok(Bar { - a, - b, - c: Addr::new(cc), - d, + Ok(Foo { + a: deserializer.decode_u64()?, + b: deserializer.decode_variable_length_bytes()?, + c: deserializer.decode_struct()?, + d: deserializer.decode_bool()?, + e: deserializer.decode_btreemap()?, }) } } -#[test] -fn test_btreemap_encode() { - let mut map = BTreeMap::new(); - let value = vec![54, 20, 21, 200]; - let key1 = vec![0]; // after serialization: [1, 0] - let key2 = vec![0, 6]; // after serialization: [2, 0, 6] - let key3 = vec![1]; // after serialization: [1, 1] - let key4 = vec![2]; // after serialization: [1, 2] - map.insert(key1.clone(), value.clone()); - map.insert(key2.clone(), value.clone()); - map.insert(key3.clone(), value.clone()); - map.insert(key4.clone(), value.clone()); - - let serialized_bytes = SimpleSerializer::>::serialize(&map).unwrap(); - - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - - // ensure the order was encoded in lexicographic order - assert_eq!(deserializer.decode_u32().unwrap(), 4); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key1); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key3); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key4); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key2); - assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); +impl CanonicalSerialize for Foo { + fn serialize(&self, serializer: &mut impl CanonicalSerializer) -> Result<()> { + serializer + .encode_u64(self.a)? + .encode_variable_length_bytes(&self.b)? + .encode_struct(&self.c)? + .encode_bool(self.d)? + .encode_btreemap(&self.e)?; + Ok(()) + } } -#[test] -fn test_serialization_roundtrip() { - let bar = Bar { - a: 50, - b: vec![10u8; 100], - c: Addr::new([3u8; 32]), - d: 12, - }; - - let mut map = BTreeMap::new(); - map.insert(vec![0, 56, 21], vec![22, 10, 5]); - map.insert(vec![1], vec![22, 21, 67]); - map.insert(vec![20, 21, 89, 105], vec![201, 23, 90]); - - let foo = Foo { - a: 1, - b: vec![32, 41, 190, 200, 2, 5, 90, 100, 123, 234, 159, 159, 101], - c: bar, - d: false, - e: map, - }; +proptest! { + #[test] + fn serializer_bar(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } + #[test] + fn serializer_bool(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut serializer = SimpleSerializer::>::new(); - foo.serialize(&mut serializer).unwrap(); - let serialized_bytes = serializer.get_output(); + #[test] + fn serialize_btreemap(value in any::, Vec>>()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - let deserialized_foo = Foo::deserialize(&mut deserializer).unwrap(); - assert_eq!(foo, deserialized_foo); - assert_eq!(deserializer.position(), deserializer.len() as u64); - assert!(deserializer.is_empty()); -} + #[test] + fn serialize_byte_array(value in any::>()) { + test_helper::assert_canonical_encode_decode(&value); + } -#[test] -fn test_serialization_optional() { - let bar1: Option = Some(42); - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_optional(&bar1).unwrap(); - let serialized_bytes = serializer.get_output(); + #[test] + fn serializer_foo(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - let de_bar1: Option = deserializer.decode_optional().unwrap(); - assert_eq!(de_bar1, bar1); + #[test] + fn serialize_i8(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let bar2: Option = None; - let mut serializer2 = SimpleSerializer::>::new(); - serializer2.encode_optional(&bar2).unwrap(); - let serialized_bytes2 = serializer2.get_output(); + #[test] + fn serialize_i16(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut deserializer2 = SimpleDeserializer::new(&serialized_bytes2); - let de_bar2: Option = deserializer2.decode_optional().unwrap(); - assert_eq!(de_bar2, bar2); -} + #[test] + fn serialize_i32(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } -#[test] -fn test_encode_vec() { - let bar1 = Bar { - a: 55, - b: vec![10u8; 100], - c: Addr::new([3u8; 32]), - d: 77, - }; - let bar2 = Bar { - a: 123, - b: vec![1, 5, 20], - c: Addr::new([8u8; 32]), - d: 127, - }; + #[test] + fn serialize_i64(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut vec = Vec::new(); - vec.push(bar1.clone()); - vec.push(bar2.clone()); - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_vec(&vec).unwrap(); - let serialized_bytes = serializer.get_output(); + #[test] + fn serialize_string(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let de_vec: Vec = SimpleDeserializer::deserialize(&serialized_bytes).unwrap(); + #[test] + fn serialize_tuple2(value in any::<(i16, String)>()) { + test_helper::assert_canonical_encode_decode(&value); + } - assert_eq!(2, de_vec.len()); - assert_eq!(bar1, de_vec[0]); - assert_eq!(bar2, de_vec[1]); + #[test] + fn serialize_tuple3(value in any::<(bool, u32, String)>()) { + test_helper::assert_canonical_encode_decode(&value); + } - // test Vec implementation - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_struct(&vec).unwrap(); - let serialized_bytes = serializer.get_output(); - let de_vec: Vec = SimpleDeserializer::deserialize(&serialized_bytes).unwrap(); + #[test] + fn serialize_u8(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - assert_eq!(2, de_vec.len()); - assert_eq!(bar1, de_vec[0]); - assert_eq!(bar2, de_vec[1]); -} + #[test] + fn serialize_u16(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } -#[test] -fn test_vec_impl() { - let mut vec: Vec = Vec::new(); - vec.push(std::i32::MIN); - vec.push(std::i32::MAX); - vec.push(100); + #[test] + fn serialize_u32(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_struct(&vec).unwrap(); - let serialized_bytes = serializer.get_output(); - let de_vec: Vec = SimpleDeserializer::deserialize(&serialized_bytes).unwrap(); - assert_eq!(vec, de_vec); + #[test] + fn serialize_u64(value in any::()) { + test_helper::assert_canonical_encode_decode(&value); + } } #[test] -fn test_vectors_1() { +fn test_serialization_correctness_using_known_vector() { let bar = Bar { a: 100, b: vec![0, 1, 2, 3, 4, 5, 6, 7, 8], @@ -270,14 +218,52 @@ fn test_vectors_1() { let serialized_bytes = serializer.get_output(); // make sure we serialize into exact same bytes as before - assert_eq!(TEST_VECTOR_1, hex::encode(serialized_bytes)); + assert_eq!(TEST_VECTOR, hex::encode(serialized_bytes)); // make sure we can deserialize the test vector into expected struct - let test_vector_bytes = hex::decode(TEST_VECTOR_1).unwrap(); + let test_vector_bytes = hex::decode(TEST_VECTOR).unwrap(); let deserialized_foo: Foo = SimpleDeserializer::deserialize(&test_vector_bytes).unwrap(); assert_eq!(foo, deserialized_foo); } +#[test] +fn test_btreemap_lexicographic_order() { + let mut map = BTreeMap::new(); + let value = vec![54, 20, 21, 200]; + let key1 = vec![0]; // after serialization: [1, 0] + let key2 = vec![0, 6]; // after serialization: [2, 0, 6] + let key3 = vec![1]; // after serialization: [1, 1] + let key4 = vec![2]; // after serialization: [1, 2] + map.insert(key1.clone(), value.clone()); + map.insert(key2.clone(), value.clone()); + map.insert(key3.clone(), value.clone()); + map.insert(key4.clone(), value.clone()); + + let serialized_bytes = SimpleSerializer::>::serialize(&map).unwrap(); + + let mut deserializer = SimpleDeserializer::new(&serialized_bytes); + + // ensure the order was encoded in lexicographic order + assert_eq!(deserializer.decode_u32().unwrap(), 4); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key1); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key3); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key4); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), key2); + assert_eq!(deserializer.decode_variable_length_bytes().unwrap(), value); +} + +#[test] +fn test_serialization_optional() { + let bar1: Option = Some(42); + test_helper::assert_canonical_encode_decode(&bar1); + + let bar2: Option = None; + test_helper::assert_canonical_encode_decode(&bar2); +} + #[test] fn test_serialization_failure_cases() { // a vec longer than representable range should result in failure @@ -338,47 +324,3 @@ fn test_deserialization_failure_cases() { deserializer = SimpleDeserializer::new(&bool_bytes); assert!(deserializer.clone().decode_bool().is_err()); } - -#[test] -fn test_tuples() { - let input: (u32, u32) = (123, 456); - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_tuple2(&input).unwrap(); - let serialized_bytes = serializer.get_output(); - - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - let output: Result<(u32, u32)> = deserializer.decode_tuple2(); - assert!(output.is_ok()); - assert_eq!(output.unwrap(), input); - - let bad_output: Result<(u32, u32)> = deserializer.decode_tuple2(); - assert!(bad_output.is_err()); -} - -#[test] -fn test_nested_tuples() { - let input: Vec<(u32, u32)> = vec![(123, 456)]; - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_vec(&input).unwrap(); - let serialized_bytes = serializer.get_output(); - - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - let output: Result> = deserializer.decode_vec(); - assert!(output.is_ok()); - assert_eq!(output.unwrap(), input); - - let bad_output: Result<(u32, u32)> = deserializer.decode_tuple2(); - assert!(bad_output.is_err()); -} - -#[test] -fn test_strings() { - let input: &'static str = "Hello, World!"; - let mut serializer = SimpleSerializer::>::new(); - serializer.encode_string(input).unwrap(); - let serialized_bytes = serializer.get_output(); - - let mut deserializer = SimpleDeserializer::new(&serialized_bytes); - let output: Result = deserializer.decode_string(); - assert_eq!(output.unwrap(), input); -} diff --git a/common/canonical_serialization/src/test_helper.rs b/common/canonical_serialization/src/test_helper.rs index bb0dda1479ac..1dd76813c782 100644 --- a/common/canonical_serialization/src/test_helper.rs +++ b/common/canonical_serialization/src/test_helper.rs @@ -10,7 +10,9 @@ where { let serialized: Vec = SimpleSerializer::serialize(object).expect("Serialization should work"); - let deserialized: T = - SimpleDeserializer::deserialize(&serialized).expect("Deserialization should work"); + let mut deserializer = SimpleDeserializer::new(&serialized); + let deserialized = T::deserialize(&mut deserializer).expect("Deserialization should work"); assert_eq!(*object, deserialized); + assert_eq!(deserializer.position(), deserializer.len() as u64); + assert!(deserializer.is_empty()); }