From 62856a46eaa398c03bef7144a3f57d2c5a54a29d Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 20 Mar 2025 19:11:38 -0700 Subject: [PATCH 1/2] Parsing and binary writing for custom descriptors Implement text and binary parsing as well as binary writing for `descriptor` and `describes` clauses, as specified in the custom-descriptors proposal. Also simplify some neighboring code dealing with shared types as a drive-by. --- scripts/test/fuzzing.py | 2 + src/parser/contexts.h | 13 +++--- src/parser/parsers.h | 47 +++++++++++++++++--- src/wasm-binary.h | 6 ++- src/wasm/wasm-binary.cpp | 46 ++++++++++++++------ test/lit/basic/custom-descriptors.wast | 60 ++++++++++++++++++++++++++ 6 files changed, 147 insertions(+), 27 deletions(-) create mode 100644 test/lit/basic/custom-descriptors.wast diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index abb7da98002..ba36f41bcf5 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -109,6 +109,8 @@ 'coalesce-locals-exact.wast', 'remove-unused-brs-exact.wast', 'exact.wast', + # TODO: fuzzer support for custom descriptors + 'custom-descriptors.wast', ] diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 3b9cb21c3e0..fa96c270670 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -982,7 +982,9 @@ struct ParseDeclsCtx : NullTypeParserCtx, NullInstrParserCtx { void addArrayType(ArrayT) {} void setOpen() {} void setShared() {} - Result<> addSubtype(HeapTypeT) { return Ok{}; } + void setDescribes(HeapTypeT) {} + void setDescriptor(HeapTypeT) {} + void setSupertype(HeapTypeT) {} void finishTypeDef(Name name, Index pos) { // TODO: type annotations typeDefs.push_back({name, pos, Index(typeDefs.size()), {}}); @@ -1157,10 +1159,11 @@ struct ParseTypeDefsCtx : TypeParserCtx { void setShared() { builder[index].setShared(); } - Result<> addSubtype(HeapTypeT super) { - builder[index].subTypeOf(super); - return Ok{}; - } + void setDescribes(HeapTypeT desc) { builder[index].describes(desc); } + + void setDescriptor(HeapTypeT desc) { builder[index].descriptor(desc); } + + void setSupertype(HeapTypeT super) { builder[index].subTypeOf(super); } void finishTypeDef(Name name, Index pos) { names[index++].name = name; } diff --git a/src/parser/parsers.h b/src/parser/parsers.h index b54de9979c6..26c20767938 100644 --- a/src/parser/parsers.h +++ b/src/parser/parsers.h @@ -357,6 +357,8 @@ Result typeuse(Ctx&, bool allowNames = true); MaybeResult inlineImport(Lexer&); Result> inlineExports(Lexer&); template Result<> comptype(Ctx&); +template Result<> describedcomptype(Ctx&); +template Result<> describingcomptype(Ctx&); template Result<> sharecomptype(Ctx&); template Result<> subtype(Ctx&); template MaybeResult<> typedef_(Ctx&); @@ -2940,19 +2942,50 @@ template Result<> comptype(Ctx& ctx) { return ctx.in.err("expected type description"); } -// sharecomptype ::= '(' 'shared' t:comptype ')' => shared t -// | t:comptype => unshared t +// describedcomptype ::= '(' 'descriptor' typeidx ct:comptype ')' +// | ct:comptype +template Result<> describedcomptype(Ctx& ctx) { + if (ctx.in.takeSExprStart("descriptor"sv)) { + auto x = typeidx(ctx); + CHECK_ERR(x); + ctx.setDescriptor(*x); + CHECK_ERR(comptype(ctx)); + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of described type"); + } + return Ok{}; + } + return comptype(ctx); +} + +// describingcomptype ::= '(' 'describes' typeidx ct:describedcomptype ')' +// | ct: describedcomptype +template Result<> describingcomptype(Ctx& ctx) { + if (ctx.in.takeSExprStart("describes"sv)) { + auto x = typeidx(ctx); + CHECK_ERR(x); + ctx.setDescribes(*x); + CHECK_ERR(describedcomptype(ctx)); + if (!ctx.in.takeRParen()) { + return ctx.in.err("expected end of describing type"); + } + return Ok{}; + } + return describedcomptype(ctx); +} + +// sharecomptype ::= '(' 'shared' t:describingcomptype ')' => shared t +// | t:describingcomptype => unshared t template Result<> sharecomptype(Ctx& ctx) { if (ctx.in.takeSExprStart("shared"sv)) { ctx.setShared(); - CHECK_ERR(comptype(ctx)); + CHECK_ERR(describingcomptype(ctx)); if (!ctx.in.takeRParen()) { return ctx.in.err("expected end of shared comptype"); } - } else { - CHECK_ERR(comptype(ctx)); + return Ok{}; } - return Ok{}; + return describingcomptype(ctx); } // subtype ::= '(' 'sub' typeidx? sharecomptype ')' | sharecomptype @@ -2963,7 +2996,7 @@ template Result<> subtype(Ctx& ctx) { } if (auto super = maybeTypeidx(ctx)) { CHECK_ERR(super); - CHECK_ERR(ctx.addSubtype(*super)); + ctx.setSupertype(*super); } CHECK_ERR(sharecomptype(ctx)); diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 77e13326999..9d99e20ce75 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -349,9 +349,11 @@ enum EncodedType { Array = 0x5e, Sub = 0x50, SubFinal = 0x4f, - SharedDef = 0x65, - Shared = -0x1b, // Also 0x65 as an SLEB128 + Shared = 0x65, + SharedLEB = -0x1b, // Also 0x65 as an SLEB128 Rec = 0x4e, + Descriptor = 0x4d, + Describes = 0x4c, // block_type Empty = -0x40, // 0x40 }; diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index de6a5867b93..bd0c474ca34 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -285,7 +285,15 @@ void WasmBinaryWriter::writeTypes() { } } if (type.isShared()) { - o << uint8_t(BinaryConsts::EncodedType::SharedDef); + o << uint8_t(BinaryConsts::EncodedType::Shared); + } + if (auto desc = type.getDescribedType()) { + o << uint8_t(BinaryConsts::EncodedType::Describes); + writeHeapType(*desc); + } + if (auto desc = type.getDescriptorType()) { + o << uint8_t(BinaryConsts::EncodedType::Descriptor); + writeHeapType(*desc); } switch (type.getKind()) { case HeapTypeKind::Func: { @@ -1680,7 +1688,7 @@ void WasmBinaryWriter::writeHeapType(HeapType type) { int ret = 0; if (type.isShared()) { - o << S32LEB(BinaryConsts::EncodedType::Shared); + o << uint8_t(BinaryConsts::EncodedType::Shared); } switch (type.getBasic(Unshared)) { case HeapType::ext: @@ -2206,7 +2214,7 @@ HeapType WasmBinaryReader::getHeapType() { return types[type]; } auto share = Unshared; - if (type == BinaryConsts::EncodedType::Shared) { + if (type == BinaryConsts::EncodedType::SharedLEB) { share = Shared; type = getS64LEB(); // TODO: Actually s33 } @@ -2467,7 +2475,6 @@ void WasmBinaryReader::readTypes() { builder.createRecGroup(i, groupSize); form = getInt8(); } - std::optional superIndex; if (form == BinaryConsts::EncodedType::Sub || form == BinaryConsts::EncodedType::SubFinal) { if (form == BinaryConsts::EncodedType::Sub) { @@ -2479,14 +2486,34 @@ void WasmBinaryReader::readTypes() { throwError("Invalid type definition with " + std::to_string(supers) + " supertypes"); } - superIndex = getU32LEB(); + auto superIdx = getU32LEB(); + if (superIdx >= builder.size()) { + throwError("invalid supertype index: " + std::to_string(superIdx)); + } + builder[i].subTypeOf(builder[superIdx]); } form = getInt8(); } - if (form == BinaryConsts::SharedDef) { + if (form == BinaryConsts::EncodedType::Shared) { builder[i].setShared(); form = getInt8(); } + if (form == BinaryConsts::EncodedType::Describes) { + auto descIdx = getU32LEB(); + if (descIdx >= builder.size()) { + throwError("invalid described type index: " + std::to_string(descIdx)); + } + builder[i].describes(builder[descIdx]); + form = getInt8(); + } + if (form == BinaryConsts::EncodedType::Descriptor) { + auto descIdx = getU32LEB(); + if (descIdx >= builder.size()) { + throwError("invalid descriptor type index: " + std::to_string(descIdx)); + } + builder[i].descriptor(builder[descIdx]); + form = getInt8(); + } if (form == BinaryConsts::EncodedType::Func) { builder[i] = readSignatureDef(); } else if (form == BinaryConsts::EncodedType::Cont) { @@ -2498,13 +2525,6 @@ void WasmBinaryReader::readTypes() { } else { throwError("Bad type form " + std::to_string(form)); } - if (superIndex) { - if (*superIndex > builder.size()) { - throwError("Out of bounds supertype index " + - std::to_string(*superIndex)); - } - builder[i].subTypeOf(builder[*superIndex]); - } } auto result = builder.build(); diff --git a/test/lit/basic/custom-descriptors.wast b/test/lit/basic/custom-descriptors.wast new file mode 100644 index 00000000000..dfec0b22413 --- /dev/null +++ b/test/lit/basic/custom-descriptors.wast @@ -0,0 +1,60 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt %s -all -o %t.text.wast -g -S +;; RUN: wasm-as %s -all -g -o %t.wasm +;; RUN: wasm-dis %t.wasm -all -o %t.bin.wast +;; RUN: wasm-as %s -all -o %t.nodebug.wasm +;; RUN: wasm-dis %t.nodebug.wasm -all -o %t.bin.nodebug.wast +;; RUN: cat %t.text.wast | filecheck %s --check-prefix=CHECK-TEXT +;; RUN: cat %t.bin.wast | filecheck %s --check-prefix=CHECK-BIN +;; RUN: cat %t.bin.nodebug.wast | filecheck %s --check-prefix=CHECK-BIN-NODEBUG + +(module + (rec + ;; CHECK-TEXT: (rec + ;; CHECK-TEXT-NEXT: (type $described (descriptor $middle (struct))) + ;; CHECK-BIN: (rec + ;; CHECK-BIN-NEXT: (type $described (descriptor $middle (struct))) + (type $described (descriptor $middle (struct))) + ;; CHECK-TEXT: (type $middle (describes $described (descriptor $describing (struct)))) + ;; CHECK-BIN: (type $middle (describes $described (descriptor $describing (struct)))) + (type $middle (describes $described (descriptor $describing (struct)))) + ;; CHECK-TEXT: (type $describing (describes $middle (struct))) + ;; CHECK-BIN: (type $describing (describes $middle (struct))) + (type $describing (describes $middle (struct))) + ) + + (rec + ;; CHECK-TEXT: (rec + ;; CHECK-TEXT-NEXT: (type $shared-described (shared (descriptor $shared-describing (struct)))) + ;; CHECK-BIN: (rec + ;; CHECK-BIN-NEXT: (type $shared-described (shared (descriptor $shared-describing (struct)))) + (type $shared-described (shared (descriptor $shared-describing (struct)))) + ;; CHECK-TEXT: (type $shared-describing (shared (describes $shared-described (struct)))) + ;; CHECK-BIN: (type $shared-describing (shared (describes $shared-described (struct)))) + (type $shared-describing (shared (describes $shared-described (struct)))) + ) + + + ;; CHECK-TEXT: (global $g (ref null $described) (ref.null none)) + ;; CHECK-BIN: (global $g (ref null $described) (ref.null none)) + (global $g (ref null $described) (ref.null none)) + ;; CHECK-TEXT: (global $shared (ref null $shared-describing) (ref.null (shared none))) + ;; CHECK-BIN: (global $shared (ref null $shared-describing) (ref.null (shared none))) + (global $shared (ref null $shared-describing) (ref.null (shared none))) +) +;; CHECK-BIN-NODEBUG: (rec +;; CHECK-BIN-NODEBUG-NEXT: (type $0 (descriptor $1 (struct))) + +;; CHECK-BIN-NODEBUG: (type $1 (describes $0 (descriptor $2 (struct)))) + +;; CHECK-BIN-NODEBUG: (type $2 (describes $1 (struct))) + +;; CHECK-BIN-NODEBUG: (rec +;; CHECK-BIN-NODEBUG-NEXT: (type $3 (shared (descriptor $4 (struct)))) + +;; CHECK-BIN-NODEBUG: (type $4 (shared (describes $3 (struct)))) + +;; CHECK-BIN-NODEBUG: (global $global$0 (ref null $0) (ref.null none)) + +;; CHECK-BIN-NODEBUG: (global $global$1 (ref null $4) (ref.null (shared none))) From c79dfec3ea29777dfce1009a554fe02c98d4366e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 21 Mar 2025 15:33:26 -0700 Subject: [PATCH 2/2] fix --- src/wasm/wasm-binary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index bd0c474ca34..c3b6d310d12 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2349,7 +2349,7 @@ void WasmBinaryReader::readTypes() { auto readHeapType = [&]() -> HeapType { int64_t htCode = getS64LEB(); // TODO: Actually s33 auto share = Unshared; - if (htCode == BinaryConsts::EncodedType::Shared) { + if (htCode == BinaryConsts::EncodedType::SharedLEB) { share = Shared; htCode = getS64LEB(); // TODO: Actually s33 }