Skip to content
Open
2 changes: 0 additions & 2 deletions check.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,6 @@ def check_expected(actual, expected):

check_expected(actual, expected)

run_spec_test(wast)

# check binary format. here we can verify execution of the final
# result, no need for an output verification
actual = ''
Expand Down
9 changes: 5 additions & 4 deletions scripts/test/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,13 +443,14 @@ def get_tests(test_dir, extensions=[], recursive=False):
'imports.wast', # Missing validation of missing function on instantiation
'proposals/threads/imports.wast', # Missing memory type validation on instantiation
'linking.wast', # Missing function type validation on instantiation
'memory.wast', # Requires wast `module definition` support
'proposals/threads/memory.wast', # Missing memory type validation on instantiation
'memory64-imports.wast', # Missing validation on instantiation
'annotations.wast', # String annotations IDs should be allowed
'id.wast', # Empty IDs should be disallowed
'instance.wast', # Requires wast `module definition` support
'table64.wast', # Requires wast `module definition` support
# Requires correct handling of tag imports from different instances of the same module,
# ref.null wast constants, and splitting for module instances
'instance.wast',
'table64.wast', # Requires validations for table size
'table_grow.wast', # Incorrect table linking semantics in interpreter
'tag.wast', # Non-empty tag results allowed by stack switching
'try_table.wast', # Requires try_table interpretation
Expand All @@ -473,7 +474,7 @@ def get_tests(test_dir, extensions=[], recursive=False):
'type-rec.wast', # Missing function type validation on instantiation
'type-subtyping.wast', # ShellExternalInterface::callTable does not handle subtyping
'call_indirect.wast', # Bug with 64-bit inline element segment parsing
'memory64.wast', # Requires wast `module definition` support
'memory64.wast', # Requires validations for memory size
'imports0.wast', # Missing memory type validation on instantiation
'imports2.wast', # Missing memory type validation on instantiation
'imports3.wast', # Missing memory type validation on instantiation
Expand Down
21 changes: 13 additions & 8 deletions scripts/test/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,14 @@ def untar(tarfile, outdir):

QUOTED = re.compile(r'\(module\s*(\$\S*)?\s+(quote|binary)')

MODULE_DEFINITION_OR_INSTANCE = re.compile(r'(?m)\(module\s+(instance|definition)')


def split_wast(wastFile):
'''
Returns a list of pairs of module definitions and assertions.
Module invalidity tests, as well as (module definition ...) and (module instance ...) are skipped.
'''
# if it's a binary, leave it as is, we can't split it
wast = None
if not wastFile.endswith('.wasm'):
Expand Down Expand Up @@ -128,7 +134,7 @@ def to_end(j):
return j

i = 0
ignoring_quoted = False
ignoring_assertions = False
while i >= 0:
start = wast.find('(', i)
if start >= 0 and wast[start + 1] == ';':
Expand All @@ -146,17 +152,17 @@ def to_end(j):
break
i = to_end(start + 1)
chunk = wast[start:i]
if QUOTED.match(chunk):
if QUOTED.match(chunk) or MODULE_DEFINITION_OR_INSTANCE.match(chunk):
# There may be assertions after this quoted module, but we aren't
# returning the module, so we need to skip the assertions as well.
ignoring_quoted = True
ignoring_assertions = True
continue
if chunk.startswith('(module'):
ignoring_quoted = False
ignoring_assertions = False
ret += [(chunk, [])]
elif chunk.startswith('(assert_invalid'):
continue
elif chunk.startswith(('(assert', '(invoke', '(register')) and not ignoring_quoted:
elif chunk.startswith(('(assert', '(invoke', '(register')) and not ignoring_assertions:
# ret may be empty if there are some asserts before the first
# module. in that case these are asserts *without* a module, which
# are valid (they may check something that doesn't refer to a module
Expand Down Expand Up @@ -190,14 +196,13 @@ def run_command(cmd, expected_status=0, stderr=None,
out, err = proc.communicate()
code = proc.returncode
if expected_status is not None and code != expected_status:
raise Exception(('run_command failed (%s)' % code, out + str(err or '')))
raise Exception(f"run_command `{' '.join(cmd)}` failed ({code}) {err or ''}")
if expected_err is not None:
if err_ignore is not None:
err = "\n".join([line for line in err.split('\n') if err_ignore not in line])
err_correct = expected_err in err if err_contains else expected_err == err
if not err_correct:
raise Exception(('run_command unexpected stderr',
"expected '%s', actual '%s'" % (expected_err, err)))
raise Exception(f"run_command unexpected stderr. Expected '{expected_err}', actual '{err}'")
return out


Expand Down
4 changes: 3 additions & 1 deletion src/parser/parse-1-decls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

namespace wasm::WATParser {

Result<> parseDecls(ParseDeclsCtx& decls) { return module(decls); }
Result<> parseModule(ParseDeclsCtx& decls) { return module(decls); }

Result<> parseModuleBody(ParseDeclsCtx& decls) { return moduleBody(decls); }

} // namespace wasm::WATParser
13 changes: 10 additions & 3 deletions src/parser/parsers.h
Original file line number Diff line number Diff line change
Expand Up @@ -3881,6 +3881,15 @@ template<typename Ctx> MaybeResult<> modulefield(Ctx& ctx) {
return ctx.in.err("unrecognized module field");
}

// (m:modulefield)*
template<typename Ctx> Result<> moduleBody(Ctx& ctx) {
while (auto field = modulefield(ctx)) {
CHECK_ERR(field);
}

return Ok{};
}

// module ::= '(' 'module' id? (m:modulefield)* ')'
// | (m:modulefield)* eof
template<typename Ctx> Result<> module(Ctx& ctx) {
Expand All @@ -3892,9 +3901,7 @@ template<typename Ctx> Result<> module(Ctx& ctx) {
}
}

while (auto field = modulefield(ctx)) {
CHECK_ERR(field);
}
CHECK_ERR(moduleBody(ctx));

if (outer && !ctx.in.takeRParen()) {
return ctx.in.err("expected end of module");
Expand Down
107 changes: 87 additions & 20 deletions src/parser/wast-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,21 @@ Result<Action> action(Lexer& in) {
return in.err("expected action");
}

// (module id? binary string*)
// (module id? quote string*)
// (module ...)
// (module id binary string*)
// (module id quote string*)
// (module definition id? ...)
// (module definition id? binary ...)
Result<WASTModule> wastModule(Lexer& in, bool maybeInvalid = false) {
Lexer reset = in;
if (!in.takeSExprStart("module"sv)) {
return in.err("expected module");
}
// TODO: use ID?
[[maybe_unused]] auto id = in.takeID();

bool isDefinition = in.takeKeyword("definition"sv);

// We'll read this again in the 'inline module' case
(void)in.takeID();

QuotedModuleType type;
if (in.takeKeyword("quote"sv)) {
type = QuotedModuleType::Text;
Expand All @@ -118,15 +123,31 @@ Result<WASTModule> wastModule(Lexer& in, bool maybeInvalid = false) {
}
}
std::string mod(reset.next().substr(0, in.getPos() - reset.getPos()));
return QuotedModule{QuotedModuleType::Text, mod};
return WASTModule{isDefinition, QuotedModule{QuotedModuleType::Text, mod}};
} else {
// This is a normal inline module that should be parseable. Reset to the
// start and parse it normally.
// In this case the module is mostly valid WAT, unless it is a module
// definition in which case it will begin with (module definition ...)
in = std::move(reset);

// We already checked this before resetting
if (!in.takeSExprStart("module"sv)) {
return in.err("expected module");
}

bool isDefinition = in.takeKeyword("definition"sv);

auto wasm = std::make_shared<Module>();
if (auto id = in.takeID()) {
wasm->name = *id;
}

wasm->features = FeatureSet::All;
CHECK_ERR(parseModule(*wasm, in));
return wasm;
CHECK_ERR(parseModuleBody(*wasm, in));
if (!in.takeRParen()) {
return in.err("expected end of module");
}

return WASTModule{isDefinition, wasm};
}

// We have a quote or binary module. Collect its contents.
Expand All @@ -139,7 +160,7 @@ Result<WASTModule> wastModule(Lexer& in, bool maybeInvalid = false) {
return in.err("expected end of module");
}

return QuotedModule{type, ss.str()};
return WASTModule{isDefinition, QuotedModule{type, ss.str()}};
}

Result<NaNKind> nan(Lexer& in) {
Expand Down Expand Up @@ -440,17 +461,57 @@ MaybeResult<Register> register_(Lexer& in) {
return in.err("expected name");
}

// TODO: Do we need to use this optional id?
in.takeID();
auto instanceName = in.takeID();

if (!in.takeRParen()) {
// TODO: handle optional module id.
return in.err("expected end of register command");
}
return Register{*name};

return Register{*name, instanceName};
}

// (module instance instance_name? module_name?)
MaybeResult<ModuleInstantiation> instantiation(Lexer& in) {
if (!in.takeSExprStart("module"sv)) {
std::optional<std::string_view> actual = in.peekKeyword();
return in.err((std::stringstream() << "expected `module` keyword but got "
<< actual.value_or("<not a keyword>"))
.str());
}

if (!in.takeKeyword("instance"sv)) {
// This is not a module instance and probably a module instead.
return {};
Comment on lines +483 to +484
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move the lexer reset to back before the (module from the caller to here.

}

auto instanceId = in.takeID();
auto moduleId = in.takeID();

if (!in.takeRParen()) {
return in.err("expected end of module instantiation");
}

return ModuleInstantiation{moduleId, instanceId};
}

// module | register | action | assertion
using ModuleOrInstantiation = std::variant<ModuleInstantiation, WASTModule>;

// (module instance ...) | (module ...)
Result<ModuleOrInstantiation> moduleOrInstantiation(Lexer& in) {
auto reset = in;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though they both start with (module, modules and instantiations are parsed very differently and behave very differently, so I think it would make sense to treat them as different things rather than bundling them together like this. That will simplify command below by removing the need for the std::visit variant conversion.

Copy link
Member Author

@stevenfontanella stevenfontanella Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mostly agree, but I chose to bundle them together since we must attempt to parse (module instance ...) before attempting (module name? ...), otherwise we'll try to parse the word instance as a name and fail rather than backtracking. Does it make sense to bundle them to make the ordering clear? I agree it's a little unfortunate that the module / module instance parsers need to be aware of each other a little bit but I don't see a way around it.

The only alternative I see is to remove the ordering dependency by letting the module (not instance) parser backtrack if it sees the word instance. That lets the ordering work either way, but that means that both parsers still need to know about each other in which case it makes sense to me to bundle them in the same function.

if (auto inst = instantiation(in)) {
CHECK_ERR(inst);
return *inst;
}
in = reset;

auto module = wastModule(in);
CHECK_ERR(module);
return *module;
}

// instantiate | module | register | action | assertion
Result<WASTCommand> command(Lexer& in) {
if (auto cmd = register_(in)) {
CHECK_ERR(cmd);
Expand All @@ -464,9 +525,14 @@ Result<WASTCommand> command(Lexer& in) {
CHECK_ERR(cmd);
return *cmd;
}
auto mod = wastModule(in);
CHECK_ERR(mod);
return *mod;
auto cmd = moduleOrInstantiation(in);
CHECK_ERR(cmd);

return std::visit(
[](const auto& modOrInstantiation) -> WASTCommand {
return modOrInstantiation;
},
*cmd);
}

#pragma GCC diagnostic push
Expand All @@ -487,7 +553,8 @@ Result<WASTScript> wast(Lexer& in) {
// No, that wasn't the problem. Return the original error.
return Err{err->msg};
}
cmds.push_back({WASTModule{std::move(wasm)}, line});
cmds.push_back(
{WASTModule{/*isDefinition=*/false, std::move(wasm)}, line});
return cmds;
}
CHECK_ERR(cmd);
Expand Down
4 changes: 3 additions & 1 deletion src/parser/wat-parser-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@

namespace wasm::WATParser {

Result<> parseDecls(ParseDeclsCtx& decls);
Result<> parseModule(ParseDeclsCtx& decls);

Result<> parseModuleBody(ParseDeclsCtx& decls);

Result<> parseTypeDefs(
ParseDeclsCtx& decls,
Expand Down
Loading
Loading