Skip to content

Commit

Permalink
add support for source phase imports
Browse files Browse the repository at this point in the history
  • Loading branch information
guybedford committed Mar 25, 2024
1 parent f44438c commit 96347a4
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 115 deletions.
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ A JS module syntax lexer used in [es-module-shims](https://github.com/guybedford

Outputs the list of exports and locations of import specifiers, including dynamic import and import meta handling.

Supports new syntax features including import attributes and source phase imports.

A very small single JS file (4KiB gzipped) that includes inlined Web Assembly for very fast source analysis of ECMAScript module syntax only.

For an example of the performance, Angular 1 (720KiB) is fully parsed in 5ms, in comparison to the fastest JS parser, Acorn which takes over 100ms.
Expand All @@ -20,6 +22,8 @@ _Comprehensively handles the JS language grammar while remaining small and fast.
npm install es-module-lexer
```

See [types/lexer.d.ts](types/lexer.d.ts) for the type definitions.

For use in CommonJS:

```js
Expand Down Expand Up @@ -60,6 +64,10 @@ import { init, parse } from 'es-module-lexer';
// Comments provided to demonstrate edge cases
import /*comment!*/ ( 'asdf', { assert: { type: 'json' }});
import /*comment!*/.meta.asdf;
// Source phase imports:
import source mod from './mod.wasm';
import.source('./mod.wasm);
`;

const [imports, exports] = parse(source, 'optional-sourcename');
Expand Down Expand Up @@ -98,10 +106,10 @@ import { init, parse } from 'es-module-lexer';
// Returns -1
exports[2].le;

// Dynamic imports are indicated by imports[2].d > -1
// In this case the "d" index is the start of the dynamic import bracket
// Import type is provided by `t` value
// (1 for static, 2, for dynamic)
// Returns true
imports[2].d > -1;
imports[2].t == 2;

// Returns "asdf" (only for string literal dynamic imports)
imports[2].n
Expand All @@ -128,6 +136,13 @@ import { init, parse } from 'es-module-lexer';
// Returns "import /*comment!*/.meta"
source.slice(imports[4].s, imports[4].e);
// ss and se are the same for import meta

// Returns "'./mod.wasm'"
source.slice(imports[5].s, imports[5].e);

// Import type 4 and 5 for static and dynamic source phase
imports[5].t === 4;
imports[6].t === 5;
})();
```

Expand Down
4 changes: 2 additions & 2 deletions chompfile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ dep = 'src/lexer.ts'
# even when we set "source-maps = false", so for now we have ejected the
# template to its raw "run" command, and added an "rm" step.
run = '''
node ./node_modules/@swc/cli/bin/swc.js $DEP -o $TARGET --no-swcrc -C jsc.parser.syntax=typescript -C jsc.parser.importAssertions=true -C jsc.parser.topLevelAwait=true -C jsc.parser.importMeta=true -C jsc.parser.privateMethod=true -C jsc.parser.dynamicImport=true -C jsc.target=es2016 -C jsc.experimental.keepImportAttributes=true
node ./node_modules/@swc/cli/bin/swc.js $DEP -o $TARGET --no-swcrc -C jsc.parser.syntax=typescript -C jsc.parser.importAssertions=true -C jsc.parser.topLevelAwait=true -C jsc.parser.importMeta=true -C jsc.parser.privateMethod=true -C jsc.parser.dynamicImport=true -C jsc.target=es2016 -C jsc.experimental.keepImportAssertions=true
'''

[[task]]
Expand Down Expand Up @@ -96,7 +96,7 @@ deps = ['src/lexer.h', 'src/lexer.c']
run = """
${{ WASI_PATH }}/bin/clang src/lexer.c --sysroot=${{ WASI_PATH }}/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
"-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,\
--export=parse,--export=sa,--export=e,--export=ri,--export=re,--export=is,--export=ie,--export=ss,--export=ip,--export=se,--export=ai,--export=id,--export=es,--export=ee,--export=els,--export=ele,--export=f,--export=ms,--export=__heap_base" \
--export=parse,--export=sa,--export=e,--export=ri,--export=re,--export=is,--export=ie,--export=it,--export=ss,--export=ip,--export=se,--export=ai,--export=id,--export=es,--export=ee,--export=els,--export=ele,--export=f,--export=ms,--export=__heap_base" \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz
"""
Expand Down
Binary file modified lib/lexer.wasm
Binary file not shown.
224 changes: 120 additions & 104 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ static const char16_t BREA[] = { 'b', 'r', 'e', 'a' };
static const char16_t CONTIN[] = { 'c', 'o', 'n', 't', 'i', 'n' };
static const char16_t SYNC[] = {'s', 'y', 'n', 'c'};
static const char16_t UNCTION[] = {'u', 'n', 'c', 't', 'i', 'o', 'n'};
static const char16_t OURCE[] = {'o', 'u', 'r', 'c', 'e'};

// Note: parsing is based on the _assumption_ that the source is already valid
bool parse () {
Expand Down Expand Up @@ -239,124 +240,136 @@ void tryParseImportStatement () {

char16_t ch = commentWhitespace(true);

switch (ch) {
// dynamic import
case '(':
openTokenStack[openTokenDepth].token = ImportParen;
openTokenStack[openTokenDepth++].pos = pos;
if (*lastTokenPos == '.')
return;
// dynamic import indicated by positive d
char16_t* dynamicPos = pos;
// try parse a string, to record a safe dynamic import string
pos++;
ch = commentWhitespace(true);
addImport(startPos, pos, 0, dynamicPos);
dynamicImportStack[dynamicImportStackDepth++] = import_write_head;
if (ch == '\'') {
stringLiteral(ch);
}
else if (ch == '"') {
stringLiteral(ch);
}
else {
pos--;
return;
}
pos++;
char16_t* endPos = pos;
bool source_keyword = false;

if (ch == '.') {
// import.meta
pos++;
ch = commentWhitespace(true);
// import.meta indicated by d == -2
if (ch == 'm' && memcmp(pos + 1, &ETA[0], 3 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.')) {
addImport(startPos, startPos, pos + 4, IMPORT_META);
return;
}
else if (ch == 's' && memcmp(pos + 1, &OURCE[0], 5 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.')) {
source_keyword = true;
pos += 6;
ch = commentWhitespace(true);
if (ch == ',') {
pos++;
ch = commentWhitespace(true);
import_write_head->end = endPos;
import_write_head->assert_index = pos;
import_write_head->safe = true;
pos--;
}
else if (ch == ')') {
openTokenDepth--;
import_write_head->end = endPos;
import_write_head->statement_end = pos + 1;
import_write_head->safe = true;
dynamicImportStackDepth--;
}
else {
pos--;
}
}
else {
return;
// import.meta
case '.':
}
}
else if (pos > startPos + 6 && ch == 's' && memcmp(pos + 1, &OURCE[0], 5 * 2) == 0 && isBrOrWs(*(pos + 6))) {
source_keyword = true;
pos += 6;
ch = commentWhitespace(true);
}

// dynamic import
if (ch == '(') {
openTokenStack[openTokenDepth].token = ImportParen;
openTokenStack[openTokenDepth++].pos = pos;
if (*lastTokenPos == '.')
return;
// dynamic import indicated by positive d
char16_t* dynamicPos = pos;
// try parse a string, to record a safe dynamic import string
pos++;
ch = commentWhitespace(true);
addImport(startPos, pos, 0, dynamicPos);
if (source_keyword)
import_write_head->import_ty = DynamicSourcePhase;
dynamicImportStack[dynamicImportStackDepth++] = import_write_head;
if (ch == '\'') {
stringLiteral(ch);
}
else if (ch == '"') {
stringLiteral(ch);
}
else {
pos--;
return;
}
pos++;
char16_t* endPos = pos;
ch = commentWhitespace(true);
if (ch == ',') {
pos++;
ch = commentWhitespace(true);
// import.meta indicated by d == -2
if (ch == 'm' && memcmp(pos + 1, &ETA[0], 3 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.'))
addImport(startPos, startPos, pos + 4, IMPORT_META);
import_write_head->end = endPos;
import_write_head->assert_index = pos;
import_write_head->safe = true;
pos--;
}
else if (ch == ')') {
openTokenDepth--;
import_write_head->end = endPos;
import_write_head->statement_end = pos + 1;
import_write_head->safe = true;
dynamicImportStackDepth--;
}
else {
pos--;
}
return;
}

if (ch == '{' && !source_keyword) {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}

default:
// no space after "import" -> not an import keyword
if (pos == startPos + 6) {
pos--;
break;
}
case '"':
case '\'':
case '*': {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}
while (pos < end) {
ch = *pos;
if (isQuote(ch)) {
readImportString(startPos, ch);
return;
}
while (pos < end) {
ch = commentWhitespace(true);
if (isQuote(ch)) {
stringLiteral(ch);
} else if (ch == '}') {
pos++;
break;
}
syntaxError();
break;
pos++;
}

case '{': {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}

while (pos < end) {
ch = commentWhitespace(true);
ch = commentWhitespace(true);
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) != 0) {
syntaxError();
return;
}

if (isQuote(ch)) {
stringLiteral(ch);
} else if (ch == '}') {
pos++;
break;
}
pos += 4;
ch = commentWhitespace(true);

pos++;
}
if (!isQuote(ch)) {
return syntaxError();
}

ch = commentWhitespace(true);
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) != 0) {
syntaxError();
break;
readImportString(startPos, ch, false);
}
else {
if (source_keyword || !(ch == '"' || ch == '\'' || ch == '*')) {
// no space after "import" -> not an import keyword
if (pos == startPos + (source_keyword ? 12 : 6)) {
pos--;
return;
}

pos += 4;
ch = commentWhitespace(true);

if (!isQuote(ch)) {
return syntaxError();
}
// import statement only permitted at base-level
if (openTokenDepth != 0 ) {
pos--;
return;
}
while (pos < end) {
ch = *pos;
if (isQuote(ch)) {
readImportString(startPos, ch, source_keyword);
return;
}

readImportString(startPos, ch);

break;
pos++;
}
syntaxError();
}
}

Expand Down Expand Up @@ -572,7 +585,7 @@ void tryParseExportStatement () {
// from ...
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) == 0) {
pos += 4;
readImportString(sStartPos, commentWhitespace(true));
readImportString(sStartPos, commentWhitespace(true), false);

// There were no local names.
for (Export* exprt = prev_export_write_head == NULL ? first_export : prev_export_write_head->next; exprt != NULL; exprt = exprt->next) {
Expand Down Expand Up @@ -619,7 +632,7 @@ char16_t readExportAs (char16_t* startPos, char16_t* endPos) {
return ch;
}

void readImportString (const char16_t* ss, char16_t ch) {
void readImportString (const char16_t* ss, char16_t ch, bool source_phase) {
const char16_t* startPos = pos + 1;
if (ch == '\'') {
stringLiteral(ch);
Expand All @@ -632,6 +645,9 @@ void readImportString (const char16_t* ss, char16_t ch) {
return;
}
addImport(ss, startPos, pos, STANDARD_IMPORT);
if (source_phase) {
import_write_head->import_ty = StaticSourcePhase;
}
pos++;
ch = commentWhitespace(false);
if (!(ch == 'a' && memcmp(pos + 1, &SSERT[0], 5 * 2) == 0) && !(ch == 'w' && *(pos + 1) == 'i' && *(pos + 2) == 't' && *(pos + 3) == 'h')) {
Expand Down
Loading

0 comments on commit 96347a4

Please sign in to comment.