-
Notifications
You must be signed in to change notification settings - Fork 323
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prepare the repo for working on rust code (#841)
- Loading branch information
1 parent
2f404b7
commit f7d4ef5
Showing
26 changed files
with
1,482 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
[build] | ||
target-dir = "target/rust/" | ||
rustflags = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
name: Parser CI | ||
|
||
on: | ||
push: | ||
branches: [ main ] | ||
pull_request: | ||
branches: [ "*" ] | ||
|
||
env: | ||
wasmpackVersion: 0.8.1 | ||
|
||
jobs: | ||
test: | ||
name: Test Parser | ||
runs-on: ${{ matrix.os }} | ||
timeout-minutes: 10 | ||
strategy: | ||
matrix: | ||
os: [macOS-latest, ubuntu-latest, windows-latest] | ||
fail-fast: false | ||
steps: | ||
- name: Checkout Parser Sources | ||
uses: actions/checkout@v2 | ||
|
||
# Install Tooling | ||
- name: Install Rust | ||
uses: actions-rs/toolchain@v1 | ||
with: | ||
toolchain: nightly-2020-06-09 | ||
override: true | ||
- name: Install wasm-pack | ||
uses: actions-rs/cargo@v1 | ||
with: | ||
command: install | ||
args: wasm-pack --version ${{ env.wasmpackVersion }} | ||
|
||
# Caches | ||
- name: Cache Cargo Registry | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.cargo/registry | ||
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**Cargo.toml') }} | ||
restore-keys: ${{ runner.os }}-cargo-registry | ||
- name: Cache Cargo Test | ||
uses: actions/cache@v2 | ||
with: | ||
path: ./target/rust | ||
key: ${{ runner.os }}-cargo-build-${{ hashFiles('**Cargo.toml') }} | ||
restore-keys: ${{ runner.os }}-cargo-build | ||
|
||
# Tests | ||
- name: Test Parser | ||
uses: actions-rs/cargo@v1 | ||
with: | ||
command: test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
|
||
# General Configuration | ||
unstable_features = true | ||
max_width = 80 | ||
error_on_line_overflow = true | ||
newline_style = "Unix" | ||
|
||
# Operators | ||
binop_separator = "Front" | ||
|
||
# Whitespace | ||
blank_lines_upper_bound = 1 | ||
|
||
# Code Layout | ||
brace_style = "SameLineWhere" | ||
combine_control_expr = true | ||
empty_item_single_line = true | ||
fn_single_line = true | ||
format_strings = true | ||
inline_attribute_width = 80 | ||
space_before_colon = false | ||
space_after_colon = false | ||
type_punctuation_density = "Wide" | ||
|
||
# Comments | ||
comment_width = 80 | ||
wrap_comments = true | ||
format_code_in_doc_comments = true | ||
normalize_comments = true | ||
|
||
# Macros | ||
format_macro_matchers = true | ||
format_macro_bodies = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[workspace] | ||
|
||
members = [ | ||
"parser/flexer" | ||
] | ||
|
||
[profile.dev] | ||
opt-level = 0 | ||
lto = false | ||
debug = true | ||
|
||
[profile.release] | ||
opt-level = 3 | ||
lto = true | ||
debug = false | ||
|
||
[profile.bench] | ||
opt-level = 3 | ||
lto = true | ||
debug = false | ||
|
||
[profile.test] | ||
opt-level = 0 | ||
lto = false | ||
debug = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
--- | ||
layout: docs-index | ||
title: Enso's Parser | ||
category: summary | ||
tags: [parser, readme] | ||
order: 0 | ||
--- | ||
|
||
# Enso's Parser | ||
The parser is one of the most crucial components of the Enso runtime in that | ||
_all_ code that a user writes must be parsed. This means that a good parser is | ||
fast, responsive, and lightweight; it shouldn't baulk at having thousands of | ||
lines of code thrown at it. | ||
|
||
Enso's parser, however, is very special. In order to support interactive use it | ||
has to narrow down the scope of a syntax error as much as possible, while still | ||
providing useful output for the compiler around the rest of the parse errors. | ||
This feature makes it more complex than many common parsers, so making this work | ||
while still preserving performance is of paramount importance. | ||
|
||
The various components of the parser's design and architecture are described | ||
below: | ||
|
||
- [**Tech Analysis:**](./tech-analysis.md) A brief overview of the reasons for | ||
the implementation technologies for the parser. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
--- | ||
layout: developer-doc | ||
title: Technology Analysis | ||
category: syntax | ||
tags: [parser, tech-analysis] | ||
order: 1 | ||
--- | ||
|
||
# Parser Technology Analysis | ||
As the Enso parser has some fairly unique requirements placed upon it, the | ||
choice of implementation technology is of paramount importance. Choosing the | ||
correct technology ensures that we can meet all of the requirements placed upon | ||
the parser. | ||
|
||
<!-- MarkdownTOC levels="2,3" autolink="true" --> | ||
|
||
- [Technology Requirements for the Parser](#technology-requirements-for-the-parser) | ||
- [Issues With the Previous Implementation](#issues-with-the-previous-implementation) | ||
- [Choosing Rust](#choosing-rust) | ||
- [Downsides of Rust](#downsides-of-rust) | ||
|
||
<!-- /MarkdownTOC --> | ||
|
||
## Technology Requirements for the Parser | ||
As the parser has to work both for the Engine and for the IDE, it has a strange | ||
set of requirements: | ||
|
||
- The implementation language must be able to run on native platforms, as well | ||
as in the browser via WASM (not JavaScript due to the marshalling overhead). | ||
- The implementation language should permit _excellent_ native performance on | ||
both native and web platforms, by giving implementers fine-grained control | ||
over memory usage. | ||
- The implementation language must be able to target all primary platforms: | ||
macOS, Linux and Windows. | ||
|
||
## Issues With the Previous Implementation | ||
The previous implementation of the parser was implemented in Scala, and had some | ||
serious issues that have necessitated this rewrite: | ||
|
||
- **Performance:** The structures used to implement the parser proved inherently | ||
difficult for a JIT to optimise, making performance far worse than expected on | ||
the JVM. | ||
- **ScalaJS Sub-Optimal Code Generation:** The JavaScript generated by ScalaJS | ||
was very suboptimal for these structures, making the parser _even_ slower when | ||
run in the browser. | ||
- **JS as a Browser Target:** To transfer textual data between WASM and JS | ||
incurs a significant marshalling overhead. As the IDE primarily works with | ||
textual operations under the hood, this proved to be a significant slowdown. | ||
|
||
## Choosing Rust | ||
Rust, then, is an obvious choice for the following reasons: | ||
|
||
- It can be compiled _natively_ into the IDE binary, providing them with | ||
excellent performance. | ||
- As a native language it can use JNI to directly create JVM objects on the JVM | ||
heap, for use by the compiler. | ||
- As a native language it can be called directly via JNI. | ||
- There is potential in the future for employing Graal's LLVM bitcode | ||
interpreter to execute the parser safely in a non-native context. | ||
|
||
### Downsides of Rust | ||
This is not to say that choosing rust doesn't come with some compromises: | ||
|
||
- It significantly complicates the CI pipeline for the engine, as we will have | ||
to build native artefacts for use by the runtime itself. | ||
- As a non-JVM language, the complexity of working with it from Scala and Java | ||
is increased. We will need to maintain a full definition of the AST in Scala | ||
to permit the compiler to work properly with it. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[package] | ||
name = "flexer" | ||
version = "0.0.1" | ||
authors = [ | ||
"Enso Team <enso-dev@enso.org>", | ||
"Ara Adkins <ara.adkins@enso.org" | ||
] | ||
edition = "2018" | ||
|
||
description = "A finite-automata-based lexing engine." | ||
readme = "README.md" | ||
homepage = "https://github.com/luna/enso" | ||
repository = "https://github.com/luna/enso" | ||
license-file = "../../LICENSE" | ||
|
||
keywords = ["lexer", "finite-automata"] | ||
|
||
publish = false | ||
|
||
[lib] | ||
name = "flexer" | ||
crate-type = ["dylib", "rlib"] | ||
test = true | ||
bench = true | ||
|
||
[dependencies] | ||
itertools = "0.8" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Flexer | ||
This library provides a finite-automata-based lexing engine that can flexibly | ||
tokenize an input stream. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
fn main() {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
//! Exports API for construction of Nondeterminist and Deterministic Finite | ||
//! State Automata. | ||
|
||
pub mod alphabet; | ||
pub mod dfa; | ||
pub mod nfa; | ||
pub mod pattern; | ||
pub mod state; |
Oops, something went wrong.