Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement JavaScript Rewriter API #100

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions js-api/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions js-api/Cargo.toml
Expand Up @@ -5,11 +5,13 @@ authors = ["Ivan Nikulin <inikulin@cloudflare.com>"]
edition = "2018"

[dependencies]
js-sys = "0.3.33"
js-sys = "0.3.51"
lol_html = { path = "../" }
serde = { version = "1.0.104", features = ["derive"] }
serde-wasm-bindgen = "0.1.3"
wasm-bindgen = "0.2"
serde = { version = "1.0.126", features = ["derive"] }
serde-wasm-bindgen = "0.3.0"
wasm-bindgen = "0.2.74"
thiserror = "1.0.26"
encoding_rs = "0.8.13"

[lib]
crate-type = ["cdylib", "rlib"]
36 changes: 36 additions & 0 deletions js-api/README.md
@@ -0,0 +1,36 @@
# LOL HTML JavaScript API

## Example

```js
'use strict';

const { HTMLRewriter } = require('lol_html');

const chunks = [];
const rewriter = new HTMLRewriter('utf8', (chunk) => {
chunks.push(chunk);
});

rewriter.on('a[href]', {
element(el) {
const href = el
.getAttribute('href')
.replace('http:', 'https:');
el.setAttribute('href', href);
},
});

[
'<div><a href=',
'http://example.com>',
'</a></div>',
].forEach((part) => {
rewriter.write(Buffer.from(part));
});

rewriter.end();

const output = Buffer.concat(chunks).toString('utf8');
console.log(output);
```
Comment on lines +5 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this tested somewhere? I'm worried it will regress otherwise - maybe you could put it in examples/ and use include_str! from a doctest?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think ideally we should get some tests going with node but idk how to set that up with travis

2 changes: 1 addition & 1 deletion js-api/src/comment.rs
Expand Up @@ -11,6 +11,6 @@ impl_mutations!(Comment);
impl Comment {
#[wasm_bindgen(method, getter)]
pub fn text(&self) -> JsResult<String> {
self.0.get().map(|c| c.text().into())
self.0.get().map(|c| c.text())
}
}
2 changes: 1 addition & 1 deletion js-api/src/element.rs
Expand Up @@ -42,7 +42,7 @@ impl Element {
}

#[wasm_bindgen(method, getter)]
pub fn attributes(&self, name: &str) -> JsResult<JsValue> {
pub fn attributes(&self) -> JsResult<JsValue> {
self.0
.get()
.map(|e| {
Expand Down
113 changes: 113 additions & 0 deletions js-api/src/handlers.rs
@@ -0,0 +1,113 @@
use super::comment::Comment;
use super::doctype::Doctype;
use super::document_end::DocumentEnd;
use super::element::Element;
use super::text_chunk::TextChunk;
use super::*;
use js_sys::Function as JsFunction;
use lol_html::{
DocumentContentHandlers as NativeDocumentContentHandlers,
ElementContentHandlers as NativeElementContentHandlers,
};
use std::mem;
use thiserror::Error;

#[derive(Error, Debug)]
#[error("JS handler error")]
pub struct HandlerJsErrorWrap(pub JsValue);

// SAFETY: The exposed js-api only supports single-threaded usage.
unsafe impl Send for HandlerJsErrorWrap {}
unsafe impl Sync for HandlerJsErrorWrap {}

macro_rules! make_handler {
($handler:ident, $JsArgType:ident) => {
move |arg: &mut _| {
let (js_arg, anchor) = $JsArgType::from_native(arg);

let res = match $handler.call1(&JsValue::NULL, &JsValue::from(js_arg)) {
Ok(_) => Ok(()),
Err(e) => Err(HandlerJsErrorWrap(e).into()),
};

mem::drop(anchor);

res
}
};
}

#[wasm_bindgen]
extern "C" {
pub type ElementContentHandlers;

#[wasm_bindgen(method, getter)]
fn element(this: &ElementContentHandlers) -> Option<JsFunction>;

#[wasm_bindgen(method, getter)]
fn comments(this: &ElementContentHandlers) -> Option<JsFunction>;

#[wasm_bindgen(method, getter)]
fn text(this: &ElementContentHandlers) -> Option<JsFunction>;
}

impl IntoNative<NativeElementContentHandlers<'static>> for ElementContentHandlers {
fn into_native(self) -> NativeElementContentHandlers<'static> {
let mut native = NativeElementContentHandlers::default();

if let Some(handler) = self.element() {
native = native.element(make_handler!(handler, Element));
}

if let Some(handler) = self.comments() {
native = native.comments(make_handler!(handler, Comment));
}

if let Some(handler) = self.text() {
native = native.text(make_handler!(handler, TextChunk));
}

native
}
}

#[wasm_bindgen]
extern "C" {
pub type DocumentContentHandlers;

#[wasm_bindgen(method, getter)]
fn doctype(this: &DocumentContentHandlers) -> Option<JsFunction>;

#[wasm_bindgen(method, getter)]
fn comments(this: &DocumentContentHandlers) -> Option<JsFunction>;

#[wasm_bindgen(method, getter)]
fn text(this: &DocumentContentHandlers) -> Option<JsFunction>;

#[wasm_bindgen(method, getter)]
fn end(this: &DocumentContentHandlers) -> Option<JsFunction>;
}

impl IntoNative<NativeDocumentContentHandlers<'static>> for DocumentContentHandlers {
fn into_native(self) -> NativeDocumentContentHandlers<'static> {
let mut native = NativeDocumentContentHandlers::default();

if let Some(handler) = self.doctype() {
native = native.doctype(make_handler!(handler, Doctype));
}

if let Some(handler) = self.comments() {
native = native.comments(make_handler!(handler, Comment));
}

if let Some(handler) = self.text() {
native = native.text(make_handler!(handler, TextChunk));
}

if let Some(handler) = self.end() {
native = native.end(make_handler!(handler, DocumentEnd));
}

native
}
}
113 changes: 107 additions & 6 deletions js-api/src/html_rewriter.rs
@@ -1,9 +1,19 @@
use super::handlers::{DocumentContentHandlers, ElementContentHandlers, HandlerJsErrorWrap};
use super::*;
use js_sys::{Function as JsFunction, Uint8Array};
use encoding_rs::Encoding;
use js_sys::{Error as JsError, Function as JsFunction, Uint8Array};
use lol_html::errors::RewritingError;
use lol_html::{
DocumentContentHandlers, ElementContentHandlers, HtmlRewriter as NativeHTMLRewriter,
OutputSink, Selector,
AsciiCompatibleEncoding, HtmlRewriter as NativeHTMLRewriter, OutputSink, Selector, Settings,
};
use std::borrow::Cow;

fn map_err(err: RewritingError) -> JsValue {
match err {
RewritingError::ContentHandlerError(err) => err.downcast::<HandlerJsErrorWrap>().unwrap().0,
_ => JsValue::from(err.to_string()),
}
}

struct JsOutputSink(JsFunction);

Expand All @@ -24,8 +34,99 @@ impl OutputSink for JsOutputSink {
}
}

enum RewriterState {
Before {
output_sink: JsOutputSink,
settings: Settings<'static, 'static>,
},
During(NativeHTMLRewriter<'static, JsOutputSink>),
After,
}

#[wasm_bindgen]
pub struct HTMLRewriterBuilder {
element_content_handlers: (Selector, ElementContentHandlers<'static>),
document_content_handlers: DocumentContentHandlers<'static>,
pub struct HTMLRewriter(RewriterState);

#[wasm_bindgen]
impl HTMLRewriter {
#[wasm_bindgen(constructor)]
pub fn new(encoding: String, output_sink: &JsFunction) -> JsResult<HTMLRewriter> {
let encoding = Encoding::for_label(encoding.as_bytes())
.and_then(AsciiCompatibleEncoding::new)
.ok_or_else(|| JsError::new("Invalid encoding"))?;

Ok(HTMLRewriter(RewriterState::Before {
output_sink: JsOutputSink::new(output_sink),
settings: Settings {
encoding,
// TODO: accept options bag and parse out here
..Settings::default()
},
}))
}

fn inner_mut(&mut self) -> JsResult<&mut NativeHTMLRewriter<'static, JsOutputSink>> {
match self.0 {
RewriterState::Before { .. } => {
if let RewriterState::Before {
settings,
output_sink,
} = std::mem::replace(&mut self.0, RewriterState::After)
{
let rewriter = NativeHTMLRewriter::new(settings, output_sink);

self.0 = RewriterState::During(rewriter);
self.inner_mut()
} else {
unsafe {
std::hint::unreachable_unchecked();
}
}
}
RewriterState::During(ref mut inner) => Ok(inner),
RewriterState::After => Err(JsError::new("Rewriter is ended").into()),
}
}

pub fn on(&mut self, selector: &str, handlers: ElementContentHandlers) -> JsResult<()> {
match self.0 {
RewriterState::Before {
ref mut settings, ..
} => {
let selector = selector.parse::<Selector>().into_js_result()?;

settings
.element_content_handlers
.push((Cow::Owned(selector), handlers.into_native()));

Ok(())
}
_ => Err(JsError::new("Handlers cannot be added after write").into()),
}
}

#[wasm_bindgen(method, js_name=onDocument)]
pub fn on_document(&mut self, handlers: DocumentContentHandlers) -> JsResult<()> {
match self.0 {
RewriterState::Before {
ref mut settings, ..
} => {
settings
.document_content_handlers
.push(handlers.into_native());
Ok(())
}
_ => Err(JsError::new("Handlers cannot be added after write").into()),
}
}

pub fn write(&mut self, chunk: &[u8]) -> JsResult<()> {
self.inner_mut()?.write(chunk).map_err(map_err)
}

pub fn end(&mut self) -> JsResult<()> {
match std::mem::replace(&mut self.0, RewriterState::After) {
RewriterState::During(inner) => inner.end().map_err(map_err),
_ => Ok(()),
}
}
}
1 change: 1 addition & 0 deletions js-api/src/lib.rs
Expand Up @@ -171,5 +171,6 @@ mod comment;
mod doctype;
mod document_end;
mod element;
mod handlers;
mod html_rewriter;
mod text_chunk;