Skip to content

Commit

Permalink
feat: support images
Browse files Browse the repository at this point in the history
  • Loading branch information
dmeijboom committed Jun 8, 2023
1 parent d7c0e95 commit 3ebccbe
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 35 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Test

on:
push:
branches: ["main"]
pull_request:
branches: ["main"]

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up cargo cache
uses: actions/cache@v3
continue-on-error: false
with:
path: |
~/.cargo
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-x86_64-unknown-linux-gnu
- run: cargo test
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ fn main() {

The following HTML elements are supported (other elements will be stripped):

- `a`
- `h1`
- `h2`
- `h3`
Expand All @@ -31,3 +30,5 @@ The following HTML elements are supported (other elements will be stripped):
- `li`
- `em`/`i`
- `strong`/`b`
- `a`
- `img`
22 changes: 22 additions & 0 deletions src/attributes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use html5ever::Attribute;

pub trait AttributeList {
fn get(&self, name: &str) -> Option<&str>;
fn get_or_default(&self, name: &str) -> String;
}

impl AttributeList for &[Attribute] {
fn get(&self, name: &str) -> Option<&str> {
self.iter().find_map(|a| {
if a.name.prefix.is_none() && &a.name.local == name {
Some(a.value.as_ref())
} else {
None
}
})
}

fn get_or_default(&self, name: &str) -> String {
self.get(name).unwrap_or_default().to_string()
}
}
104 changes: 70 additions & 34 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! # HTML to CommonMark
//!
//!
//! Convert HTML to markdown (CommonMark). Uses [html5ever](https://crates.io/crates/html5ever) for parsing HTML and [comrak](https://crates.io/crates/comrak) for generating markdown output. It generates a comrak AST based on the HTML input and then converts it to markdown using `comrak::format_commonmark`.
//!
//! ## Usage
Expand All @@ -13,11 +13,11 @@
//! println!("{}", markdown); // # Hello World
//! }
//! ```
//!
//!
//! ## Features
//!
//!
//! The following HTML elements are supported (other elements will be stripped):
//!
//!
//! - `a`
//! - `h1`
//! - `h2`
Expand Down Expand Up @@ -45,8 +45,10 @@ use html5ever::{
Attribute,
};

mod attributes;
mod error;

use attributes::AttributeList;
pub use error::Error;

struct Sink<'a> {
Expand Down Expand Up @@ -99,50 +101,62 @@ fn heading<'a>(level: u8, line: usize) -> AstNode<'a> {
}

#[inline]
fn attr_or_default(name: &str, attrs: &[Attribute]) -> String {
attrs
.iter()
.find(|a| a.name.prefix.is_none() && &a.name.local == name)
.map(|a| a.value.to_string())
.unwrap_or_default()
}

#[inline]
fn create_node<'a>(name: &str, attrs: &[Attribute], line: usize) -> Option<AstNode<'a>> {
fn create_node<'a>(
arena: &'a Arena<AstNode<'a>>,
name: &str,
attrs: &[Attribute],
line: usize,
) -> Option<&'a AstNode<'a>> {
Some(match name {
"a" => node(
"a" => arena.alloc(node(
NodeValue::Link(NodeLink {
url: attr_or_default("href", attrs),
title: attr_or_default("title", attrs),
url: attrs.get_or_default("href"),
title: attrs.get_or_default("title"),
}),
1,
),
"h1" => heading(1, line),
"h2" => heading(2, line),
"h3" => heading(3, line),
"h4" => heading(4, line),
"h5" => heading(5, line),
"h6" => heading(6, line),
"p" => node(NodeValue::Paragraph, line),
"ul" => node(
)),
"h1" => arena.alloc(heading(1, line)),
"h2" => arena.alloc(heading(2, line)),
"h3" => arena.alloc(heading(3, line)),
"h4" => arena.alloc(heading(4, line)),
"h5" => arena.alloc(heading(5, line)),
"h6" => arena.alloc(heading(6, line)),
"p" => arena.alloc(node(NodeValue::Paragraph, line)),
"ul" => arena.alloc(node(
NodeValue::List(NodeList {
list_type: ListType::Bullet,
bullet_char: b'-',
..NodeList::default()
}),
line,
),
"ol" => node(
)),
"ol" => arena.alloc(node(
NodeValue::List(NodeList {
list_type: ListType::Ordered,
start: 1,
..NodeList::default()
}),
line,
),
"li" => node(NodeValue::Item(NodeList::default()), line),
"b" | "strong" => node(NodeValue::Strong, line),
"i" | "em" => node(NodeValue::Emph, line),
)),
"li" => arena.alloc(node(NodeValue::Item(NodeList::default()), line)),
"b" | "strong" => arena.alloc(node(NodeValue::Strong, line)),
"i" | "em" => arena.alloc(node(NodeValue::Emph, line)),
"img" => {
let image = arena.alloc(node(
NodeValue::Image(NodeLink {
url: attrs.get_or_default("src"),
title: attrs.get_or_default("title"),
}),
line,
));

if let Some(alt) = AttributeList::get(&attrs, "alt") {
let text_node = arena.alloc(node(NodeValue::Text(alt.to_string()), line));
image.append(text_node);
}

image
}
_ => return None,
})
}
Expand All @@ -165,6 +179,7 @@ fn valid_elem(name: &str) -> bool {
| "strong"
| "i"
| "em"
| "img"
)
}

Expand All @@ -177,8 +192,16 @@ impl<'a> TokenSink for Sink<'a> {
Token::DoctypeToken(_) => {}
Token::TagToken(tag) => match tag.kind {
TagKind::StartTag => {
if let Some(node) = create_node(&tag.name, &tag.attrs, line as usize) {
self.stack.push(self.arena.alloc(node));
if let Some(node) =
create_node(self.arena, &tag.name, &tag.attrs, line as usize)
{
if tag.self_closing {
let parent = self.cur()?;

parent.append(node);
} else {
self.stack.push(node);
}
}
}
TagKind::EndTag if !valid_elem(&tag.name) => {}
Expand Down Expand Up @@ -311,4 +334,17 @@ mod tests {
assert_render("<em>hello world</em>", "*hello world*\n");
assert_render("<i>hello world</i>", "*hello world*\n");
}

#[test]
fn test_img() {
assert_render("<img src=\"test.jpg\" />", "![](test.jpg)\n");
assert_render(
"<img src=\"test.jpg\" title=\"this is a test\" />",
"![](test.jpg \"this is a test\")\n",
);
assert_render(
"<img src=\"test.jpg\" title=\"this is a test\" alt=\"alt test\" />",
"![alt test](test.jpg \"this is a test\")\n",
);
}
}

0 comments on commit 3ebccbe

Please sign in to comment.