Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
brson committed May 8, 2020
1 parent f9b4681 commit 581211d
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::mem;
use anyhow::Result;
use crate::html::SubDom;
use markup5ever_rcdom as rcdom;
use rcdom::{Node as Node, NodeData};
use rcdom::{Node, NodeData};
use crate::doc;
use log::{warn, debug, error};
use crate::config::BlogPost;
Expand Down
2 changes: 1 addition & 1 deletion src/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub struct Heading {
pub level: HeadingLevel,
}

#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HeadingLevel {
H1, H2, H3, H4, H5, H6,
}
Expand Down
10 changes: 5 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ fn run_render_article(cmd: CmdOpts<RenderArticle>) -> Result<()> {
match html::extract_article(&post) {
Ok(dom) => {
let doc = convert::from_dom(&meta, &dom);
let doc = sanitize::sanitize(doc);
let doc = sanitize::sanitize(doc, &dom);
let title = extract::title(&doc);
let doc = render::to_string(&assets, &doc)?;
if !cmd.cmd.to_file {
Expand Down Expand Up @@ -310,7 +310,7 @@ fn run_extract_title(cmd: CmdOpts<ExtractTitle>) -> Result<()> {
match html::extract_article(&post) {
Ok(dom) => {
let doc = convert::from_dom(&meta, &dom);
let doc = sanitize::sanitize(doc);
let doc = sanitize::sanitize(doc, &dom);
let title = extract::title(&doc);
match title {
Some(title) => {
Expand All @@ -334,7 +334,7 @@ fn run_generate_slug(cmd: CmdOpts<GenerateSlug>) -> Result<()> {
match html::extract_article(&post) {
Ok(dom) => {
let doc = convert::from_dom(&meta, &dom);
let doc = sanitize::sanitize(doc);
let doc = sanitize::sanitize(doc, &dom);
let title = extract::title(&doc);
match title {
Some(title) => {
Expand Down Expand Up @@ -364,7 +364,7 @@ fn run_write_index(cmd: CmdOpts<WriteIndex>) -> Result<()> {
match html::extract_article(&post) {
Ok(dom) => {
let doc = convert::from_dom(&meta, &dom);
let doc = sanitize::sanitize(doc);
let doc = sanitize::sanitize(doc, &dom);
let title = extract::title(&doc);
match title {
Some(title) => {
Expand Down Expand Up @@ -410,7 +410,7 @@ fn run_write_author_pages(cmd: CmdOpts<WriteAuthorPages>) -> Result<()> {
match html::extract_article(&post) {
Ok(dom) => {
let doc = convert::from_dom(&meta, &dom);
let doc = sanitize::sanitize(doc);
let doc = sanitize::sanitize(doc, &dom);
let title = extract::title(&doc);
match title {
Some(title) => {
Expand Down
43 changes: 42 additions & 1 deletion src/sanitize.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use log::warn;
use anyhow::Result;
use crate::doc::*;
use markup5ever_rcdom as rcdom;
use rcdom::{Node, NodeData};
use crate::html::SubDom;
use crate::doc::{Block, HeadingLevel};

pub fn sanitize(doc: Document) -> Document {
pub fn sanitize(doc: Document, dom: &SubDom) -> Document {
let doc = maybe_add_h1(doc, dom);
doc
}

Expand Down Expand Up @@ -65,3 +71,38 @@ fn remove_leading_and_trailing_dashes(mut s: String) -> String {
}
s.to_string()
}

/// Some blogs don't put their h1 title inside the `article` tag (e.g.
/// burntsushi). This hack looks for cases where there's the extracted doc
/// contains no h1 before other headers, then looks for an h1 inside the dom and
/// stuff it into the doc.
fn maybe_add_h1(mut doc: Document, dom: &SubDom) -> Document {
if missing_h1(&doc) {
warn!("missing h1 in {:?}", doc.meta.origin_url);
if let Some(h1) = find_h1(dom) {
doc.body.blocks.insert(0, Block::Heading(h1));
}
}
doc
}

fn missing_h1(doc: &Document) -> bool {
for body in &doc.body.blocks {
match body {
Block::Heading(h) => {
if h.level != HeadingLevel::H1 {
return true;
} else {
return false;
}
}
_ => { }
}
}

false
}

fn find_h1(dom: &SubDom) -> Option<Heading> {
None
}

0 comments on commit 581211d

Please sign in to comment.