Permalink
Browse files

document conversion

  • Loading branch information...
flying-sheep committed Dec 10, 2018
1 parent d019d0b commit 6d995f698f580aba9e67b847432899ce841e6e7d
@@ -4,7 +4,7 @@ use failure::{Error,bail,format_err};
use serde_derive::Serialize;
use regex::Regex;

#[derive(Debug,Serialize)]
#[derive(Debug,PartialEq,Serialize)]
pub enum EnumeratedListType {
Arabic,
LowerAlpha,
@@ -13,17 +13,17 @@ pub enum EnumeratedListType {
UpperRoman,
}

#[derive(Debug,Serialize)]
#[derive(Debug,PartialEq,Serialize)]
pub enum FixedSpace { Default, Preserve } // yes, default really is not “Default”
impl Default for FixedSpace { fn default() -> FixedSpace { FixedSpace::Preserve } }

#[derive(Debug,Serialize)] pub enum AlignH { Left, Center, Right}
#[derive(Debug,Serialize)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right }
#[derive(Debug,PartialEq,Serialize)] pub enum AlignH { Left, Center, Right}
#[derive(Debug,PartialEq,Serialize)] pub enum AlignHV { Top, Middle, Bottom, Left, Center, Right }

#[derive(Debug,Serialize)] pub struct ID(pub String);
#[derive(Debug,Serialize)] pub struct NameToken(pub String);
#[derive(Debug,PartialEq,Serialize)] pub struct ID(pub String);
#[derive(Debug,PartialEq,Serialize)] pub struct NameToken(pub String);

#[derive(Debug,Serialize)]
#[derive(Debug,PartialEq,Serialize)]
pub enum Measure { // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#length-units
Em(f64),
Ex(f64),
@@ -43,7 +43,7 @@ macro_rules! synonymous_enum {
cartesian!(impl_into, [ $( ($subcat::$entry) ),+ ], [ $($supcat),+ ]);
};
( $name:ident { $( $entry:ident ),+ $(,)* } ) => {
#[derive(Serialize)]
#[derive(PartialEq,Serialize)]
pub enum $name { $(
$entry(Box<$entry>),
)* }
@@ -87,7 +87,6 @@ synonymous_enum!(TextOrInlineElement {
//Content Models\\
//--------------\\

synonymous_enum!(SubSection { Title, Subtitle, Docinfo, Decoration, SubStructure });
synonymous_enum!(AuthorInfo { Author, Organization, Address, Contact });
synonymous_enum!(DecorationElement { Header, Footer });
synonymous_enum!(SubTopic { Title, BodyElement });
@@ -26,7 +26,7 @@ pub trait Element {
fn classes_mut(&mut self) -> &mut Vec<String>;
}

#[derive(Debug,Default,Serialize)]
#[derive(Debug,Default,PartialEq,Serialize)]
pub struct CommonAttributes {
ids: Vec<ID>,
names: Vec<NameToken>,
@@ -78,7 +78,7 @@ macro_rules! impl_new {(
),* $(,)* }
) => (
$(#[$attr])*
#[derive(Debug,Serialize)]
#[derive(Debug,PartialEq,Serialize)]
pub struct $name { $(
$(#[$fattr])* $field: $typ,
)* }
@@ -136,7 +136,7 @@ impl_children!(Document, StructuralSubElement);

impl_elems!(
//structual elements
(Section, SubSection)
(Section, StructuralSubElement)
(Topic, SubTopic)
(Sidebar, SubSidebar)

@@ -20,7 +20,7 @@ macro_rules! skip {
macro_rules! impl_extra {
( $name:ident { $( $(#[$pattr:meta])* $param:ident : $type:ty ),* $(,)* } ) => (
impl_extra!(
#[derive(Default,Debug,Serialize)]
#[derive(Default,Debug,PartialEq,Serialize)]
$name { $( $(#[$pattr])* $param : $type, )* }
);
);
@@ -49,7 +49,7 @@ impl_extra!(Target {
anonymous: bool,
});
impl_extra!(Raw { space: FixedSpace, format: Vec<NameToken> });
impl_extra!(#[derive(Debug,Serialize)] Image {
impl_extra!(#[derive(Debug,PartialEq,Serialize)] Image {
uri: target::Target,
align: Option<AlignHV>,
alt: Option<String>,
@@ -7,14 +7,138 @@ use pest::iterators::Pairs;
use crate::document_tree::{
HasChildren,
elements as e,
element_categories as c,
};

use super::pest_rst::Rule;


fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section {
match ssubel {
c::StructuralSubElement::SubStructure(ref mut b) => match **b {
c::SubStructure::Section(ref mut s) => s,
_ => unreachable!(),
},
_ => unreachable!(),
}
}


fn get_level<'tl>(toplevel: &'tl mut Vec<c::StructuralSubElement>, section_idxs: &[Option<usize>]) -> &'tl mut Vec<c::StructuralSubElement> {
let mut level = toplevel;
for maybe_i in section_idxs {
if let Some(i) = *maybe_i {
level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut();
}
}
level
}


pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> {
let structural_elems = pairs.map(block::convert_ssubel)
.filter_map(|elem| match elem { Ok(Some(e)) => Some(Ok(e)), Err(e) => Some(Err(e)), Ok(None) => None })
.collect::<Result<_,_>>()?;
Ok(e::Document::with_children(structural_elems))
use self::block::TitleOrSsubel::*;

let mut toplevel: Vec<c::StructuralSubElement> = vec![];
// The kinds of section titles encountered.
// `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer
let mut kinds: Vec<block::TitleKind> = vec![];
// Recursive indices into the tree, pointing at the active sections.
// `None`s indicate skipped section levels:
// toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]...
let mut section_idxs: Vec<Option<usize>> = vec![];

for pair in pairs {
if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel {
Title(title, kind) => {
match kinds.iter().position(|k| k == &kind) {
// Idx points to the level we want to add,
// so idx-1 needs to be the last valid index.
Some(idx) => {
// If idx < len: Remove found section and all below
section_idxs.truncate(idx);
// If idx > len: Add None for skipped levels
// TODO: test skipped levels
while section_idxs.len() < idx { section_idxs.push(None) }
},
None => kinds.push(kind),
}
let super_level = get_level(&mut toplevel, &section_idxs);
super_level.push(e::Section::with_children(vec![title.into()]).into());
section_idxs.push(Some(super_level.len() - 1));
},
Ssubel(elem) => get_level(&mut toplevel, &section_idxs).push(elem),
}}
}
Ok(e::Document::with_children(toplevel))
}


#[cfg(test)]
mod tests {
use crate::{
parser::parse,
document_tree::{
elements as e,
element_categories as c,
HasChildren,
}
};

fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section {
match ssubel {
c::StructuralSubElement::SubStructure(ref b) => match **b {
c::SubStructure::Section(ref s) => s,
ref c => panic!("Expected section, not {:?}", c),
},
ref c => panic!("Expected SubStructure, not {:?}", c),
}
}

const SECTIONS: &str = "\
Intro before first section title
Level 1
*******
-------
Level 2
-------
Level 3
=======
L1 again
********
L3 again, skipping L2
=====================
";

#[test]
fn convert_skipped_section() {
let doctree = parse(SECTIONS).unwrap();
let lvl0 = doctree.children();
assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0);

assert_eq!(lvl0[0], e::Paragraph::with_children(vec![
"Intro before first section title".to_owned().into()
]).into(), "The intro text should fit");

let lvl1a = ssubel_to_section(&lvl0[1]).children();
assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a);
//TODO: test title lvl1a[0]
let lvl2 = ssubel_to_section(&lvl1a[1]).children();
assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2);
//TODO: test title lvl2[0]
let lvl3a = ssubel_to_section(&lvl2[1]).children();
assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a);
//TODO: test title lvl3a[0]

let lvl1b = ssubel_to_section(&lvl0[2]).children();
assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b);
//TODO: test title lvl1b[0]
let lvl3b = ssubel_to_section(&lvl1b[1]).children();
assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b);
//TODO: test title lvl3b[0]
}
}
@@ -15,35 +15,54 @@ use crate::parser::{
use super::inline::convert_inline;


pub fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<c::StructuralSubElement>, Error> {
// TODO: This is just a proof of concept. Keep closely to DTD in final version!
#[derive(PartialEq)]
pub(super) enum TitleKind { Double(char), Single(char) }

pub(super) enum TitleOrSsubel {
Title(e::Title, TitleKind),
Ssubel(c::StructuralSubElement),
}


pub(super) fn convert_ssubel(pair: Pair<Rule>) -> Result<Option<TitleOrSsubel>, Error> {
use self::TitleOrSsubel::*;
Ok(Some(match pair.as_rule() {
Rule::title => convert_title(pair).into(),
Rule::paragraph => convert_paragraph(pair)?.into(),
Rule::target => convert_target(pair)?.into(),
Rule::substitution_def => convert_substitution_def(pair)?.into(),
Rule::admonition_gen => convert_admonition_gen(pair)?.into(),
Rule::image => convert_image::<e::Image>(pair)?.into(),
Rule::title => { let (t, k) = convert_title(pair); Title(t, k) },
Rule::paragraph => Ssubel(convert_paragraph(pair)?.into()),
Rule::target => Ssubel(convert_target(pair)?.into()),
Rule::substitution_def => Ssubel(convert_substitution_def(pair)?.into()),
Rule::admonition_gen => Ssubel(convert_admonition_gen(pair)?.into()),
Rule::image => Ssubel(convert_image::<e::Image>(pair)?.into()),
Rule::EOI => return Ok(None),
rule => panic!("unknown rule {:?}", rule),
}))
}


fn convert_title(pair: Pair<Rule>) -> e::Title {
fn convert_title(pair: Pair<Rule>) -> (e::Title, TitleKind) {
let mut title: Option<&str> = None;
let mut _adornment_char: Option<char> = None;
for p in pair.into_inner() {
let mut adornment_char: Option<char> = None;
// title_double or title_single. Extract kind before consuming
let inner_pair = pair.into_inner().next().unwrap();
let kind = inner_pair.as_rule();
for p in inner_pair.into_inner() {
match p.as_rule() {
Rule::line => title = Some(p.as_str()),
Rule::adornments => _adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
Rule::line => title = Some(p.as_str()), // TODO: can contain other stuff?
Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")),
rule => unimplemented!("Unexpected rule in title: {:?}", rule),
};
}
// TODO adornment char
e::Title::with_children(vec![
// now we encountered one line of text and one of adornments
// TODO: emit error if the adornment line is too short (has to match title length)
let elem = e::Title::with_children(vec![
title.expect("No text in title").into()
])
]);
let title_kind = match kind {
Rule::title_double => TitleKind::Double(adornment_char.unwrap()),
Rule::title_single => TitleKind::Single(adornment_char.unwrap()),
_ => unreachable!(),
};
(elem, title_kind)
}


@@ -26,10 +26,10 @@ Title
",
rule: Rule::title,
tokens: [
title(0, 12, [
title(0, 12, [ title_single(0, 12, [
line(0, 6, [ str(0, 5) ]),
adornments(6, 11),
])
]) ])
]
};
}
@@ -45,10 +45,10 @@ Title
",
rule: Rule::title,
tokens: [
title(0, 17, [
title(0, 17, [ title_double(0, 17, [
adornments(0, 5),
line(6, 12, [ str(6, 11) ]),
])
]) ])
]
};
}
@@ -52,10 +52,9 @@ target_name_qu = { ( !( ":"|"`") ~ !NEWLINE ~ ANY )* }
link_target = { nonspacechar+ }

// Title. A block type
title = {
PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP
| line ~ PEEK[..] ~ adornments ~ NEWLINE
}
title = { title_double | title_single }
title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP }
title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE }

// Bullet list. A block type.
bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* }
@@ -6,7 +6,7 @@ use url::{self,Url};
use serde_derive::Serialize;


#[derive(Debug, Serialize)]
#[derive(Debug,PartialEq,Serialize)]
#[serde(untagged)]
pub enum Target {
#[serde(serialize_with = "serialize_url")]

0 comments on commit 6d995f6

Please sign in to comment.