From 8d1d15e303d73369f312b006a2278ababe6538f8 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Sun, 22 Sep 2019 12:30:41 +0600 Subject: [PATCH 01/11] docx support --- cr3gui/CMakeLists.txt | 1 + cr3qt/data/docx.css | 103 ++ cr3qt/data/fb3.css | 136 ++ cr3qt/src/cr3widget.cpp | 6 + cr3qt/src/mainwindow.cpp | 5 +- crengine/CMakeLists.txt | 5 +- crengine/include/bookformats.h | 2 + crengine/include/docxfmt.h | 11 + crengine/include/fb3fmt.h | 10 + crengine/include/lvopc.h | 81 + crengine/include/lvtinydom.h | 2 +- crengine/src/docxdtd.inc | 148 ++ crengine/src/docxfmt.cpp | 3065 ++++++++++++++++++++++++++++++++ crengine/src/fb3fmt.cpp | 244 +++ crengine/src/lvdocview.cpp | 72 + crengine/src/lvopc.cpp | 147 ++ crengine/src/lvrend.cpp | 2 +- crengine/src/lvtextfm.cpp | 7 +- crengine/src/lvtinydom.cpp | 16 +- 19 files changed, 4053 insertions(+), 10 deletions(-) create mode 100644 cr3qt/data/docx.css create mode 100644 cr3qt/data/fb3.css create mode 100644 crengine/include/docxfmt.h create mode 100644 crengine/include/fb3fmt.h create mode 100644 crengine/include/lvopc.h create mode 100644 crengine/src/docxdtd.inc create mode 100644 crengine/src/docxfmt.cpp create mode 100644 crengine/src/fb3fmt.cpp create mode 100644 crengine/src/lvopc.cpp diff --git a/cr3gui/CMakeLists.txt b/cr3gui/CMakeLists.txt index b15a024a14..1990cca06f 100644 --- a/cr3gui/CMakeLists.txt +++ b/cr3gui/CMakeLists.txt @@ -97,6 +97,7 @@ elseif ( ${GUI} STREQUAL CRGUI_QT ) SET(CR3_SOURCES ${CR3_SOURCES} src/cr3qt.cpp) SET (EXTRA_LIBS ${QT_LIBRARIES} ${STD_LIBS} ) + SET(CR3_STYLES ${CR3_STYLES} data/fb3.css) INSTALL( FILES ${CR3_STYLES} DESTINATION root/crengine) INSTALL( DIRECTORY data/hyph/ DESTINATION root/crengine/hyph diff --git a/cr3qt/data/docx.css b/cr3qt/data/docx.css new file mode 100644 index 0000000000..a4b7ad369f --- /dev/null +++ b/cr3qt/data/docx.css @@ -0,0 +1,103 @@ +body { text-align: left; margin: 0; text-indent: 0px } + +p { $def.all } + +empty-line { height: 1em } + +a { display: inline; $link.all } +a[type="note"] { $footnote-link.all } + +image { text-align: center; text-indent: 0px; display: block } +p image { display: inline } +li image { display: inline } + +li { display: list-item; text-indent: 0em; } +ol { display: block; list-style-type: decimal; margin-left: 1em } + +title.h1 p, title.h2 h2 p { + $title.all +} + +title.h3 p, title.h4 p, title.h5 p, title.h6 p { + $subtitle.all +} + +title.h1, title.h2, title.h3, title.h4, title.h5, title.h6 { + hyphenate: none; +} + +title.h1, title.h2, title.h3, title.h4, title.h5, title.h6 { + display: block; + margin-top: 0.5em; + margin-bottom: 0.3em; + padding: 10px ; + margin-top: 0.5em; + margin-bottom: 0.5em; +} +title.h1, title.h2 { + page-break-inside: avoid; + page-break-after: avoid; +} + +title.h3, title.h4, title.h5, title.h6 { + page-break-inside: avoid; + page-break-after: avoid; +} + +title.h1 { font-size: 150% } +title.h2 { font-size: 140% } +title.h3 { font-size: 130% } +title.h4 { font-size: 120% } +title.h5 { font-size: 110% } + +table { font-size: 80% } +td, th { text-indent: 0px; padding: 3px } +th { font-weight: bold; text-align: center; background-color: #DDD } +/* #808080; */ +table caption { text-indent: 0px; padding: 4px; background-color: #EEE } + +body[name="notes"] { $footnote.all } +body[name="notes"] section title { display: run-in; text-align: left; $footnote-title.all page-break-before: auto; page-break-inside: auto; page-break-after: auto; } +body[name="notes"] section title p { display: inline } + +body[name="comments"] { $footnote.all } +body[name="comments"] section title { display: run-in; text-align: left; $footnote-title.all page-break-before: auto; page-break-inside: auto; page-break-after: auto; } +body[name="comments"] section title p { display: inline } + +description { display: block; } +title-info { display: block; } +annotation { $annotation.all } +date { display: block; font-size: 80%; font-style: italic; text-align: center } +genre { display: none; } +author { display: none; } +book-title { display: none; } +keywords { display: none; } +lang { display: none; } +src-lang { display: none; } +translator { display: none; } +document-info { display: none; } +publish-info { display: none; } +custom-info { display: none; } +coverpage { display: none } + +strong,emphasis,u,strike,sub,sup { + display: inline; +} + +sub { vertical-align: sub; font-size: 70% } +sup { vertical-align: super; font-size: 70% } + +strong { font-weight: bold } +emphasis { font-style: italic } + +u { text-decoration: underline; } +strike { text-decoration: line-through; } + + +img { + margin: 0.5em; + text-align: center; + text-indent: 0em; + border-style: solid; + border-width: medium; +} diff --git a/cr3qt/data/fb3.css b/cr3qt/data/fb3.css new file mode 100644 index 0000000000..c52186f481 --- /dev/null +++ b/cr3qt/data/fb3.css @@ -0,0 +1,136 @@ +body { text-align: left; margin: 0; text-indent: 0px } + +p { $def.all } + +empty-line { height: 1em } + +hr { height: 1px; background-color: #808080; margin-top: 0.5em; margin-bottom: 0.5em; /* 2px */ } + +a { display: inline; $link.all } +a[type="note"] { $footnote-link.all } + +image { text-align: center; text-indent: 0px; display: block } +p image { display: inline } +li image { display: inline } + +li { display: list-item; text-indent: 0em; } +ul { display: block; list-style-type: disc; margin-left: 1em } +ol { display: block; list-style-type: decimal; margin-left: 1em } + +v { text-align: left; text-align-last: right; text-indent: 1em hanging } + +stanza { $poem.all } +stanza + stanza { margin-top: 1em; } +poem { margin-top: 1em; margin-bottom: 1em; text-indent: 0px } +text-author { $text-author.all } + +epigraph, epigraph p { $epigraph.all } +cite, cite p { $cite.all } + +title p, h1 p, h2 p { + $title.all +} + +subtitle, subtitle p, h3 p, h4 p, h5 p, h6 p { + $subtitle.all +} + +title, h1, h2, h3, h4, h5, h6, subtitle { + hyphenate: none; +} + +h1, h2, h3, h4, h5, h6 { + display: block; + margin-top: 0.5em; + margin-bottom: 0.3em; + padding: 10px ; + margin-top: 0.5em; + margin-bottom: 0.5em; +} +title, h1, h2 { + page-break-before: always; + page-break-inside: avoid; + page-break-after: avoid; +} +ol title, ul title { + page-break-before: auto; +} +subtitle, h3, h4, h5, h6 { + page-break-inside: avoid; + page-break-after: avoid; +} +h1 { font-size: 150% } +h2 { font-size: 140% } +h3 { font-size: 130% } +h4 { font-size: 120% } +h5 { font-size: 110% } + +table { font-size: 80% } +td, th { text-indent: 0px; padding: 3px } +th { font-weight: bold; text-align: center; background-color: #DDD } +/* #808080; */ +table caption { text-indent: 0px; padding: 4px; background-color: #EEE } + +tt, samp, kbd, code, pre { font-family: "Courier New", "Courier", monospace; } +code, pre { + display: block; + white-space: pre; + $pre.all +} + +body[name="notes"] { $footnote.all } +body[name="notes"] section title { display: run-in; text-align: left; $footnote-title.all page-break-before: auto; page-break-inside: auto; page-break-after: auto; } +body[name="notes"] section title p { display: inline } + +body[name="comments"] { $footnote.all } +body[name="comments"] section title { display: run-in; text-align: left; $footnote-title.all page-break-before: auto; page-break-inside: auto; page-break-after: auto; } +body[name="comments"] section title p { display: inline } + +description { display: block; } +title-info { display: block; } +annotation { $annotation.all } +date { display: block; font-size: 80%; font-style: italic; text-align: center } +genre { display: none; } +author { display: none; } +book-title { display: none; } +keywords { display: none; } +lang { display: none; } +src-lang { display: none; } +translator { display: none; } +document-info { display: none; } +publish-info { display: none; } +custom-info { display: none; } +coverpage { display: none } + +head, form, script { display: none; } + +b,strong,i,em,dfn,var,q,u,underline,del,s,strike,small,big,sub,sup,acronym,tt,sa mp,kbd,code { + display: inline; +} + +spacing { display: inline; letter-spacing: 5px } + +sub { vertical-align: sub; font-size: 70% } +sup { vertical-align: super; font-size: 70% } + +strong, b { font-weight: bold } +emphasis, i, em, dfn, var { font-style: italic } +u,underline { text-decoration: underline; } +del, s, strike, strikethrough { text-decoration: line-through; } + +small { font-size: 80%; } +big { font-size: 130%; } + +nobr { display: inline; hyphenate: none; white-space: nowrap; } + +dl { margin-left: 0em; } +dt { display: block; margin-left: 0em; margin-top:0.3em; font-weight: bold; } +dd { display: block; margin-left: 1.3em; } + +img { + margin: 0.5em; + text-align: center; + text-indent: 0em; + border-style: solid; + border-width: medium; +} \ No newline at end of file diff --git a/cr3qt/src/cr3widget.cpp b/cr3qt/src/cr3widget.cpp index 394d160d68..7d14a9eea4 100644 --- a/cr3qt/src/cr3widget.cpp +++ b/cr3qt/src/cr3widget.cpp @@ -1226,6 +1226,9 @@ void CR3View::OnLoadFileFormatDetected( doc_format_t fileFormat ) case doc_format_txt: filename = "txt.css"; break; + case doc_format_fb3: + filename = "fb3.css"; + break; case doc_format_rtf: filename = "rtf.css"; break; @@ -1238,6 +1241,9 @@ void CR3View::OnLoadFileFormatDetected( doc_format_t fileFormat ) case doc_format_doc: filename = "doc.css"; break; + case doc_format_docx: + filename = "docx.css"; + break; case doc_format_chm: filename = "chm.css"; break; diff --git a/cr3qt/src/mainwindow.cpp b/cr3qt/src/mainwindow.cpp index 3d6edcab35..40c1eaa7ca 100644 --- a/cr3qt/src/mainwindow.cpp +++ b/cr3qt/src/mainwindow.cpp @@ -231,11 +231,12 @@ void MainWindow::on_actionOpen_triggered() } QString fileName = QFileDialog::getOpenFileName(this, tr("Open book file"), lastPath, - QString(tr("All supported formats")) + QString(" (*.fb2 *.txt *.tcr *.rtf *.doc *.epub *.html *.shtml *.htm *.chm *.zip *.pdb *.pml *.prc *.pml *.mobi);;") + QString(tr("All supported formats")) + QString(" (*.fb2 *.fb3 *.txt *.tcr *.rtf *.doc *.docx *.epub *.html *.shtml *.htm *.chm *.zip *.pdb *.pml *.prc *.pml *.mobi);;") + QString(tr("FB2 books")) + QString(" (*.fb2 *.fb2.zip);;") + + QString(tr("FB3 books")) + QString(" (*.fb3);;") + QString(tr("Text files")) + QString(" (*.txt);;") + QString(tr("Rich text")) + QString(" (*.rtf);;") - + QString(tr("MS Word document")) + QString(" (*.doc);;") + + QString(tr("MS Word document")) + QString(" (*.doc *.docx);;") + QString(tr("HTML files")) + QString(" (*.shtml *.htm *.html);;") + QString(tr("EPUB files")) + QString(" (*.epub);;") + QString(tr("CHM files")) + QString(" (*.chm);;") diff --git a/crengine/CMakeLists.txt b/crengine/CMakeLists.txt index f63efd0778..0120736e3c 100644 --- a/crengine/CMakeLists.txt +++ b/crengine/CMakeLists.txt @@ -42,7 +42,10 @@ if ( NOT ${GUI} STREQUAL FB2PROPS ) src/chmfmt.cpp src/epubfmt.cpp src/pdbfmt.cpp - src/wordfmt.cpp + src/wordfmt.cpp + src/lvopc.cpp + src/docxfmt.cpp + src/fb3fmt.cpp src/crconcurrent.cpp #src/xutils.cpp ) diff --git a/crengine/include/bookformats.h b/crengine/include/bookformats.h index dc63a070d3..22d56d9fd5 100644 --- a/crengine/include/bookformats.h +++ b/crengine/include/bookformats.h @@ -7,6 +7,7 @@ typedef enum { doc_format_none, doc_format_fb2, + doc_format_fb3, doc_format_txt, doc_format_rtf, doc_format_epub, @@ -14,6 +15,7 @@ typedef enum { doc_format_txt_bookmark, // coolreader TXT format bookmark doc_format_chm, doc_format_doc, + doc_format_docx, doc_format_pdb, doc_format_max = doc_format_pdb // don't forget update getDocFormatName() when changing this enum diff --git a/crengine/include/docxfmt.h b/crengine/include/docxfmt.h new file mode 100644 index 0000000000..7328763c13 --- /dev/null +++ b/crengine/include/docxfmt.h @@ -0,0 +1,11 @@ +#ifndef DOCXFMT_H +#define DOCXFMT_H + +#include "../include/crsetup.h" +#include "../include/lvtinydom.h" + + +bool DetectDocXFormat( LVStreamRef stream ); +bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback ); + +#endif // DOCXFMT_H diff --git a/crengine/include/fb3fmt.h b/crengine/include/fb3fmt.h new file mode 100644 index 0000000000..dd35816a18 --- /dev/null +++ b/crengine/include/fb3fmt.h @@ -0,0 +1,10 @@ +#ifndef FB3FMT_H +#define FB3FMT_H + +#include "../include/crsetup.h" +#include "../include/lvtinydom.h" + +bool DetectFb3Format( LVStreamRef stream ); +bool ImportFb3Document( LVStreamRef stream, ldomDocument * doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback ); + +#endif // FB3FMT_H diff --git a/crengine/include/lvopc.h b/crengine/include/lvopc.h new file mode 100644 index 0000000000..b41168528c --- /dev/null +++ b/crengine/include/lvopc.h @@ -0,0 +1,81 @@ +#ifndef LVOPC_H +#define LVOPC_H + +#include "lvstream.h" +#include "lvhashtable.h" +#include "props.h" + +/* + * Open Packaging Conventions (OPC) + * The OPC is specified in Part 2 of the Office Open XML standards ISO/IEC 29500:2008 and ECMA-376 +*/ + +class OpcPart; +typedef LVFastRef OpcPartRef; +class OpcPackage; + +class OpcPart : public LVRefCounter +{ +public: + ~OpcPart(); + LVStreamRef open(); + lString16 getRelatedPartName(const lChar16 * const relationType, const lString16 id = lString16()); + OpcPartRef getRelatedPart(const lChar16 * const relationType, const lString16 id = lString16()); +protected: + OpcPart(OpcPackage* package, lString16 name): + m_relations(16), m_package(package), m_name(name), m_relationsValid(false) + { + } + void readRelations(); + lString16 getTargetPath(const lString16 srcPath, const lString16 targetMode, lString16 target); + OpcPart* createPart(OpcPackage* package, lString16 name) { + return new OpcPart(package, name); + } +private: + LVHashTable *> m_relations; + OpcPackage* m_package; + lString16 m_name; + bool m_relationsValid; +private: + // non copyable + OpcPart(); + OpcPart( const OpcPart& ); + OpcPart& operator=( const OpcPart& ); +}; + + +class OpcPackage : public OpcPart +{ +private: + bool m_contentTypesValid; + LVContainerRef m_container; + LVHashTable m_contentTypes; +private: + // non copyable + OpcPackage(); + OpcPackage( const OpcPackage& ); + OpcPackage& operator=( const OpcPart& ); +public: + OpcPackage(LVContainerRef container) : OpcPart(this, L"/"), + m_contentTypesValid(false), m_container(container), + m_contentTypes(16) + { + } + LVStreamRef open(lString16 partName) { + return m_container->OpenStream(partName.c_str(), LVOM_READ); + } + lString16 getContentPartName(const lChar16* contentType); + OpcPartRef getContentPart(const lChar16* contentType) { + return getPart(getContentPartName(contentType)); + } + LVStreamRef openContentPart(const lChar16* contentType) { + return open(getContentPartName(contentType)); + } + OpcPartRef getPart(const lString16 partName); + bool partExist(const lString16 partName); + void readCoreProperties(CRPropRef doc_props); +private: + void readContentTypes(); +}; + +#endif // LVOPC_H diff --git a/crengine/include/lvtinydom.h b/crengine/include/lvtinydom.h index 30549a0921..e5bd05813d 100644 --- a/crengine/include/lvtinydom.h +++ b/crengine/include/lvtinydom.h @@ -851,7 +851,7 @@ class ldomNode /// returns object image source LVImageSourceRef getObjectImageSource(); /// returns object image ref name - lString16 getObjectImageRefName(); + lString16 getObjectImageRefName(bool percentDecode = true); /// returns object image stream LVStreamRef getObjectImageStream(); /// formats final block diff --git a/crengine/src/docxdtd.inc b/crengine/src/docxdtd.inc new file mode 100644 index 0000000000..7f14fea007 --- /dev/null +++ b/crengine/src/docxdtd.inc @@ -0,0 +1,148 @@ +DOCX_TAG(abstractNum) +DOCX_TAG(abstractNumId) +DOCX_TAG(anchor) +DOCX_TAG(document) +DOCX_TAG(body) +DOCX_TAG(br) +DOCX_TAG(blip) +DOCX_TAG(blipFill) +DOCX_TAG(footnote) +DOCX_TAG(footnotes) +DOCX_TAG(footnoteReference) +DOCX_TAG(footnoteRef) +DOCX_TAG(endnote) +DOCX_TAG(endnotes) +DOCX_TAG(endnoteReference) +DOCX_TAG(endnoteRef) +DOCX_TAG(hyperlink) +DOCX_TAG(bookmarkStart) +DOCX_TAG(bookmarkEnd) +DOCX_TAG(drawing) +DOCX_TAG(fldChar) +DOCX_TAG(graphic) +DOCX_TAG(graphicData) +DOCX_TAG(gridCol) +DOCX_TAG(gridSpan) +DOCX_TAG(numPr) +DOCX_TAG(ilvl) +DOCX_TAG(inline) +DOCX_TAG(instrText) +DOCX_TAG(isLgl) +DOCX_TAG(lang) +DOCX_TAG(lvl) +DOCX_TAG(lvlOverride) +DOCX_TAG(lvlJc) +DOCX_TAG(lvlRestart) +DOCX_TAG(lvlText) +DOCX_TAG(num) +DOCX_TAG(numFmt) +DOCX_TAG(numbering) +DOCX_TAG(nvPicPr) +DOCX_TAG(outlineLvl) +DOCX_TAG(numId) +DOCX_TAG(pic) +DOCX_TAG(pPr) +DOCX_TAG(pPrDefault) +DOCX_TAG(rPr) +DOCX_TAG(spPr) +DOCX_TAG(start) +DOCX_TAG(rPrDefault) +DOCX_TAG(tab) +DOCX_TAG(tbl) +DOCX_TAG(tblGrid) +DOCX_TAG(tblPr) +DOCX_TAG(tc) +DOCX_TAG(tcPr) +DOCX_TAG(tr) +DOCX_TAG(trPr) +DOCX_TAG(b) +DOCX_TAG(i) +DOCX_TAG(u) +DOCX_TAG(strike) +DOCX_TAG(color) +DOCX_TAG(p) +DOCX_TAG(r) +DOCX_TAG(t) +DOCX_TAG(jc) +DOCX_TAG(rFonts) +DOCX_TAG(vertAlign) +DOCX_TAG(sz) +DOCX_TAG(vanish) +DOCX_TAG(pStyle) +DOCX_TAG(rStyle) +DOCX_TAG(ind) +DOCX_TAG(spacing) +DOCX_TAG(suff) +DOCX_TAG(textAlignment) +DOCX_TAG(suppressAutoHyphens) +DOCX_TAG(style) +DOCX_TAG(styles) +DOCX_TAG(docDefaults) +DOCX_TAG(name) +DOCX_TAG(basedOn) +DOCX_TAG(pageBreakBefore) +DOCX_TAG(keepNext) +DOCX_TAG(mirrorIndents) +DOCX_TAG(vMerge) +DOCX_NUM_FMT(aiueo) +DOCX_NUM_FMT(aiueoFullWidth) +DOCX_NUM_FMT(arabicAbjad) +DOCX_NUM_FMT(arabicAlpha) +DOCX_NUM_FMT(bullet) +DOCX_NUM_FMT(cardinalText) +DOCX_NUM_FMT(chicago) +DOCX_NUM_FMT(chineseCounting) +DOCX_NUM_FMT(chineseCountingThousand) +DOCX_NUM_FMT(chineseLegalSimplified) +DOCX_NUM_FMT(chosung) +DOCX_NUM_FMT(decimal) +DOCX_NUM_FMT(decimalEnclosedCircle) +DOCX_NUM_FMT(decimalEnclosedCircleChinese) +DOCX_NUM_FMT(decimalEnclosedFullstop) +DOCX_NUM_FMT(decimalEnclosedParen) +DOCX_NUM_FMT(decimalFullWidth) +DOCX_NUM_FMT(decimalFullWidth2) +DOCX_NUM_FMT(decimalHalfWidth) +DOCX_NUM_FMT(decimalZero) +DOCX_NUM_FMT(ganada) +DOCX_NUM_FMT(hebrew1) +DOCX_NUM_FMT(hebrew2) +DOCX_NUM_FMT(hex) +DOCX_NUM_FMT(hindiConsonants) +DOCX_NUM_FMT(hindiCounting) +DOCX_NUM_FMT(hindiNumbers) +DOCX_NUM_FMT(hindiVowels) +DOCX_NUM_FMT(ideographDigital) +DOCX_NUM_FMT(ideographEnclosedCircle) +DOCX_NUM_FMT(ideographLegalTraditional) +DOCX_NUM_FMT(ideographTraditional) +DOCX_NUM_FMT(ideographZodiac) +DOCX_NUM_FMT(ideographZodiacTraditional) +DOCX_NUM_FMT(iroha) +DOCX_NUM_FMT(irohaFullWidth) +DOCX_NUM_FMT(japaneseCounting) +DOCX_NUM_FMT(japaneseDigitalTenThousand) +DOCX_NUM_FMT(japaneseLegal) +DOCX_NUM_FMT(koreanCounting) +DOCX_NUM_FMT(koreanDigital) +DOCX_NUM_FMT(koreanDigital2) +DOCX_NUM_FMT(koreanLegal) +DOCX_NUM_FMT(lowerLetter) +DOCX_NUM_FMT(lowerRoman) +DOCX_NUM_FMT(none) +DOCX_NUM_FMT(numberInDash) +DOCX_NUM_FMT(ordinal) +DOCX_NUM_FMT(ordinalText) +DOCX_NUM_FMT(russianLower) +DOCX_NUM_FMT(russianUpper) +DOCX_NUM_FMT(taiwaneseCounting) +DOCX_NUM_FMT(taiwaneseCountingThousand) +DOCX_NUM_FMT(taiwaneseDigital) +DOCX_NUM_FMT(thaiCounting) +DOCX_NUM_FMT(thaiLetters) +DOCX_NUM_FMT(thaiNumbers) +DOCX_NUM_FMT(upperLetter) +DOCX_NUM_FMT(upperRoman) +DOCX_NUM_FMT(vietnameseCounting) +#undef DOCX_NUM_FMT +#undef DOCX_TAG diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp new file mode 100644 index 0000000000..b916490987 --- /dev/null +++ b/crengine/src/docxfmt.cpp @@ -0,0 +1,3065 @@ +#include "../include/docxfmt.h" +#include "../include/lvtinydom.h" +#include "../include/fb2def.h" +#include "../include/lvopc.h" + +#define DOCX_TAG_NAME(itm) docx_el_##itm##_name +#define DOCX_TAG_ID(itm) docx_el_##itm +#define DOCX_TAG_CHILD(itm) { DOCX_TAG_ID(itm), DOCX_TAG_NAME(itm) } +#define DOCX_LAST_ITEM { -1, NULL } + +/// known docx items name and identifier +struct item_def_t { + int id; + const lChar16 * name; +}; + +static const lChar16* const docx_DocumentContentType = L"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"; +static const lChar16* const docx_NumberingContentType = L"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"; +static const lChar16* const docx_StylesContentType = L"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; +static const lChar16* const docx_ImageRelationship = L"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"; +static const lChar16* const docx_HyperlinkRelationship = L"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; +static const lChar16* const docx_FootNotesRelationShip = L"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes"; +static const lChar16* const docx_EndNotesRelationShip = L"http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes"; + +enum { +#define DOCX_NUM_FMT(itm) +#define DOCX_TAG(itm) DOCX_TAG_ID(itm), + docx_el_NULL = 0, + #include "docxdtd.inc" + docx_el_MAX_ID +}; + +#define DOCX_NUM_FMT(itm) +#define DOCX_TAG(itm) static const lChar16 * const DOCX_TAG_NAME(itm) = L ## #itm; + #include "docxdtd.inc" + +const struct item_def_t styles_elements[] = { + DOCX_TAG_CHILD(styles), + DOCX_TAG_CHILD(style), + DOCX_TAG_CHILD(docDefaults), + DOCX_LAST_ITEM +}; + + +enum docx_lineRule_type { + docx_lineRule_atLeast, + docx_lineRule_auto, + docx_lineRule_exact +}; + +enum docx_style_type { + docx_paragraph_style, + docx_character_style, + docx_table_style, + docx_numbering_style +}; + +enum docx_multilevel_type { + docx_hybrid_multilevel, + docx_multilevel, + docx_singlelevel +}; + +#define DOCX_NUM_FMT(itm) docx_numFormat_##itm , +#define DOCX_TAG(itm) +enum docx_numFormat_type { + #include "docxdtd.inc" + docx_numFormat_MAX_ID +}; + +enum docx_LevelSuffix_type { + docx_level_suffix_tab, + docx_level_suffix_space, + docx_level_suffix_nothing +}; + +const struct item_def_t style_elements[] = { + DOCX_TAG_CHILD(name), + DOCX_TAG_CHILD(basedOn), + DOCX_TAG_CHILD(pPr), + DOCX_TAG_CHILD(rPr), + DOCX_TAG_CHILD(tblPr), + DOCX_TAG_CHILD(trPr), + DOCX_TAG_CHILD(tcPr), + DOCX_LAST_ITEM +}; + +const struct item_def_t docDefaults_elements[] = { + DOCX_TAG_CHILD(pPr), + DOCX_TAG_CHILD(pPrDefault), + DOCX_TAG_CHILD(rPrDefault), + DOCX_TAG_CHILD(rPr), + DOCX_LAST_ITEM +}; + +const struct item_def_t rPr_elements[] = { + DOCX_TAG_CHILD(b), + DOCX_TAG_CHILD(i), + DOCX_TAG_CHILD(color), + DOCX_TAG_CHILD(jc), + DOCX_TAG_CHILD(lang), + DOCX_TAG_CHILD(rFonts), + DOCX_TAG_CHILD(rStyle), + DOCX_TAG_CHILD(u), + DOCX_TAG_CHILD(vertAlign), + DOCX_TAG_CHILD(sz), + DOCX_TAG_CHILD(vanish), + DOCX_TAG_CHILD(strike), + DOCX_LAST_ITEM +}; + +const struct item_def_t numPr_elements[] = { + DOCX_TAG_CHILD(ilvl), + DOCX_TAG_CHILD(numId), + DOCX_LAST_ITEM +}; + +const struct item_def_t pPr_elements[] = { + DOCX_TAG_CHILD(pageBreakBefore), + DOCX_TAG_CHILD(keepNext), + DOCX_TAG_CHILD(pStyle), + DOCX_TAG_CHILD(jc), + DOCX_TAG_CHILD(spacing), + DOCX_TAG_CHILD(numPr), + DOCX_TAG_CHILD(textAlignment), + DOCX_TAG_CHILD(ind), + DOCX_TAG_CHILD(suppressAutoHyphens), +// DOCX_TAG_CHILD(rPr), don't care about Paragraph merker formatting + DOCX_TAG_CHILD(outlineLvl), + DOCX_LAST_ITEM +}; + +const struct item_def_t p_elements[] = { + DOCX_TAG_CHILD(r), + DOCX_TAG_CHILD(pPr), + DOCX_TAG_CHILD(hyperlink), + DOCX_TAG_CHILD(bookmarkStart), + DOCX_LAST_ITEM +}; + +const struct item_def_t r_elements[] = { + DOCX_TAG_CHILD(br), + DOCX_TAG_CHILD(t), + DOCX_TAG_CHILD(tab), + DOCX_TAG_CHILD(drawing), + DOCX_TAG_CHILD(rPr), + DOCX_TAG_CHILD(footnoteReference), + DOCX_TAG_CHILD(footnoteRef), + DOCX_TAG_CHILD(endnoteReference), + DOCX_TAG_CHILD(endnoteRef), + DOCX_TAG_CHILD(fldChar), + DOCX_TAG_CHILD(instrText), + DOCX_LAST_ITEM +}; + +const struct item_def_t hyperlink_elements[] = { + DOCX_TAG_CHILD(r), + DOCX_LAST_ITEM +}; + +const struct item_def_t drawing_elements[] = { + DOCX_TAG_CHILD(blipFill), + DOCX_TAG_CHILD(blip), + DOCX_TAG_CHILD(graphic), + DOCX_TAG_CHILD(graphicData), + DOCX_TAG_CHILD(inline), + DOCX_TAG_CHILD(anchor), + DOCX_TAG_CHILD(pic), + DOCX_LAST_ITEM +}; + +const struct item_def_t tbl_elements[] = { + DOCX_TAG_CHILD(bookmarkStart), + DOCX_TAG_CHILD(tblPr), + DOCX_TAG_CHILD(tblGrid), + DOCX_TAG_CHILD(tcPr), + DOCX_TAG_CHILD(gridCol), + DOCX_TAG_CHILD(gridSpan), + DOCX_TAG_CHILD(tr), + DOCX_TAG_CHILD(tc), + DOCX_TAG_CHILD(p), + DOCX_TAG_CHILD(vMerge), + DOCX_LAST_ITEM +}; + +const struct item_def_t lvl_elements[] = { + DOCX_TAG_CHILD(isLgl), + DOCX_TAG_CHILD(lvlJc), + DOCX_TAG_CHILD(lvlRestart), + DOCX_TAG_CHILD(lvlText), + DOCX_TAG_CHILD(numFmt), + DOCX_TAG_CHILD(pPr), + DOCX_TAG_CHILD(pStyle), + DOCX_TAG_CHILD(rPr), + DOCX_TAG_CHILD(start), + DOCX_TAG_CHILD(suff), + DOCX_LAST_ITEM +}; + +const struct item_def_t numbering_elements[] = { + DOCX_TAG_CHILD(numbering), + DOCX_TAG_CHILD(abstractNum), + DOCX_TAG_CHILD(num), + DOCX_LAST_ITEM +}; + +const struct item_def_t abstractNum_elements[] = { + DOCX_TAG_CHILD(lvl), + DOCX_LAST_ITEM +}; + +const struct item_def_t num_elements[] = { + DOCX_TAG_CHILD(abstractNumId), + DOCX_TAG_CHILD(lvlOverride), + DOCX_LAST_ITEM +}; + + +const struct item_def_t document_elements[] = { + DOCX_TAG_CHILD(document), + DOCX_TAG_CHILD(body), + DOCX_TAG_CHILD(p), + DOCX_TAG_CHILD(tbl), + DOCX_LAST_ITEM +}; + +const struct item_def_t footnotes_elements[] = { + DOCX_TAG_CHILD(footnotes), + DOCX_TAG_CHILD(footnote), + DOCX_TAG_CHILD(endnotes), + DOCX_TAG_CHILD(endnote), + DOCX_TAG_CHILD(p), + DOCX_LAST_ITEM +}; + +const struct item_def_t no_elements[] = { + DOCX_LAST_ITEM +}; + +const struct item_def_t jc_attr_values[] = { + { css_ta_left, L"left"}, + { css_ta_right, L"right" }, + { css_ta_center, L"center" }, + { css_ta_justify, L"both" }, + DOCX_LAST_ITEM +}; + +const struct item_def_t vertAlign_attr_values[] = { + { css_va_baseline, L"baseline"}, + { css_va_super, L"superscript" }, + { css_va_sub, L"subscript" }, + DOCX_LAST_ITEM +}; + +const struct item_def_t textAlignment_attr_values[] = { + { css_va_inherit, L"auto" }, + { css_va_baseline, L"baseline"}, + { css_va_bottom, L"bottom"}, + { css_va_middle, L"center" }, + { css_va_top, L"top" }, + DOCX_LAST_ITEM +}; + +const struct item_def_t lineRule_attr_values[] = { + { docx_lineRule_atLeast, L"atLeast" }, + { docx_lineRule_auto, L"auto"}, + { docx_lineRule_exact, L"exact"}, + DOCX_LAST_ITEM +}; + +const struct item_def_t styleType_attr_values[] = { + { docx_paragraph_style, L"paragraph" }, + { docx_character_style, L"character"}, + { docx_numbering_style, L"numbering"}, + { docx_table_style, L"table"}, + DOCX_LAST_ITEM +}; + +const struct item_def_t lvlSuff_attr_values[] = { + { docx_level_suffix_tab, L"tab" }, + { docx_level_suffix_space, L"space" }, + { docx_level_suffix_nothing, L"nothing" }, + DOCX_LAST_ITEM +}; + +#define DOCX_TAG(itm) +#define DOCX_NUM_FMT(itm) { docx_numFormat_##itm , L ## #itm }, +const struct item_def_t numFmt_attr_values[] = { + #include "docxdtd.inc" + DOCX_LAST_ITEM +}; + +bool DetectDocXFormat( LVStreamRef stream ) +{ + LVContainerRef m_arc = LVOpenArchieve( stream ); + if ( m_arc.isNull() ) + return false; // not a ZIP archive + + OpcPackage package(m_arc); + + return package.partExist(package.getContentPartName(docx_DocumentContentType)); +} + +class docxImportContext; +class docxStyle; + +template +class docx_PropertiesContainer +{ +public: + static const int PROP_COUNT = N; + + virtual void reset() { + for(int i = 0; i < N; i++) { + m_properties[i].type = css_val_unspecified; + m_properties[i].value = 0; + } + } + virtual ~docx_PropertiesContainer() {} + + docx_PropertiesContainer() { + reset(); + } + + css_length_t get(int index) const { + if( index < N ) { + return m_properties[index]; + } + return css_length_t(css_val_unspecified, 0); + } + + void set(int index, int value) { + if ( index < N ) { + m_properties[index].type = css_val_pt; + m_properties[index].value = value; + } + } + + void set(int index, css_length_t& value) { + if ( index < N ) { + m_properties[index] = value; + } + } + + template + T getValue(int index, T defaultValue) const { + css_length_t property = get(index); + if(property.type != css_val_unspecified) + return (T)property.value; + return defaultValue; + } + + template<> + bool getValue(int index, bool defaultValue) const { + css_length_t property = get(index); + if(property.type != css_val_unspecified) + return (property.value != 0); + return defaultValue; + } + + void combineWith(const docx_PropertiesContainer* other) + { + for(int i = 0; i < PROP_COUNT; i++) { + css_length_t baseValue = other->get(i); + if( get(i).type == css_val_unspecified && + baseValue.type != css_val_unspecified) + set(i, baseValue); + } + } + +protected: + css_length_t m_properties[N]; +}; + +enum docx_run_properties +{ + docx_run_italic_prop, + docx_run_bold_prop, + docx_run_underline_prop, + docx_run_strikethrough_prop, + docx_run_hidden_prop, + docx_run_halign_prop, + docx_run_valign_prop, + docx_run_font_size_prop, + docx_run_max_prop +}; + +class docx_rPr : public docx_PropertiesContainer +{ + friend class docx_rPrHandler; +private: + lString16 m_rStyle; +public: + docx_rPr(); + void reset() { m_rStyle.clear(); docx_PropertiesContainer::reset(); } + ///properties + inline bool isBold() const { return getValue(docx_run_bold_prop, false); } + inline void setBold(bool value) { set(docx_run_bold_prop, value); } + inline bool isItalic() const { return getValue(docx_run_italic_prop, false); } + inline void setItalic(bool value) { set(docx_run_italic_prop, value); } + inline bool isUnderline() const { return getValue(docx_run_underline_prop, false); } + inline void setUnderline(bool value) { set(docx_run_underline_prop, value); } + inline bool isStrikeThrough() const { return getValue(docx_run_strikethrough_prop, false); } + inline void setStrikeThrough(bool value) { set(docx_run_strikethrough_prop, value); } + inline bool isSubScript() const { return (getVertAlign() == css_va_sub); } + inline bool isSuperScript() const { return (getVertAlign() == css_va_super); } + inline bool isHidden() const { return getValue(docx_run_hidden_prop, false); } + inline void setHidden(bool value) { set(docx_run_hidden_prop, value); } + inline css_text_align_t getTextAlign() const { + return getValue(docx_run_halign_prop, css_ta_inherit); + } + inline void setTextAlign( css_text_align_t value ) { set(docx_run_halign_prop, value); } + inline css_vertical_align_t getVertAlign() const { + return getValue(docx_run_valign_prop, css_va_inherit); + } + inline void setVertAlign(css_vertical_align_t value) { set(docx_run_valign_prop,value); } + lString16 getCss(); +}; + +enum docx_p_properties { + docx_p_page_break_before_prop, + docx_p_keep_next_prop, + docx_p_mirror_indents_prop, + docx_p_halign_prop, + docx_p_valign_prop, + docx_p_line_rule_prop, + docx_p_hyphenate_prop, + docx_p_before_spacing_prop, + docx_p_after_spacing_prop, + docx_p_before_auto_spacing_prop, + docx_p_after_auto_spacing_prop, + docx_p_line_spacing_prop, + docx_p_line_height_prop, + docx_p_left_margin_prop, + docx_p_right_margin_prop, + docx_p_indent_prop, + docx_p_hanging_prop, + docx_p_outline_level_prop, + docx_p_num_id_prop, + docx_p_ilvl_prop, + docx_p_max_prop +}; + +class docx_pPr : public docx_PropertiesContainer +{ + friend class docx_pPrHandler; +private: + lString16 m_pStyleId; +public: + docx_pPr(); + + void reset() { + m_pStyleId.clear(); + docx_PropertiesContainer::reset(); + } + ///properties + inline css_text_align_t getTextAlign() const { + return getValue(docx_p_halign_prop, css_ta_inherit); + } + inline void setTextAlign( css_text_align_t value ) { set(docx_p_halign_prop, value); } + inline css_vertical_align_t getVertAlign() const { + return getValue(docx_p_valign_prop, css_va_inherit); + } + inline void setVertAlign(css_vertical_align_t value) { set(docx_p_valign_prop, value); } + inline css_hyphenate_t getHyphenate() const { + return getValue(docx_p_hyphenate_prop, css_hyph_inherit); + } + inline void setHyphenate( css_hyphenate_t value ) { set(docx_p_hyphenate_prop, value); } + // page-break-before:always + inline bool isPageBreakBefore() const { return getValue(docx_p_page_break_before_prop, false); } + inline void setPageBreakBefore(bool value) { set(docx_p_page_break_before_prop, value); } + // page-break-after:avoid + inline bool isKeepNext() const { return getValue(docx_p_keep_next_prop, false); } + inline void setKeepNext(bool value) { set(docx_p_keep_next_prop, value); } + inline bool isMirrorIndents() const { return getValue(docx_p_mirror_indents_prop, false); } + inline void setMirrorIndents(bool value) { set(docx_p_mirror_indents_prop, value); } + inline docx_lineRule_type getLineRule() const { return getValue(docx_p_line_rule_prop, docx_lineRule_auto); } + inline void setLineRule(docx_lineRule_type value) { set(docx_p_line_rule_prop, value); } + inline int getNumberingId() { return getValue(docx_p_num_id_prop, 0); } + css_length_t getOutlineLvl() { return get(docx_p_outline_level_prop); } + inline int getNumberingLevel() { return get(docx_p_ilvl_prop).value; } + docxStyle* getStyle(docxImportContext* context); + lString16 getCss(); +}; + +class docxNumLevel : public LVRefCounter +{ +private: + bool m_isLgl; + css_text_align_t m_lvlJc; + css_length_t m_ilvl; + css_length_t m_lvlRestart; + lString16 m_lvlText; + bool m_lvlTextNull; + docx_numFormat_type m_lvlNumFormat; + docx_pPr m_pPr; + docx_rPr m_rPr; + lString16 m_pStyle; + css_length_t m_lvlStart; + docx_LevelSuffix_type m_suffix; +public: + docxNumLevel(); + virtual ~docxNumLevel() {} + void reset(); + ///properties + inline bool isLgl() const { return m_isLgl; } + inline void setLgl(bool value) { m_isLgl = value; } + + inline css_text_align_t getLevelAlign() const { return m_lvlJc; } + inline void setLevelAlign( css_text_align_t value ) { m_lvlJc = value; } + inline css_length_t getLevel() const { return m_ilvl; } + inline void setLevel(const css_length_t &value) { m_ilvl = value; } + inline css_length_t getLevelRestart() const { return m_lvlRestart; } + inline void setLevelRestart(const css_length_t &value) { m_lvlRestart = value; } + inline lString16 getLevelText() const { return m_lvlText; } + inline void setLevelText(const lString16 value) { m_lvlText = value; } + inline bool getLevelTextNull() const { return m_lvlTextNull; } + inline void setLevelTextNull(const bool value) { m_lvlTextNull = value; } + inline docx_numFormat_type getNumberFormat() const { return m_lvlNumFormat; } + inline void setNumberFormat(const docx_numFormat_type value) { m_lvlNumFormat = value; } + inline lString16 getReferencedStyleId() const { return m_pStyle; } + inline void setReferencedStyleId(const lString16 value) { m_pStyle = value; } + inline css_length_t getLevelStart() const { return m_lvlStart; } + inline void setLevelStart(const css_length_t &value) { m_lvlStart = value; } + inline docx_LevelSuffix_type getLevelSuffix() const { return m_suffix; } + inline void setLevelSuffix(const docx_LevelSuffix_type value) { m_suffix = value; } + inline docx_rPr * get_rPr() { return &m_rPr; } + inline docx_pPr * get_pPr() { return &m_pPr; } + css_list_style_type_t getListType() const; +}; + +typedef LVFastRef< docxNumLevel > docxNumLevelRef; + +class docxAbstractNum : public LVRefCounter +{ +private: + docx_multilevel_type m_multilevel; + css_length_t m_abstractNumId; + LVHashTable m_levels; +public: + docxAbstractNum(); + docxNumLevel* getLevel(int level); + void addLevel(docxNumLevelRef docxLevel); + void setId(int id) { m_abstractNumId.value = id; m_abstractNumId.type = css_val_in; } + int getId() { return m_abstractNumId.value; } + virtual ~docxAbstractNum() {} + void reset(); +}; + +typedef LVFastRef< docxAbstractNum > docxAbstractNumRef; + +class docxNum : public LVRefCounter +{ +private: + css_length_t m_id; + css_length_t m_abstractNumId; + LVHashTable m_overrides; +public: + docxNum() : m_id(css_val_unspecified, 0), m_abstractNumId(css_val_unspecified, 0), + m_overrides(10) { + } + const docxAbstractNumRef getBase(docxImportContext &context) const; + void setId(int id) { m_id.value = id; m_id.type = css_val_in; } + int getId() const { return m_id.value; } + void setBaseId(int id) { m_abstractNumId.value = id; m_abstractNumId.type = css_val_in; } + int getBaseId() const { return m_abstractNumId.value; } + void overrideLevel(docxNumLevelRef docxLevel); + docxNumLevel* getDocxLevel(docxImportContext &context, int level); + bool isValid() const; + void reset(); +}; + +typedef LVFastRef< docxNum > docxNumRef; + +class docxStyle : public LVRefCounter +{ + friend class docx_styleHandler; +private: + lString16 m_Name; + lString16 m_Id; + lString16 m_basedOn; + docx_style_type m_type; + docx_pPr m_pPr; + docx_rPr m_rPr; + bool m_pPrMerged; + bool m_rPrMerged; +public: + docxStyle(); + + inline lString16 getName() const { return m_Name; } + inline void setName(const lChar16 * value) { m_Name = value; } + + inline lString16 getId() const { return m_Id; } + inline void setId(const lChar16 * value) { m_Id = value; } + + inline lString16 getBasedOn() const { return m_basedOn; } + inline void setBasedOn(const lChar16 * value) { m_basedOn = value; } + bool isValid() const; + + inline docx_style_type getStyleType() const { return m_type; } + inline void setStyleType(docx_style_type value) { m_type = value; } + docxStyle* getBaseStyle(docxImportContext* context); + inline docx_pPr * get_pPr(docxImportContext* context); + inline docx_rPr * get_rPr(docxImportContext* context); +}; + +typedef LVFastRef< docxStyle > docxStyleRef; + +class docxImportContext +{ +private: + LVHashTable m_styles; + LVHashTable m_abstractNumbers; + LVHashTable m_Numbers; + LVArray m_ListLevels; + docx_rPr m_rPrDefault; + docx_pPr m_pPrDefault; + OpcPartRef m_docPart; + OpcPartRef m_relatedPart; + OpcPackage* m_package; + ldomDocument* m_doc; +public: + docxImportContext(OpcPackage *package, ldomDocument * doc); + virtual ~docxImportContext(); + docxStyle * getStyle( lString16 id ); + void addStyle( docxStyleRef style ); + void addNum( docxNumRef num ); + void addAbstractNum(docxAbstractNumRef abstractNum ); + docxNumRef getNum(lUInt32 id) { return m_Numbers.get(id); } + docxAbstractNumRef getAbstractNum(lUInt32 id) { return m_abstractNumbers.get(id); } + lString16 getImageTarget(lString16 id) { + return getRelationTarget(docx_ImageRelationship, id); + } + lString16 getLinkTarget(lString16 id) { + return getRelationTarget(docx_HyperlinkRelationship, id); + } + lString16 getRelationTarget(const lChar16 * const relationType, lString16 id) { + if ( !m_relatedPart.isNull() ) + return m_relatedPart->getRelatedPartName(relationType, id); + return m_docPart->getRelatedPartName(relationType, id); + } + LVStreamRef openContentPart(const lChar16 * const contentType); + LVStreamRef openRelatedPart(const lChar16 * const relationshipType); + void closeRelatedPart(); + void openList(int level, int numid, ldomDocumentWriter *writer); + void closeList(int level, ldomDocumentWriter *writer); + inline docx_rPr * get_rPrDefault() { return &m_rPrDefault; } + inline docx_pPr * get_pPrDefault() { return &m_pPrDefault; } + inline int getListLevel() { return m_ListLevels.length(); } + inline bool isInList() { return m_ListLevels.length() != 0; } + void setLanguage(const lChar16 *lang); + lString16 m_footNoteId; + int m_footNoteCount; + int m_endNoteCount; + bool m_inField; + ldomNode *m_linkNode; + docxStyle* m_pStyle; +private: + lString16 getListStyle(css_list_style_type_t listType); +}; + +class docx_ElementHandler; + +class docXMLreader : public LVXMLParserCallback +{ +private: + enum docx_reader_state { + docx_in_start, + docx_in_xml_declaration, + docx_in_body, + docx_in_document + }; + int m_skipTag; + docx_reader_state m_state; +protected: + docx_ElementHandler *m_handler; + ldomDocumentWriter *m_writer; + + inline bool isSkipping() + { + return (m_skipTag != 0); + } + + inline void skipped() + { + m_skipTag--; + } + +public: + /// constructor + docXMLreader(ldomDocumentWriter *writer) : m_skipTag(0), m_state(docx_in_start), + m_handler(NULL), m_writer(writer) + { + } + + /// destructor + virtual ~docXMLreader() { } + /// called on parsing start + virtual void OnStart(LVFileFormatParser *); + /// called on parsing end + virtual void OnStop() { } + + inline void skip() + { + m_skipTag++; + } + + /// called on opening tag < + ldomNode * OnTagOpen( const lChar16 * nsname, const lChar16 * tagname); + + /// called after > of opening tag (when entering tag body) + void OnTagBody(); + + /// called on tag close + void OnTagClose( const lChar16 * nsname, const lChar16 * tagname ); + + /// called on element attribute + void OnAttribute( const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue ); + + /// called on text + void OnText( const lChar16 * text, int len, lUInt32 flags ); + + /// add named BLOB data to document + bool OnBlob(lString16 name, const lUInt8 * data, int size); + + docx_ElementHandler * getHandler() + { + return m_handler; + } + + void setHandler(docx_ElementHandler *a_handler) + { + m_handler = a_handler; + } + + void setWriter(ldomDocumentWriter *writer) + { + m_writer = writer; + } +}; + +class docx_ElementHandler +{ +protected: + docXMLreader * m_reader; + ldomDocumentWriter *m_writer; + docx_ElementHandler *m_savedHandler; + docxImportContext *m_importContext; + const item_def_t *m_children; + int m_element; + int m_state; +protected: + static bool parse_OnOff_attribute(const lChar16 * attrValue); + static int parse_name(const struct item_def_t *tags, const lChar16 * nameValue); + static void parse_int(const lChar16 * attrValue, css_length_t & result); + void generateLink(const lChar16 * target, const lChar16 * type, const lChar16 *text); + void setChildrenInfo(const struct item_def_t *tags); + docx_ElementHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, + int element, const struct item_def_t *children) : + m_reader(reader), m_writer(writer), m_importContext(context), m_children(children), + m_element(element), m_state(element) + { + } + virtual ~docx_ElementHandler() {} +public: + ldomNode * handleTagOpen(const lChar16 * nsname, const lChar16 * tagname); + virtual ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue) + { + CR_UNUSED(nsname); + + handleAttribute(attrname, attrvalue); + } + virtual void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue) { + CR_UNUSED2(attrname, attrvalue); + } + virtual void handleTagBody() {} + virtual void handleText( const lChar16 * text, int len, lUInt32 flags ) { + CR_UNUSED3(text,len,flags); + } + virtual void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) + { + CR_UNUSED2(nsname, tagname); + + if(m_state == m_element) + stop(); + else + m_state = m_element; + } + virtual void start(); + virtual void stop(); + virtual void reset(); +}; + +class docx_SkipElementHandler : public docx_ElementHandler +{ +public: + docx_SkipElementHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, + int element) : docx_ElementHandler(reader, writer, context, element, no_elements) {} + void skipElement(int element) { + m_state = element; + start(); + } +}; + +class docx_rPrHandler : public docx_ElementHandler +{ +private: + docx_rPr *m_rPr; +public: + docx_rPrHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_rPr, rPr_elements), m_rPr(NULL) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void start(docx_rPr *rPr); + void reset(); +}; + +class docx_drawingHandler : public docx_ElementHandler +{ +private: + int m_level; +public: + docx_drawingHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_drawing, drawing_elements), m_level(0) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset() { m_level = 1; } +}; + +class docx_pHandler; +class docx_rHandler : public docx_ElementHandler +{ +private: + docx_rPr m_rPr; + docx_pHandler* m_pHandler; + docx_rPrHandler m_rPrHandler; + lString16 m_footnoteId; + lString16 m_instruction; + docx_drawingHandler m_drawingHandler; + bool m_content; +private: + void handleInstruction(lString16& instruction, lString16 parameters); +public: + docx_rHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_pHandler* pHandler) : + docx_ElementHandler(reader, writer, context, docx_el_r, r_elements), m_pHandler(pHandler), + m_rPrHandler(reader, writer, context), + m_drawingHandler(reader, writer, context), + m_content(false) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleText( const lChar16 * text, int len, lUInt32 flags ); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset(); +}; + +class docx_pPrHandler : public docx_ElementHandler +{ +private: + docx_pPr *m_pPr; +public: + docx_pPrHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_pPr, pPr_elements), m_pPr(NULL) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void start(docx_pPr *pPr); + void reset(); +}; + +class docx_hyperlinkHandler : public docx_ElementHandler +{ + docx_rHandler m_rHandler; + lString16 m_target; +public: + docx_hyperlinkHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_pHandler* pHandler) : + docx_ElementHandler(reader, writer, context, docx_el_hyperlink, hyperlink_elements), + m_rHandler(reader, writer, context, pHandler) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset() { m_target.clear(); m_rHandler.reset(); } +}; + +class docx_documentHandler; + +class docx_pHandler : public docx_ElementHandler +{ +private: + docx_pPrHandler m_pPrHandler; + docx_pPr m_pPr; + docx_rHandler m_rHandler; + docx_documentHandler* m_documentHandler; + docx_hyperlinkHandler m_hyperlinkHandler; + int m_runCount; + lString16 m_styleTags; + bool m_inTitle; +private: + int styleTagPos(lChar16 ch) + { + for (int i=0; i < m_styleTags.length(); i++) + if (m_styleTags[i] == ch) + return i; + return -1; + } + const lChar16 * getStyleTagName( lChar16 ch ); + void closeStyleTag( lChar16 ch); + void openStyleTag( lChar16 ch); +public: + docx_pHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_documentHandler* p_documentHandler) : + docx_ElementHandler(reader, writer, context, docx_el_p, p_elements), + m_pPrHandler(reader, writer, context), + m_rHandler(reader, writer, context, this), + m_documentHandler(p_documentHandler), + m_hyperlinkHandler(reader, writer, context, this), m_inTitle(false) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset(); + void openStyleTags(docx_rPr* runProps); + void closeStyleTags(docx_rPr* runProps); + void closeStyleTags(); +}; + +struct docx_row_span_info { + ldomNode *column; + int rows; + docx_row_span_info() : column(NULL), rows(1) {} + docx_row_span_info(ldomNode *column) : column(column), rows(1) {} +}; + +class docx_tblHandler : public docx_ElementHandler +{ +private: + LVArray m_levels; + LVArray m_rowSpaninfo; + int m_rowCount; + docx_pHandler m_pHandler; + docx_SkipElementHandler m_skipHandler; + docx_ElementHandler* m_pHandler_; + int m_colSpan; + int m_column; + int m_columnCount; + enum vMergeState_tyep { + VMERGE_NONE, + VMERGE_RESET, + VMERGE_CONTINUE + }; + int m_vMergeState; + void endRowSpan(int column); +public: + docx_tblHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_tbl, tbl_elements), + m_rowCount(0), m_pHandler(reader, writer, context, NULL), + m_skipHandler(reader, writer, context, docx_el_p), m_colSpan(1), + m_column(0), m_columnCount(0), m_vMergeState(VMERGE_NONE) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset(); +}; + +class docx_footnotesHandler : public docx_ElementHandler +{ +private: + bool m_normal; + int m_pCount; + docx_pHandler paragraphHandler; +private: + bool isEndNote() { return m_element == docx_el_endnotes; } +public: + docx_footnotesHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, int element) : + docx_ElementHandler(reader, writer, context, element, footnotes_elements), m_normal(), m_pCount(), + paragraphHandler(reader, writer, context, NULL) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); +}; + +class docx_documentHandler : public docx_ElementHandler +{ +private: + docx_pHandler paragraphHandler; + docx_tblHandler m_tableHandler; + ldomNode *m_section; + int m_sectionLevel; + int m_pCount; + bool m_hasTitle; +private: + void makeSection(int startIndex); + void openSection(int level); + void closeSection(int level); +public: + docx_documentHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_document, document_elements), + paragraphHandler(reader, writer, context, this), + m_tableHandler(reader, writer, context), m_section(), m_sectionLevel(), m_pCount() + { + } + void onTitleStart(int level); + void onTitleEnd(); + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); +}; + +class docx_styleHandler : public docx_ElementHandler +{ +private: + docx_pPrHandler m_pPrHandler; + docx_rPrHandler m_rPrHandler; + docxStyleRef m_styleRef; + docxStyle *m_style; +public: + /// constructor + docx_styleHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_style, style_elements), + m_pPrHandler(reader, writer, context), + m_rPrHandler(reader, writer, context) + { + } + ldomNode * handleTagOpen(int tagId); + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void start(); +}; + +class docx_stylesHandler : public docx_ElementHandler +{ +private: + docx_styleHandler m_styleHandler; + docx_pPrHandler m_pPrHandler; + docx_rPrHandler m_rPrHandler; +public: + /// constructor + docx_stylesHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_styles, styles_elements), + m_styleHandler(reader, writer, context), + m_pPrHandler(reader, writer, context), + m_rPrHandler(reader, writer, context) + { + } + /// destructor + ldomNode * handleTagOpen(int tagId); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void reset(); +}; + +class docx_lvlHandler : public docx_ElementHandler +{ +private: + docxNumLevel *m_lvl; + docx_pPrHandler m_pPrHandler; + docx_rPrHandler m_rPrHandler; +public: + /// constructor + docx_lvlHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_lvl, lvl_elements), + m_pPrHandler(reader, writer, context), + m_rPrHandler(reader, writer, context) + { + } + void start(docxNumLevel* level) { + m_lvl = level; + docx_ElementHandler::start(); + } + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + ldomNode * handleTagOpen(int tagId); + void reset(); +}; + +class docx_numHandler : public docx_ElementHandler +{ + docx_lvlHandler m_lvlHandler; + docxNumRef m_numRef; + docxNumLevelRef m_levelRef; +public: + docx_numHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_num, num_elements), + m_lvlHandler(reader, writer, context) + { + } + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + ldomNode * handleTagOpen(int tagId); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void start(); +}; + +class docx_abstractNumHandler : public docx_ElementHandler +{ + docx_lvlHandler m_lvlHandler; + docxNumLevelRef m_levelRef; + docxAbstractNumRef m_abstractNumRef; +public: + docx_abstractNumHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_abstractNum, abstractNum_elements), + m_lvlHandler(reader, writer, context) + { + } + void handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue); + ldomNode * handleTagOpen(int tagId); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void start(); +}; + +class docx_numberingHandler : public docx_ElementHandler +{ +private: + docx_numHandler m_numHandler; + docx_abstractNumHandler m_abstractNumHandler; +public: + /// constructor + docx_numberingHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_ElementHandler(reader, writer, context, docx_el_numbering, numbering_elements), + m_numHandler(reader, writer, context), + m_abstractNumHandler(reader, writer, context) + { + } + ldomNode * handleTagOpen(int tagId); + void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); +}; + +docx_rPr::docx_rPr() +{ +} + +lString16 docx_rPr::getCss() +{ + lString16 style; + + if( isBold() ) + style << " font-weight: bold;"; + if( isItalic() ) + style << " font-style: italic;"; + if( isUnderline() ) + style << " text-decoration: underline;"; + if( isStrikeThrough() ) + style << " text-decoration: line-through;"; + return style; +} + +docx_pPr::docx_pPr() +{ +} + +docxStyle *docx_pPr::getStyle(docxImportContext *context) +{ + docxStyle* ret = NULL; + + if (!m_pStyleId.empty() ) { + ret = context->getStyle(m_pStyleId); + } + return ret; +} + +lString16 docx_pPr::getCss() +{ + lString16 style; + + css_text_align_t align = getTextAlign(); + if(align != css_ta_inherit) + { + style << "text-align: "; + switch(align) + { + case css_ta_left: + style << "left;"; + break; + case css_ta_right: + style << "right"; + break; + case css_ta_center: + style << "center;"; + break; + case css_ta_justify: + default: + style << "justify"; + break; + } + } + if( isPageBreakBefore() ) + style << "page-break-before: always;"; + else if ( isKeepNext() ) + style << "page-break-before: avoid;"; + return style; +} + +docxNumLevel::docxNumLevel() : + m_isLgl(false), m_lvlJc(css_ta_inherit), m_ilvl(css_val_unspecified, 0), + m_lvlRestart(css_val_unspecified, 0), m_lvlTextNull(false), m_lvlNumFormat(docx_numFormat_ordinal), + m_lvlStart(css_val_unspecified, 0), m_suffix(docx_level_suffix_space) +{ +} + +void docxNumLevel::reset() +{ + m_isLgl = false; + m_lvlJc = css_ta_inherit; + m_ilvl.type = css_val_unspecified; + m_lvlRestart.type = css_val_unspecified; + m_lvlText.clear(); + m_lvlTextNull = false; + m_lvlNumFormat = docx_numFormat_ordinal; + m_pPr.reset(); + m_rPr.reset(); + m_pStyle.clear(); + m_lvlStart.type = css_val_unspecified; + m_suffix = docx_level_suffix_space; +} + +css_list_style_type_t docxNumLevel::getListType() const +{ + if(m_isLgl) + return css_lst_decimal; + switch(m_lvlNumFormat) { + case docx_numFormat_lowerLetter: + return css_lst_lower_alpha; + case docx_numFormat_lowerRoman: + return css_lst_lower_roman; + case docx_numFormat_upperLetter: + return css_lst_upper_alpha; + case docx_numFormat_upperRoman: + return css_lst_upper_roman; + case docx_numFormat_bullet: + if ( getLevelText() == L"\xf0a7" ) + return css_lst_square; + return css_lst_disc; + case docx_numFormat_decimal: + return css_lst_decimal; + default: + return css_lst_none; + } +} + + +ldomNode * docXMLreader::OnTagOpen( const lChar16 * nsname, const lChar16 * tagname) +{ + if ( m_state == docx_in_start && !lStr_cmp(tagname, "?xml") ) + m_state = docx_in_xml_declaration; + else if( !isSkipping() ) { + if ( m_handler ) + return m_handler->handleTagOpen(nsname, tagname); + } else + // skip nested tag + skip(); + return NULL; +} + +void docXMLreader::OnStart(LVFileFormatParser *) +{ + m_skipTag = 0; + m_state = docx_in_start; +} + +void docXMLreader::OnTagBody() +{ + if( m_state != docx_in_xml_declaration && !isSkipping() && m_handler ) + m_handler->handleTagBody(); +} + +void docXMLreader::OnTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + CR_UNUSED(nsname); + + switch(m_state) { + case docx_in_xml_declaration: + m_state = docx_in_document; + break; + case docx_in_document: + if( isSkipping() ) + skipped(); + else if ( m_handler ) + m_handler->handleTagClose(L"", tagname); + break; + default: + CRLog::error("Unexpected state"); + break; + } +} + +void docXMLreader::OnAttribute( const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue ) +{ + switch(m_state) { + case docx_in_xml_declaration: + if ( m_writer ) + m_writer->OnAttribute(nsname, attrname, attrvalue); + break; + case docx_in_document: + if ( !isSkipping() && m_handler ) + m_handler->handleAttribute(nsname, attrname, attrvalue); + break; + default: + CRLog::error("Unexpected state"); + } +} + +void docXMLreader::OnText( const lChar16 * text, int len, lUInt32 flags ) +{ + if( !isSkipping() && m_handler ) + m_handler->handleText(text, len, flags); +} + +bool docXMLreader::OnBlob(lString16 name, const lUInt8 * data, int size) +{ + if ( !isSkipping() && m_writer ) + return m_writer->OnBlob(name, data, size); + return false; +} + +void docx_ElementHandler::start() +{ + m_savedHandler = m_reader->getHandler(); + reset(); + m_reader->setHandler(this); +} + +void docx_ElementHandler::reset() +{ +} + +void docx_ElementHandler::stop() +{ + m_reader->setHandler(m_savedHandler); + m_savedHandler = NULL; +} + +bool docx_ElementHandler::parse_OnOff_attribute(const lChar16 * attrValue) +{ + if ( !lStr_cmp(attrValue, "1") || !lStr_cmp(attrValue, "on") || !lStr_cmp(attrValue, "true") ) + return true; + return false; +} + +int docx_ElementHandler::parse_name(const struct item_def_t *tags, const lChar16 * nameValue) +{ + for (int i=0; tags[i].name; i++) { + if ( !lStr_cmp( tags[i].name, nameValue )) { + // found! + return tags[i].id; + } + } + return -1; +} + +void docx_ElementHandler::parse_int(const lChar16 * attrValue, css_length_t & result) +{ + lString16 value = attrValue; + + result.type = css_val_unspecified; + if ( value.atoi(result.value) ) + result.type = css_val_pt; //just to distinguish with unspecified value +} + +void docx_ElementHandler::generateLink(const lChar16 *target, const lChar16 *type, const lChar16 *text) +{ + m_writer->OnTagOpen(L"", L"a"); + m_writer->OnAttribute(L"", L"href", target ); + if(type) + m_writer->OnAttribute(L"", L"type", type); + m_writer->OnTagBody(); + lString16 t(text); + m_writer->OnText(t.c_str(), t.length(), 0); + m_writer->OnTagClose(L"", L"a"); +} + +void docx_ElementHandler::setChildrenInfo(const struct item_def_t *tags) +{ + m_children = tags; +} + +ldomNode * docx_ElementHandler::handleTagOpen(int tagId) +{ + m_state = tagId; + return NULL; +} + +ldomNode * docx_ElementHandler::handleTagOpen(const lChar16 * nsname, const lChar16 * tagname) +{ + int tag = parse_name(m_children, tagname); + + CR_UNUSED(nsname); + if( -1 == tag) { + // skip the tag we are not interested in + m_reader->skip(); + return NULL; + } + return handleTagOpen(tag); +} + +ldomNode * docx_rPrHandler::handleTagOpen(int tagId) +{ + m_state = tagId; + switch(tagId) { + case docx_el_b: + m_rPr->setBold(true); + break; + case docx_el_i: + m_rPr->setItalic(true); + break; + case docx_el_u: + m_rPr->setUnderline(true); + break; + case docx_el_vanish: + m_rPr->setHidden(true); + break; + case docx_el_strike: + m_rPr->setStrikeThrough(true); + break; + default: + break; + } + return NULL; +} + +void docx_rPrHandler::handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue) +{ + int attr_value; + switch(m_state) { + case docx_el_lang: + if( !lStr_cmp(attrname, "val") ) { + if( m_rPr == m_importContext->get_rPrDefault() ) { + m_importContext->setLanguage(attrvalue); + } + } + break; + case docx_el_color: + // todo + break; + case docx_el_b: + if( !lStr_cmp(attrname, "val") ) + m_rPr->setBold(parse_OnOff_attribute( attrvalue )); + break; + case docx_el_i: + if( !lStr_cmp(attrname, "val") ) + m_rPr->setItalic(parse_OnOff_attribute( attrvalue )); + break; + case docx_el_jc: + if( !lStr_cmp(attrname, "val") ) { + attr_value = parse_name(jc_attr_values, attrvalue); + if(attr_value != -1) + m_rPr->setTextAlign((css_text_align_t)attr_value); + } + break; + case docx_el_rFonts: + //todo + break; + case docx_el_rStyle: + m_rPr->m_rStyle = attrvalue; + if ( !m_rPr->m_rStyle.empty() ) { + docxStyle *style = m_importContext->getStyle(m_rPr->m_rStyle); + if( style && (docx_character_style == style->getStyleType()) ) { + m_rPr->combineWith(style->get_rPr(m_importContext)); + } + } + break; + case docx_el_strike: + if( !lStr_cmp(attrname, "val") ) + m_rPr->setStrikeThrough(parse_OnOff_attribute(attrvalue)); + break; + case docx_el_vertAlign: + if( !lStr_cmp(attrname, "val") ) { + attr_value = parse_name(vertAlign_attr_values, attrvalue); + if(attr_value != -1) + m_rPr->setVertAlign((css_vertical_align_t)attr_value); + } + break; + case docx_el_sz: + //todo + break; + case docx_el_vanish: + if ( !lStr_cmp(attrname, "val") ) + m_rPr->setHidden(parse_OnOff_attribute(attrvalue)); + break; + default: + break; + } +} + +void docx_rPrHandler::reset() +{ + m_state = m_element; + if (m_rPr) + m_rPr->reset(); +} + +void docx_rPrHandler::start(docx_rPr * const rPr) +{ + m_rPr = rPr; + docx_ElementHandler::start(); +} + +void docx_rHandler::handleInstruction(lString16 &instruction, lString16 parameters) +{ + if( instruction == cs16("REF") || instruction == cs16("NOTEREF") || instruction == cs16("PAGEREF") ) { + lString16 argument, switches; + if( parameters.split2( cs16(" "), argument, switches) && !argument.empty() ) + { + m_importContext->m_linkNode = m_writer->OnTagOpen(L"", L"a"); + lString16 target = L"#"; + target << argument; + m_writer->OnAttribute(L"", L"href", target.c_str()); + m_writer->OnTagBody(); + } + } +} + +ldomNode *docx_rHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_br: + case docx_el_t: + case docx_el_tab: + if( !m_content ) { + if( m_importContext->m_pStyle ) + m_rPr.combineWith(m_importContext->m_pStyle->get_rPr(m_importContext)); + m_rPr.combineWith(m_importContext->get_rPrDefault()); + m_pHandler->closeStyleTags(&m_rPr); + m_pHandler->openStyleTags(&m_rPr); + m_content = true; + } + m_state = tagId; + break; + case docx_el_rPr: + m_rPrHandler.start(&m_rPr); + break; + case docx_el_footnoteRef: + case docx_el_endnoteRef: + m_state = tagId; + break; + case docx_el_drawing: + m_drawingHandler.start(); + break; + case docx_el_footnoteReference: + case docx_el_endnoteReference: + m_footnoteId.clear(); + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_rHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + if( (docx_el_footnoteReference == m_state || docx_el_endnoteReference == m_state) && + !lStr_cmp(attrname, "id") ) { + m_footnoteId = attrvalue; + } + if( docx_el_fldChar == m_state && !lStr_cmp(attrname, "fldCharType") ) { + if( !lStr_cmp(attrvalue, "begin") ) { + m_importContext->m_inField = true; + } else if( !lStr_cmp(attrvalue, "end") ) { + if( m_importContext->m_linkNode ) { + m_writer->OnTagClose(L"", L"a"); + m_importContext->m_linkNode = NULL; + } + m_importContext->m_inField = false; + } + } +} + +void docx_rHandler::handleText(const lChar16 *text, int len, lUInt32 flags) +{ + switch(m_state) { + case docx_el_t: + m_writer->OnText(text, len, flags); + break; + case docx_el_instrText: + m_instruction = text; + break; + default: + break; + } +} + +void docx_rHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + lChar16 nobsp = 0x00A0; + CR_UNUSED2(nsname, tagname); + + switch(m_state) { + case docx_el_br: + m_writer->OnTagOpenAndClose(L"", L"br"); + m_state = docx_el_r; + break; + case docx_el_r: + stop(); + break; + case docx_el_tab: + m_writer->OnText(&nobsp, 1, 0); + m_state = docx_el_r; + break; + case docx_el_footnoteReference: + if( !m_footnoteId.empty() ) { + m_importContext->m_footNoteCount++; + lString16 target = L"#n_"; + target << m_footnoteId; + generateLink(target.c_str(), L"note", m_footnoteId.c_str()); + } + m_state = docx_el_r; + break; + case docx_el_instrText: + if( m_importContext->m_inField ) { + m_instruction.trim(); + if ( !m_instruction.empty() ) { + lString16 instruction, parameters; + if ( m_instruction.split2(cs16(" "), instruction, parameters) ) + handleInstruction(instruction, parameters); + } + } + m_state = docx_el_r; + break; + case docx_el_endnoteReference: + if( !m_footnoteId.empty() ) { + m_importContext->m_endNoteCount++; + lString16 target = L"#c_"; + target << m_footnoteId; + generateLink(target.c_str(), L"comment", m_footnoteId.c_str()); + } + m_state = docx_el_r; + break; + case docx_el_footnoteRef: + case docx_el_endnoteRef: + if(!m_importContext->m_footNoteId.empty()) { + m_writer->OnTagOpen(L"", L"sup"); + m_writer->OnTagBody(); + m_writer->OnText(m_importContext->m_footNoteId.c_str(), m_importContext->m_footNoteId.length(), 0); + m_writer->OnTagClose(L"", L"sup"); + } + default: + m_state = docx_el_r; + break; + } +} + +void docx_rHandler::reset() +{ + m_rPrHandler.reset(); + m_state = docx_el_r; + m_content = false; +} + +ldomNode * docx_pPrHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_rPr: + break; + case docx_el_numPr: + m_state = tagId; + setChildrenInfo(numPr_elements); + break; + case docx_el_pageBreakBefore: + m_state = tagId; + m_pPr->setPageBreakBefore(true); + break; + case docx_el_keepNext: + m_state = tagId; + m_pPr->setKeepNext(true); + break; + case docx_el_mirrorIndents: + m_state = tagId; + m_pPr->setMirrorIndents(true); + break; + case docx_el_suppressAutoHyphens: + m_pPr->setHyphenate(css_hyph_none); + //fallthrough + default: + m_state = tagId; + } + return NULL; +} + +void docx_pPrHandler::handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue) +{ + switch(m_state) { + case docx_el_pStyle: + if( !lStr_cmp(attrname, "val") ) { + m_pPr->m_pStyleId = attrvalue; + if ( !m_pPr->m_pStyleId.empty() ) { + docxStyle* style = m_importContext->getStyle(m_pPr->m_pStyleId); + if( style && (docx_paragraph_style == style->getStyleType()) ) { + m_pPr->combineWith(style->get_pPr(m_importContext)); + } + } + } + break; + case docx_el_jc: + if( !lStr_cmp(attrname, "val") ) { + int attr_value = parse_name(jc_attr_values, attrvalue); + if(attr_value != -1) + m_pPr->setTextAlign((css_text_align_t)attr_value); + } + break; + case docx_el_spacing: + if( !lStr_cmp(attrname, "line") ) { + css_length_t val; + parse_int(attrvalue, val); + m_pPr->set(docx_p_line_spacing_prop, val); + } else if( !lStr_cmp(attrname, "lineRule") ) { + int attr_value = parse_name(lineRule_attr_values, attrvalue); + if( -1 != attr_value ) + m_pPr->setLineRule((docx_lineRule_type)attr_value); + } else if ( !lStr_cmp(attrname, "afterAutospacing") ) { + m_pPr->set(docx_p_after_auto_spacing_prop, parse_OnOff_attribute(attrvalue)); + } else if ( !lStr_cmp(attrname, "beforeAutospacing") ) { + m_pPr->set(docx_p_before_auto_spacing_prop, parse_OnOff_attribute(attrvalue)); + } else { + //todo + } + break; + case docx_el_textAlignment: + if( !lStr_cmp(attrname, "val") ) { + int attr_value = parse_name(textAlignment_attr_values, attrvalue); + if(attr_value != -1) + m_pPr->setVertAlign((css_vertical_align_t)attr_value); + } + break; + case docx_el_ind: + //todo + break; + case docx_el_ilvl: + if( !lStr_cmp(attrname, "val") ) { + css_length_t val; + parse_int(attrvalue, val); + m_pPr->set(docx_p_ilvl_prop, val.value); + } + break; + case docx_el_numId: + if( !lStr_cmp(attrname, "val") ) { + css_length_t val; + parse_int(attrvalue, val); + m_pPr->set(docx_p_num_id_prop, val); + } + break; + case docx_el_outlineLvl: + if( !lStr_cmp(attrname, "val") ) { + css_length_t val; + parse_int(attrvalue, val); + m_pPr->set(docx_p_outline_level_prop, val); + } + break; + case docx_el_pageBreakBefore: + if( !lStr_cmp(attrname, "val") ) + m_pPr->setPageBreakBefore(parse_OnOff_attribute(attrvalue)); + break; + case docx_el_keepNext: + if( !lStr_cmp(attrname, "val") ) + m_pPr->setKeepNext(parse_OnOff_attribute(attrvalue)); + break; + case docx_el_mirrorIndents: + if( !lStr_cmp(attrname, "val") ) + m_pPr->setMirrorIndents(parse_OnOff_attribute(attrvalue)); + break; + case docx_el_suppressAutoHyphens: + if( !lStr_cmp(attrname, "val") && !parse_OnOff_attribute(attrvalue) ) + m_pPr->setHyphenate(css_hyph_auto); + break; + default: + break; + } +} + +void docx_pPrHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + switch(m_state) { + case docx_el_ilvl: + case docx_el_numId: + m_state = docx_el_numPr; + break; + case docx_el_numPr: + setChildrenInfo(pPr_elements); + //falltrrough + default: + docx_ElementHandler::handleTagClose(nsname, tagname); + break; + } +} + +void docx_pPrHandler::reset() +{ + if(m_pPr) + m_pPr->reset(); +} + +void docx_pPrHandler::start(docx_pPr *pPr) +{ + m_pPr = pPr; + docx_ElementHandler::start(); +} + +const lChar16 *docx_pHandler::getStyleTagName(lChar16 ch) +{ + switch ( ch ) { + case 'b': + return L"strong"; + case 'i': + return L"emphasis"; + case 'u': + return L"u"; + case 's': + return L"strike"; + case 't': + return L"sup"; + case 'd': + return L"sub"; + default: + return NULL; + } +} + +void docx_pHandler::closeStyleTag(lChar16 ch) +{ + int pos = styleTagPos( ch ); + if (pos >= 0) { + for (int i = m_styleTags.length() - 1; i >= pos; i--) { + const lChar16 * tag = getStyleTagName(m_styleTags[i]); + m_styleTags.erase(m_styleTags.length() - 1, 1); + if ( tag ) { + m_writer->OnTagClose(L"", tag); + } + } + } +} + +void docx_pHandler::openStyleTag(lChar16 ch) +{ + int pos = styleTagPos( ch ); + if (pos < 0) { + const lChar16 * tag = getStyleTagName(ch); + if ( tag ) { + m_writer->OnTagOpenNoAttr(L"", tag); + m_styleTags.append( 1, ch ); + } + } +} + +ldomNode * docx_pHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_r: + if ( 0 == m_runCount ) { + lString16 className; + + m_pPr.combineWith(m_importContext->get_pPrDefault()); + css_length_t outlineLevel = m_pPr.getOutlineLvl(); + m_importContext->m_pStyle = m_pPr.getStyle(m_importContext); + if ( outlineLevel.type != css_val_unspecified ) { + if (m_documentHandler) { + m_inTitle = true; + m_documentHandler->onTitleStart(outlineLevel.value + 1); + } else { + className = cs16("h") + lString16::itoa(outlineLevel.value + 1); + } + } + + int numId = m_pPr.getNumberingId(); + if( numId != 0 ) { + int level = m_pPr.getNumberingLevel() + 1; + if( level > m_importContext->getListLevel() ) + m_importContext->openList(level, numId, m_writer); + else if( level < m_importContext->getListLevel() ) + m_importContext->closeList(level, m_writer); + else + m_writer->OnTagClose(L"", L"li"); + m_writer->OnTagOpenNoAttr(L"", L"li"); + } else { + if( m_importContext->isInList() ) + m_importContext->closeList(0, m_writer); + m_writer->OnTagOpen(L"", L"p"); + if ( !className.empty() ) + m_writer->OnAttribute(L"", L"class", className.c_str()); + lString16 style = m_pPr.getCss(); + if(!style.empty()) + m_writer->OnAttribute(L"", L"style", style.c_str()); + m_writer->OnTagBody(); + } + } + m_rHandler.start(); + m_runCount++; + break; + case docx_el_bookmarkStart: + m_state = tagId; + break; + case docx_el_hyperlink: + m_hyperlinkHandler.start(); + break; + case docx_el_pPr: + m_pPrHandler.start(&m_pPr); + break; + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_pHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + if( docx_el_bookmarkStart == m_state && !lStr_cmp(attrname, "name") ) { + m_writer->OnTagOpen(L"", L"a"); + m_writer->OnAttribute(L"", L"id", attrvalue); + m_writer->OnTagBody(); + m_writer->OnTagClose(L"", L"a"); + } +} + +void docx_pHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + CR_UNUSED2(nsname, tagname); + + switch(m_state) { + case docx_el_p: + closeStyleTags(); + if( m_pPr.getNumberingId() == 0 ) + m_writer->OnTagClose(L"", L"p"); + stop(); + if( m_inTitle ) { + m_inTitle = false; + m_documentHandler->onTitleEnd(); + } + break; + default: + m_state = docx_el_p; + break; + } +} + +void docx_pHandler::reset() +{ + m_pPrHandler.reset(); + m_rHandler.reset(); + m_state = docx_el_p; + m_runCount = 0; +} + +void docx_pHandler::openStyleTags(docx_rPr *runProps) +{ + if(runProps->isBold()) + openStyleTag('b'); + if(runProps->isItalic()) + openStyleTag('i'); + if(runProps->isUnderline()) + openStyleTag('u'); + if(runProps->isStrikeThrough()) + openStyleTag('s'); + if(runProps->isSubScript()) + openStyleTag('d'); + if(runProps->isSuperScript()) + openStyleTag('t'); +} + +void docx_pHandler::closeStyleTags(docx_rPr *runProps) +{ + if(!runProps->isBold()) + closeStyleTag('b'); + if(!runProps->isItalic()) + closeStyleTag('i'); + if(!runProps->isUnderline()) + closeStyleTag('u'); + if(!runProps->isStrikeThrough()) + closeStyleTag('s'); + if(!runProps->isSubScript()) + closeStyleTag('d'); + if(!runProps->isSuperScript()) + closeStyleTag('t'); + +} + +void docx_pHandler::closeStyleTags() +{ + for(int i = m_styleTags.length() - 1; i >= 0; i--) + closeStyleTag(m_styleTags[i]); + m_styleTags.clear(); +} + +void docx_documentHandler::makeSection(int startIndex) +{ + ldomNode *newSection = m_section->insertChildElement(startIndex, LXML_NS_NONE, el_section); + newSection->initNodeStyle(); + m_section->moveItemsTo(newSection, startIndex + 1, m_section->getChildCount() - 1); + newSection->initNodeRendMethod( ); + m_section = newSection; +} + +void docx_documentHandler::openSection(int level) +{ + for(int i = m_sectionLevel; i < level; i++) { + m_section = m_writer->OnTagOpen(L"", L"section"); + m_writer->OnTagBody(); + } + m_sectionLevel = level; + m_pCount = 0; + m_hasTitle = false; +} + +void docx_documentHandler::closeSection(int level) +{ + for(int i = 0; i < level; i++) { + m_writer->OnTagClose(L"", L"section"); + m_sectionLevel--; + } + m_pCount = 0; + m_hasTitle = false; +} + +void docx_documentHandler::onTitleStart(int level) +{ + if(m_sectionLevel < level) { + int startIndex = m_hasTitle ? 1 : 0; + int contentCount = m_section->getChildCount(); + if(contentCount > startIndex) { + makeSection(startIndex); + } + } else { + closeSection(m_sectionLevel - level + 1); + } + openSection(level); + m_writer->OnTagOpen(L"", L"title"); + lString16 className = cs16("h") + lString16::itoa(level); + m_writer->OnAttribute(L"", L"class", className.c_str()); + m_writer->OnTagBody(); +} + +void docx_documentHandler::onTitleEnd() +{ + m_writer->OnTagClose(L"", L"title"); + m_hasTitle = true; +} + +ldomNode * docx_documentHandler::handleTagOpen(int tagId) +{ + if( tagId != docx_el_p && m_importContext->isInList() ) + m_importContext->closeList(0, m_writer); + switch(tagId) { + case docx_el_p: + m_pCount++; + paragraphHandler.start(); + break; + case docx_el_tbl: + m_tableHandler.start(); + break; + case docx_el_body: + m_section = m_writer->OnTagOpen(L"", docx_el_body_name); + m_writer->OnTagBody(); + //fallthrough + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_documentHandler::handleAttribute(const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue) +{ + if (m_state == docx_el_document && !lStr_cmp(nsname, "xmlns") ) + CRLog::debug("namespace declaration %s:%s", LCSTR(attrname), LCSTR(attrvalue)); +} + +void docx_documentHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + switch(m_state) { + case docx_el_body: + closeSection(m_sectionLevel); + m_writer->OnTagClose(nsname, tagname); + break; + default: + break; + } +} + +ldomNode * docx_styleHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_pPr: + m_pPrHandler.start(&m_style->m_pPr); + break; + case docx_el_rPr: + m_rPrHandler.start(&m_style->m_rPr); + break; + case docx_el_tblPr: + case docx_el_trPr: + case docx_el_tcPr: + m_state = tagId; + break; + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_styleHandler::handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue) +{ + switch(m_state) { + case docx_el_style: + if ( !lStr_cmp(attrname, "type") ) { + int attr_value = parse_name(styleType_attr_values, attrvalue); + if( -1 != attr_value ) + m_style->setStyleType((docx_style_type)attr_value); + } else if ( !lStr_cmp(attrname, "styleId") ) { + m_style->setId(attrvalue); + } + break; + case docx_el_name: + if ( !lStr_cmp(attrname, "val") ) + m_style->setName(attrvalue); + break; + case docx_el_basedOn: + if ( !lStr_cmp(attrname, "val") ) + m_style->setBasedOn(attrvalue); + break; + case docx_el_pPr: + break; + case docx_el_rPr: + break; + } +} + +void docx_styleHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + CR_UNUSED2(nsname, tagname); + + switch(m_state) { + case docx_el_style: + if ( m_style->isValid() ) + m_importContext->addStyle(m_styleRef); + stop(); + break; + default: + m_state = docx_el_style; + break; + } +} + +void docx_styleHandler::start() +{ + docx_ElementHandler::start(); + m_styleRef = docxStyleRef( new docxStyle ); + m_style = m_styleRef.get(); + m_state = docx_el_style; +} + +ldomNode * docx_stylesHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_pPr: + m_pPrHandler.start(m_importContext->get_pPrDefault()); + break; + case docx_el_rPr: + m_rPrHandler.start(m_importContext->get_rPrDefault()); + break; + case docx_el_style: + m_styleHandler.start(); + break; + case docx_el_docDefaults: + setChildrenInfo(docDefaults_elements); + //falltrough + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_stylesHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagname ) +{ + switch(m_state) { + case docx_el_rPrDefault: + case docx_el_pPrDefault: + m_state = docx_el_docDefaults; + break; + case docx_el_docDefaults: + setChildrenInfo(styles_elements); + //fallthrough + case docx_el_style: + m_state = docx_el_styles; + break; + case docx_el_styles: + stop(); + break; + default: + CRLog::error("Unexpected tag(%s:%)", nsname, tagname); + break; + } +} + +void docx_stylesHandler::reset() +{ + m_styleHandler.reset(); + m_state = docx_el_NULL; +} + +bool parseStyles(docxImportContext *importContext) +{ + LVStreamRef m_stream = importContext->openContentPart(docx_StylesContentType); + if ( m_stream.isNull() ) + return false; + + docXMLreader docReader(NULL); + docx_stylesHandler stylesHandler(&docReader, NULL, importContext); + docReader.setHandler(&stylesHandler); + + LVXMLParser parser(m_stream, &docReader); + + if ( !parser.Parse() ) + return false; + return true; +} + +bool parseNumbering(docxImportContext *importContext) +{ + LVStreamRef m_stream = importContext->openContentPart(docx_NumberingContentType); + if ( m_stream.isNull() ) + return false; + + docXMLreader docReader(NULL); + docx_numberingHandler numberingHandler(&docReader, NULL, importContext); + docReader.setHandler(&numberingHandler); + + LVXMLParser parser(m_stream, &docReader); + + if ( !parser.Parse() ) + return false; + return true; +} + +void parseFootnotes(ldomDocumentWriter& writer, docxImportContext& context, int element) +{ + LVStreamRef m_stream; + + if( element == docx_el_footnotes ) + m_stream = context.openRelatedPart(docx_FootNotesRelationShip); + else + m_stream = context.openRelatedPart(docx_EndNotesRelationShip); + + if ( !m_stream.isNull() ) { + docXMLreader docReader(&writer); + docx_footnotesHandler footnotesHandler(&docReader, &writer, &context, element); + docReader.setHandler(&footnotesHandler); + + LVXMLParser parser(m_stream, &docReader); + + if(parser.Parse()) + writer.OnTagClose(L"", docx_el_body_name); + } + context.closeRelatedPart(); +} + +bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback ) +{ + LVContainerRef arc = LVOpenArchieve( stream ); + if ( arc.isNull() ) + return false; // not a ZIP archive + + doc->setContainer(arc); + OpcPackage package(arc); + + docxImportContext importContext(&package, doc); + + package.readCoreProperties(doc->getProps()); + +#if BUILD_LITE!=1 + if ( doc->openFromCache(formatCallback) ) { + if ( progressCallback ) { + progressCallback->OnLoadFileEnd( ); + } + return true; + } +#endif + + parseNumbering(&importContext); + + if ( !parseStyles(&importContext) ) + return false; + + LVStreamRef m_stream = importContext.openContentPart(docx_DocumentContentType); + if ( m_stream.isNull() ) + return false; + + ldomDocumentWriter writer(doc); + docXMLreader docReader(&writer); + + writer.OnStart(NULL); + writer.OnTagOpen(NULL, L"?xml"); + writer.OnAttribute(NULL, L"version", L"1.0"); + writer.OnAttribute(NULL, L"encoding", L"utf-8"); + writer.OnEncoding(L"utf-8", NULL); + writer.OnTagBody(); + writer.OnTagClose(NULL, L"?xml"); + writer.OnTagOpenNoAttr(NULL, L"FictionBook"); + // DESCRIPTION + writer.OnTagOpenNoAttr(NULL, L"description"); + writer.OnTagOpenNoAttr(NULL, L"title-info"); + writer.OnTagOpenNoAttr(NULL, L"book-title"); + writer.OnTagClose(NULL, L"book-title"); + writer.OnTagClose(NULL, L"title-info"); + writer.OnTagClose(NULL, L"description"); + + docx_documentHandler documentHandler(&docReader, &writer, &importContext); + docReader.setHandler(&documentHandler); + + + LVXMLParser parser(m_stream, &docReader); + + if ( !parser.Parse() ) + return false; + + if(importContext.m_footNoteCount > 0) { + parseFootnotes(writer, importContext, docx_el_footnotes); + } + if(importContext.m_endNoteCount > 0) { + parseFootnotes(writer, importContext, docx_el_endnotes); + } + writer.OnTagClose(NULL, L"FictionBook"); + writer.OnStop(); + + if ( progressCallback ) { + progressCallback->OnLoadFileEnd( ); + doc->compact(); + doc->dumpStatistics(); + } + + return true; + +} + +docxStyle::docxStyle() : m_type(docx_paragraph_style), + m_pPrMerged(false), m_rPrMerged(false) +{ +} + +bool docxStyle::isValid() const +{ + return ( !(m_Name.empty() || m_Id.empty()) ); +} + +docxStyle *docxStyle::getBaseStyle(docxImportContext *context) +{ + lString16 basedOn = getBasedOn(); + if ( !basedOn.empty() ) { + docxStyle *pStyle = context->getStyle(basedOn); + if( pStyle && pStyle->getStyleType() == getStyleType() ) + return pStyle; + } + return NULL; +} + +docx_pPr *docxStyle::get_pPr(docxImportContext *context) +{ + if( !m_pPrMerged ) { + docxStyle* pStyle = getBaseStyle(context); + if (pStyle ) { + m_pPr.combineWith(pStyle->get_pPr(context)); + } + m_pPrMerged = true; + } + return &m_pPr; +} + +docx_rPr *docxStyle::get_rPr(docxImportContext *context) +{ + if( !m_rPrMerged ) { + docxStyle* pStyle = getBaseStyle(context); + if (pStyle ) { + m_rPr.combineWith(pStyle->get_rPr(context)); + } + m_rPrMerged = true; + } + return &m_rPr; +} + +docxImportContext::docxImportContext(OpcPackage *package, ldomDocument *doc) : m_styles(64), m_abstractNumbers(16), + m_Numbers(16), m_footNoteCount(0), m_endNoteCount(0), + m_inField(false), m_linkNode(NULL), m_pStyle(NULL), + m_package(package), m_doc(doc) +{ +} + +docxImportContext::~docxImportContext() +{ +} + +docxStyle * docxImportContext::getStyle( lString16 id ) +{ + return m_styles.get(id).get(); +} + +void docxImportContext::addStyle( docxStyleRef style ) +{ + docxStyle *referenced = style.get(); + if ( NULL != referenced) + { + m_styles.set(referenced->getId(), style); + } +} + +void docxImportContext::addNum(docxNumRef num) +{ + if ( !num.isNull() ) { + m_Numbers.set(num->getId(), num); + } +} + +void docxImportContext::addAbstractNum(docxAbstractNumRef abstractNum) +{ + if ( !abstractNum.isNull() ) { + m_abstractNumbers.set(abstractNum->getId(), abstractNum); + } +} + +LVStreamRef docxImportContext::openContentPart(const lChar16 * const contentType) +{ + m_docPart = m_package->getContentPart(contentType); + if( !m_docPart.isNull() ) { + return m_docPart->open(); + } + return LVStreamRef(); +} + +LVStreamRef docxImportContext::openRelatedPart(const lChar16 * const relationshipType) +{ + if ( !m_docPart.isNull() ) { + m_relatedPart = m_docPart->getRelatedPart(relationshipType); + if ( !m_relatedPart.isNull()) + return m_relatedPart->open(); + } + return LVStreamRef(); +} + +void docxImportContext::closeRelatedPart() +{ + if ( !m_relatedPart.isNull() ) { + m_relatedPart.Clear(); + } +} + +void docxImportContext::openList(int level, int numid, ldomDocumentWriter *writer) +{ + const docxNumRef num = getNum(numid); + + for(int i = getListLevel(); i < level; i++) { + const docxNumLevel* listLevel = NULL; + css_list_style_type_t listType = css_lst_disc; + if ( !num.isNull() ) + listLevel = num->getDocxLevel(const_cast(*this), level - 1); + if (listLevel) + listType = listLevel->getListType(); + writer->OnTagOpen(L"", L"ol"); + lString16 listStyle = getListStyle(listType); + m_ListLevels.add(listType); + if ( !listStyle.empty() ) + writer->OnAttribute(L"", L"style", listStyle.c_str()); + writer->OnTagBody(); + if ( i != level - 1 ) + writer->OnTagOpenNoAttr(L"", L"li"); + } +} + +void docxImportContext::closeList(int level, ldomDocumentWriter *writer) +{ + for(int i = getListLevel(); i > level; i--) { + writer->OnTagClose(L"", L"li"); + writer->OnTagClose(L"", L"ol"); + m_ListLevels.remove(getListLevel() - 1); + } +} + +void docxImportContext::setLanguage(const lChar16 *lang) +{ + lString16 language(lang); + + int p = language.pos(cs16("-")); + if ( p > 0 ) { + language = language.substr(0, p); + } + m_doc->getProps()->setString(DOC_PROP_LANGUAGE, language); +} + +lString16 docxImportContext::getListStyle(css_list_style_type_t listType) +{ + switch(listType) { + case css_lst_disc: + return lString16("list-style-type: disc;"); + case css_lst_circle: + return lString16("list-style-type: circle;"); + case css_lst_square: + return lString16("list-style-type: square;"); + case css_lst_decimal: + return lString16("list-style-type: decimal;"); + case css_lst_lower_roman: + return lString16("list-style-type: lower-roman;"); + case css_lst_upper_roman: + return lString16("list-style-type: upper-roman;"); + case css_lst_lower_alpha: + return lString16("list-style-type: lower-alpha;"); + case css_lst_upper_alpha: + return lString16("list-style-type: upper-alpha;"); + default: + return lString16(); + } +} + +ldomNode * docx_lvlHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_pPr: + m_pPrHandler.start(m_lvl->get_pPr()); + break; + case docx_el_rPr: + m_rPrHandler.start(m_lvl->get_rPr()); + break; + case docx_el_isLgl: + m_lvl->setLgl(true); + //fallthrough + case docx_el_lvlJc: + case docx_el_lvlRestart: + case docx_el_lvlText: + case docx_el_numFmt: + case docx_el_pStyle: + case docx_el_start: + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_lvlHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + css_length_t result; + + if( !lStr_cmp(attrname, "val") ) { + int attr_value; + + switch(m_state) { + case docx_el_pStyle: + m_lvl->setReferencedStyleId(attrvalue); + break; + case docx_el_lvlJc: + attr_value = parse_name(jc_attr_values, attrvalue); + if(attr_value != -1) + m_lvl->setLevelAlign((css_text_align_t)attr_value); + break; + case docx_el_isLgl: + m_lvl->setLgl(parse_OnOff_attribute( attrvalue )); + break; + case docx_el_lvlRestart: + parse_int(attrvalue, result); + m_lvl->setLevelRestart(result); + break; + case docx_el_lvlText: + m_lvl->setLevelText(attrvalue); + break; + case docx_el_numFmt: + attr_value = parse_name(numFmt_attr_values, attrvalue); + if( -1 != attr_value ) + m_lvl->setNumberFormat((docx_numFormat_type)attr_value); + break; + case docx_el_start: + parse_int(attrvalue, result); + m_lvl->setLevelStart(result); + break; + case docx_el_suff: + attr_value = parse_name(lvlSuff_attr_values, attrvalue); + if( -1 != attr_value ) + m_lvl->setLevelSuffix((docx_LevelSuffix_type)attr_value); + break; + } + } else if( !lStr_cmp(attrname, "ilvl") ) { + // m_state should be docx_el_lvl + parse_int(attrvalue, result); + m_lvl->setLevel(result); + } else if( !lStr_cmp(attrname, "null") ) { + // m_state should be docx_el_lvl + m_lvl->setLevelTextNull(parse_OnOff_attribute( attrvalue )); + } +} + +void docx_lvlHandler::reset() +{ + m_rPrHandler.reset(); + m_pPrHandler.reset(); + if(m_lvl) + m_lvl->reset(); +} + +ldomNode *docx_footnotesHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_p: + if( m_normal && !m_importContext->m_footNoteId.empty() ) { + if( m_pCount == 0 ) { + m_writer->OnTagOpen(L"", L"section"); + lString16 id = isEndNote() ? L"c_" : L"n_"; + id << m_importContext->m_footNoteId.c_str(); + m_writer->OnAttribute(L"", L"id", id.c_str()); + m_writer->OnTagBody(); + } + paragraphHandler.start(); + } else { + m_state = tagId; + } + m_pCount++; + break; + case docx_el_footnote: + case docx_el_endnote: + m_normal = true; + m_importContext->m_footNoteId.clear(); + m_pCount = 0; + m_state = tagId; + break; + case docx_el_footnotes: + case docx_el_endnotes: + m_writer->OnTagOpen(L"", docx_el_body_name); + if(isEndNote()) { + m_writer->OnAttribute(L"", L"name", L"comments"); + m_writer->OnTagBody(); + m_writer->OnTagOpen(L"", L"subtitle"); + m_writer->OnTagBody(); + m_writer->OnText(L"* * *", 5, 0); + m_writer->OnTagClose(L"", L"subtitle"); + } else { + m_writer->OnAttribute(L"", L"name", L"notes"); + m_writer->OnTagBody(); + } + //fallthrough + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_footnotesHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + switch(m_state) { + case docx_el_footnote: + case docx_el_endnote: + if( !lStr_cmp(attrname, "type") ) { + if( lStr_cmp(attrvalue, "normal") ) + m_normal = false; + } else if( !lStr_cmp(attrname, "id") ) + m_importContext->m_footNoteId.append(attrvalue); + break; + default: + break; + } +} + +void docx_footnotesHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + switch (m_state) { + case docx_el_p: + m_state = isEndNote() ? docx_el_endnote : docx_el_footnote; + break; + case docx_el_endnote: + case docx_el_footnote: + m_writer->OnTagClose(L"", L"section"); + default: + docx_ElementHandler::handleTagClose(nsname, tagname); + break; + } +} + +ldomNode *docx_hyperlinkHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_r: + m_rHandler.start(); + break; + default: + m_state = tagId; + break; + } + return NULL; +} + +void docx_hyperlinkHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + if( docx_el_hyperlink == m_state && !lStr_cmp(attrname, "id") ) { + m_target = m_importContext->getLinkTarget(lString16(attrvalue)); + if( !m_target.empty() ) { + m_writer->OnTagOpen(L"", L"a"); + m_writer->OnAttribute(L"", L"href", m_target.c_str()); + m_writer->OnTagBody(); + } + } +} + +void docx_hyperlinkHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + switch (m_state) { + case docx_el_hyperlink: + if ( !m_target.empty() ) { + m_writer->OnTagClose(L"", L"a"); + } + default: + docx_ElementHandler::handleTagClose(nsname, tagname); + break; + } +} + +ldomNode *docx_drawingHandler::handleTagOpen(int tagId) +{ + m_level++; + m_state = tagId; + return NULL; +} + +void docx_drawingHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + if( m_state == docx_el_blip && !lStr_cmp(attrname, "embed") ) { + lString16 imgPath = m_importContext->getImageTarget(lString16(attrvalue)); + if( !imgPath.empty() ) { + m_writer->OnTagOpen(L"", L"img"); + m_writer->OnAttribute(L"", L"src", imgPath.c_str()); + m_writer->OnTagBody(); + m_writer->OnTagClose(L"", L"img"); + } + } +} + +void docx_drawingHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + CR_UNUSED2(nsname, tagname); + + if(m_level <= 1) + stop(); + m_level--; +} + +void docx_tblHandler::endRowSpan(int column) +{ + docx_row_span_info rowSpan = m_rowSpaninfo[column]; + if( rowSpan.rows > 1 ) { + CRLog::warn("Row span on column: %d, end: %d", column, rowSpan.rows); + if( rowSpan.column ) { + rowSpan.column->setAttributeValue(LXML_NS_NONE, + rowSpan.column->getDocument()->getAttrNameIndex(L"rowspan"), + lString16::itoa(rowSpan.rows).c_str()); + } else { + CRLog::error("No column node"); + } + } +} + +ldomNode *docx_tblHandler::handleTagOpen(int tagId) +{ + bool elementHandled = false; + switch(tagId) { + case docx_el_p: + m_pHandler_->start(); + elementHandled = true; + break; + case docx_el_tc: + m_colSpan = 1; + CRLog::warn("Column: %d", m_column); + m_vMergeState = VMERGE_NONE; + break; + case docx_el_vMerge: + m_vMergeState = VMERGE_CONTINUE; + break; + case docx_el_tr: + m_column = 0; + m_writer->OnTagOpenNoAttr(L"", L"tr"); + break; + default: + break; + } + if( !elementHandled ) { + m_state = tagId; + m_levels.add(tagId); + } + return NULL; +} + +void docx_tblHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + if( m_state == docx_el_gridSpan && !lStr_cmp( attrname, "val" ) ) { + m_colSpan = lString16(attrvalue).atoi(); + } else if( m_state == docx_el_vMerge && !lStr_cmp( attrname, "val" ) ) { + if( !lStr_cmp( attrvalue, "restart" ) ) + m_vMergeState = VMERGE_RESET; + } +} + +void docx_tblHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + CR_UNUSED2(nsname, tagname); + + if( !m_levels.empty() ) { + switch(m_state) { + case docx_el_tblPr: + m_writer->OnTagOpenNoAttr(L"", L"table"); + break; + case docx_el_tr: + m_writer->OnTagClose(L"", L"tr"); + m_rowCount++; + break; + case docx_el_tc: + m_column++; + if( m_pHandler_ == &m_pHandler ) + m_writer->OnTagClose(L"", L"td"); + break; + case docx_el_gridCol: + m_columnCount++; + break; + case docx_el_tblGrid: + if( m_columnCount ) + m_rowSpaninfo.reserve(m_columnCount); + break; + case docx_el_tcPr: + if( VMERGE_NONE == m_vMergeState || VMERGE_RESET == m_vMergeState) { + m_pHandler_ = &m_pHandler; + ldomNode *columnNode = m_writer->OnTagOpen(L"", L"td"); + for(int i = 0; i < m_colSpan; i++) { + if( m_column + i >= m_columnCount ) + break; // shouldn't happen + endRowSpan(m_column + i); + } + m_rowSpaninfo[m_column] = docx_row_span_info(columnNode); + if( m_colSpan > 1) + m_writer->OnAttribute(L"", L"colspan", lString16::itoa(m_colSpan).c_str() ); + m_writer->OnTagBody(); + } else if ( VMERGE_CONTINUE == m_vMergeState ) { + m_pHandler_ = &m_skipHandler; + m_rowSpaninfo[m_column].rows++; + } + m_column += m_colSpan - 1; + break; + default: + break; + } + m_levels.erase(m_levels.length() - 1, 1); + if( !m_levels.empty() ) { + m_state = m_levels[m_levels.length() - 1]; + } else { + m_state = docx_el_tbl; + } + } else { + for(int i = 0; i < m_columnCount; i++) { + endRowSpan(i); + } + m_writer->OnTagClose(L"", L"table"); + stop(); + } + +} + +void docx_tblHandler::reset() +{ + m_levels.clear(); + m_rowSpaninfo.clear(); + m_rowCount = 0; + m_columnCount = 0; +} + +ldomNode *docx_numberingHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_abstractNum: + m_abstractNumHandler.start(); + break; + case docx_el_num: + m_numHandler.start(); + break; + default: + m_state = tagId; + } + return NULL; +} + +void docx_numberingHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + switch(m_state) { + case docx_el_num: + case docx_el_abstractNum: + m_state = docx_el_numbering; + break; + case docx_el_numbering: + stop(); + break; + default: + CRLog::error("Unexpected tag(%s:%)", nsname, tagname); + break; + } +} + +ldomNode *docx_abstractNumHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_lvl: + if ( !m_levelRef.isNull() ) + m_abstractNumRef->addLevel( m_levelRef ); + m_levelRef = docxNumLevelRef( new docxNumLevel ); + m_lvlHandler.start( m_levelRef.get() ); + break; + default: + m_state = tagId; + } + return NULL; +} + +void docx_abstractNumHandler::handleAttribute(const lChar16 * attrname, const lChar16 * attrvalue) +{ + switch(m_state) { + case docx_el_abstractNum: + if ( !lStr_cmp(attrname, "abstractNumId") ) + m_abstractNumRef->setId(lString16(attrvalue).atoi()); + break; + default: + break; + } +} + +void docx_abstractNumHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + CR_UNUSED2(nsname, tagname); + + switch(m_state) { + case docx_el_abstractNum: + if ( !m_levelRef.isNull() ) + m_abstractNumRef->addLevel( m_levelRef ); + if ( !m_abstractNumRef.isNull() ) + m_importContext->addAbstractNum( m_abstractNumRef ); + stop(); + break; + default: + m_state = docx_el_abstractNum; + break; + } +} + +void docx_abstractNumHandler::start() +{ + m_abstractNumRef = docxAbstractNumRef( new docxAbstractNum ); + docx_ElementHandler::start(); +} + +void docx_numHandler::handleAttribute(const lChar16 *attrname, const lChar16 *attrvalue) +{ + switch(m_state) { + case docx_el_num: + if ( !lStr_cmp(attrname, "numId") ) + m_numRef->setId( lString16(attrvalue).atoi() ); + break; + case docx_el_abstractNumId: + if ( !lStr_cmp(attrname, "val") ) + m_numRef->setBaseId( lString16(attrvalue).atoi() ); + break; + default: + break; + } +} + +ldomNode *docx_numHandler::handleTagOpen(int tagId) +{ + switch(tagId) { + case docx_el_lvl: + if ( !m_levelRef.isNull() ) + m_numRef->overrideLevel( m_levelRef ); + m_levelRef = docxNumLevelRef( new docxNumLevel ); + m_lvlHandler.start( m_levelRef.get() ); + break; + default: + m_state = tagId; + } + return NULL; +} + +void docx_numHandler::handleTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + CR_UNUSED2(nsname, tagname); + + switch(m_state) { + case docx_el_num: + if ( !m_levelRef.isNull() ) + m_numRef->overrideLevel( m_levelRef ); + if ( m_numRef->isValid() ) + m_importContext->addNum( m_numRef ); + stop(); + break; + default: + m_state = docx_el_num; + break; + } +} + +void docx_numHandler::start() +{ + m_numRef = docxNumRef( new docxNum ); + docx_ElementHandler::start(); +} + +const docxAbstractNumRef docxNum::getBase(docxImportContext &context) const +{ + return context.getAbstractNum(getBaseId()); +} + +void docxNum::overrideLevel(docxNumLevelRef docxLevel) +{ + if( !docxLevel.isNull() ) + m_overrides.set(docxLevel->getLevel().value, docxLevel); +} + +docxNumLevel *docxNum::getDocxLevel(docxImportContext &context, int level) +{ + docxNumLevelRef levelRef = m_overrides.get(level); + if( !levelRef.isNull() ) + return levelRef.get(); + docxAbstractNumRef baseRef = getBase(context); + if( !baseRef.isNull() ) + return baseRef->getLevel(level); + return NULL; +} + +bool docxNum::isValid() const +{ + return (m_id.type != css_val_unspecified + && m_abstractNumId.type != css_val_unspecified); +} + +void docxNum::reset() +{ + m_id.type = css_val_unspecified; + m_abstractNumId.type = css_val_unspecified; + m_overrides.clear(); +} + +void docxAbstractNum::addLevel(docxNumLevelRef docxLevel) +{ + m_levels.set(docxLevel->getLevel().value, docxLevel); +} + +docxAbstractNum::docxAbstractNum() : m_multilevel(docx_singlelevel), + m_abstractNumId(css_val_unspecified, 0), m_levels(10) +{ +} + +docxNumLevel *docxAbstractNum::getLevel(int level) +{ + return m_levels.get(level).get(); +} + +void docxAbstractNum::reset() +{ + m_levels.clear(); +} diff --git a/crengine/src/fb3fmt.cpp b/crengine/src/fb3fmt.cpp new file mode 100644 index 0000000000..a1e94e6a1e --- /dev/null +++ b/crengine/src/fb3fmt.cpp @@ -0,0 +1,244 @@ +#include "../include/fb3fmt.h" +#include "../include/lvtinydom.h" +#include "../include/fb2def.h" +#include "../include/lvopc.h" + +static const lChar16 * const fb3_BodyContentType = L"application/fb3-body+xml"; +static const lChar16 * const fb3_DescriptionContentType = L"application/fb3-description+xml"; +static const lChar16 * const fb3_CoverRelationship = L"http://schemas.openxmlformats.org/package/2006/relationships/metadata/thumbnail"; +static const lChar16 * const fb3_ImageRelationship = L"http://www.fictionbook.org/FictionBook3/relationships/image"; + +class fb3ImportContext +{ +private: + OpcPackage *m_package; + OpcPartRef m_bookPart; + ldomDocument *m_descDoc; +public: + fb3ImportContext(OpcPackage *package); + virtual ~fb3ImportContext(); + + lString16 geImageTarget(const lString16 relationId) { + return m_bookPart->getRelatedPartName(fb3_ImageRelationship, relationId); + } + LVStreamRef openBook() { + m_bookPart = m_package->getContentPart(fb3_BodyContentType); + m_coverImage = m_package->getRelatedPartName(fb3_CoverRelationship); + return m_bookPart->open(); + } + ldomDocument *getDescription(); +public: + lString16 m_coverImage; +}; + +bool DetectFb3Format( LVStreamRef stream ) +{ + LVContainerRef m_arc = LVOpenArchieve( stream ); + if ( m_arc.isNull() ) + return false; // not a ZIP archive + + OpcPackage package(m_arc); + + return package.partExist(package.getContentPartName(fb3_BodyContentType)); +} + +class fb3DomWriter : public LVXMLParserCallback +{ +private: + fb3ImportContext *m_context; + ldomDocumentWriter *m_parent; + bool m_checkRole; +protected: + void writeDescription(); +public: + /// constructor + fb3DomWriter(ldomDocumentWriter * parent, fb3ImportContext *importContext ) : + m_context(importContext), m_parent(parent), m_checkRole(false) + { + } + // LVXMLParserCallback interface +public: + ldomNode *OnTagOpen(const lChar16 *nsname, const lChar16 *tagname); + /// called on closing tag + void OnTagClose( const lChar16 * nsname, const lChar16 * tagname ); + void OnTagBody(); + void OnAttribute(const lChar16 *nsname, const lChar16 *attrname, const lChar16 *attrvalue); + + lUInt32 getFlags() { return m_parent->getFlags(); } + void setFlags(lUInt32 flags) { m_parent->setFlags(flags); } + void OnEncoding(const lChar16 *name, const lChar16 *table) { m_parent->OnEncoding(name, table); } + void OnStart(LVFileFormatParser *parser) { m_parent->OnStart(parser); } + void OnStop() { m_parent->OnStop(); } + void OnText(const lChar16 *text, int len, lUInt32 flags) { m_parent->OnText(text, len, flags); } + bool OnBlob(lString16 name, const lUInt8 *data, int size) { return m_parent->OnBlob(name, data, size); } + void OnDocProperty(const char *name, lString8 value) { m_parent->OnDocProperty(name, value); } +}; + +bool ImportFb3Document( LVStreamRef stream, ldomDocument * doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback ) +{ + LVContainerRef arc = LVOpenArchieve( stream ); + if ( arc.isNull() ) + return false; // not a ZIP archive + + OpcPackage package(arc); + + fb3ImportContext context(&package); + + doc->setContainer(arc); + + package.readCoreProperties(doc->getProps()); + + ldomDocument * descDoc = context.getDescription(); + + if ( descDoc ) { + lString16 language = descDoc->textFromXPath( cs16("fb3-description/lang") ); + doc->getProps()->setString(DOC_PROP_LANGUAGE, language); + } else { + CRLog::error("Couldn't parse description doc"); + } + +#if BUILD_LITE!=1 + if ( doc->openFromCache(formatCallback) ) { + if ( progressCallback ) { + progressCallback->OnLoadFileEnd( ); + } + return true; + } +#endif + + LVStreamRef bookStream = context.openBook(); + if ( bookStream.isNull() ) { + CRLog::error("Couldn't read a book"); + return false; + } + + ldomDocumentWriter writer(doc); + fb3DomWriter fb3Writer(&writer, &context); + LVFileFormatParser * parser = new LVXMLParser(bookStream, &fb3Writer); + + bool ret = parser->Parse(); + delete parser; + + if ( !ret ) { + CRLog::error("Couldn't parse a book"); + } + + if ( progressCallback ) { + progressCallback->OnLoadFileEnd( ); + doc->compact(); + doc->dumpStatistics(); + } + + return ret; +} + +fb3ImportContext::fb3ImportContext(OpcPackage *package) : m_package(package), m_descDoc(NULL) +{ +} + +fb3ImportContext::~fb3ImportContext() +{ + if(m_descDoc) + delete m_descDoc; +} + +ldomDocument *fb3ImportContext::getDescription() +{ + if( !m_descDoc ) { + LVStreamRef descStream = m_package->openContentPart(fb3_DescriptionContentType); + + if ( !descStream.isNull() ) { + m_descDoc = LVParseXMLStream( descStream ); + } + } + return m_descDoc; +} + +void fb3DomWriter::writeDescription() +{ + //TODO extended FB3 description + m_parent->OnTagOpenNoAttr( NULL, L"description" ); + m_parent->OnTagOpenNoAttr( NULL, L"title-info" ); + m_parent->OnTagOpenNoAttr( NULL, L"book-title" ); + m_parent->OnTagClose( NULL, L"book-title" ); + if ( !m_context->m_coverImage.empty() ) { + m_parent->OnTagOpenNoAttr( NULL, L"coverpage" ); + m_parent->OnTagOpen(NULL, L"image"); + m_parent->OnAttribute(L"l", L"href", m_context->m_coverImage.c_str()); + m_parent->OnTagClose( NULL, L"image" ); + m_parent->OnTagClose( NULL, L"coverpage" ); + } + m_parent->OnTagClose( NULL, L"title-info" ); + m_parent->OnTagClose( NULL, L"description" ); +} + +ldomNode *fb3DomWriter::OnTagOpen(const lChar16 *nsname, const lChar16 *tagname) +{ + if( !lStr_cmp(tagname, "fb3-body") ) { + m_parent->OnTagOpenNoAttr(NULL, L"FictionBook"); + writeDescription(); + tagname = L"body"; + } else if ( !lStr_cmp(tagname, "notes" )) { + m_parent->OnTagClose(NULL, L"body"); + ldomNode *footnotesBody = m_parent->OnTagOpen(NULL,L"body"); + m_parent->OnAttribute(NULL, L"name", L"notes"); + m_parent->OnTagBody(); + return footnotesBody; + } else if( !lStr_cmp(tagname, "notebody") ) { + tagname = L"section"; + } else if( !lStr_cmp(tagname, "note") ) { + m_checkRole = true; + return m_parent->OnTagOpen(nsname, L"a"); + } + return m_parent->OnTagOpen(nsname, tagname); +} + +void fb3DomWriter::OnTagClose(const lChar16 *nsname, const lChar16 *tagname) +{ + if ( !lStr_cmp(tagname, "fb3-body") ) { + m_parent->OnTagClose(NULL, L"body"); + tagname = L"FictionBook"; + } else if ( !lStr_cmp(tagname, "notebody") ) { + tagname = L"section"; + } else if( !lStr_cmp(tagname, "note") ) { + tagname = L"a"; + } else if ( !lStr_cmp(tagname, "notes" )) { + tagname = L"body"; + } + m_parent->OnTagClose(nsname, tagname); +} + +void fb3DomWriter::OnTagBody() +{ + m_checkRole = false; + m_parent->OnTagBody(); +} + +void fb3DomWriter::OnAttribute(const lChar16 *nsname, const lChar16 *attrname, const lChar16 *attrvalue) +{ + bool pass = true; + + if( !lStr_cmp(attrname, "href") ) { + lString16 href(attrvalue); + + if ( href.pos(":") == -1 && href[0] != '#') { + href = cs16("#") + href; + m_parent->OnAttribute(nsname, attrname, href.c_str()); + pass = false; + } + } else if ( m_checkRole && !lStr_cmp(attrname, "role") ) { + if( !lStr_cmp(attrvalue, "footnote") ) + m_parent->OnAttribute(NULL, L"type", L"note"); + else + m_parent->OnAttribute(NULL, L"type", L"comment"); + } else if ( !lStr_cmp(attrname, "src") ) { + lString16 target = m_context->geImageTarget(attrvalue); + if( !target.empty() ) { + m_parent->OnAttribute(nsname, attrname, target.c_str()); + pass = false; + } + } + if ( pass) { + m_parent->OnAttribute(nsname, attrname, attrvalue); + } +} diff --git a/crengine/src/lvdocview.cpp b/crengine/src/lvdocview.cpp index 1a2212076b..ecb317d53b 100644 --- a/crengine/src/lvdocview.cpp +++ b/crengine/src/lvdocview.cpp @@ -27,6 +27,9 @@ #include "../include/chmfmt.h" #include "../include/wordfmt.h" #include "../include/pdbfmt.h" +#include "../include/fb3fmt.h" +#include "../include/docxfmt.h" + /// to show page bounds rectangles //#define SHOW_PAGE_RECT @@ -3890,6 +3893,71 @@ bool LVDocView::LoadDocument(LVStreamRef stream) { return true; } } + + if( DetectFb3Format(m_stream) ) { + CRLog::info("FB3 format detected"); + createEmptyDocument(); + m_doc->setProps( m_doc_props ); + setRenderProps( 0, 0 ); + setDocFormat( doc_format_fb3 ); + if ( m_callback ) + m_callback->OnLoadFileFormatDetected(doc_format_fb3); + updateDocStyleSheet(); + bool res = ImportFb3Document( m_stream, m_doc, m_callback, this ); + if ( !res ) { + setDocFormat( doc_format_none ); + createDefaultDocument( cs16("ERROR: Error reading FB3 format"), cs16("Cannot open document") ); + if ( m_callback ) { + m_callback->OnLoadFileError( cs16("Error reading FB3 document") ); + } + return false; + } else { + m_container = m_doc->getContainer(); + m_doc_props = m_doc->getProps(); + setRenderProps( 0, 0 ); + REQUEST_RENDER("loadDocument") + if ( m_callback ) { + m_callback->OnLoadFileEnd( ); + //m_doc->compact(); + m_doc->dumpStatistics(); + } + m_arc = m_doc->getContainer(); + return true; + } + } + + if( DetectDocXFormat(m_stream) ) { + CRLog::info("DOCX format detected"); + createEmptyDocument(); + m_doc->setProps( m_doc_props ); + setRenderProps( 0, 0 ); + setDocFormat( doc_format_docx ); + if ( m_callback ) + m_callback->OnLoadFileFormatDetected(doc_format_docx); + updateDocStyleSheet(); + bool res = ImportDocXDocument( m_stream, m_doc, m_callback, this ); + if ( !res ) { + setDocFormat( doc_format_none ); + createDefaultDocument( cs16("ERROR: Error reading DOCX format"), cs16("Cannot open document") ); + if ( m_callback ) { + m_callback->OnLoadFileError( cs16("Error reading DOCX document") ); + } + return false; + } else { + m_container = m_doc->getContainer(); + m_doc_props = m_doc->getProps(); + setRenderProps( 0, 0 ); + REQUEST_RENDER("loadDocument") + if ( m_callback ) { + m_callback->OnLoadFileEnd( ); + //m_doc->compact(); + m_doc->dumpStatistics(); + } + m_arc = m_doc->getContainer(); + return true; + } + } + #if CHM_SUPPORT_ENABLED==1 if ( DetectCHMFormat( m_stream ) ) { // CHM @@ -4111,6 +4179,8 @@ const lChar16 * getDocFormatName(doc_format_t fmt) { switch (fmt) { case doc_format_fb2: return L"FictionBook (FB2)"; + case doc_format_fb3: + return L"FictionBook (FB3)"; case doc_format_txt: return L"Plain text (TXT)"; case doc_format_rtf: @@ -4125,6 +4195,8 @@ const lChar16 * getDocFormatName(doc_format_t fmt) { return L"CR3 TXT Bookmark"; case doc_format_doc: return L"DOC"; + case doc_format_docx: + return L"DOCX"; default: return L"Unknown format"; } diff --git a/crengine/src/lvopc.cpp b/crengine/src/lvopc.cpp new file mode 100644 index 0000000000..ab5c4b7086 --- /dev/null +++ b/crengine/src/lvopc.cpp @@ -0,0 +1,147 @@ +#include "../include/lvopc.h" +#include "../include/lvtinydom.h" + +static const lChar16 * const OPC_PropertiesContentType = L"application/vnd.openxmlformats-package.core-properties+xml"; + +OpcPart::~OpcPart() +{ + m_relations.clear(); +} + +LVStreamRef OpcPart::open() +{ + return m_package->open(m_name); +} + +lString16 OpcPart::getRelatedPartName(const lChar16 * const relationType, const lString16 id) +{ + if( !m_relationsValid ) { + readRelations(); + m_relationsValid = true; + } + LVHashTable *relationsTable = m_relations.get(relationType); + if( relationsTable ) { + if( id.empty() ) { + LVHashTable::iterator it = relationsTable->forwardIterator(); + LVHashTable::pair *p = it.next(); + if( p ) { + return p->value; // return first value + } + } else { + return relationsTable->get(id); + } + } + return lString16(); +} + +OpcPartRef OpcPart::getRelatedPart(const lChar16 * const relationType, const lString16 id) +{ + return m_package->getPart( getRelatedPartName(relationType, id) ); +} + +void OpcPart::readRelations() +{ + lString16 relsPath = LVExtractPath(m_name) + cs16("_rels/") + LVExtractFilename(m_name) + cs16(".rels"); + LVStreamRef container_stream = m_package->open(relsPath); + + if ( !container_stream.isNull() ) { + ldomDocument * doc = LVParseXMLStream( container_stream ); + lString16 srcPath = LVExtractPath(m_name); + + if ( doc ) { + ldomNode *root = doc->nodeFromXPath(cs16("Relationships")); + if( root ) { + for(int i = 0; i < root->getChildCount(); i++) { + ldomNode * relationshipNode = root->getChildNode((lUInt32)i); + const lString16 relType = relationshipNode->getAttributeValue(L"Type"); + LVHashTable *relationsTable = m_relations.get(relType); + if( !relationsTable ) { + relationsTable = new LVHashTable(16); + m_relations.set(relType, relationsTable); + } + const lString16 id = relationshipNode->getAttributeValue(L"Id"); + relationsTable->set( id, getTargetPath(srcPath, relationshipNode->getAttributeValue(L"TargetMode"), + relationshipNode->getAttributeValue(L"Target")) ); + } + } + delete doc; + } + } +} + +lString16 OpcPart::getTargetPath(const lString16 srcPath, const lString16 targetMode, lString16 target) +{ + if( !target.empty() ) { + if ( targetMode == L"External" || target.pos(L":") != -1 ) + return target; + + if( !LVIsAbsolutePath(target) ) { + target = LVCombinePaths(srcPath, target); + } + if( LVIsAbsolutePath(target) ) { + return target.substr(1); + } + } + return target; +} + +lString16 OpcPackage::getContentPartName(const lChar16 *contentType) +{ + if ( !m_contentTypesValid ) { + readContentTypes(); + m_contentTypesValid = true; + } + return m_contentTypes.get(contentType); +} + +OpcPartRef OpcPackage::getPart(const lString16 partName) +{ + return OpcPartRef(createPart(this, partName)); +} + +bool OpcPackage::partExist(const lString16 partName) +{ + LVStreamRef partStream = open(partName); + return !partStream.isNull(); +} + +void OpcPackage::readCoreProperties(CRPropRef doc_props) +{ + LVStreamRef propStream = openContentPart(OPC_PropertiesContentType); + + if ( !propStream.isNull() ) { + ldomDocument * propertiesDoc = LVParseXMLStream( propStream ); + if ( propertiesDoc ) { + lString16 author = propertiesDoc->textFromXPath( cs16("coreProperties/creator") ); + lString16 title = propertiesDoc->textFromXPath( cs16("coreProperties/title") ); + doc_props->setString(DOC_PROP_TITLE, title); + doc_props->setString(DOC_PROP_AUTHORS, author ); + delete propertiesDoc; + } else { + CRLog::error("Couldn't parse core properties"); + } + } else { + CRLog::error("Couldn't read core properties"); + } +} + +void OpcPackage::readContentTypes() +{ + LVStreamRef mtStream = m_container->OpenStream(L"[Content_Types].xml", LVOM_READ ); + if ( !mtStream.isNull() ) { + ldomDocument * doc = LVParseXMLStream( mtStream ); + if( doc ) { + ldomNode *root = doc->nodeFromXPath(cs16("Types")); + if(root) { + for(int i = 0; i < root->getChildCount(); i++) { + ldomNode * typeNode = root->getChildNode(i); + + if(typeNode->getNodeName() == cs16("Override")) //Don't care about Extensions + m_contentTypes.set( typeNode->getAttributeValue(L"ContentType"), + typeNode->getAttributeValue(L"PartName") ); + } + } + delete doc; + } + } +} diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 88ec072186..21b3426b0e 100644 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -1269,7 +1269,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce } */ //int offs = 0; - if ( txform->GetSrcCount()==0 && style->white_space!=css_ws_pre ) { + if ( (txform->GetSrcCount()==0 || (tflags & LTEXT_IS_LINK)) && style->white_space!=css_ws_pre ) { // clear leading spaces for first text of paragraph int i=0; for ( ;txt.length()>i && (txt[i]==' ' || txt[i]=='\t'); i++ ) diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index c1e0255ff4..348e4a0b27 100644 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -505,16 +505,21 @@ class LVFormatter { } bool isObject = false; bool prevCharIsObject = false; + bool isLetterSpacingChanged = false; if ( it.font; + if (i > 0 && m_srcs[i]->letter_spacing != m_srcs[i -1]->letter_spacing) { + isLetterSpacingChanged = true; + } } if (i > 0) prevCharIsObject = m_charindex[i - 1] == OBJECT_CHAR_INDEX; if ( !lastFont ) lastFont = newFont; - if ( i>start && (newFont!=lastFont || isObject || prevCharIsObject || i>=start+MAX_TEXT_CHUNK_SIZE || (m_flags[i]&LCHAR_MANDATORY_NEWLINE)) ) { + if ( i>start && (newFont!=lastFont || isObject || prevCharIsObject || isLetterSpacingChanged + || i>=start+MAX_TEXT_CHUNK_SIZE || (m_flags[i]&LCHAR_MANDATORY_NEWLINE)) ) { // measure start..i-1 chars if ( m_charindex[i-1]!=OBJECT_CHAR_INDEX ) { // measure text diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 1f7daa7c74..96e497b6b4 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -10612,7 +10612,7 @@ bool ldomNode::getNodeListMarker( int & counterValue, lString16 & marker, int & for (int i = 0; i < parent->getChildCount(); i++) { ldomNode * child = parent->getChildNode(i); css_style_ref_t cs = child->getStyle(); - if ( cs.isNull() ) + if ( cs.isNull() || cs->display != css_d_list_item ) continue; switch ( cs->list_style_type ) { case css_lst_decimal: @@ -11035,7 +11035,7 @@ class NodeImageProxy : public LVImageSource }; /// returns object image ref name -lString16 ldomNode::getObjectImageRefName() +lString16 ldomNode::getObjectImageRefName(bool percentDecode) { if (!isElement()) return lString16::empty_str; @@ -11072,7 +11072,8 @@ lString16 ldomNode::getObjectImageRefName() } if ( refName.length()<2 ) return lString16::empty_str; - refName = DecodeHTMLUrlString(refName); + if (percentDecode) + refName = DecodeHTMLUrlString(refName); return refName; } @@ -11090,11 +11091,18 @@ LVStreamRef ldomNode::getObjectImageStream() /// returns object image source LVImageSourceRef ldomNode::getObjectImageSource() { - lString16 refName = getObjectImageRefName(); + lString16 refName = getObjectImageRefName(true); LVImageSourceRef ref; if ( refName.empty() ) return ref; ref = getDocument()->getObjectImageSource( refName ); + if (ref.isNull()) { + // try again without percent decoding (for fb3) + refName = getObjectImageRefName(false); + if ( refName.empty() ) + return ref; + ref = getDocument()->getObjectImageSource( refName ); + } if ( !ref.isNull() ) { int dx = ref->GetWidth(); int dy = ref->GetHeight(); From 856b4bb4ecfa214ad09b3d1e83fcee474ad2e70b Mon Sep 17 00:00:00 2001 From: pkb Date: Mon, 30 Sep 2019 13:38:32 +0600 Subject: [PATCH 02/11] Fixed C++ compilation error --- crengine/src/docxfmt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index b916490987..9fafc6b2ab 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -342,7 +342,7 @@ class docx_PropertiesContainer } } - template + template T getValue(int index, T defaultValue) const { css_length_t property = get(index); if(property.type != css_val_unspecified) @@ -350,7 +350,7 @@ class docx_PropertiesContainer return defaultValue; } - template<> + template bool getValue(int index, bool defaultValue) const { css_length_t property = get(index); if(property.type != css_val_unspecified) From 8f80a1caaa980b359f33a341c8d20ca3e27111cf Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Tue, 1 Oct 2019 08:56:59 +0600 Subject: [PATCH 03/11] dont consider numbering inside title to fix Extraneous numbers in a document, if at some doc it will be required will have to reconsider conditions --- crengine/src/docxfmt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index 9fafc6b2ab..a5d47b8f2e 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -1871,7 +1871,7 @@ ldomNode * docx_pHandler::handleTagOpen(int tagId) } int numId = m_pPr.getNumberingId(); - if( numId != 0 ) { + if( numId != 0 && !m_inTitle ) { int level = m_pPr.getNumberingLevel() + 1; if( level > m_importContext->getListLevel() ) m_importContext->openList(level, numId, m_writer); From b4b407f5f3fb672ce8571dccb08d82bc4ba6b1f0 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Tue, 1 Oct 2019 21:30:19 +0600 Subject: [PATCH 04/11] fixed underline processing --- crengine/src/docxfmt.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index a5d47b8f2e..54559af108 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -1451,6 +1451,10 @@ void docx_rPrHandler::handleAttribute(const lChar16 * attrname, const lChar16 * if( !lStr_cmp(attrname, "val") ) m_rPr->setItalic(parse_OnOff_attribute( attrvalue )); break; + case docx_el_u: + if( !lStr_cmp(attrname, "val") ) + m_rPr->setUnderline( lStr_cmp(attrvalue, "none") != 0); + break; case docx_el_jc: if( !lStr_cmp(attrname, "val") ) { attr_value = parse_name(jc_attr_values, attrvalue); From bfdc88214dc1df45c01ca316e430b59a3af3f71f Mon Sep 17 00:00:00 2001 From: pkb Date: Wed, 9 Oct 2019 08:14:55 +0600 Subject: [PATCH 05/11] Fixed typo (extra h2 in css selector) --- cr3qt/data/docx.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cr3qt/data/docx.css b/cr3qt/data/docx.css index a4b7ad369f..43a4f71f2a 100644 --- a/cr3qt/data/docx.css +++ b/cr3qt/data/docx.css @@ -14,7 +14,7 @@ li image { display: inline } li { display: list-item; text-indent: 0em; } ol { display: block; list-style-type: decimal; margin-left: 1em } -title.h1 p, title.h2 h2 p { +title.h1 p, title.h2 p { $title.all } From 34a8ad9a88574a9876ae9e29dc8e3276d06e47b8 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Sun, 13 Oct 2019 14:40:15 +0600 Subject: [PATCH 06/11] Refactored titles import handling to have either FB2 or HTML like import --- cr3qt/data/docx.css | 6 +- crengine/src/docxfmt.cpp | 294 ++++++++++++++++++++++++++------------- 2 files changed, 197 insertions(+), 103 deletions(-) diff --git a/cr3qt/data/docx.css b/cr3qt/data/docx.css index a4b7ad369f..0bb00d8df2 100644 --- a/cr3qt/data/docx.css +++ b/cr3qt/data/docx.css @@ -80,7 +80,7 @@ publish-info { display: none; } custom-info { display: none; } coverpage { display: none } -strong,emphasis,u,strike,sub,sup { +strong,em,u,s,sub,sup { display: inline; } @@ -88,10 +88,10 @@ sub { vertical-align: sub; font-size: 70% } sup { vertical-align: super; font-size: 70% } strong { font-weight: bold } -emphasis { font-style: italic } +em { font-style: italic } u { text-decoration: underline; } -strike { text-decoration: line-through; } +s { text-decoration: line-through; } img { diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index 54559af108..a736021ec0 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -8,6 +8,9 @@ #define DOCX_TAG_CHILD(itm) { DOCX_TAG_ID(itm), DOCX_TAG_NAME(itm) } #define DOCX_LAST_ITEM { -1, NULL } +// comment this out to disable in-page footnotes +#define DOCX_CRENGINE_IN_PAGE_FOOTNOTES 1 + /// known docx items name and identifier struct item_def_t { int id; @@ -877,6 +880,42 @@ class docx_pPrHandler : public docx_ElementHandler void reset(); }; +class docx_titleHandler +{ +public: + docx_titleHandler(ldomDocumentWriter *writer, docxImportContext *context, bool useClassName=false) : + m_writer(writer), m_importContext(context), m_titleLevel(), m_useClassName(useClassName) {} + virtual ~docx_titleHandler() {} + virtual void onBodyStart(); + virtual void onTitleStart(int level, bool noSection = false); + virtual void onTitleEnd(); + virtual void onBodyEnd() {} + bool useClassForTitle() { return m_useClassName; } +protected: + ldomDocumentWriter *m_writer; + docxImportContext *m_importContext; + int m_titleLevel; + bool m_useClassName; +}; + +class docx_fb2TitleHandler : public docx_titleHandler +{ +public: + docx_fb2TitleHandler(ldomDocumentWriter *writer, docxImportContext *context) : + docx_titleHandler(writer, context, true) + {} + void onBodyStart(); + void onTitleStart(int level, bool noSection = false); + void onTitleEnd(); +private: + void makeSection(int startIndex); + void openSection(int level); + void closeSection(int level); +private: + ldomNode *m_section; + bool m_hasTitle; +}; + class docx_hyperlinkHandler : public docx_ElementHandler { docx_rHandler m_rHandler; @@ -901,7 +940,7 @@ class docx_pHandler : public docx_ElementHandler docx_pPrHandler m_pPrHandler; docx_pPr m_pPr; docx_rHandler m_rHandler; - docx_documentHandler* m_documentHandler; + docx_titleHandler* m_titleHandler; docx_hyperlinkHandler m_hyperlinkHandler; int m_runCount; lString16 m_styleTags; @@ -918,11 +957,11 @@ class docx_pHandler : public docx_ElementHandler void closeStyleTag( lChar16 ch); void openStyleTag( lChar16 ch); public: - docx_pHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_documentHandler* p_documentHandler) : + docx_pHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_titleHandler* p_documentHandler) : docx_ElementHandler(reader, writer, context, docx_el_p, p_elements), m_pPrHandler(reader, writer, context), m_rHandler(reader, writer, context, this), - m_documentHandler(p_documentHandler), + m_titleHandler(p_documentHandler), m_hyperlinkHandler(reader, writer, context, this), m_inTitle(false) { } @@ -948,6 +987,7 @@ class docx_tblHandler : public docx_ElementHandler LVArray m_levels; LVArray m_rowSpaninfo; int m_rowCount; + docx_titleHandler m_titleHandler; docx_pHandler m_pHandler; docx_SkipElementHandler m_skipHandler; docx_ElementHandler* m_pHandler_; @@ -962,9 +1002,10 @@ class docx_tblHandler : public docx_ElementHandler int m_vMergeState; void endRowSpan(int column); public: - docx_tblHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_tblHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_titleHandler* titleHandler) : docx_ElementHandler(reader, writer, context, docx_el_tbl, tbl_elements), - m_rowCount(0), m_pHandler(reader, writer, context, NULL), + m_rowCount(0), m_titleHandler(writer, context, titleHandler->useClassForTitle()), + m_pHandler(reader, writer, context, &m_titleHandler), m_skipHandler(reader, writer, context, docx_el_p), m_colSpan(1), m_column(0), m_columnCount(0), m_vMergeState(VMERGE_NONE) { @@ -999,23 +1040,15 @@ class docx_documentHandler : public docx_ElementHandler private: docx_pHandler paragraphHandler; docx_tblHandler m_tableHandler; - ldomNode *m_section; - int m_sectionLevel; - int m_pCount; - bool m_hasTitle; -private: - void makeSection(int startIndex); - void openSection(int level); - void closeSection(int level); +protected: + docx_titleHandler* m_titleHandler; public: - docx_documentHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) : + docx_documentHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_titleHandler* titleHandler) : docx_ElementHandler(reader, writer, context, docx_el_document, document_elements), - paragraphHandler(reader, writer, context, this), - m_tableHandler(reader, writer, context), m_section(), m_sectionLevel(), m_pCount() + paragraphHandler(reader, writer, context, titleHandler), + m_tableHandler(reader, writer, context, titleHandler), m_titleHandler(titleHandler) { } - void onTitleStart(int level); - void onTitleEnd(); ldomNode * handleTagOpen(int tagId); void handleAttribute(const lChar16 * nsname, const lChar16 * attrname, const lChar16 * attrvalue); void handleTagClose( const lChar16 * nsname, const lChar16 * tagname ); @@ -1374,9 +1407,25 @@ void docx_ElementHandler::generateLink(const lChar16 *target, const lChar16 *typ m_writer->OnAttribute(L"", L"href", target ); if(type) m_writer->OnAttribute(L"", L"type", type); + // Add classic role=doc-noteref attribute to allow popup/in-page footnotes + m_writer->OnAttribute(L"", L"role", L"doc-noteref"); m_writer->OnTagBody(); +#ifndef DOCX_CRENGINE_IN_PAGE_FOOTNOTES + if( !lStr_cmp(type, "note") ) { + // For footnotes (but not endnotes), wrap in (to get the + // same effect upstream gets with the following in docx.css: + // a[type="note"] { vertical-align: super; font-size: 70%; } + m_writer->OnTagOpen(L"", L"sup"); + m_writer->OnTagBody(); + } +#endif lString16 t(text); m_writer->OnText(t.c_str(), t.length(), 0); +#ifndef DOCX_CRENGINE_IN_PAGE_FOOTNOTES + if( !lStr_cmp(type, "note") ) { + m_writer->OnTagClose(L"", L"sup"); + } +#endif m_writer->OnTagClose(L"", L"a"); } @@ -1815,11 +1864,11 @@ const lChar16 *docx_pHandler::getStyleTagName(lChar16 ch) case 'b': return L"strong"; case 'i': - return L"emphasis"; + return L"em"; // upstream uses L"emphasis"; case 'u': return L"u"; case 's': - return L"strike"; + return L"s"; // upstream uses L"strike"; case 't': return L"sup"; case 'd': @@ -1860,20 +1909,12 @@ ldomNode * docx_pHandler::handleTagOpen(int tagId) switch(tagId) { case docx_el_r: if ( 0 == m_runCount ) { - lString16 className; - m_pPr.combineWith(m_importContext->get_pPrDefault()); css_length_t outlineLevel = m_pPr.getOutlineLvl(); m_importContext->m_pStyle = m_pPr.getStyle(m_importContext); if ( outlineLevel.type != css_val_unspecified ) { - if (m_documentHandler) { - m_inTitle = true; - m_documentHandler->onTitleStart(outlineLevel.value + 1); - } else { - className = cs16("h") + lString16::itoa(outlineLevel.value + 1); - } + m_inTitle = true; } - int numId = m_pPr.getNumberingId(); if( numId != 0 && !m_inTitle ) { int level = m_pPr.getNumberingLevel() + 1; @@ -1883,18 +1924,19 @@ ldomNode * docx_pHandler::handleTagOpen(int tagId) m_importContext->closeList(level, m_writer); else m_writer->OnTagClose(L"", L"li"); - m_writer->OnTagOpenNoAttr(L"", L"li"); + m_writer->OnTagOpen(L"", L"li"); } else { if( m_importContext->isInList() ) m_importContext->closeList(0, m_writer); - m_writer->OnTagOpen(L"", L"p"); - if ( !className.empty() ) - m_writer->OnAttribute(L"", L"class", className.c_str()); - lString16 style = m_pPr.getCss(); - if(!style.empty()) - m_writer->OnAttribute(L"", L"style", style.c_str()); - m_writer->OnTagBody(); + if( m_inTitle ) + m_titleHandler->onTitleStart(outlineLevel.value + 1); + else + m_writer->OnTagOpen(L"", L"p"); } + lString16 style = m_pPr.getCss(); + if( !style.empty() ) + m_writer->OnAttribute(L"", L"style", style.c_str()); + m_writer->OnTagBody(); } m_rHandler.start(); m_runCount++; @@ -1932,12 +1974,15 @@ void docx_pHandler::handleTagClose( const lChar16 * nsname, const lChar16 * tagn switch(m_state) { case docx_el_p: closeStyleTags(); - if( m_pPr.getNumberingId() == 0 ) - m_writer->OnTagClose(L"", L"p"); + if( m_pPr.getNumberingId() == 0 ) { + if( !m_inTitle ) { + m_writer->OnTagClose(L"", L"p"); + } + } stop(); if( m_inTitle ) { m_inTitle = false; - m_documentHandler->onTitleEnd(); + m_titleHandler->onTitleEnd(); } break; default: @@ -1994,74 +2039,19 @@ void docx_pHandler::closeStyleTags() m_styleTags.clear(); } -void docx_documentHandler::makeSection(int startIndex) -{ - ldomNode *newSection = m_section->insertChildElement(startIndex, LXML_NS_NONE, el_section); - newSection->initNodeStyle(); - m_section->moveItemsTo(newSection, startIndex + 1, m_section->getChildCount() - 1); - newSection->initNodeRendMethod( ); - m_section = newSection; -} - -void docx_documentHandler::openSection(int level) -{ - for(int i = m_sectionLevel; i < level; i++) { - m_section = m_writer->OnTagOpen(L"", L"section"); - m_writer->OnTagBody(); - } - m_sectionLevel = level; - m_pCount = 0; - m_hasTitle = false; -} - -void docx_documentHandler::closeSection(int level) -{ - for(int i = 0; i < level; i++) { - m_writer->OnTagClose(L"", L"section"); - m_sectionLevel--; - } - m_pCount = 0; - m_hasTitle = false; -} - -void docx_documentHandler::onTitleStart(int level) -{ - if(m_sectionLevel < level) { - int startIndex = m_hasTitle ? 1 : 0; - int contentCount = m_section->getChildCount(); - if(contentCount > startIndex) { - makeSection(startIndex); - } - } else { - closeSection(m_sectionLevel - level + 1); - } - openSection(level); - m_writer->OnTagOpen(L"", L"title"); - lString16 className = cs16("h") + lString16::itoa(level); - m_writer->OnAttribute(L"", L"class", className.c_str()); - m_writer->OnTagBody(); -} - -void docx_documentHandler::onTitleEnd() -{ - m_writer->OnTagClose(L"", L"title"); - m_hasTitle = true; -} - ldomNode * docx_documentHandler::handleTagOpen(int tagId) { if( tagId != docx_el_p && m_importContext->isInList() ) m_importContext->closeList(0, m_writer); switch(tagId) { case docx_el_p: - m_pCount++; paragraphHandler.start(); break; case docx_el_tbl: m_tableHandler.start(); break; case docx_el_body: - m_section = m_writer->OnTagOpen(L"", docx_el_body_name); + m_titleHandler->onBodyStart(); m_writer->OnTagBody(); //fallthrough default: @@ -2081,7 +2071,7 @@ void docx_documentHandler::handleTagClose( const lChar16 * nsname, const lChar16 { switch(m_state) { case docx_el_body: - closeSection(m_sectionLevel); + m_titleHandler->onBodyEnd(); m_writer->OnTagClose(nsname, tagname); break; default: @@ -2262,7 +2252,13 @@ void parseFootnotes(ldomDocumentWriter& writer, docxImportContext& context, int LVXMLParser parser(m_stream, &docReader); if(parser.Parse()) +#ifdef DOCX_CRENGINE_IN_PAGE_FOOTNOTES writer.OnTagClose(L"", docx_el_body_name); +#else + // We didn't add to not trigger crengine auto-in-page-foonotes + // mechanism, so we can tweak them with style tweaks. We used a simple
instead. + writer.OnTagClose(L"", L"div"); +#endif } context.closeRelatedPart(); } @@ -2317,7 +2313,10 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba writer.OnTagClose(NULL, L"title-info"); writer.OnTagClose(NULL, L"description"); - docx_documentHandler documentHandler(&docReader, &writer, &importContext); + //Two options when dealing with titles: (FB2|HTML) + docx_fb2TitleHandler titleHandler(&writer, &importContext); //
..
+ //docx_titleHandler titleHandler(&writer, &importContext); //.. + docx_documentHandler documentHandler(&docReader, &writer, &importContext, &titleHandler); docReader.setHandler(&documentHandler); @@ -2340,9 +2339,7 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba doc->compact(); doc->dumpStatistics(); } - return true; - } docxStyle::docxStyle() : m_type(docx_paragraph_style), @@ -2615,6 +2612,7 @@ ldomNode *docx_footnotesHandler::handleTagOpen(int tagId) lString16 id = isEndNote() ? L"c_" : L"n_"; id << m_importContext->m_footNoteId.c_str(); m_writer->OnAttribute(L"", L"id", id.c_str()); + m_writer->OnAttribute(L"", L"role", isEndNote() ? L"doc-rearnote" : L"doc-footnote"); m_writer->OnTagBody(); } paragraphHandler.start(); @@ -2632,6 +2630,7 @@ ldomNode *docx_footnotesHandler::handleTagOpen(int tagId) break; case docx_el_footnotes: case docx_el_endnotes: +#ifdef DOCX_CRENGINE_IN_PAGE_FOOTNOTES m_writer->OnTagOpen(L"", docx_el_body_name); if(isEndNote()) { m_writer->OnAttribute(L"", L"name", L"comments"); @@ -2644,6 +2643,13 @@ ldomNode *docx_footnotesHandler::handleTagOpen(int tagId) m_writer->OnAttribute(L"", L"name", L"notes"); m_writer->OnTagBody(); } +#else + // We don't add to not trigger crengine auto-in-page-foonotes + // mechanism, so we can tweak them with style tweaks. We use a simple
instead. + m_writer->OnTagOpen(L"", L"div"); + m_writer->OnAttribute(L"", L"style", L"page-break-before: always"); + m_writer->OnTagBody(); +#endif //fallthrough default: m_state = tagId; @@ -3067,3 +3073,91 @@ void docxAbstractNum::reset() { m_levels.clear(); } + +void docx_titleHandler::onBodyStart() +{ + m_writer->OnTagOpen(L"", docx_el_body_name); +} + +void docx_titleHandler::onTitleStart(int level, bool noSection) +{ + CR_UNUSED(noSection); + + m_titleLevel = level; + lString16 name = cs16("h") + lString16::itoa(m_titleLevel); + if( m_useClassName ) { + m_writer->OnTagOpen(L"", L"p"); + m_writer->OnAttribute(L"", L"class", name.c_str()); + } else + m_writer->OnTagOpen(L"", name.c_str()); +} + +void docx_titleHandler::onTitleEnd() +{ + if( !m_useClassName ) { + lString16 tagName = cs16("h") + lString16::itoa(m_titleLevel); + m_writer->OnTagClose(L"", tagName.c_str()); + } else + m_writer->OnTagClose(L"", L"p"); +} + +void docx_fb2TitleHandler::onBodyStart() +{ + m_section = m_writer->OnTagOpen(L"", docx_el_body_name); +} + +void docx_fb2TitleHandler::onTitleStart(int level, bool noSection) +{ + if( noSection ) + docx_titleHandler::onTitleStart(level, true); + else { + if( m_titleLevel < level ) { + int startIndex = m_hasTitle ? 1 : 0; + int contentCount = m_section->getChildCount(); + if(contentCount > startIndex) + makeSection(startIndex); + } else + closeSection(m_titleLevel - level + 1); + openSection(level); + m_writer->OnTagOpen(L"", L"title"); + lString16 className = cs16("h") + lString16::itoa(level); + m_writer->OnAttribute(L"", L"class", className.c_str()); + m_writer->OnTagBody(); + m_writer->OnTagOpen(L"", L"p"); + } +} + +void docx_fb2TitleHandler::onTitleEnd() +{ + m_writer->OnTagClose(L"", L"title"); + m_writer->OnTagClose(L"", L"p"); + m_hasTitle = true; +} + +void docx_fb2TitleHandler::makeSection(int startIndex) +{ + ldomNode *newSection = m_section->insertChildElement(startIndex, LXML_NS_NONE, el_section); + newSection->initNodeStyle(); + m_section->moveItemsTo(newSection, startIndex + 1, m_section->getChildCount() - 1); + newSection->initNodeRendMethod( ); + m_section = newSection; +} + +void docx_fb2TitleHandler::openSection(int level) +{ + for(int i = m_titleLevel; i < level; i++) { + m_section = m_writer->OnTagOpen(L"", L"section"); + m_writer->OnTagBody(); + } + m_titleLevel = level; + m_hasTitle = false; +} + +void docx_fb2TitleHandler::closeSection(int level) +{ + for(int i = 0; i < level; i++) { + m_writer->OnTagClose(L"", L"section"); + m_titleLevel--; + } + m_hasTitle = false; +} From 85044caebb7263d0df5a99e5529ae5a3bdea9a10 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Sun, 13 Oct 2019 21:14:11 +0600 Subject: [PATCH 07/11] Add preprocessor define to switch between FB2 and HTML DOM --- crengine/src/docxfmt.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index a736021ec0..a897ea0f77 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -10,6 +10,8 @@ // comment this out to disable in-page footnotes #define DOCX_CRENGINE_IN_PAGE_FOOTNOTES 1 +// build FB2 DOM, comment out to build HTML DOM +#define DOCX_FB2_DOM_STRUCTURE 1 /// known docx items name and identifier struct item_def_t { @@ -1413,7 +1415,7 @@ void docx_ElementHandler::generateLink(const lChar16 *target, const lChar16 *typ #ifndef DOCX_CRENGINE_IN_PAGE_FOOTNOTES if( !lStr_cmp(type, "note") ) { // For footnotes (but not endnotes), wrap in (to get the - // same effect upstream gets with the following in docx.css: + // same effect as the following in docx.css: // a[type="note"] { vertical-align: super; font-size: 70%; } m_writer->OnTagOpen(L"", L"sup"); m_writer->OnTagBody(); @@ -1864,11 +1866,11 @@ const lChar16 *docx_pHandler::getStyleTagName(lChar16 ch) case 'b': return L"strong"; case 'i': - return L"em"; // upstream uses L"emphasis"; + return L"em"; case 'u': return L"u"; case 's': - return L"s"; // upstream uses L"strike"; + return L"s"; case 't': return L"sup"; case 'd': @@ -2313,13 +2315,15 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba writer.OnTagClose(NULL, L"title-info"); writer.OnTagClose(NULL, L"description"); +#ifdef DOCX_FB2_DOM_STRUCTURE //Two options when dealing with titles: (FB2|HTML) docx_fb2TitleHandler titleHandler(&writer, &importContext); //
..
- //docx_titleHandler titleHandler(&writer, &importContext); //.. +#else + docx_titleHandler titleHandler(&writer, &importContext); //.. +#endif docx_documentHandler documentHandler(&docReader, &writer, &importContext, &titleHandler); docReader.setHandler(&documentHandler); - LVXMLParser parser(m_stream, &docReader); if ( !parser.Parse() ) From 515ac882f53deee32a603cd191649d0e351ffbf4 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Sun, 13 Oct 2019 22:15:45 +0600 Subject: [PATCH 08/11] Added setting for usage of calss in title --- crengine/src/docxfmt.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index a897ea0f77..90445f2673 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -12,6 +12,8 @@ #define DOCX_CRENGINE_IN_PAGE_FOOTNOTES 1 // build FB2 DOM, comment out to build HTML DOM #define DOCX_FB2_DOM_STRUCTURE 1 +//If true <p>...</p> else <hx>..</hx> +#define DOCX_USE_CLASS_FOR_HEADING true /// known docx items name and identifier struct item_def_t { @@ -903,8 +905,8 @@ class docx_titleHandler class docx_fb2TitleHandler : public docx_titleHandler { public: - docx_fb2TitleHandler(ldomDocumentWriter *writer, docxImportContext *context) : - docx_titleHandler(writer, context, true) + docx_fb2TitleHandler(ldomDocumentWriter *writer, docxImportContext *context, bool useClassName) : + docx_titleHandler(writer, context, useClassName) {} void onBodyStart(); void onTitleStart(int level, bool noSection = false); @@ -2317,7 +2319,7 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba #ifdef DOCX_FB2_DOM_STRUCTURE //Two options when dealing with titles: (FB2|HTML) - docx_fb2TitleHandler titleHandler(&writer, &importContext); //
..
+ docx_fb2TitleHandler titleHandler(&writer, &importContext, DOCX_USE_CLASS_FOR_HEADING); //
..
#else docx_titleHandler titleHandler(&writer, &importContext); //.. #endif @@ -3124,17 +3126,27 @@ void docx_fb2TitleHandler::onTitleStart(int level, bool noSection) closeSection(m_titleLevel - level + 1); openSection(level); m_writer->OnTagOpen(L"", L"title"); - lString16 className = cs16("h") + lString16::itoa(level); - m_writer->OnAttribute(L"", L"class", className.c_str()); - m_writer->OnTagBody(); - m_writer->OnTagOpen(L"", L"p"); + lString16 headingName = cs16("h") + lString16::itoa(level); + if( m_useClassName ) { + m_writer->OnAttribute(L"", L"class", headingName.c_str()); + m_writer->OnTagBody(); + m_writer->OnTagOpen(L"", L"p"); + } else { + m_writer->OnTagBody(); + m_writer->OnTagOpen(L"", headingName.c_str()); + } } } void docx_fb2TitleHandler::onTitleEnd() { + if( !m_useClassName ) { + lString16 headingName = cs16("h") + lString16::itoa(m_titleLevel); + m_writer->OnTagClose(L"", headingName.c_str()); + } else + m_writer->OnTagClose(L"", L"p"); + m_writer->OnTagClose(L"", L"title"); - m_writer->OnTagClose(L"", L"p"); m_hasTitle = true; } From 04c3e13e0908c461fe0929bb245099a1c98c8fa9 Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Mon, 14 Oct 2019 08:35:05 +0600 Subject: [PATCH 09/11] Cosmetic change for HTML DOM --- crengine/src/docxfmt.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index 90445f2673..dba86391f6 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -2301,6 +2301,7 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba ldomDocumentWriter writer(doc); docXMLreader docReader(&writer); +#ifdef DOCX_FB2_DOM_STRUCTURE writer.OnStart(NULL); writer.OnTagOpen(NULL, L"?xml"); writer.OnAttribute(NULL, L"version", L"1.0"); @@ -2316,6 +2317,16 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba writer.OnTagClose(NULL, L"book-title"); writer.OnTagClose(NULL, L"title-info"); writer.OnTagClose(NULL, L"description"); +#else + writer.OnStart(NULL); + writer.OnTagOpen(NULL, L"?xml"); + writer.OnAttribute(NULL, L"version", L"1.0"); + writer.OnAttribute(NULL, L"encoding", L"utf-8"); + writer.OnEncoding(L"utf-8", NULL); + writer.OnTagBody(); + writer.OnTagClose(NULL, L"?xml"); + writer.OnTagOpenNoAttr(NULL, L"html"); +#endif #ifdef DOCX_FB2_DOM_STRUCTURE //Two options when dealing with titles: (FB2|HTML) @@ -2337,7 +2348,11 @@ bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallba if(importContext.m_endNoteCount > 0) { parseFootnotes(writer, importContext, docx_el_endnotes); } +#ifdef DOCX_FB2_DOM_STRUCTURE writer.OnTagClose(NULL, L"FictionBook"); +#else + writer.OnTagClose(NULL, L"html"); +#endif writer.OnStop(); if ( progressCallback ) { From 5f0a53058a8a886024acb596b481ad4d1c631f9a Mon Sep 17 00:00:00 2001 From: Konstantin Potapov Date: Mon, 14 Oct 2019 08:38:35 +0600 Subject: [PATCH 10/11] Simplified docx.css --- cr3qt/data/docx.css | 48 ++++++++++++++++++++-------------------- crengine/src/docxfmt.cpp | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/cr3qt/data/docx.css b/cr3qt/data/docx.css index f3ba09dcc3..96f68ea700 100644 --- a/cr3qt/data/docx.css +++ b/cr3qt/data/docx.css @@ -1,6 +1,6 @@ body { text-align: left; margin: 0; text-indent: 0px } -p { $def.all } +p { $def.all } empty-line { height: 1em } @@ -14,45 +14,45 @@ li image { display: inline } li { display: list-item; text-indent: 0em; } ol { display: block; list-style-type: decimal; margin-left: 1em } -title.h1 p, title.h2 p { +.h1, .h2 { $title.all } -title.h3 p, title.h4 p, title.h5 p, title.h6 p { +.h3, .h4, .h5, .h6 p { $subtitle.all } -title.h1, title.h2, title.h3, title.h4, title.h5, title.h6 { - hyphenate: none; +.h1, .h2, .h3, .h4, .h5, .h6 { + hyphenate: none; } -title.h1, title.h2, title.h3, title.h4, title.h5, title.h6 { - display: block; - margin-top: 0.5em; - margin-bottom: 0.3em; - padding: 10px ; - margin-top: 0.5em; - margin-bottom: 0.5em; +.h1, .h2, .h3, .h4, .h5, .h6 { + display: block; + margin-top: 0.5em; + margin-bottom: 0.3em; + padding: 10px ; + margin-top: 0.5em; + margin-bottom: 0.5em; } -title.h1, title.h2 { - page-break-inside: avoid; - page-break-after: avoid; +.h1, .h2 { + page-break-inside: avoid; + page-break-after: avoid; } -title.h3, title.h4, title.h5, title.h6 { - page-break-inside: avoid; - page-break-after: avoid; +.h3, .h4, .h5, .h6 { + page-break-inside: avoid; + page-break-after: avoid; } -title.h1 { font-size: 150% } -title.h2 { font-size: 140% } -title.h3 { font-size: 130% } -title.h4 { font-size: 120% } -title.h5 { font-size: 110% } +.h1 { font-size: 150% } +.h2 { font-size: 140% } +.h3 { font-size: 130% } +.h4 { font-size: 120% } +.h5 { font-size: 110% } table { font-size: 80% } td, th { text-indent: 0px; padding: 3px } -th { font-weight: bold; text-align: center; background-color: #DDD } +th { font-weight: bold; text-align: center; background-color: #DDD } /* #808080; */ table caption { text-indent: 0px; padding: 4px; background-color: #EEE } diff --git a/crengine/src/docxfmt.cpp b/crengine/src/docxfmt.cpp index dba86391f6..8bb9fa8d9b 100644 --- a/crengine/src/docxfmt.cpp +++ b/crengine/src/docxfmt.cpp @@ -3143,9 +3143,9 @@ void docx_fb2TitleHandler::onTitleStart(int level, bool noSection) m_writer->OnTagOpen(L"", L"title"); lString16 headingName = cs16("h") + lString16::itoa(level); if( m_useClassName ) { - m_writer->OnAttribute(L"", L"class", headingName.c_str()); m_writer->OnTagBody(); m_writer->OnTagOpen(L"", L"p"); + m_writer->OnAttribute(L"", L"class", headingName.c_str()); } else { m_writer->OnTagBody(); m_writer->OnTagOpen(L"", headingName.c_str()); From a5ec1d5530ca1aebb65e64355f294c6f9d86c5e1 Mon Sep 17 00:00:00 2001 From: pkb Date: Mon, 14 Oct 2019 15:20:40 +0600 Subject: [PATCH 11/11] Fixed css selector for h6 --- cr3qt/data/docx.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cr3qt/data/docx.css b/cr3qt/data/docx.css index 96f68ea700..a4ac8db8f3 100644 --- a/cr3qt/data/docx.css +++ b/cr3qt/data/docx.css @@ -18,7 +18,7 @@ ol { display: block; list-style-type: decimal; margin-left: 1em } $title.all } -.h3, .h4, .h5, .h6 p { +.h3, .h4, .h5, .h6 { $subtitle.all }