From 2a4d339e53d6438e1b6dfc2ed892f6a0f61f9eab Mon Sep 17 00:00:00 2001 From: Jeroen Hellingman Date: Sat, 2 Jul 2016 21:23:39 +0200 Subject: [PATCH] Move some experimental files out of the way. --- .gitignore | 3 + inclusions.xsl | 4 +- sandbox/README.md | 2 + breaklines.pl => sandbox/breaklines.pl | 22 +- breaklines.xsl => sandbox/breaklines.xsl | 288 +++---- css2xml.xsl => sandbox/css2xml.xsl | 638 +++++++-------- extract-page.xsl => sandbox/extract-page.xsl | 554 ++++++------- extract-segs.xsl => sandbox/extract-segs.xsl | 72 +- fb2tei.xsl => sandbox/fb2tei.xsl | 786 +++++++++---------- html2tei.xsl => sandbox/html2tei.xsl | 694 ++++++++-------- 10 files changed, 1534 insertions(+), 1529 deletions(-) create mode 100644 sandbox/README.md rename breaklines.pl => sandbox/breaklines.pl (97%) rename breaklines.xsl => sandbox/breaklines.xsl (97%) rename css2xml.xsl => sandbox/css2xml.xsl (96%) rename extract-page.xsl => sandbox/extract-page.xsl (97%) rename extract-segs.xsl => sandbox/extract-segs.xsl (96%) rename fb2tei.xsl => sandbox/fb2tei.xsl (96%) rename html2tei.xsl => sandbox/html2tei.xsl (96%) diff --git a/.gitignore b/.gitignore index 05389ea4..221f0c8a 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,6 @@ images/stock_book_yellow-16.png images/stock_book-16.png images/stock_volume-16.png images/thumbs/cover500.jpg +attic/ +test-kwic.html +metadata.xml diff --git a/inclusions.xsl b/inclusions.xsl index e964318e..f3a1ca37 100644 --- a/inclusions.xsl +++ b/inclusions.xsl @@ -9,8 +9,8 @@ > - Stylesheet to align paragraphs in two TEI documents. -

Stylesheet to align paragraphs in two TEI documents.

+ Stylesheet to include external files into TEI documents. +

Stylesheet to include external files into TEI documents.

diff --git a/sandbox/README.md b/sandbox/README.md new file mode 100644 index 00000000..b7bfc690 --- /dev/null +++ b/sandbox/README.md @@ -0,0 +1,2 @@ + +This directory contains unfinished or experimental stylesheets. \ No newline at end of file diff --git a/breaklines.pl b/sandbox/breaklines.pl similarity index 97% rename from breaklines.pl rename to sandbox/breaklines.pl index 29e31c32..f23c93a3 100644 --- a/breaklines.pl +++ b/sandbox/breaklines.pl @@ -1,11 +1,11 @@ -# Perl script to test the the tei2wl.xsl stylesheet with Saxon. - -use strict; - -my $xsldir = "C:\\Users\\Jeroen\\Documents\\eLibrary\\Tools\\tei2html"; # location of xsl stylesheets -my $saxon = "\"C:\\Program Files\\Java\\jre6\\bin\\java.exe\" -jar C:\\bin\\saxonhe9\\saxon9he.jar "; - -my $filename = $ARGV[0]; - -system ("$saxon $filename $xsldir/breaklines.xsl"); - +# Perl script to test the the tei2wl.xsl stylesheet with Saxon. + +use strict; + +my $xsldir = "C:\\Users\\Jeroen\\Documents\\eLibrary\\Tools\\tei2html"; # location of xsl stylesheets +my $saxon = "\"C:\\Program Files\\Java\\jre6\\bin\\java.exe\" -jar C:\\bin\\saxonhe9\\saxon9he.jar "; + +my $filename = $ARGV[0]; + +system ("$saxon $filename $xsldir/breaklines.xsl"); + diff --git a/breaklines.xsl b/sandbox/breaklines.xsl similarity index 97% rename from breaklines.xsl rename to sandbox/breaklines.xsl index 3add5bc0..fee3253a 100644 --- a/breaklines.xsl +++ b/sandbox/breaklines.xsl @@ -1,144 +1,144 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/css2xml.xsl b/sandbox/css2xml.xsl similarity index 96% rename from css2xml.xsl rename to sandbox/css2xml.xsl index e0343fec..af032700 100644 --- a/css2xml.xsl +++ b/sandbox/css2xml.xsl @@ -1,320 +1,320 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - @ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -{ - - -} - - - - - - - : - - ; - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + # + + + + + + + + + @ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ + + +} + + + + + + + : + + ; + + \ No newline at end of file diff --git a/extract-page.xsl b/sandbox/extract-page.xsl similarity index 97% rename from extract-page.xsl rename to sandbox/extract-page.xsl index 61a39aa4..7a2a1006 100644 --- a/extract-page.xsl +++ b/sandbox/extract-page.xsl @@ -1,277 +1,277 @@ - - - - - Extract page from a TEI document. - -

This stylesheet extracts a page from a TEI document, that is, all content between two pb-elements.

- -

This is somewhat more complicated than it appears at first because page-break elements are milestones, that - do not follow any particular structure, and - sometimes footnotes spread out over more than one page, and we only want the content that actually occurs on - the page indicated, that is, including any parts of footnotes on a previous page that are carried over to - the page being extracted, and excluding parts of footnotes that have been carried over to following pages.

- -

To make the code to achieve this somewhat readable, we first pre-process the TEI document, such that - page-breaks in footnotes use a different element (fnpb), and all page-breaks have a @p attribute, indicating - their position (we clean this up later-on). Then we split-up the horrendous test required to achieve this - into several parts.

-
- Jeroen Hellingman - 2011, Jeroen Hellingman -
- - - - - - Number of page to extract (based on @n attribute). - - - - - Number of page following the page to extract, determined by code. - - - - - - - Number of page to extract (based on position). - - - - - - - - - - - Extracting page '' at position - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extracting a fragment between milestones from a complex structure. - -I am looking for a template which allows me to extract a single page, as marked by milestone -elements from a complex structure. Note that the milestone elements may occur in -different parents. I want all the parent nodes, and everything between the and the next - element - -For example, when applying the extract method with as parameter "12" on the following sample - - - ... - - - Blah - Blah blah blah blah. - - - Blah blah blah blah. - - - Blah blah blah blah. - - - ... - - -It should return: - - - - - blah blah. - - - Blah blah blah blah. - - - Blah blah - - - - -The idea is that it will copy only elements that are a ancestor of either and follow -the first and precede the second . The problem I face is formulating a nice XPath to select -those elements. - - - -
+ + + + + Extract page from a TEI document. + +

This stylesheet extracts a page from a TEI document, that is, all content between two pb-elements.

+ +

This is somewhat more complicated than it appears at first because page-break elements are milestones, that + do not follow any particular structure, and + sometimes footnotes spread out over more than one page, and we only want the content that actually occurs on + the page indicated, that is, including any parts of footnotes on a previous page that are carried over to + the page being extracted, and excluding parts of footnotes that have been carried over to following pages.

+ +

To make the code to achieve this somewhat readable, we first pre-process the TEI document, such that + page-breaks in footnotes use a different element (fnpb), and all page-breaks have a @p attribute, indicating + their position (we clean this up later-on). Then we split-up the horrendous test required to achieve this + into several parts.

+
+ Jeroen Hellingman + 2011, Jeroen Hellingman +
+ + + + + + Number of page to extract (based on @n attribute). + + + + + Number of page following the page to extract, determined by code. + + + + + + + Number of page to extract (based on position). + + + + + + + + + + + Extracting page '' at position + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Extracting a fragment between milestones from a complex structure. + +I am looking for a template which allows me to extract a single page, as marked by milestone +elements from a complex structure. Note that the milestone elements may occur in +different parents. I want all the parent nodes, and everything between the and the next + element + +For example, when applying the extract method with as parameter "12" on the following sample + + + ... + + + Blah + Blah blah blah blah. + + + Blah blah blah blah. + + + Blah blah blah blah. + + + ... + + +It should return: + + + + + blah blah. + + + Blah blah blah blah. + + + Blah blah + + + + +The idea is that it will copy only elements that are a ancestor of either and follow +the first and precede the second . The problem I face is formulating a nice XPath to select +those elements. + + + +
diff --git a/extract-segs.xsl b/sandbox/extract-segs.xsl similarity index 96% rename from extract-segs.xsl rename to sandbox/extract-segs.xsl index c02da819..26f5f11b 100644 --- a/extract-segs.xsl +++ b/sandbox/extract-segs.xsl @@ -1,36 +1,36 @@ - - - - - Extract segments from a TEI document. - -

This stylesheet extracts segments from an XHTML document.

-
- Jeroen Hellingman - 2014, Jeroen Hellingman -
- - - - - - - - - - | - - - - -
+ + + + + Extract segments from a TEI document. + +

This stylesheet extracts segments from an XHTML document.

+
+ Jeroen Hellingman + 2014, Jeroen Hellingman +
+ + + + + + + + + + | + + + + +
diff --git a/fb2tei.xsl b/sandbox/fb2tei.xsl similarity index 96% rename from fb2tei.xsl rename to sandbox/fb2tei.xsl index 595fcc10..99dac6af 100644 --- a/fb2tei.xsl +++ b/sandbox/fb2tei.xsl @@ -1,394 +1,394 @@ - - - - -]> - - - - XSLT stylesheet to convert a fictionbook format text to TEI. - This stylesheet converts a fictionbook (.fb2) file to TEI. - Jeroen Hellingman - 2015, Jeroen Hellingman - - - - - - - - - &lf; - -
-

- -

- - image() - - Front Cover. -
-

-
-
-
- &lf; - - &lf; - - - - -
- -
-
- - - - - - - - <xsl:value-of select="fb2:title-info/fb2:book-title"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <xsl:value-of select="fb2:title-info/fb2:book-title"/> - - - - - - - - - - - - - - - TODO: lookup main language name. - - - TODO: lookup language name. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Unhandled fb2 element: - - - - - - - &lf; - - - - - - - - &lf; -
- -
-
- - - - - - - - - &lf; -

- - - - - -

-
- - - - &lf; - - - - - - - - &lf; -

- -

-
- - - - &lf;&lf; -

- -

-
- - - &lf; -

- -

-
- - - - - - - - - - - - - - &lf; - - - - - - - - &lf;&lf; - - &lf; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - &lf; - - - - - - - &lf; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - &lf; -
- - image( - - ) - -
-
- - - - - Extracted binary file: - - - - - - - - - - - - .jpg - .png - .gif - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + +]> + + + + XSLT stylesheet to convert a fictionbook format text to TEI. + This stylesheet converts a fictionbook (.fb2) file to TEI. + Jeroen Hellingman + 2015, Jeroen Hellingman + + + + + + + + + &lf; + +
+

+ +

+ + image() + + Front Cover. +
+

+
+
+
+ &lf; + + &lf; + + + + +
+ +
+
+ + + + + + + + <xsl:value-of select="fb2:title-info/fb2:book-title"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="fb2:title-info/fb2:book-title"/> + + + + + + + + + + + + + + + TODO: lookup main language name. + + + TODO: lookup language name. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unhandled fb2 element: + + + + + + + &lf; + + + + + + + + &lf; +
+ +
+
+ + + + + + + + + &lf; +

+ + + + + +

+
+ + + + &lf; + + + + + + + + &lf; +

+ +

+
+ + + + &lf;&lf; +

+ +

+
+ + + &lf; +

+ +

+
+ + + + + + + + + + + + + + &lf; + + + + + + + + &lf;&lf; + + &lf; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + &lf; + + + + + + + &lf; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + &lf; +
+ + image( + + ) + +
+
+ + + + + Extracted binary file: + + + + + + + + + + + + .jpg + .png + .gif + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/html2tei.xsl b/sandbox/html2tei.xsl similarity index 96% rename from html2tei.xsl rename to sandbox/html2tei.xsl index 7517276b..ed5b1810 100644 --- a/html2tei.xsl +++ b/sandbox/html2tei.xsl @@ -1,348 +1,348 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- -

- - -
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Unhandled element - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ +

+ + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unhandled element + + + + +
\ No newline at end of file