Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Initial support for FictionBook 2 output format #2

Merged
merged 7 commits into from

2 participants

@mgorny

FictionBook 2 is a primary file format supported by FBReader and is very popular in Russia. It's based on clear XML, and very strict, thus it is quite hard to convert current WLXML correctly and easily.

This (initial) version should be able to convert basic (simple) prose. I've been able to successfully convert (corrected) 'Syzyfowe prace' and read it using FBReader. I will still work on improving the converter with other books.

@rczajka rczajka merged commit 2258dea into fnp:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Apr 24, 2012
  1. @mgorny
  2. @mgorny

    Initial FictionBook 2 output support.

    mgorny authored
    This is a work-in-progress and still needs a lot of polishing.
Commits on May 6, 2012
  1. @mgorny
  2. @mgorny

    FB2: a few more elements.

    mgorny authored
  3. @mgorny
Commits on May 12, 2012
  1. @mgorny
  2. @mgorny
This page is out of date. Refresh to see the latest.
View
35 librarian/fb2.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+from copy import deepcopy
+from lxml import etree
+
+from librarian import functions, OutputFile
+
+
+functions.reg_substitute_entities()
+
+def transform(wldoc, verbose=False,
+ cover=None, flags=None):
+ """ produces a FB2 file
+
+ cover: a cover.Cover object or True for default
+ flags: less-advertising, working-copy
+ """
+
+ document = deepcopy(wldoc)
+ del wldoc
+
+ if flags:
+ for flag in flags:
+ document.edoc.getroot().set(flag, 'yes')
+
+ style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
+ style = etree.parse(style_filename)
+
+ result = document.transform(style)
+
+ return OutputFile.from_string(unicode(result).encode('utf-8'))
View
83 librarian/fb2/description.xslt
@@ -0,0 +1,83 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">
+
+ <!-- description parsing -->
+ <xsl:template match="rdf:Description" mode="outer">
+ <description>
+ <!-- need to keep ordering here... -->
+
+ <title-info>
+ <!-- obligatory: genre, author, book-title, lang -->
+
+ <!-- XXX -->
+ <genre>literature</genre>
+ <xsl:apply-templates mode="description"
+ select="dc:creator"/>
+ <xsl:apply-templates mode="description"
+ select="dc:title"/>
+ <xsl:apply-templates mode="description"
+ select="dc:date.pd"/>
+ <xsl:apply-templates mode="description"
+ select="dc:language"/>
+ </title-info>
+ <document-info>
+ <!-- obligatory: author, date, id, version -->
+
+ <xsl:apply-templates mode="description"
+ select="dc:contributor.editor"/>
+ <xsl:apply-templates mode="description"
+ select="dc:contributor.technical_editor"/>
+ <program-used>book2fb2</program-used>
+ <!-- maybe today's date instead? -->
+ <xsl:apply-templates mode="description"
+ select="dc:date"/>
+ <xsl:apply-templates mode="description"
+ select="dc:identifier.url"/>
+ <!-- XXX -->
+ <version>0</version>
+ </document-info>
+ <publish-info>
+ <xsl:apply-templates mode="description"
+ select="dc:publisher"/>
+ </publish-info>
+ </description>
+ </xsl:template>
+
+ <xsl:template mode="description"
+ match="dc:creator|dc:contributor.editor|dc:contributor.technical_editor">
+ <!-- last name, first name -->
+ <xsl:variable name="last"
+ select="normalize-space(substring-before(., ','))"/>
+ <xsl:variable name="first"
+ select="normalize-space(substring-after(., ','))"/>
+
+ <author>
+ <first-name><xsl:value-of select="$first"/></first-name>
+ <last-name><xsl:value-of select="$last"/></last-name>
+ </author>
+ </xsl:template>
+ <xsl:template mode="description" match="dc:title">
+ <book-title><xsl:value-of select="."/></book-title>
+ </xsl:template>
+ <xsl:template mode="description" match="dc:language">
+ <lang><xsl:value-of select="."/></lang>
+ </xsl:template>
+ <xsl:template mode="description" match="dc:date.pd|dc:date">
+ <date><xsl:value-of select="."/></date>
+ </xsl:template>
+ <xsl:template mode="description" match="dc:publisher">
+ <publisher><xsl:value-of select="."/></publisher>
+ </xsl:template>
+ <xsl:template mode="description" match="dc:identifier.url">
+ <id><xsl:value-of select="."/></id>
+ </xsl:template>
+</xsl:stylesheet>
View
84 librarian/fb2/fb2.xslt
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <xsl:include href="description.xslt"/>
+ <xsl:include href="footnotes.xslt"/>
+ <xsl:include href="inline.xslt"/>
+ <xsl:include href="paragraphs.xslt"/>
+ <xsl:include href="poems.xslt"/>
+ <xsl:include href="sections.xslt"/>
+
+ <xsl:strip-space elements="*"/>
+ <xsl:output encoding="utf-8" method="xml" indent="yes"/>
+
+ <xsl:template match="utwor">
+ <FictionBook>
+ <xsl:apply-templates mode="outer"/>
+
+ <body name="footnotes">
+ <xsl:apply-templates mode="footnotes"/>
+ </body>
+ </FictionBook>
+ </xsl:template>
+
+ <!-- we can't handle lyrics nicely yet -->
+ <xsl:template match="powiesc|opowiadanie|liryka_l|liryka_lp" mode="outer">
+ <body> <!-- main body for main book flow -->
+ <xsl:if test="autor_utworu or nazwa_utworu">
+ <title>
+ <xsl:apply-templates mode="title"
+ select="autor_utworu|dzielo_nadrzedne|nazwa_utworu"/>
+ </title>
+ </xsl:if>
+
+ <epigraph>
+ <p>
+ Utwór opracowany został w&#160;ramach projektu
+ <a l:href="http://www.wolnelektury.pl/">Wolne Lektury</a>
+ przez <a l:href="http://www.nowoczesnapolska.org.pl/">fundację
+ Nowoczesna Polska</a>.
+ </p>
+ </epigraph>
+
+ <xsl:variable name="sections" select="count(naglowek_rozdzial)"/>
+ <section>
+ <xsl:choose>
+ <xsl:when test="local-name() = 'liryka_l'">
+ <poem>
+ <xsl:apply-templates mode="para"/>
+ </poem>
+ </xsl:when>
+
+ <xsl:otherwise>
+ <xsl:apply-templates mode="para"
+ select="*[count(following-sibling::naglowek_rozdzial)
+ = $sections]"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </section>
+
+ <xsl:apply-templates mode="sections"/>
+ </body>
+ </xsl:template>
+
+ <xsl:template match="uwaga" mode="outer"/>
+ <xsl:template match="extra" mode="outer"/>
+
+ <xsl:template mode="title" match="*">
+ <!-- title -->
+
+ <p><xsl:apply-templates mode="inline"/></p>
+ </xsl:template>
+
+ <xsl:template match="uwaga" mode="title"/>
+ <xsl:template match="extra" mode="title"/>
+</xsl:stylesheet>
View
37 librarian/fb2/footnotes.xslt
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <!-- footnote body mode -->
+ <xsl:template match="pe" mode="footnotes">
+ <!-- we number them absolutely -->
+ <xsl:variable name="n" select="count(preceding::pe) + 1"/>
+
+ <xsl:element name="section">
+ <xsl:attribute name="id">fn<xsl:value-of select="$n"/></xsl:attribute>
+
+ <p><xsl:apply-templates mode="inline"/></p>
+ </xsl:element>
+ </xsl:template>
+ <xsl:template match="text()" mode="footnotes"/>
+
+ <!-- footnote links -->
+ <xsl:template match="pe" mode="inline">
+ <xsl:variable name="n" select="count(preceding::pe) + 1"/>
+ <xsl:element name="a">
+ <xsl:attribute name="type">note</xsl:attribute>
+ <xsl:attribute name="l:href">#fn<xsl:value-of select="$n"/></xsl:attribute>
+
+ [<xsl:value-of select="$n"/>]
+ </xsl:element>
+ </xsl:template>
+</xsl:stylesheet>
View
38 librarian/fb2/inline.xslt
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <!-- inline elements -->
+
+ <!-- ignored -->
+ <xsl:template match="motyw" mode="inline"/>
+
+ <!-- formatting -->
+ <xsl:template match="slowo_obce|tytul_dziela">
+ <emphasis>
+ <xsl:apply-templates mode="inline"/>
+ </emphasis>
+ </xsl:template>
+ <xsl:template match="wyroznienie">
+ <strong>
+ <xsl:apply-templates mode="inline"/>
+ </strong>
+ </xsl:template>
+
+ <!-- text -->
+ <xsl:template match="text()" mode="inline">
+ <xsl:value-of select="wl:substitute_entities(.)"/>
+ </xsl:template>
+
+ <xsl:template match="uwaga" mode="inline"/>
+ <xsl:template match="extra" mode="inline"/>
+</xsl:stylesheet>
View
24 librarian/fb2/paragraphs.xslt
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <!-- in paragraph mode -->
+
+ <xsl:template mode="para" match="akap|akap_dialog">
+ <!-- paragraphs & similar -->
+
+ <p><xsl:apply-templates mode="inline"/></p>
+ </xsl:template>
+
+ <xsl:template mode="para" match="*"/>
+ <xsl:template mode="sections" match="*"/>
+</xsl:stylesheet>
View
53 librarian/fb2/poems.xslt
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <!-- poems -->
+
+ <!-- match poem citations -->
+ <xsl:template mode="para" match="poezja_cyt">
+ <cite>
+ <poem>
+ <xsl:apply-templates mode="para"/>
+ </poem>
+ </cite>
+ </xsl:template>
+
+ <!-- regular poem elements -->
+ <xsl:template mode="para" match="strofa">
+ <stanza>
+ <xsl:call-template name="split-poem">
+ <xsl:with-param name="list" select="."/>
+ </xsl:call-template>
+ </stanza>
+ </xsl:template>
+
+ <!-- split into verses -->
+ <xsl:template name="split-poem">
+ <xsl:param name="list"></xsl:param>
+
+ <xsl:if test="$list != ''">
+ <xsl:variable name="before"
+ select="substring-before(concat($list, '/'), '/')"/>
+ <xsl:variable name="after"
+ select="substring-after($list, '/')"/>
+
+ <v>
+ <xsl:value-of select="$before"/>
+ </v>
+
+ <xsl:call-template name="split-poem">
+ <xsl:with-param name="list" select="$after"/>
+ </xsl:call-template>
+ </xsl:if>
+ </xsl:template>
+</xsl:stylesheet>
View
44 librarian/fb2/sections.xslt
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+ Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:wl="http://wolnelektury.pl/functions"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
+ xmlns:l="http://www.w3.org/1999/xlink">
+
+ <!-- a nice epigraph section -->
+ <xsl:template match="dedykacja|nota|nota_red" mode="sections">
+ <epigraph>
+ <xsl:apply-templates mode="para"/>
+ <!-- XXX: <strofa/> can be here as well -->
+ </epigraph>
+ </xsl:template>
+
+ <!-- main text is split by headings -->
+ <xsl:template match="naglowek_rozdzial" mode="sections">
+ <!--
+
+ This one's tricky - we need to sections text into sections.
+ In order to do that, all elements belonging to a single section
+ must have something in common. We assume that this common factor
+ is having the same number of following section headings.
+
+ -->
+
+ <section>
+ <xsl:apply-templates mode="para"
+ select="../*[count(following-sibling::naglowek_rozdzial)
+ = count(current()/following-sibling::naglowek_rozdzial)]"/>
+ </section>
+ </xsl:template>
+
+ <!-- actual headings -->
+ <xsl:template match="naglowek_rozdzial" mode="para">
+ <title><p><xsl:apply-templates mode="inline"/></p></title>
+ </xsl:template>
+</xsl:stylesheet>
View
4 librarian/parser.py
@@ -185,6 +185,10 @@ def as_mobi(self, *args, **kwargs):
from librarian import mobi
return mobi.transform(self, *args, **kwargs)
+ def as_fb2(self, *args, **kwargs):
+ from librarian import fb2
+ return fb2.transform(self, *args, **kwargs)
+
def save_output_file(self, output_file, output_path=None,
output_dir_path=None, make_author_dir=False, ext=None):
if output_dir_path:
View
2  scripts/book2epub
@@ -23,7 +23,7 @@ if __name__ == '__main__':
parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
help='create default cover')
parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False,
- help='specifies the directory for output')
+ help='mark the output as a working copy')
parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
help='create a directory for author and put the PDF in it')
parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
View
68 scripts/book2fb2
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import os.path
+import optparse
+
+from librarian import DirDocProvider, ParseError
+from librarian.parser import WLDocument
+
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to FB2 format."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+ parser.add_option('-c', '--with-cover', action='store_true', dest='with_cover', default=False,
+ help='create default cover')
+ parser.add_option('-w', '--working-copy', action='store_true', dest='working_copy', default=False,
+ help='mark the output as a working copy')
+ parser.add_option('-d', '--make-dir', action='store_true', dest='make_dir', default=False,
+ help='create a directory for author and put the PDF in it')
+ parser.add_option('-o', '--output-file', dest='output_file', metavar='FILE',
+ help='specifies the output file')
+ parser.add_option('-O', '--output-dir', dest='output_dir', metavar='DIR',
+ help='specifies the directory for output')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ flags = []
+ if options.working_copy:
+ flags.append('working-copy')
+
+ # Do some real work
+ try:
+ for main_input in input_filenames:
+ if options.verbose:
+ print main_input
+
+ path, fname = os.path.realpath(main_input).rsplit('/', 1)
+ provider = DirDocProvider(path)
+ if not (options.output_file or options.output_dir):
+ output_file = os.path.splitext(main_input)[0] + '.fb2'
+ else:
+ output_file = None
+
+ doc = WLDocument.from_file(main_input, provider=provider)
+ fb2 = doc.as_fb2(cover=options.with_cover, flags=flags)
+
+ doc.save_output_file(fb2,
+ output_file, options.output_dir, options.make_dir, 'fb2')
+
+ except ParseError, e:
+ print '%(file)s:%(name)s:%(message)s' % {
+ 'file': main_input,
+ 'name': e.__class__.__name__,
+ 'message': e
+ }
Something went wrong with that request. Please try again.