From 890726285fc24a948bfc4ca4afd079c0e27fd91a Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Mon, 26 Nov 2012 15:21:50 +1100 Subject: [PATCH] add funnelweb with pandoc converter to buildout for anyone to convert html manuals to .rst --- buildout-cache/downloads/README.txt | 1 - buildout-cache/eggs/README.txt | 1 - buildout.cfg | 48 ++++++++++++++++++++++++++--- pandoc.cfg | 47 ++++++++++++++++++++++++++++ pipeline.cfg | 47 ++++++++++++++++++++++++++++ 5 files changed, 137 insertions(+), 7 deletions(-) delete mode 100644 buildout-cache/downloads/README.txt delete mode 100644 buildout-cache/eggs/README.txt create mode 100644 pandoc.cfg create mode 100644 pipeline.cfg diff --git a/buildout-cache/downloads/README.txt b/buildout-cache/downloads/README.txt deleted file mode 100644 index 55407f58..00000000 --- a/buildout-cache/downloads/README.txt +++ /dev/null @@ -1 +0,0 @@ -Needed for Travis CI integration diff --git a/buildout-cache/eggs/README.txt b/buildout-cache/eggs/README.txt deleted file mode 100644 index 55407f58..00000000 --- a/buildout-cache/eggs/README.txt +++ /dev/null @@ -1 +0,0 @@ -Needed for Travis CI integration diff --git a/buildout.cfg b/buildout.cfg index b7670d6d..e0420d39 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -9,6 +9,7 @@ parts = download install sphinx + funnelweb allow-hosts = *.plone.org @@ -22,11 +23,10 @@ extensions = mr.developer buildout.dumppickedversions -eggs-directory = buildout-cache/eggs -download-cache = buildout-cache/downloads - extends = https://raw.github.com/plone/buildout.coredev/4.2/sources.cfg + http://dist.plone.org/release/4.2-latest/versions.cfg + dump-picked-versions-file = picked.cfg sources = sources @@ -50,6 +50,17 @@ auto-checkout = Products.TinyMCE tutorials.todoapp +# these can be removed after upcoming release + transmogrify.webcrawler + transmogrify.htmlcontentextractor + transmogrify.ploneremote + transmogrify.htmltesting + transmogrify.siteanalyser + transmogrify.command + funnelweb + mr.migrator + + allow-picked-versions = true [download] @@ -60,7 +71,12 @@ url = https://launchpad.net/plone/4.2/4.2.1/+download/Plone-4.2.1-UnifiedInstall [install] recipe = collective.recipe.cmd on_install = true -cmds = tar jxf ${download:location}/Plone-4.2.1-UnifiedInstaller/packages/buildout-cache.tar.bz2 1>/dev/null +cmds = + tar jxf ${download:location}/Plone-4.2.1-UnifiedInstaller/packages/buildout-cache.tar.bz2 1>/dev/null + cp -R buildout-cache/eggs ${buildout:eggs-directory} + cp -R buildout-cache/downloads ${buildout:download-cache} + rm -r buildout-cache/eggs buildout-cache/downloads + [sphinx] recipe = collective.recipe.sphinxbuilder @@ -75,19 +91,41 @@ eggs = collective.sphinx.includedoc collective.sphinx.autoatschema +# +# To run funnelweb use $ bin/funnelweb --pipeline=pandoc.cfg +# or create another pipeline to use +# + +[funnelweb] +recipe = zc.recipe.egg +eggs = + funnelweb + transmogrify.command + [versions] roadrunner = 0.2.3.1 zc.recipe.egg = 1.2.0 -Products.CMFPlone = 4.2.1 +#Products.CMFPlone = 4.2.1 # Some pindowns to make sure Sphinx + dependencies don't go havoc collective.recipe.sphinxbuilder = 0.7.0 Sphinx = 1.1.3 +docutils=0.9.1 [sources] Products.TinyMCE = git git://github.com/plone/Products.TinyMCE plone.api = git git://github.com/plone/plone.api.git tutorials.todoapp = git git://github.com/collective/tutorial.todoapp.git +# funnelweb +transmogrify.webcrawler = git git@github.com:djay/transmogrify.webcrawler.git +transmogrify.htmlcontentextractor = git git@github.com:djay/transmogrify.htmlcontentextractor.git +transmogrify.ploneremote = git git@github.com:djay/transmogrify.ploneremote.git +transmogrify.htmltesting = git git@github.com:djay/transmogrify.htmltesting.git +transmogrify.siteanalyser = git git@github.com:djay/transmogrify.siteanalyser.git +transmogrify.command = git git@github.com:djay/transmogrify.command.git +funnelweb = git git@github.com:collective/funnelweb.git +mr.migrator = git git@github.com:collective/mr.migrator.git + diff --git a/pandoc.cfg b/pandoc.cfg new file mode 100644 index 00000000..5de6f12c --- /dev/null +++ b/pandoc.cfg @@ -0,0 +1,47 @@ + +[transmogrifier] +include = funnelweb.remote + +pipeline = + crawler + cache + typeguess +# drop + template1 + urltidy + pandoc + localupload + +[crawler] +#url=http://plone.org/documentation/manual/theme-reference +url = http://plone.org/documentation/manual/developer-manual/generic-setup +ignore= + .css + .js + -all-pages +[cache] +[typeguess] + +[template1] +#title= text //h1[@class="documentFirstHeading"] +text= html //div[@id="content"] +_delete= optional //div[@class="visualNoPrint"] +_delete2= optional //div[@class="documentByLine"] + +[pandoc] +blueprint = transmogrify.command +commands = pandoc --from=html --to=rst +input-key = text +output-key = text + +[urltidy] +link_expr = python:(item['_path'].rsplit('.',1)[-1] in ['html','asp','php'] and item['_path'].rsplit('.',1)[0] or item['_path'])+".rst" +use_title = python:False +invalid_ids = + security + sharing + + +# Save locally for debugging purposes +[localupload] +output=manual diff --git a/pipeline.cfg b/pipeline.cfg new file mode 100644 index 00000000..8cb91c0c --- /dev/null +++ b/pipeline.cfg @@ -0,0 +1,47 @@ +[transmogrifier] +include = funnelweb.remote + +[crawler] +url=file:build/html +ignore= + cgi-bin + javascript: + _static + _sources + genindex\.html + search\.html + saesrchindex\.js + +[template1] +title = text //div[@class='body']//h1[1] +description = optional //div[contains(@class,'admonition-description')]/p[@class='last']/text() +text = html //div[@class='body'] +# Fields with '_' won't be uploaded to Plone so will be effectively removed +_permalink = text //div[@class='body']//a[@class='headerlink'] +_label = optional //p[contains(@class,'admonition-title')] +_remove_useless_links = optional //div[@id = 'indices-and-tables'] + +# Images will get titles from backlink text +[titleguess] +condition = python:True + +# Pages linked to content will be moved together +[indexguess] +condition = python:False + +# Hide the images folder from navigation +[hideguess] +condition = python:item.get("_path","").startswith('_images') and item.get('_type')=='Folder' + +# Upload as PHC instead of Folders and Pages +[changetype] +value=python:{'Folder':'HelpCenterReferenceManualSection','Document':'HelpCenterLeafPage'}.get(item['_type'],item['_type']) + +# Save locally for debugging purposes +[localupload] +output=manual + +# All folderish content should be checked if they contain +# any items on the remote site which are not presented locally. including base folder +[ploneprune] +condition=python:item.get('_type') in ['HelpCenterReferenceManualSection','HelpCenterReferenceManual'] or item['_path'] == '' \ No newline at end of file