Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

initial checkin for funnelweb recipe and runner

  • Loading branch information...
commit 99dc7e89fdaa09994bdde4520e3386f78fce6a05 0 parents
Dylan Jay authored
7 .gitignore
@@ -0,0 +1,7 @@
+*.pyc
+*.egg-info
+parts/
+var/
+develop-eggs/
+src/
+bin/
5 CHANGES.txt
@@ -0,0 +1,5 @@
+1.0 (xxxx-xx-xx)
+----------------
+
+- Created recipe with ZopeSkel
+ [""]
2  CONTRIBUTORS.txt
@@ -0,0 +1,2 @@
+"", Author
+
17 README.txt
@@ -0,0 +1,17 @@
+.. contents::
+
+.. Note to recipe author!
+ ---------------------
+ Update the following URLs to point to your:
+
+ - code repository
+ - bug tracker
+ - questions/comments feedback mail
+ (do not set a real mail, to avoid spams)
+
+ Or remove it if not used.
+
+- Code repository: http://svn.somewhere.com/...
+- Questions and comments to somemailing_list
+- Report bugs at http://bug.somewhere.com/..
+
260 bootstrap.py
@@ -0,0 +1,260 @@
+##############################################################################
+#
+# Copyright (c) 2006 Zope Foundation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Bootstrap a buildout-based project
+
+Simply run this script in a directory containing a buildout.cfg.
+The script accepts buildout command-line options, so you can
+use the -c option to specify an alternate configuration file.
+"""
+
+import os, shutil, sys, tempfile, textwrap, urllib, urllib2, subprocess
+from optparse import OptionParser
+
+if sys.platform == 'win32':
+ def quote(c):
+ if ' ' in c:
+ return '"%s"' % c # work around spawn lamosity on windows
+ else:
+ return c
+else:
+ quote = str
+
+# See zc.buildout.easy_install._has_broken_dash_S for motivation and comments.
+stdout, stderr = subprocess.Popen(
+ [sys.executable, '-Sc',
+ 'try:\n'
+ ' import ConfigParser\n'
+ 'except ImportError:\n'
+ ' print 1\n'
+ 'else:\n'
+ ' print 0\n'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+has_broken_dash_S = bool(int(stdout.strip()))
+
+# In order to be more robust in the face of system Pythons, we want to
+# run without site-packages loaded. This is somewhat tricky, in
+# particular because Python 2.6's distutils imports site, so starting
+# with the -S flag is not sufficient. However, we'll start with that:
+if not has_broken_dash_S and 'site' in sys.modules:
+ # We will restart with python -S.
+ args = sys.argv[:]
+ args[0:0] = [sys.executable, '-S']
+ args = map(quote, args)
+ os.execv(sys.executable, args)
+# Now we are running with -S. We'll get the clean sys.path, import site
+# because distutils will do it later, and then reset the path and clean
+# out any namespace packages from site-packages that might have been
+# loaded by .pth files.
+clean_path = sys.path[:]
+import site
+sys.path[:] = clean_path
+for k, v in sys.modules.items():
+ if k in ('setuptools', 'pkg_resources') or (
+ hasattr(v, '__path__') and
+ len(v.__path__)==1 and
+ not os.path.exists(os.path.join(v.__path__[0],'__init__.py'))):
+ # This is a namespace package. Remove it.
+ sys.modules.pop(k)
+
+is_jython = sys.platform.startswith('java')
+
+setuptools_source = 'http://peak.telecommunity.com/dist/ez_setup.py'
+distribute_source = 'http://python-distribute.org/distribute_setup.py'
+
+# parsing arguments
+def normalize_to_url(option, opt_str, value, parser):
+ if value:
+ if '://' not in value: # It doesn't smell like a URL.
+ value = 'file://%s' % (
+ urllib.pathname2url(
+ os.path.abspath(os.path.expanduser(value))),)
+ if opt_str == '--download-base' and not value.endswith('/'):
+ # Download base needs a trailing slash to make the world happy.
+ value += '/'
+ else:
+ value = None
+ name = opt_str[2:].replace('-', '_')
+ setattr(parser.values, name, value)
+
+usage = '''\
+[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
+
+Bootstraps a buildout-based project.
+
+Simply run this script in a directory containing a buildout.cfg, using the
+Python that you want bin/buildout to use.
+
+Note that by using --setup-source and --download-base to point to
+local resources, you can keep this script from going over the network.
+'''
+
+parser = OptionParser(usage=usage)
+parser.add_option("-v", "--version", dest="version",
+ help="use a specific zc.buildout version")
+parser.add_option("-d", "--distribute",
+ action="store_true", dest="use_distribute", default=False,
+ help="Use Distribute rather than Setuptools.")
+parser.add_option("--setup-source", action="callback", dest="setup_source",
+ callback=normalize_to_url, nargs=1, type="string",
+ help=("Specify a URL or file location for the setup file. "
+ "If you use Setuptools, this will default to " +
+ setuptools_source + "; if you use Distribute, this "
+ "will default to " + distribute_source +"."))
+parser.add_option("--download-base", action="callback", dest="download_base",
+ callback=normalize_to_url, nargs=1, type="string",
+ help=("Specify a URL or directory for downloading "
+ "zc.buildout and either Setuptools or Distribute. "
+ "Defaults to PyPI."))
+parser.add_option("--eggs",
+ help=("Specify a directory for storing eggs. Defaults to "
+ "a temporary directory that is deleted when the "
+ "bootstrap script completes."))
+parser.add_option("-t", "--accept-buildout-test-releases",
+ dest='accept_buildout_test_releases',
+ action="store_true", default=False,
+ help=("Normally, if you do not specify a --version, the "
+ "bootstrap script and buildout gets the newest "
+ "*final* versions of zc.buildout and its recipes and "
+ "extensions for you. If you use this flag, "
+ "bootstrap and buildout will get the newest releases "
+ "even if they are alphas or betas."))
+parser.add_option("-c", None, action="store", dest="config_file",
+ help=("Specify the path to the buildout configuration "
+ "file to be used."))
+
+options, args = parser.parse_args()
+
+# if -c was provided, we push it back into args for buildout's main function
+if options.config_file is not None:
+ args += ['-c', options.config_file]
+
+if options.eggs:
+ eggs_dir = os.path.abspath(os.path.expanduser(options.eggs))
+else:
+ eggs_dir = tempfile.mkdtemp()
+
+if options.setup_source is None:
+ if options.use_distribute:
+ options.setup_source = distribute_source
+ else:
+ options.setup_source = setuptools_source
+
+if options.accept_buildout_test_releases:
+ args.append('buildout:accept-buildout-test-releases=true')
+args.append('bootstrap')
+
+try:
+ import pkg_resources
+ import setuptools # A flag. Sometimes pkg_resources is installed alone.
+ if not hasattr(pkg_resources, '_distribute'):
+ raise ImportError
+except ImportError:
+ ez_code = urllib2.urlopen(
+ options.setup_source).read().replace('\r\n', '\n')
+ ez = {}
+ exec ez_code in ez
+ setup_args = dict(to_dir=eggs_dir, download_delay=0)
+ if options.download_base:
+ setup_args['download_base'] = options.download_base
+ if options.use_distribute:
+ setup_args['no_fake'] = True
+ ez['use_setuptools'](**setup_args)
+ if 'pkg_resources' in sys.modules:
+ reload(sys.modules['pkg_resources'])
+ import pkg_resources
+ # This does not (always?) update the default working set. We will
+ # do it.
+ for path in sys.path:
+ if path not in pkg_resources.working_set.entries:
+ pkg_resources.working_set.add_entry(path)
+
+cmd = [quote(sys.executable),
+ '-c',
+ quote('from setuptools.command.easy_install import main; main()'),
+ '-mqNxd',
+ quote(eggs_dir)]
+
+if not has_broken_dash_S:
+ cmd.insert(1, '-S')
+
+find_links = options.download_base
+if not find_links:
+ find_links = os.environ.get('bootstrap-testing-find-links')
+if find_links:
+ cmd.extend(['-f', quote(find_links)])
+
+if options.use_distribute:
+ setup_requirement = 'distribute'
+else:
+ setup_requirement = 'setuptools'
+ws = pkg_resources.working_set
+setup_requirement_path = ws.find(
+ pkg_resources.Requirement.parse(setup_requirement)).location
+env = dict(
+ os.environ,
+ PYTHONPATH=setup_requirement_path)
+
+requirement = 'zc.buildout'
+version = options.version
+if version is None and not options.accept_buildout_test_releases:
+ # Figure out the most recent final version of zc.buildout.
+ import setuptools.package_index
+ _final_parts = '*final-', '*final'
+ def _final_version(parsed_version):
+ for part in parsed_version:
+ if (part[:1] == '*') and (part not in _final_parts):
+ return False
+ return True
+ index = setuptools.package_index.PackageIndex(
+ search_path=[setup_requirement_path])
+ if find_links:
+ index.add_find_links((find_links,))
+ req = pkg_resources.Requirement.parse(requirement)
+ if index.obtain(req) is not None:
+ best = []
+ bestv = None
+ for dist in index[req.project_name]:
+ distv = dist.parsed_version
+ if _final_version(distv):
+ if bestv is None or distv > bestv:
+ best = [dist]
+ bestv = distv
+ elif distv == bestv:
+ best.append(dist)
+ if best:
+ best.sort()
+ version = best[-1].version
+if version:
+ requirement = '=='.join((requirement, version))
+cmd.append(requirement)
+
+if is_jython:
+ import subprocess
+ exitcode = subprocess.Popen(cmd, env=env).wait()
+else: # Windows prefers this, apparently; otherwise we would prefer subprocess
+ exitcode = os.spawnle(*([os.P_WAIT, sys.executable] + cmd + [env]))
+if exitcode != 0:
+ sys.stdout.flush()
+ sys.stderr.flush()
+ print ("An error occurred when trying to install zc.buildout. "
+ "Look above this message for any errors that "
+ "were output by easy_install.")
+ sys.exit(exitcode)
+
+ws.add_entry(eggs_dir)
+ws.require(requirement)
+import zc.buildout.buildout
+zc.buildout.buildout.main(args)
+if not options.eggs: # clean up temporary egg directory
+ shutil.rmtree(eggs_dir)
102 buildout.cfg
@@ -0,0 +1,102 @@
+[buildout]
+develop =
+ .
+ src/transmogrify.webcrawler
+ src/transmogrify.pathsorter
+ src/transmogrify.htmltesting
+ src/transmogrify.siteanalyser
+ src/transmogrify.ploneremote
+ src/transmogrify.htmlcontentextractor
+
+parts = test funnelweb instance
+#extends =
+# http://download.zope.org/Zope2/index/2.12.6/versions.cfg
+# http://dist.plone.org/release/4.1/versions.cfg
+versions = versions
+find-links =
+ http://dist.plone.org/release/4.0
+ http://dist.plone.org/thirdparty
+
+extensions =
+ mr.developer
+ buildout.dumppickedversions
+dump-picked-versions-file = picked.cfg
+sources = sources
+sources-dir = ${buildout:directory}/src
+auto-checkout =
+
+
+[test]
+recipe = zc.recipe.testrunner
+eggs = funnelweb [tests]
+
+
+[funnelweb]
+recipe = funnelweb.recipe
+crawler-url=http://www.whitehouse.gov
+#upload-url=http://localhost:8080/Plone
+crawler-ignore=
+ \.mp4
+ \.mp3
+ cgi-bin
+ javascript:
+template1-text= html //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//p
+ //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//h2
+ //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//p
+ //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//h2
+ //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//p
+template1-title= html //div[re:test(@class,"^extend-page body-text clearfix clear page node-content$","i")]//div[re:test(@id,"^content$","i")]//h1
+
+
+
+
+
+
+[lxml]
+recipe = z3c.recipe.staticlxml
+egg = lxml == 2.2.6
+
+[PIL]
+# Build egg with Chris McDonough's custom packaging of setuptools-compatibile PIL
+# http://article.gmane.org/gmane.comp.web.zope.devel/13999
+recipe = zc.recipe.egg
+egg = PIL==1.1.6
+find-links = http://dist.repoze.org/PIL-1.1.6.tar.gz
+
+
+[instance]
+recipe = plone.recipe.zope2instance
+debug-mode = on
+verbose-security = on
+user = admin:admin
+zcml-eggs =
+# pretaweb.funnelweb
+# plone.reload
+eggs =
+ ${PIL:egg}
+ Plone
+ plone.i18n
+ ${instance:zcml-eggs}
+ ${lxml:egg}
+zcml =
+ ${instance:zcml-eggs}
+
+
+[versions]
+#zc.buildout=1.4.1
+
+
+# Set transmogrify package repository pointer here to allow easy switching between private and public repos
+# Final repo URLs are format git://github.com/djay/transmogrify.htmltesting.git
+[transmogrify]
+git-repo = git@github.com/djay
+
+[sources]
+transmogrify.webcrawler = git ${transmogrify:git-repo}/transmogrify.webcrawler.git
+transmogrify.pathsorter = git ${transmogrify:git-repo}/transmogrify.pathsorter.git
+transmogrify.htmltesting = git ${transmogrify:git-repo}/transmogrify.htmltesting.git
+transmogrify.siteanalyser = git ${transmogrify:git-repo}/transmogrify.siteanalyser.git
+transmogrify.ploneremote = git ${transmogrify:git-repo}/transmogrify.ploneremote.git
+transmogrify.htmlcontentextractor = git ${transmogrify:git-repo}/transmogrify.htmlcontentextractor.git
+
+
6 funnelweb/__init__.py
@@ -0,0 +1,6 @@
+# See http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
+try:
+ __import__('pkg_resources').declare_namespace(__name__)
+except ImportError:
+ from pkgutil import extend_path
+ __path__ = extend_path(__path__, __name__)
61 funnelweb/recipe/README.txt
@@ -0,0 +1,61 @@
+Supported options
+=================
+
+The recipe supports the following options:
+
+.. Note to recipe author!
+ ----------------------
+ For each option the recipe uses you should include a description
+ about the purpose of the option, the format and semantics of the
+ values it accepts, whether it is mandatory or optional and what the
+ default value is if it is omitted.
+
+option1
+ Description for ``option1``...
+
+option2
+ Description for ``option2``...
+
+
+Example usage
+=============
+
+.. Note to recipe author!
+ ----------------------
+ zc.buildout provides a nice testing environment which makes it
+ relatively easy to write doctests that both demonstrate the use of
+ the recipe and test it.
+ You can find examples of recipe doctests from the PyPI, e.g.
+
+ http://pypi.python.org/pypi/zc.recipe.egg
+
+ The PyPI page for zc.buildout contains documentation about the test
+ environment.
+
+ http://pypi.python.org/pypi/zc.buildout#testing-support
+
+ Below is a skeleton doctest that you can start with when building
+ your own tests.
+
+We'll start by creating a buildout that uses the recipe::
+
+ >>> write('buildout.cfg',
+ ... """
+ ... [buildout]
+ ... parts = test1
+ ...
+ ... [test1]
+ ... recipe = funnelweb
+ ... option1 = %(foo)s
+ ... option2 = %(bar)s
+ ... """ % { 'foo' : 'value1', 'bar' : 'value2'})
+
+Running the buildout gives us::
+
+ >>> print 'start', system(buildout)
+ start...
+ Installing test1.
+ Unused options for test1: 'option2' 'option1'.
+ <BLANKLINE>
+
+
63 funnelweb/recipe/__init__.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+"""Recipe funnelweb"""
+
+import z3c.recipe.scripts
+from urllib import pathname2url as url
+from sys import argv
+import logging
+from pkg_resources import resource_string, resource_filename
+
+
+logging.basicConfig(level=logging.DEBUG)
+
+class Recipe(z3c.recipe.scripts.scripts.Scripts):
+ """zc.buildout recipe"""
+
+ def __init__(self, buildout, name, options):
+ self.buildout, self.name, self.options = buildout, name, options
+ self.options.setdefault('cache-output',"%s/var/funnelwebcache"%buildout['buildout']['directory'])
+ args = {}
+ for k,v in self.options.items():
+ if '-' not in k:
+ continue
+ part,key = k.split('-',1)
+ args.setdefault(part, {})[key] = v
+ default = buildout['buildout']['directory']+'/var/cache'
+ default = args['crawler'].get('url','')
+ args.setdefault('crawler',{}).setdefault('site_url',default)
+ default = args.setdefault('upload',{}).get('url','')
+ args.setdefault('upload',{}).setdefault('target',default)
+ #args.setdefault('schemaupdater',{}).setdefault('target',default)
+ #args.setdefault('publish',{}).setdefault('target',default)
+ #args.setdefault('excludefromnavigation',{}).setdefault('target',default)
+
+
+ self.options['eggs'] = """transmogrify.htmltesting
+ transmogrify.webcrawler
+ transmogrify.siteanalyser
+ transmogrify.htmlcontentextractor
+ transmogrify.pathsorter
+ transmogrify.ploneremote
+ Products.ZSQLMethods
+ Products.CMFCore
+ zope.app.pagetemplate
+ funnelweb
+ zope.app.component"""
+ self.options['arguments'] = str(args)
+# self.options['entry-points'] = '%s=transmogrify.htmltesting.runner:runner'%name
+ return z3c.recipe.scripts.scripts.Scripts.__init__(self, buildout, name, options)
+
+ def install(self):
+ """Installer"""
+ # XXX Implement recipe functionality here
+
+
+
+
+ # Return files that were created by the recipe. The buildout
+ # will remove all returned files upon reinstall.
+ return z3c.recipe.scripts.scripts.Scripts.install(self)
+
+ def update(self):
+ """Updater"""
+ return z3c.recipe.scripts.scripts.Scripts.update(self)
1  funnelweb/recipe/tests/__init__.py
@@ -0,0 +1 @@
+# package
47 funnelweb/recipe/tests/test_docs.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+"""
+Doctest runner for 'funnelweb'.
+"""
+__docformat__ = 'restructuredtext'
+
+import unittest
+import zc.buildout.tests
+import zc.buildout.testing
+
+from zope.testing import doctest, renormalizing
+
+optionflags = (doctest.ELLIPSIS |
+ doctest.NORMALIZE_WHITESPACE |
+ doctest.REPORT_ONLY_FIRST_FAILURE)
+
+def setUp(test):
+ zc.buildout.testing.buildoutSetUp(test)
+
+ # Install the recipe in develop mode
+ zc.buildout.testing.install_develop('funnelweb', test)
+
+ # Install any other recipes that should be available in the tests
+ #zc.buildout.testing.install('collective.recipe.foobar', test)
+
+def test_suite():
+ suite = unittest.TestSuite((
+ doctest.DocFileSuite(
+ '../README.txt',
+ setUp=setUp,
+ tearDown=zc.buildout.testing.buildoutTearDown,
+ optionflags=optionflags,
+ checker=renormalizing.RENormalizing([
+ # If want to clean up the doctest output you
+ # can register additional regexp normalizers
+ # here. The format is a two-tuple with the RE
+ # as the first item and the replacement as the
+ # second item, e.g.
+ # (re.compile('my-[rR]eg[eE]ps'), 'my-regexps')
+ zc.buildout.testing.normalize_path,
+ ]),
+ ),
+ ))
+ return suite
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='test_suite')
92 funnelweb/runner/__init__.py
@@ -0,0 +1,92 @@
+from collective.transmogrifier.tests import registerConfig
+from collective.transmogrifier.transmogrifier import Transmogrifier
+from pkg_resources import resource_string, resource_filename
+from collective.transmogrifier.transmogrifier import configuration_registry
+from Products.Five import zcml
+from zope.component import provideUtility
+from zope.interface import classProvides, implements
+import transmogrify.htmltesting
+import zope.app.component
+import re
+from pkg_resources import resource_filename
+import sys
+
+class Context:
+ pass
+
+
+CONFIG = """
+
+[clean]
+blueprint = collective.transmogrifier.sections.manipulator
+delete =
+ %(strip)s
+
+[printer]
+blueprint = collective.transmogrifier.sections.tests.pprinter
+
+"""
+
+
+
+def runner(args={}):
+
+ for k,v in [a.split('=') for a in sys.argv[1:]]:
+ k = k.lstrip('--')
+ part,key = k.split(':')
+ args.setdefault(part, {})[key] = v
+
+ config = resource_filename(__name__,'pipeline.cfg')
+
+ if args.get('pipeline') == '':
+ f = open(config)
+ print f.read()
+ f.close()
+ return
+ else:
+ config = args.get('pipeline', config)
+
+ from collective.transmogrifier.transmogrifier import Transmogrifier
+# test.globs['transmogrifier'] = Transmogrifier(test.globs['plone'])
+
+ import zope.component
+ import collective.transmogrifier.sections
+ zcml.load_config('meta.zcml', zope.app.component)
+
+ zcml.load_config('meta.zcml', collective.transmogrifier)
+ zcml.load_config('configure.zcml', collective.transmogrifier.sections)
+ zcml.load_config('configure.zcml', transmogrify.htmltesting)
+
+
+ context = Context()
+ configuration_registry.registerConfiguration(
+ u'transmogrify.config.funnelweb',
+ u"",
+ u'', config)
+
+ transmogrifier = Transmogrifier(context)
+ overrides = {}
+ if type(args) == type(''):
+ for arg in args:
+ section,keyvalue = arg.split(':',1)
+ key,value = keyvalue.split('=',1)
+ overrides.setdefault('section',{})[key] = value
+ else:
+ overrides = args
+
+ transmogrifier(u'transmogrify.config.funnelweb', **overrides)
+
+
+
+
+def testtransmogrifier(config, strip=['_content']):
+ strip = '\t'+'\n\t'.join(strip)
+
+ config = re.sub('\.\.\.',config, 'clean\n\tprinter\n')
+ config += CONFIG
+ config = config % locals()
+
+ runner(config)
+
+if __name__ == '__main__':
+ main()
207 funnelweb/runner/pipeline.cfg
@@ -0,0 +1,207 @@
+[transmogrifier]
+pipeline =
+ crawler
+ cache
+ typeguess
+ drop-resources
+ template1
+ template2
+ template3
+ template4
+ templateauto
+# isindex is before moves. not sure why but it has to be
+ set-folder-default-page
+ titleguess
+ relinker
+# treeserializer before contructor to put in right order again
+# treeserializer
+ encode-all
+ topublish
+ ploneupload
+ ploneupdate
+ plonehide
+ plonepublish
+ localupload
+
+#
+# WebCrawler will emit items like
+# item = dict(_site_url = "Original site_url used",
+# _path = "The url crawled without _site_url,
+# _content = "The raw content returned by the url",
+# _content_info = "Headers returned with content"
+# _backlinks = names,
+# _sortorder = "An integer representing the order the url was found within the page/site
+# )
+#
+# site_url - the top url to crawl
+# ignore - list of regex for urls to not crawl
+# cache - local directory to read crawled items from instead of accessing the site directly
+#
+
+[crawler]
+blueprint = transmogrify.webcrawler
+site_url = %s
+ignore =
+ cgi-bin
+ javascript:
+cache = ${cache:output}
+
+[cache]
+blueprint = transmogrify.webcrawler.cache
+#target = ploneout
+output = funnelwebcache
+
+
+
+#
+# Typerecognitor adds two more attributes
+# _type = 'Page', 'Image', or 'File' depending on the mime_type of from the headers
+# _mime_type = the mime_type from the headers
+#
+[typeguess]
+blueprint = transmogrify.webcrawler.typerecognitor
+
+
+
+[drop-resources]
+blueprint = collective.transmogrifier.sections.condition
+condition: python:item.get('_mimetype') not in ['application/x-javascript','text/css','text/plain','application/x-java-byte-code'] and item.get('_path','').split('.')[-1] not in ['class']
+
+
+#[treeserializer]
+#blueprint = transmogrify.pathsorter
+
+
+
+[relinker]
+blueprint = transmogrify.siteanalyser.relinker
+#link_expr = python:item.get('_type',None)=='Image' and item['_path'][:-1].endswith('image') and item['_path']+'_medium' or item.get('_type',None)=='Image' and item['_path']+'/image_web' or item['_path']
+
+#
+# Extract title, description and content text from Sphinx generated HTML page
+#
+# Title is the first <h1> element
+#
+# Description is reST "admonition" with name Description
+#
+# Text is what is left to <body> after removing title and description
+#
+# Note that spaces in XPaths must be escaped as &#32;
+#
+# See
+# http://www.w3schools.com/xpath/default.asp
+# http://blog.browsermob.com/2009/04/test-your-selenium-xpath-easily-with-firebug/
+# for XPath info
+#
+[template1]
+blueprint = transmogrify.htmlcontentextractor
+title = text //div[@class='body']//h1[1]
+# Permalinks are not used in Plone. We have this dummy rule here to parse
+# them out from body so that they do not corrupt "text" field
+#delete1 = optional //div[@class='body']//a[@class='headerlink']
+#delete2 = optional //div[contains(@class,'admonition-description')]
+#description = text //div[contains(@class,'admonition-description')]//p[@class='last']
+#text = html //div[@class='body']
+
+[template2]
+blueprint = transmogrify.htmlcontentextractor
+
+[template3]
+blueprint = transmogrify.htmlcontentextractor
+
+[template4]
+blueprint = transmogrify.htmlcontentextractor
+
+
+[templateauto]
+blueprint = transmogrify.htmlcontentextractor.auto
+disable = False
+
+
+[titleguess]
+blueprint = transmogrify.siteanalyser.title
+ignore =
+ click
+ read more
+
+
+#
+# Set the workflow transition key hint which will be used publish the item.
+# This value is later read by [publish] and we can have conditions
+# to have different WF transitions for different content types
+#
+# Note that images do not have workflow and they inherit
+# permission settings of the parent container
+#
+[topublish]
+blueprint = collective.transmogrifier.sections.inserter
+key = string:_transitions
+value = python:["publish"]
+condition = python:item.get('_type') != 'Image' and not options.get('disabled')
+
+[encode-all]
+blueprint = collective.transmogrifier.sections.codec
+keys =
+ text
+to = utf8
+
+
+
+#
+# Set index.html as the default page of the folder
+#
+# Determines an item is a default page for a container if it has many links
+# to items in that container.
+#
+[set-folder-default-page]
+blueprint = transmogrify.siteanalyser.defaultpage
+
+#
+# Turn items with attachments into folders with a default view
+#
+[makeattachments]
+blueprint = transmogrify.siteanalyser.attach
+condition = python: subitem.get('_type') in ['Image']
+defaultpage = index-html
+
+
+
+
+#
+# Create remote item on Plone site
+#
+[ploneupload]
+blueprint = transmogrify.ploneremote.remoteconstructor
+target =
+
+#
+# Update the remote item with new extracted content from Sphinx documentation
+#
+[ploneupdate]
+blueprint = transmogrify.ploneremote.remoteschemaupdater
+target = ${upload:target}
+
+#
+# Hide items from the navigation
+# (hints to which items should be hidden are set earlier in pipeline)
+#
+[plonehide]
+blueprint = transmogrify.ploneremote.remotenavigationexcluder
+target = ${upload:target}
+
+#
+# Publish the uploaded documentation (if not public yet)
+#
+[plonepublish]
+blueprint = transmogrify.ploneremote.remoteworkflowupdater
+transitions = submit publish
+transitions-key = _transitions
+target = ${upload:target}
+
+
+#
+# Save transformed site locally
+#
+[localupload]
+blueprint = transmogrify.webcrawler.cache
+output =
0  funnelweb/runner/runner.py
No changes.
81 setup.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+"""
+This module contains the tool of funnelweb
+"""
+import os
+from setuptools import setup, find_packages
+
+def read(*rnames):
+ return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
+
+version = '1.0'
+
+long_description = (
+ read('README.txt')
+ + '\n' +
+ 'Detailed Documentation\n'
+ '**********************\n'
+ + '\n' +
+ read('funnelweb', 'recipe', 'funnelweb', 'README.txt')
+ + '\n' +
+ 'Contributors\n'
+ '************\n'
+ + '\n' +
+ read('CONTRIBUTORS.txt')
+ + '\n' +
+ 'Change history\n'
+ '**************\n'
+ + '\n' +
+ read('CHANGES.txt')
+ + '\n' +
+ 'Download\n'
+ '********\n'
+ )
+entry_point = 'funnelweb.recipe:Recipe'
+entry_points = {"zc.buildout": ["default = %s" % entry_point],
+ 'console_scripts': ['funnelweb = funnelweb.runner:runner']}
+
+tests_require=['zope.testing', 'zc.buildout']
+
+setup(name='funnelweb',
+ version=version,
+ description="Parse static sites and import to CMS",
+ long_description=long_description,
+ # Get more strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+ classifiers=[
+ 'Framework :: Buildout',
+ 'Intended Audience :: Developers',
+ 'Topic :: Software Development :: Build Tools',
+ 'License :: OSI Approved :: Zope Public License',
+ ],
+ keywords='buildout crawler spider',
+ author='Dylan Jay',
+ author_email='software@pretaweb.com',
+ url='http://svn.plone.org/svn/collective/',
+ license='ZPL',
+ packages=find_packages(exclude=['ez_setup']),
+ namespace_packages=['funnelweb'],
+ include_package_data=True,
+ zip_safe=False,
+ install_requires=['setuptools',
+ 'zc.buildout',
+ 'z3c.recipe.scripts',
+# 'Zope2',
+ 'collective.transmogrifier',
+ 'transmogrify.htmltesting',
+ 'transmogrify.webcrawler',
+ 'transmogrify.siteanalyser',
+ 'transmogrify.htmlcontentextractor',
+ 'transmogrify.pathsorter',
+ 'transmogrify.ploneremote',
+ 'Products.CMFCore',
+ 'zope.app.pagetemplate',
+ 'zope.app.component',
+ 'z3c.autoinclude'
+ # -*- Extra requirements: -*-
+ ],
+ tests_require=tests_require,
+ extras_require=dict(tests=tests_require),
+ test_suite = 'funnelweb.recipe.funnelweb.tests.test_docs.test_suite',
+ entry_points=entry_points,
+ )
Please sign in to comment.
Something went wrong with that request. Please try again.