Skip to content

Commit

Permalink
- cdx handler refactoring: factor out CDXHandler and init to
Browse files Browse the repository at this point in the history
seperate cdx_handler module
- Make wsgi app a class, add port as an optional field in wsgi app
and router. (not required to be specified)
  • Loading branch information
ikreymer committed Mar 3, 2014
1 parent 0bf651c commit 2d4ae62
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 64 deletions.
21 changes: 4 additions & 17 deletions pywb/apps/cdx_server.py
@@ -1,27 +1,14 @@
from pywb.cdx.cdxserver import create_cdx_server

from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
from pywb.framework.archivalrouter import ArchivalRouter, Route

from pywb.core.handlers import CDXHandler
from pywb.core.cdx_handler import create_cdx_server_app

DEFAULT_RULES = 'pywb/rules.yaml'
#=================================================================
# init cdx server app
#=================================================================

# cdx-server only config
DEFAULT_CONFIG = 'pywb/cdx/config.yaml'

#=================================================================
# create simple cdx server under '/cdx' using config file
# TODO: support multiple collections like full wayback?

def create_cdx_server_app(config):
cdx_server = create_cdx_server(config, DEFAULT_RULES)
routes = [Route('cdx', CDXHandler(cdx_server))]
return ArchivalRouter(routes)

#=================================================================
# init pywb app
#=================================================================
application = init_app(create_cdx_server_app,
load_yaml=True,
config_file=DEFAULT_CONFIG)
Expand Down
43 changes: 43 additions & 0 deletions pywb/core/cdx_handler.py
@@ -0,0 +1,43 @@
from pywb.cdx.query import CDXQuery
from pywb.cdx.cdxserver import create_cdx_server

from pywb.framework.archivalrouter import ArchivalRouter, Route
from pywb.framework.basehandlers import BaseHandler

from views import TextCapturesView


#=================================================================
class CDXHandler(BaseHandler):
"""
Handler which passes wsgi request to cdx server and
returns a text-based cdx response
"""
def __init__(self, index_reader, view=None):
self.index_reader = index_reader
self.view = view if view else TextCapturesView()

def __call__(self, wbrequest):
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
cdx_lines = self.index_reader.load_cdx(**params)

return self.view.render_response(wbrequest, cdx_lines)

def __str__(self):
return 'CDX Handler: ' + str(self.index_reader)


#=================================================================
DEFAULT_RULES = 'pywb/rules.yaml'

#=================================================================
def create_cdx_server_app(config):
"""
Create a cdx server config to be wrapped in a wsgi app
Currently using single access point '/cdx'
TODO: more complex example with multiple collections?
"""
cdx_server = create_cdx_server(config, DEFAULT_RULES)
port = config.get('port')
routes = [Route('cdx', CDXHandler(cdx_server))]
return ArchivalRouter(routes, port=port)
20 changes: 0 additions & 20 deletions pywb/core/handlers.py
@@ -1,9 +1,7 @@
import urlparse
import pkgutil
import mimetypes
import time

from pywb.cdx.query import CDXQuery
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse
from pywb.framework.wbexceptions import WbException, NotFoundException
Expand Down Expand Up @@ -58,24 +56,6 @@ def __str__(self):
return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay)


#=================================================================
# CDX-Server Handler -- pass all params to cdx server
#=================================================================
class CDXHandler(BaseHandler):
def __init__(self, index_reader, view = None):
self.index_reader = index_reader
self.view = view if view else TextCapturesView()

def __call__(self, wbrequest):
params = CDXQuery.extract_params_from_wsgi_env(wbrequest.env)
cdx_lines = self.index_reader.load_cdx(**params)

return self.view.render_response(wbrequest, cdx_lines)

def __str__(self):
return 'Index Reader: ' + str(self.index_reader)


#=================================================================
# Static Content Handler
#=================================================================
Expand Down
6 changes: 5 additions & 1 deletion pywb/core/pywb_init.py
Expand Up @@ -11,7 +11,8 @@
from replay_views import ReplayView

from handlers import WBHandler
from handlers import CDXHandler, StaticHandler
from handlers import StaticHandler
from cdx_handler import CDXHandler
from handlers import DebugEchoHandler, DebugEchoEnvHandler


Expand Down Expand Up @@ -115,6 +116,8 @@ def create_wb_router(passed_config = {}):

hostpaths = config.get('hostpaths')

port = config.get('port')

# collections based on cdx source
collections = config.get('collections')

Expand Down Expand Up @@ -169,6 +172,7 @@ def create_wb_router(passed_config = {}):
# This will help catch occasionally missed rewrites that fall-through to the host
# (See archivalrouter.ReferRedirect)
hostpaths = hostpaths,
port = port,

abs_path = config.get('absolute_paths', True),

Expand Down
11 changes: 9 additions & 2 deletions pywb/framework/archivalrouter.py
Expand Up @@ -9,11 +9,18 @@
# ArchivalRouter -- route WB requests in archival mode
#=================================================================
class ArchivalRouter(object):
def __init__(self, routes, hostpaths=None, abs_path=True,
home_view=None, error_view=None):
def __init__(self, routes,
hostpaths=None,
port=None,
abs_path=True,
home_view=None,
error_view=None):

self.routes = routes

# optional port setting may be ignored by wsgi container
self.port = port

if hostpaths:
self.fallback = ReferRedirect(hostpaths)
else:
Expand Down
26 changes: 18 additions & 8 deletions pywb/framework/proxy.py
Expand Up @@ -8,21 +8,31 @@
# http proxy mode support is very simple so far:
# only latest capture is available currently
#=================================================================
class ProxyArchivalRouter:
def __init__(self, routes, hostpaths=None, abs_path=True,
home_view=None, error_view=None):
class ProxyArchivalRouter(ArchivalRouter):
def __init__(self, routes,
hostpaths=None,
port=None,
abs_path=True,
home_view=None,
error_view=None):

(super(ProxyArchivalRouter, self).
__init__(routes,
hostpaths=hostpaths,
port=port,
abs_path=abs_path,
home_view=home_view,
error_view=error_view))

self.archival = ArchivalRouter(routes, hostpaths, abs_path,
home_view, error_view)
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
self.error_view = error_view
#self.error_view = error_view

def __call__(self, env):
response = self.archival(env)
response = self.proxy(env)
if response:
return response

response = self.proxy(env)
response = super(ProxyArchivalRouter, self).__call__(env)
if response:
return response

Expand Down
5 changes: 3 additions & 2 deletions pywb/framework/wbexceptions.py
Expand Up @@ -5,17 +5,18 @@ class NotFoundException(WbException):
def status(self):
return '404 Not Found'


# Exceptions that effect a specific capture and result in a retry
class CaptureException(WbException):
def status(self):
return '500 Internal Server Error'


class InternalRedirect(WbException):
def __init__(self, location, status = '302 Internal Redirect'):
def __init__(self, location, status='302 Internal Redirect'):
WbException.__init__(self, 'Redirecting -> ' + location)
self.status = status
self.httpHeaders = [('Location', location)]

def status(self):
return self.status

31 changes: 17 additions & 14 deletions pywb/framework/wsgi_wrappers.py
Expand Up @@ -10,6 +10,8 @@
import logging


DEFAULT_PORT = 8080

#=================================================================
# adapted from wsgiref.request_uri, but doesn't include domain name
# and allows all characters which are allowed in the path segment
Expand All @@ -18,6 +20,7 @@
# http://stackoverflow.com/questions/4669692/
# valid-characters-for-directory-part-of-a-url-for-short-links


def rel_request_uri(environ, include_query=1):
"""
Return the requested path, optionally including the query string
Expand All @@ -40,14 +43,21 @@ def rel_request_uri(environ, include_query=1):


#=================================================================
def create_wb_app(wb_router):
class WSGIApp(object):
def __init__(self, wb_router):
self.wb_router = wb_router
self.port = DEFAULT_PORT
if hasattr(wb_router, 'port'):
self.port = wb_router.port

# Top-level wsgi application
def application(env, start_response):
def __call__(self, env, start_response):
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
env['REL_REQUEST_URI'] = rel_request_uri(env)
else:
env['REL_REQUEST_URI'] = env['REQUEST_URI']

wb_router = self.wb_router
response = None

try:
Expand All @@ -68,8 +78,6 @@ def application(env, start_response):

return response(env, start_response)

return application


#=================================================================
def handle_exception(env, error_view, exc, print_trace):
Expand Down Expand Up @@ -126,13 +134,10 @@ def init_app(init_func, load_yaml=True, config_file=None):
msg = '*** pywb app inited with config from "%s"!\n'
logging.info(msg, init_func.__name__)

return create_wb_app(wb_router)
return WSGIApp(wb_router)


#=================================================================
DEFAULT_PORT = 8080


def start_wsgi_server(the_app):
from wsgiref.simple_server import make_server
from optparse import OptionParser
Expand All @@ -144,12 +149,10 @@ def start_wsgi_server(the_app):

port = options.port

if port is None:
try:
config = load_default_config()
port = config.get('port', DEFAULT_PORT)
except:
port = DEFAULT_PORT
port = the_app.port

if not port:
port = DEFAULT_PORT

logging.debug('Starting CDX Server on port %s', port)

Expand Down
3 changes: 3 additions & 0 deletions test_config.yaml
Expand Up @@ -90,6 +90,9 @@ enable_http_proxy: true
# enable cdx server api for querying cdx directly (experimental)
enable_cdx_api: true

# test different port
port: 9000

# optional reporter callback func
# if set, called with request and cdx object
reporter: !!python/object/new:tests.fixture.PrintReporter []
Expand Down

0 comments on commit 2d4ae62

Please sign in to comment.