Permalink
Browse files

pywb 0.2!

move to distinct packages: pywb.utils, pywb.cdx, pywb.warc, pywb.util, pywb.rewrite!
each package will have its own README and tests
shared sample_data and install
  • Loading branch information...
1 parent 2528ee0 commit 5345459298d5fea399ac9def52bb6b4af6e532c5 @ikreymer committed Feb 17, 2014
Showing with 2,950 additions and 2,184 deletions.
  1. +0 −2 __init__.py
  2. +3 −2 pywb/__init__.py
  3. +2 −95 pywb/archivalrouter.py
  4. +0 −461 pywb/archiveloader.py
  5. +0 −123 pywb/binsearch/binsearch.py
  6. +36 −0 pywb/cdx/README.md
  7. 0 pywb/{cdxserver → cdx}/__init__.py
  8. +13 −9 pywb/{cdxserver → cdx}/cdxobject.py
  9. +30 −36 pywb/{cdxserver → cdx}/cdxops.py
  10. +75 −49 pywb/{cdxserver → cdx}/cdxserver.py
  11. +92 −0 pywb/cdx/cdxsource.py
  12. +3 −0 pywb/cdx/config.yaml
  13. +163 −0 pywb/cdx/test/cdxserver_test.py
  14. +72 −0 pywb/cdx/wsgi_cdxserver.py
  15. +0 −42 pywb/cdxserver/cdxapp.py
  16. +13 −38 pywb/config_utils.py
  17. +33 −20 pywb/handlers.py
  18. +31 −144 pywb/indexreader.py
  19. +11 −22 pywb/pywb_init.py
  20. +0 −269 pywb/regex_rewriters.py
  21. +58 −320 pywb/replay_views.py
  22. +47 −0 pywb/rewrite/README.md
  23. 0 pywb/{binsearch → rewrite}/__init__.py
  24. +1 −49 pywb/{ → rewrite}/header_rewriter.py
  25. +2 −81 pywb/{ → rewrite}/html_rewriter.py
  26. +156 −0 pywb/rewrite/regex_rewriters.py
  27. +151 −0 pywb/rewrite/rewrite_content.py
  28. +68 −0 pywb/rewrite/rewrite_live.py
  29. +266 −0 pywb/rewrite/test/test_rewrite.py
  30. +32 −0 pywb/rewrite/test/test_rewrite_live.py
  31. +18 −20 pywb/{ → rewrite}/url_rewriter.py
  32. +50 −9 pywb/{ → rewrite}/wburl.py
  33. +0 −122 pywb/utils.py
  34. +16 −0 pywb/utils/README.md
  35. 0 pywb/utils/__init__.py
  36. +110 −0 pywb/utils/binsearch.py
  37. +204 −0 pywb/utils/bufferedreaders.py
  38. +152 −0 pywb/utils/loaders.py
  39. +107 −0 pywb/utils/statusandheaders.py
  40. +52 −0 pywb/utils/test/binsearch_test.py
  41. +69 −0 pywb/utils/test/loaders_test.py
  42. +18 −9 pywb/{cdxserver → utils}/timeutils.py
  43. +1 −1 pywb/views.py
  44. +22 −0 pywb/warc/README.md
  45. 0 pywb/warc/__init__.py
  46. +39 −24 pywb/{replay_resolvers.py → warc/pathresolvers.py}
  47. +161 −0 pywb/warc/recordloader.py
  48. +176 −0 pywb/warc/resolvingloader.py
  49. +199 −0 pywb/warc/test/test_loading.py
  50. +27 −4 pywb/wbapp.py
  51. +3 −33 pywb/wbrequestresponse.py
  52. +3 −0 run-tests.py
  53. BIN sample_archive/cdx/iana.cdx.gz
  54. BIN sample_archive/warcs/example-wget-1-14.warc.gz
  55. +69 −0 sample_archive/warcs/example.arc
  56. BIN sample_archive/warcs/example.arc.gz
  57. +5 −5 setup.py
  58. +88 −0 tests/test_archivalrouter.py
  59. +0 −43 tests/test_binsearch.py
  60. +0 −149 tests/test_cdxserve.py
  61. +3 −3 tests/test_integration.py
View
@@ -1,2 +0,0 @@
-#Allow importing
-
View
@@ -1,3 +1,4 @@
-#Allow importing
-
+import os
+def get_test_dir():
+ return os.path.dirname(os.path.realpath(__file__)) + '/../sample_archive/'
@@ -3,8 +3,8 @@
import wbexceptions
from wbrequestresponse import WbRequest, WbResponse
-from url_rewriter import UrlRewriter
-from wburl import WbUrl
+from pywb.rewrite.url_rewriter import UrlRewriter
+from pywb.rewrite.wburl import WbUrl
#=================================================================
# ArchivalRouter -- route WB requests in archival mode
@@ -45,20 +45,6 @@ def render_home_page(self):
# of request uri (excluding first '/')
#=================================================================
class Route:
- """
- # route with relative path
- >>> Route('web', handlers.BaseHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False)
- {'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com'), 'coll': 'web', 'wb_prefix': '/web/', 'request_uri': '/web/test.example.com'}
-
- # route with absolute path, running at script /my_pywb
- >>> Route('web', handlers.BaseHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)
- {'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com'), 'coll': 'web', 'wb_prefix': 'https://localhost:8081/my_pywb/web/', 'request_uri': '/web/2013im_/test.example.com'}
-
-
- # not matching route -- skipped
- >>> Route('web', handlers.BaseHandler())({'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}, False)
- """
-
# match upto next / or ? or end
SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)'
@@ -127,57 +113,6 @@ def __str__(self):
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
#=================================================================
class ReferRedirect:
-
- """
- >>> ReferRedirect('http://localhost:8080/').match_prefixs
- ['http://localhost:8080/']
-
- >>> ReferRedirect(['http://example:9090/']).match_prefixs
- ['http://example:9090/']
-
- >>> test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
- 'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
-
- >>> test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
- 'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
- >>> test_redir('http://localhost:8080/', '/../../other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
- 'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
- # Custom collection
- >>> test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/complex/123/20131010/http://example.com/path/page.html', coll='complex/123')
- 'http://localhost:8080/complex/123/20131010/http://example.com/other.html'
-
- # With timestamp included
- >>> test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
- 'http://localhost:8080/coll/20131010/http://example.com/other.html'
-
- # With timestamp included
- >>> test_redir('http://localhost:8080/', '/20131010/path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/some/index.html')
- 'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
-
- # Wrong Host
- >>> test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
- False
-
- # Right Host
- >>> test_redir('http://localhost:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html', http_host = 'example.com:8080')
- 'http://example.com:8080/coll/20131010/http://example.com/other.html'
-
- # With custom SCRIPT_NAME
- >>> test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
- 'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
-
- # With custom SCRIPT_NAME + timestamp
- >>> test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
- 'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
-
- # With custom SCRIPT_NAME, bad match
- >>> test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extr')
- False
-
- """
-
def __init__(self, match_prefixs):
if isinstance(match_prefixs, list):
self.match_prefixs = match_prefixs
@@ -240,31 +175,3 @@ def __call__(self, env, routes):
final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', ''))
return WbResponse.redir_response(final_url)
-
-
-import utils
-if __name__ == "__main__" or utils.enable_doctests():
-
- import handlers
-
- def test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
- env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
-
- if http_host:
- env['HTTP_HOST'] = http_host
-
- routes = [Route(coll, handlers.BaseHandler())]
-
- redir = ReferRedirect(match_host)
- #req = WbRequest.from_uri(request_uri, env)
- rep = redir(env, routes)
- if not rep:
- return False
-
- return rep.status_headers.get_header('Location')
-
-
- import doctest
- doctest.testmod()
-
-
Oops, something went wrong.

0 comments on commit 5345459

Please sign in to comment.