Permalink
Browse files

rewrite: fix WbUrl parsing for urls that start with a digit, eg. 1234…

….example.com

split latest replay url from timestamped replay regex
add additional rewrite tests
  • Loading branch information...
1 parent 6acac67 commit 4aa6512b05d5083d2d841da81e14adba14e8ad61 @ikreymer committed Mar 23, 2015
Showing with 24 additions and 3 deletions.
  1. +2 −1 .coveragerc
  2. +12 −0 pywb/rewrite/test/test_wburl.py
  3. +10 −2 pywb/rewrite/wburl.py
View
@@ -1,10 +1,11 @@
[run]
-omit =
+omit =
*/test/*
*/tests/*
*.html
*.js
*.css
+ pywb/__init__.py
[report]
exclude_lines =
@@ -20,6 +20,18 @@
>>> repr(WbUrl('cs_/example.com'))
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
+>>> repr(WbUrl('im_/20130102.org'))
+"('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')"
+
+>>> repr(WbUrl('20130102.example.com'))
+"('latest_replay', '', '', 'http://20130102.example.com', 'http://20130102.example.com')"
+
+>>> repr(WbUrl('20130102.org/1'))
+"('latest_replay', '', '', 'http://20130102.org/1', 'http://20130102.org/1')"
+
+>>> repr(WbUrl('20130102/1.com'))
+"('replay', '20130102', '', 'http://1.com', '20130102/http://1.com')"
+
>>> repr(WbUrl('https://example.com/xyz'))
"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
View
@@ -86,7 +86,8 @@ class WbUrl(BaseWbUrl):
# Regexs
# ======================
QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.+)$')
- REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.+)$')
+ REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{1,3}(.+)$')
+ #LATEST_REPLAY_REGEX = re.compile('^\w_)')
DEFAULT_SCHEME = 'http://'
@@ -221,7 +222,14 @@ def _init_query(self, url):
def _init_replay(self, url):
replay = self.REPLAY_REGEX.match(url)
if not replay:
- return None
+ if not url:
+ return None
+
+ self.timestamp = ''
+ self.mod = ''
+ self.url = url
+ self.type = self.LATEST_REPLAY
+ return True
res = replay.groups('')

0 comments on commit 4aa6512

Please sign in to comment.