Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fuzzy match: add support for specifying regex and args seperately for
fuzzy_lookup match
- Loading branch information
Showing
3 changed files
with
62 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
r""" | ||
Load Rules | ||
>>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True) | ||
>>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d') | ||
'example,example,test)/path/index.html?id=value' | ||
# Fuzzy Query Args Builder | ||
>>> CDXDomainSpecificRule.make_query_match_regex(['para', 'id', 'abc']) | ||
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)' | ||
>>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()']) | ||
'[?&](abc\\(\\)=[^&]+).*[?&](id\\[0\\]=[^&]+)' | ||
# Fuzzy Match Query + Args | ||
# list | ||
>>> CDXDomainSpecificRule.make_regex(['para', 'id', 'abc']).pattern | ||
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)' | ||
# dict | ||
>>> CDXDomainSpecificRule.make_regex(dict(regex='com,test,.*\)/', args=['para', 'id', 'abc'])).pattern | ||
'com,test,.*\\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)' | ||
# string | ||
>>> CDXDomainSpecificRule.make_regex('com,test,.*\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)').pattern | ||
'com,test,.*\\)/[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)' | ||
""" | ||
|
||
|
||
from pywb.cdx.cdxdomainspecific import CDXDomainSpecificRule | ||
from pywb.cdx.cdxdomainspecific import load_domain_specific_cdx_rules | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
doctest.testmod() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters