Skip to content

Commit

Permalink
cherrypicked lexisnexis fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Toon Alfrink committed Apr 9, 2014
1 parent 5456728 commit a5cb344
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
4 changes: 2 additions & 2 deletions amcat/scripts/article_upload/lexisnexis.py
Expand Up @@ -42,8 +42,8 @@

# Regular expressions used for parsing document
class RES:
# Match at least 20 whitespace characters, followed by # of # DOCUMENTS.
DOCUMENT_COUNT = re.compile(" {20,}(FOCUS -)? *\d* of \d* DOCUMENT")
# Match at least 20 whitespace characters or at least 7 tabs, followed by # of # DOCUMENTS.
DOCUMENT_COUNT = re.compile("( {20,}|\t{7,})(FOCUS -)? *\d* of \d* DOCUMENTS?")

# Header meta information group match
HEADER_META = re.compile("([\w -]*):(.*)", re.UNICODE)
Expand Down
1 change: 1 addition & 0 deletions scrapers
Submodule scrapers added at a4f06b

0 comments on commit a5cb344

Please sign in to comment.