From ab8988ebfa9e4557411f3d4c0f4ccda139e18875 Mon Sep 17 00:00:00 2001 From: Santhosh Thottingal Date: Fri, 5 May 2023 11:59:50 +0530 Subject: [PATCH] Make the regex python 3.11 compatible --- wikiextractor/extract.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wikiextractor/extract.py b/wikiextractor/extract.py index a00e23d..ff7b982 100644 --- a/wikiextractor/extract.py +++ b/wikiextractor/extract.py @@ -380,11 +380,11 @@ def dropSpans(spans, text): # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 EXT_LINK_URL_CLASS = r'[^][<>"\x00-\x20\x7F\s]' ExtLinkBracketedRegex = re.compile( - '\[(((?i)' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]', + '(?i)\[((' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]', re.S | re.U) EXT_IMAGE_REGEX = re.compile( - r"""^(http://|https://)([^][<>"\x00-\x20\x7F\s]+) - /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.((?i)gif|png|jpg|jpeg)$""", + r"""(?i)^(http://|https://)([^][<>"\x00-\x20\x7F\s]+) + /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.(gif|png|jpg|jpeg)$""", re.X | re.S | re.U)