From b4eac55da67d9c2c521e0ece1d4b65a1a6bd14cc Mon Sep 17 00:00:00 2001
From: Kapil Kukreja <37610242+Kapilhk@users.noreply.github.com>
Date: Fri, 25 Jun 2021 14:35:41 +0530
Subject: [PATCH] Corrected the logic to avoid redirect pages

The original logic statement was allowing all titles where the colon was not found (colon<0)
---
 wikiextractor/WikiExtractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wikiextractor/WikiExtractor.py b/wikiextractor/WikiExtractor.py
index 509158a0..7a86874a 100755
--- a/wikiextractor/WikiExtractor.py
+++ b/wikiextractor/WikiExtractor.py
@@ -421,7 +421,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress,
             page.append(line)
         elif tag == '/page':
             colon = title.find(':')
-            if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and
+            if (colon < 0 or (title[:colon] in acceptedNamespaces)) and (id != last_id and
                     not redirect and not title.startswith(templateNamespace)):
                 job = (id, revid, urlbase, title, page, ordinal)
                 jobs_queue.put(job)  # goes to any available extract_process