diff --git a/wikiextractor/WikiExtractor.py b/wikiextractor/WikiExtractor.py index 509158a0..7a86874a 100755 --- a/wikiextractor/WikiExtractor.py +++ b/wikiextractor/WikiExtractor.py @@ -421,7 +421,7 @@ def process_dump(input_file, template_file, out_file, file_size, file_compress, page.append(line) elif tag == '/page': colon = title.find(':') - if (colon < 0 or (title[:colon] in acceptedNamespaces) and id != last_id and + if (colon < 0 or (title[:colon] in acceptedNamespaces)) and (id != last_id and not redirect and not title.startswith(templateNamespace)): job = (id, revid, urlbase, title, page, ordinal) jobs_queue.put(job) # goes to any available extract_process