Process URLs in multiple threads

amgedr · Feb 26, 2018 · f692211 · f692211
1 parent 7b476be
commit f692211
Showing 1 changed file with 22 additions and 10 deletions.
diff --git a/webchk/__main__.py b/webchk/__main__.py
@@ -1,9 +1,24 @@
 import sys
+import threading
 from .utils import get_parser, read_input_file
 from .http import http_response
 from . import __version__
 
 
+def _process_url(url, parse_xml):
+    resp = http_response(url, parse=parse_xml)
+    print(resp)
+
+    follow = resp.redirect
+    while follow:
+        print('   {}'.format(follow))
+        follow = follow.redirect
+
+    if resp.sitemap_urls:
+        for sitemap_url in resp.sitemap_urls:
+            print('   {}'.format(sitemap_url))
+
+
 def process_urls(urls, list_only=False, parse_xml=False):
     """Loops through the list of URLs and performs the checks.
 
@@ -12,22 +27,19 @@ def process_urls(urls, list_only=False, parse_xml=False):
     files and will be downloaded to search its contents for more URLs to
     check.
     """
+    threads = []
+
     for url in urls:
         if list_only:
             print(url)
             continue
 
-        resp = http_response(url, parse=parse_xml)
-        print(resp)
-
-        follow = resp.redirect
-        while follow:
-            print('   {}'.format(follow))
-            follow = follow.redirect
+        thread = threading.Thread(target=_process_url, args=(url, parse_xml))
+        thread.start()
+        threads.append(thread)
 
-        if resp.sitemap_urls:
-            for sitemap_url in resp.sitemap_urls:
-                print('   {}'.format(sitemap_url))
+    for thread in threads:
+        thread.join()
 
 
 def main():