remove multithreading

gravelcycles · Jun 12, 2021 · 33f8e80 · 33f8e80
1 parent 2852d18
commit 33f8e80
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 48 deletions.
diff --git a/pywebcopy/core.py b/pywebcopy/core.py
@@ -15,7 +15,6 @@
 import shutil
 import zipfile
 from datetime import datetime
-import threading
 
 from .configs import config, SESSION
 from .globals import MARK, __version__, lru_cache
@@ -32,12 +31,6 @@ def zip_project(timeout=10):
     """
     # wait for the threads to finish downloading files
 
-    for thread in threading.enumerate():
-        if not thread or isinstance(thread, threading._MainThread):
-            continue
-        if thread.is_alive():
-            thread.join(timeout=timeout)
-
     zip_fn = os.path.abspath(config['project_folder']) + '.zip'
 
     with zipfile.ZipFile(zip_fn, 'w', zipfile.ZIP_DEFLATED) as archive:

diff --git a/pywebcopy/crawler.py b/pywebcopy/crawler.py
@@ -17,7 +17,6 @@
 
 """
 import os
-import threading
 import warnings
 
 from .elements import TagBase
@@ -26,8 +25,6 @@
 
 
 INDEX = set()
-INDEX_LOCK = threading.Lock()
-
 
 class SubPage(TagBase):
     """Custom anchor tag handler.
@@ -93,13 +90,12 @@ def run(self):
         if _sub_page is None or not getattr(_sub_page, '_stack'):
             return
 
-        with INDEX_LOCK:
-            elements = list(_sub_page.elements)
-            for elem in elements:
-                if elem.url not in INDEX:
-                    INDEX.add(elem.url)
-                else:
-                    _sub_page.elements.remove(elem)
+        elements = list(_sub_page.elements)
+        for elem in elements:
+            if elem.url not in INDEX:
+                INDEX.add(elem.url)
+            else:
+                _sub_page.elements.remove(elem)
 
         _sub_page.save_complete()
 
@@ -150,10 +146,9 @@ def run(self):
 
         self.parse()
 
-        with INDEX_LOCK:
-            elements = list(self.elements)
-            for elem in elements:
-                INDEX.add(elem.url)
+        elements = list(self.elements)
+        for elem in elements:
+            INDEX.add(elem.url)
 
         self.save_complete()
 

diff --git a/pywebcopy/elements.py b/pywebcopy/elements.py
@@ -21,7 +21,7 @@
 from six.moves.urllib.request import pathname2url
 from .configs import config, SESSION
 from .core import is_allowed
-from .globals import CSS_IMPORTS_RE, CSS_URLS_RE, POOL_LIMIT, MARK, __version__, lru_cache
+from .globals import CSS_IMPORTS_RE, CSS_URLS_RE, MARK, __version__, lru_cache
 from .urls import URLTransformer, relate
 
 __all__ = ['TagBase', 'AnchorTag', 'ImgTag', 'ScriptTag', 'LinkTag', '_ElementFactory']
@@ -53,8 +53,7 @@ def __repr__(self):
 
     def run(self):
         # XXX: This could wait for any condition
-        with POOL_LIMIT:
-            self.download_file()
+        self.download_file()
 
     save_file = run
 
@@ -302,23 +301,22 @@ def run(self):
         Thus css file content needs to be searched for urls and then it will proceed
         as usual.
         """
-        with POOL_LIMIT:
-            if os.path.exists(self.file_path):
-                if not config['over_write']:
-                    LOGGER.info("File already exists at location: [%r]" % self.file_path)
-                    return
-            # LinkTags can also be specified for elements like favicon etc.
-            # Thus a check is necessary to validate it is a proper css file or not.
-            if not self._url.endswith('.css'):
-                super(LinkTag, self).run()
-
-            # Custom request object creation
-            req = SESSION.get(self.url, stream=True)
-
-            # if some error occurs
-            if not req or not req.ok:
-                LOGGER.error("URL returned an unknown response: [%s]" % self.url)
+        if os.path.exists(self.file_path):
+            if not config['over_write']:
+                LOGGER.info("File already exists at location: [%r]" % self.file_path)
                 return
+        # LinkTags can also be specified for elements like favicon etc.
+        # Thus a check is necessary to validate it is a proper css file or not.
+        if not self._url.endswith('.css'):
+            super(LinkTag, self).run()
+
+        # Custom request object creation
+        req = SESSION.get(self.url, stream=True)
+
+        # if some error occurs
+        if not req or not req.ok:
+            LOGGER.error("URL returned an unknown response: [%s]" % self.url)
+            return
 
             # Try to avoid pulling the contents in the ram
             # while substituting urls in the contents would NOT

diff --git a/pywebcopy/globals.py b/pywebcopy/globals.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 import re
 import textwrap
-import threading
 
 from . import __version__
 
@@ -94,7 +93,6 @@ def _cache_wrapper(self, caller, *args, **kwargs):
 # under control so that the resource overloading could
 # be prevented and the program remains memory efficient
 #: new in version: 6.0.0
-POOL_LIMIT = threading.Semaphore(5)
 
 MARK = textwrap.dedent("""
         {0}

diff --git a/pywebcopy/webpage.py b/pywebcopy/webpage.py
@@ -32,10 +32,8 @@
 
 import logging
 import os
-import threading
 from operator import attrgetter
 
-from .globals import POOL_LIMIT
 from .configs import SESSION, config
 from .elements import _ElementFactory, LinkTag, ScriptTag, ImgTag, AnchorTag, TagBase
 from .exceptions import ParseError
@@ -228,10 +226,7 @@ def save_assets(self):
         LOGGER.log(100, "Queueing download of <%d> asset files." % len(elms))
 
         for elem in elms:
-            with POOL_LIMIT:
-                t = threading.Thread(name=repr(elem), target=elem.run)
-                t.start()
-                self._threads.append(t)
+                elem.run()
 
     def save_html(self, file_name=None, raw_html=False):
         """Saves the html of the page to a default or specified file.