prepare for 0.1.1 release

bomquote · Nov 17, 2018 · 70113c6 · 70113c6
1 parent b5562c4
commit 70113c6
Show file tree

Hide file tree

Showing 8 changed files with 34 additions and 18 deletions.
diff --git a/CHANGES b/CHANGES
@@ -2,6 +2,9 @@
   transistor: CHANGES
 ========================
 
+Nov 17, 2018
+- pypy 0.1.1 release
+
 Nov 16, 2018
 - standardized SplashScraper attributes: 'auth', 'baseurl', 'browser', 'cookies',
 'crawlera_user', 'http_session_timeout', 'http_session_valid', 'LUA_SOURCE',

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,7 @@
-include LICENSE README.rst
+include LICENSE AUTHORS CHANGES README.rst
 recursive-include tests *.py
+recursive-include tests *.xlsx
+recursive-include tests *.html
 recursive-include examples/*.py
 include requirements/dev.txt requirements/prod.txt
 include docs/*
diff --git a/README.rst b/README.rst
@@ -3,10 +3,12 @@
 
 **Web data collection and storage for intelligent use cases.**
 
-.. image:: https://img.shields.io/badge/Python-3.5%20%7C%203.6%20%7C%203.7-blue.svg
-.. image:: https://img.shields.io/badge/pypi-0.1-blue.svg
-  :target: https://pypi.org/project/transistor/0.1.0/
+.. image:: https://img.shields.io/badge/Python-3.6%20%7C%203.7-blue.svg
+  :target: https://github.com/bomquote/transistor
+.. image:: https://img.shields.io/badge/pypi%20package-0.1.1-blue.svg
+  :target: https://pypi.org/project/transistor/0.1.1/
 .. image:: https://img.shields.io/badge/Status-Beta-blue.svg
+  :target: https://github.com/bomquote/transistor
 .. image:: https://ci.appveyor.com/api/projects/status/xfg2yedwyrbyxysy/branch/master?svg=true
     :target: https://ci.appveyor.com/project/bomquote/transistor
 .. image:: https://pyup.io/repos/github/bomquote/transistor/shield.svg?t=1542037265283

diff --git a/dist/transistor-0.1.1-py3-none-any.whl b/dist/transistor-0.1.1-py3-none-any.whl
diff --git a/dist/transistor-0.1.1.tar.gz b/dist/transistor-0.1.1.tar.gz
diff --git a/setup.py b/setup.py
@@ -13,8 +13,8 @@
 URL = 'https://github.com/bomquote/transistor'
 EMAIL = 'bmjjr@bomquote.com'
 AUTHOR = 'Bob Jordan'
-REQUIRES_PYTHON = '>=3.6.0'
-VERSION = '0.1.0'
+REQUIRES_PYTHON = '>=3.5.0'
+VERSION = '0.1.1'
 
 # What packages are required for this module to be executed?
 REQUIRED = [
@@ -124,12 +124,12 @@ def run(self):
     author_email=EMAIL,
     python_requires=REQUIRES_PYTHON,
     url=URL,
-    download_url='https://github.com/bomquote/transistor/archive/v0.1.0.tar.gz',
+    download_url='https://github.com/bomquote/transistor/archive/v0.1.1.tar.gz',
     keywords=['scraping', 'crawling', 'spiders', 'requests', 'beautifulsoup4',
               'mechanicalsoup', 'framework', 'headless-browser'],
-    # packages=find_packages(exclude=('tests',)),
+    packages=find_packages(exclude=('tests',)),
     # If your package is a single module, use this instead of 'packages':
-    py_modules=['transistor'],
+    # py_modules=['transistor'],
 
     # entry_points={
     #     'console_scripts': ['mycli=mymodule:cli'],

diff --git a/transistor/__init__.py b/transistor/__init__.py
@@ -13,4 +13,7 @@
 from .browsers import SplashBrowser
 from .managers import BaseWorkGroupManager
 from .scrapers import SplashScraper
-from .workers import BaseWorker, BaseGroup, WorkGroup
+from .workers import BaseWorker, BaseGroup, WorkGroup
+
+__all__ = [BaseGroup, BaseWorker, BaseWorkGroupManager, SplashBrowser,
+           SplashScraper, StatefulBook, WorkGroup]
diff --git a/transistor/managers/base_manager.py b/transistor/managers/base_manager.py
@@ -31,6 +31,9 @@ class BaseWorkGroupManager:
     """
     Base class for a WorkGroupManager.
     """
+    __attrs__ = [
+        'book', 'job_id', 'groups', 'trackers', 'pool', 'qitems', 'workgroups',
+    ]
 
     def __init__(self, job_id, book, groups:list, pool:int=20):
         """
@@ -40,13 +43,16 @@ def __init__(self, job_id, book, groups:list, pool:int=20):
         :param job_id: will save the result of the workers Scrapes to `job_id` list.
         If this job_id is "NONE" then it will pass on the save.
         :param book:  a StatefulBook instance
-        :param pool: size of the greenlets pool, should be at least the total number
-         of all workers + 1 for the manager
-        :param groups: a list WorkGroup(<WorkGroup> class object,
-        number_of_workers, **kwargs)
+        :param pool: size of the greenlets pool. If you want to utilize all the
+        workers concurrently, it should be at least the total number
+        of all workers + 1 for the manager. Otherwise, the pool is useful to
+        constrain concurrency to help stay within Crawlera subscription limits.
+        :param groups: a list of class: `WorkGroup()` objects.
         Example:
-            groups = [WorkGroup(class_=MouserGroup, workers=10, kwargs={"china":True}),
-                    WorkGroup(class_=MouserGroup, workers=0, kwargs={})]
+            >>> groups = [
+            >>> WorkGroup(class_=MouseKeyGroup, workers=5, kwargs={"china":True}),
+            >>> WorkGroup(class_=MouseKeyGroup, workers=5, kwargs={})
+            >>> ]
         :param pool: number of greenlets to create
         """
         self.job_id = job_id
@@ -113,7 +119,7 @@ def _init_workers(self):
     def spawn_list(self):
         """"
         The spawn() method begins a new greenlet with the given arguments
-        (which are passed to the greenlet constructor) and add it to the
+        (which are passed to the greenlet constructor) and adds it to the
         collection of greenlets this group is monitoring.
 
         We return a list of the newly started greenlets, used in a later
@@ -138,7 +144,7 @@ def monitor(self, target):
         This method actually spawns the scraper and then the purpose is to allow
         some additional final actions to be performed on the scraper object after
         the worker completes the scrape job, but before it shuts down and the object
-        instance is lost (though the ScraperNewt object will exist in the db).
+        instance is lost (though the ScraperShell object will exist in the db).
 
         The simplest example which must be implemented: