Skip to content

Commit

Permalink
prepare for 0.1.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
bmjjr committed Nov 17, 2018
1 parent b5562c4 commit 70113c6
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 18 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -2,6 +2,9 @@
transistor: CHANGES
========================

Nov 17, 2018
- pypy 0.1.1 release

Nov 16, 2018
- standardized SplashScraper attributes: 'auth', 'baseurl', 'browser', 'cookies',
'crawlera_user', 'http_session_timeout', 'http_session_valid', 'LUA_SOURCE',
Expand Down
4 changes: 3 additions & 1 deletion MANIFEST.in
@@ -1,5 +1,7 @@
include LICENSE README.rst
include LICENSE AUTHORS CHANGES README.rst
recursive-include tests *.py
recursive-include tests *.xlsx
recursive-include tests *.html
recursive-include examples/*.py
include requirements/dev.txt requirements/prod.txt
include docs/*
8 changes: 5 additions & 3 deletions README.rst
Expand Up @@ -3,10 +3,12 @@

**Web data collection and storage for intelligent use cases.**

.. image:: https://img.shields.io/badge/Python-3.5%20%7C%203.6%20%7C%203.7-blue.svg
.. image:: https://img.shields.io/badge/pypi-0.1-blue.svg
:target: https://pypi.org/project/transistor/0.1.0/
.. image:: https://img.shields.io/badge/Python-3.6%20%7C%203.7-blue.svg
:target: https://github.com/bomquote/transistor
.. image:: https://img.shields.io/badge/pypi%20package-0.1.1-blue.svg
:target: https://pypi.org/project/transistor/0.1.1/
.. image:: https://img.shields.io/badge/Status-Beta-blue.svg
:target: https://github.com/bomquote/transistor
.. image:: https://ci.appveyor.com/api/projects/status/xfg2yedwyrbyxysy/branch/master?svg=true
:target: https://ci.appveyor.com/project/bomquote/transistor
.. image:: https://pyup.io/repos/github/bomquote/transistor/shield.svg?t=1542037265283
Expand Down
Binary file added dist/transistor-0.1.1-py3-none-any.whl
Binary file not shown.
Binary file added dist/transistor-0.1.1.tar.gz
Binary file not shown.
10 changes: 5 additions & 5 deletions setup.py
Expand Up @@ -13,8 +13,8 @@
URL = 'https://github.com/bomquote/transistor'
EMAIL = 'bmjjr@bomquote.com'
AUTHOR = 'Bob Jordan'
REQUIRES_PYTHON = '>=3.6.0'
VERSION = '0.1.0'
REQUIRES_PYTHON = '>=3.5.0'
VERSION = '0.1.1'

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down Expand Up @@ -124,12 +124,12 @@ def run(self):
author_email=EMAIL,
python_requires=REQUIRES_PYTHON,
url=URL,
download_url='https://github.com/bomquote/transistor/archive/v0.1.0.tar.gz',
download_url='https://github.com/bomquote/transistor/archive/v0.1.1.tar.gz',
keywords=['scraping', 'crawling', 'spiders', 'requests', 'beautifulsoup4',
'mechanicalsoup', 'framework', 'headless-browser'],
# packages=find_packages(exclude=('tests',)),
packages=find_packages(exclude=('tests',)),
# If your package is a single module, use this instead of 'packages':
py_modules=['transistor'],
# py_modules=['transistor'],

# entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
Expand Down
5 changes: 4 additions & 1 deletion transistor/__init__.py
Expand Up @@ -13,4 +13,7 @@
from .browsers import SplashBrowser
from .managers import BaseWorkGroupManager
from .scrapers import SplashScraper
from .workers import BaseWorker, BaseGroup, WorkGroup
from .workers import BaseWorker, BaseGroup, WorkGroup

__all__ = [BaseGroup, BaseWorker, BaseWorkGroupManager, SplashBrowser,
SplashScraper, StatefulBook, WorkGroup]
22 changes: 14 additions & 8 deletions transistor/managers/base_manager.py
Expand Up @@ -31,6 +31,9 @@ class BaseWorkGroupManager:
"""
Base class for a WorkGroupManager.
"""
__attrs__ = [
'book', 'job_id', 'groups', 'trackers', 'pool', 'qitems', 'workgroups',
]

def __init__(self, job_id, book, groups:list, pool:int=20):
"""
Expand All @@ -40,13 +43,16 @@ def __init__(self, job_id, book, groups:list, pool:int=20):
:param job_id: will save the result of the workers Scrapes to `job_id` list.
If this job_id is "NONE" then it will pass on the save.
:param book: a StatefulBook instance
:param pool: size of the greenlets pool, should be at least the total number
of all workers + 1 for the manager
:param groups: a list WorkGroup(<WorkGroup> class object,
number_of_workers, **kwargs)
:param pool: size of the greenlets pool. If you want to utilize all the
workers concurrently, it should be at least the total number
of all workers + 1 for the manager. Otherwise, the pool is useful to
constrain concurrency to help stay within Crawlera subscription limits.
:param groups: a list of class: `WorkGroup()` objects.
Example:
groups = [WorkGroup(class_=MouserGroup, workers=10, kwargs={"china":True}),
WorkGroup(class_=MouserGroup, workers=0, kwargs={})]
>>> groups = [
>>> WorkGroup(class_=MouseKeyGroup, workers=5, kwargs={"china":True}),
>>> WorkGroup(class_=MouseKeyGroup, workers=5, kwargs={})
>>> ]
:param pool: number of greenlets to create
"""
self.job_id = job_id
Expand Down Expand Up @@ -113,7 +119,7 @@ def _init_workers(self):
def spawn_list(self):
""""
The spawn() method begins a new greenlet with the given arguments
(which are passed to the greenlet constructor) and add it to the
(which are passed to the greenlet constructor) and adds it to the
collection of greenlets this group is monitoring.
We return a list of the newly started greenlets, used in a later
Expand All @@ -138,7 +144,7 @@ def monitor(self, target):
This method actually spawns the scraper and then the purpose is to allow
some additional final actions to be performed on the scraper object after
the worker completes the scrape job, but before it shuts down and the object
instance is lost (though the ScraperNewt object will exist in the db).
instance is lost (though the ScraperShell object will exist in the db).
The simplest example which must be implemented:
Expand Down

0 comments on commit 70113c6

Please sign in to comment.