Skip to content

Commit

Permalink
Merge branch 'master' into puppeteer
Browse files Browse the repository at this point in the history
  • Loading branch information
binux committed Feb 24, 2019
2 parents e8e5b9b + 440e7eb commit c7396bb
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 28 deletions.
26 changes: 12 additions & 14 deletions .travis.yml
Expand Up @@ -2,11 +2,15 @@ sudo: required
language: python
cache: pip
python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- 2.7
- 3.3
- 3.4
- 3.5
- 3.6
matrix:
include:
- python: 3.7
dist: xenial
services:
- docker
- mongodb
Expand All @@ -16,11 +20,6 @@ services:
- postgresql
addons:
postgresql: "9.4"
apt:
packages:
- mysql-server-5.6
- mysql-client-core-5.6
- mysql-client-5.6
before_install:
- sudo apt-get update -qq
- sudo apt-get install -y beanstalkd
Expand All @@ -35,11 +34,10 @@ before_script:
- psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres
- sleep 10
install:
- pip install mysql-connector-python
- pip install https://github.com/marcus67/easywebdav/archive/master.zip

- if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --no-use-wheel lxml; else pip install lxml; fi
- if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --allow-all-external -e .[all,test]; else pip install -e .[all,test]; fi
- if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then sudo apt-get install libc6; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then sudo apt-get install libgnutls28-dev; fi
- pip install -e .[all,test]
- pip install coveralls
script:
- coverage run setup.py test
Expand Down
5 changes: 1 addition & 4 deletions pyspider/fetcher/puppeteer_fetcher.js
Expand Up @@ -123,9 +123,7 @@ async function _fetch(page, options) {
page_settings["waitUntil"] = ["domcontentloaded", "networkidle0"];

console.log('goto ', options.url)
await page.goto(options.url, page_settings);

var response = await page.waitForResponse(() => true);
var response = await page.goto(options.url, page_settings);

if (error_message) {
throw error_message
Expand Down Expand Up @@ -207,7 +205,6 @@ app.post("/", async (request, response) => {
} else {
opened_page_nums += 1;
let options = request.body;
console.log('post ', options);
result = await fetch(options);
opened_page_nums -= 1;
response.send(result)
Expand Down
6 changes: 3 additions & 3 deletions pyspider/fetcher/tornado_fetcher.py
Expand Up @@ -86,13 +86,13 @@ def __init__(self, inqueue, outqueue, poolsize=100, proxy=None, async_mode=True)
self._running = False
self._quit = False
self.proxy = proxy
self.async = async_mode
self.async_mode = async_mode
self.ioloop = tornado.ioloop.IOLoop()

self.robots_txt_cache = {}

# binding io_loop to http_client here
if self.async:
if self.async_mode:
self.http_client = MyCurlAsyncHTTPClient(max_clients=self.poolsize,
io_loop=self.ioloop)
else:
Expand All @@ -114,7 +114,7 @@ def send_result(self, type, task, result):
logger.exception(e)

def fetch(self, task, callback=None):
if self.async:
if self.async_mode:
return self.async_fetch(task, callback)
else:
return self.async_fetch(task, callback).result()
Expand Down
2 changes: 1 addition & 1 deletion pyspider/webui/app.py
Expand Up @@ -92,7 +92,7 @@ def quit(self):
app.jinja_env.globals.update(builtins.__dict__)

app.config.update({
'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async=False).fetch(x),
'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async_mode=False).fetch(x),
'taskdb': None,
'projectdb': None,
'scheduler_rpc': None,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -22,7 +22,7 @@
'Jinja2>=2.7',
'chardet>=2.2',
'cssselect>=0.9',
'lxml',
'lxml' if sys.version_info[:2] != (3, 3) else "lxml<=4.2.6",
'pycurl',
'requests>=2.2',
'Flask-Login>=0.2.11',
Expand Down
10 changes: 5 additions & 5 deletions tests/test_task_queue.py
Expand Up @@ -31,15 +31,15 @@ def test_task_queue_in_time_order(self):
q = queues[it.priority] # type:Queue.Queue
q.put(it)
tasks[it.taskid] = it
six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime)
# six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime)
for i in range(0, 100):
task_id = tq.get()
task = tasks[task_id]
q = queues[task.priority] # type: Queue.Queue
expect_task = q.get()
self.assertEqual(task_id, expect_task.taskid)
self.assertEqual(task.priority, int(9 - i // 10))
six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime)
# six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime)

self.assertEqual(tq.size(), 100)
self.assertEqual(tq.priority_queue.qsize(), 0)
Expand All @@ -54,7 +54,7 @@ def test_task_queue_in_time_order(self):
class TestTimeQueue(unittest.TestCase):
def test_time_queue(self):

six.print_('Test time queue order by time only')
# six.print_('Test time queue order by time only')

tq = TaskQueue(rate=300, burst=1000)

Expand All @@ -66,7 +66,7 @@ def test_time_queue(self):
it = InQueueTask(str(i), priority=int(i // 10), exetime=time.time() + (i + 1) * interval)
tq.put(it.taskid, it.priority, it.exetime)
fifo_queue.put(it)
six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime)
# six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime)

self.assertEqual(tq.priority_queue.qsize(), 0)
self.assertEqual(tq.processing.qsize(), 0)
Expand All @@ -76,7 +76,7 @@ def test_time_queue(self):
t1 = fifo_queue.get()
t2 = tq.time_queue.get()
self.assertEqual(t1.taskid, t2.taskid)
six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime)
# six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime)
self.assertEqual(tq.priority_queue.qsize(), 0)
self.assertEqual(tq.processing.qsize(), 0)
self.assertEqual(tq.time_queue.qsize(), 0)
Expand Down

0 comments on commit c7396bb

Please sign in to comment.