diff --git a/.travis.yml b/.travis.yml index 168991ae6..3da5c937e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,15 @@ sudo: required language: python cache: pip python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" + - 2.7 + - 3.3 + - 3.4 + - 3.5 + - 3.6 +matrix: + include: + - python: 3.7 + dist: xenial services: - docker - mongodb @@ -16,11 +20,6 @@ services: - postgresql addons: postgresql: "9.4" - apt: - packages: - - mysql-server-5.6 - - mysql-client-core-5.6 - - mysql-client-5.6 before_install: - sudo apt-get update -qq - sudo apt-get install -y beanstalkd @@ -35,11 +34,10 @@ before_script: - psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres - sleep 10 install: - - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - - - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --no-use-wheel lxml; else pip install lxml; fi - - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --allow-all-external -e .[all,test]; else pip install -e .[all,test]; fi + - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then sudo apt-get install libc6; fi + - if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then sudo apt-get install libgnutls28-dev; fi + - pip install -e .[all,test] - pip install coveralls script: - coverage run setup.py test diff --git a/pyspider/fetcher/puppeteer_fetcher.js b/pyspider/fetcher/puppeteer_fetcher.js index 110afc1f2..1bd117157 100644 --- a/pyspider/fetcher/puppeteer_fetcher.js +++ b/pyspider/fetcher/puppeteer_fetcher.js @@ -123,9 +123,7 @@ async function _fetch(page, options) { page_settings["waitUntil"] = ["domcontentloaded", "networkidle0"]; console.log('goto ', options.url) - await page.goto(options.url, page_settings); - - var response = await page.waitForResponse(() => true); + var response = await page.goto(options.url, page_settings); if (error_message) { throw error_message @@ -207,7 +205,6 @@ app.post("/", async (request, response) => { } else { opened_page_nums += 1; let options = request.body; - console.log('post ', options); result = await fetch(options); opened_page_nums -= 1; response.send(result) diff --git a/pyspider/fetcher/tornado_fetcher.py b/pyspider/fetcher/tornado_fetcher.py index 112afd962..d64169351 100644 --- a/pyspider/fetcher/tornado_fetcher.py +++ b/pyspider/fetcher/tornado_fetcher.py @@ -86,13 +86,13 @@ def __init__(self, inqueue, outqueue, poolsize=100, proxy=None, async_mode=True) self._running = False self._quit = False self.proxy = proxy - self.async = async_mode + self.async_mode = async_mode self.ioloop = tornado.ioloop.IOLoop() self.robots_txt_cache = {} # binding io_loop to http_client here - if self.async: + if self.async_mode: self.http_client = MyCurlAsyncHTTPClient(max_clients=self.poolsize, io_loop=self.ioloop) else: @@ -114,7 +114,7 @@ def send_result(self, type, task, result): logger.exception(e) def fetch(self, task, callback=None): - if self.async: + if self.async_mode: return self.async_fetch(task, callback) else: return self.async_fetch(task, callback).result() diff --git a/pyspider/webui/app.py b/pyspider/webui/app.py index e596337e1..2261fd6e6 100644 --- a/pyspider/webui/app.py +++ b/pyspider/webui/app.py @@ -92,7 +92,7 @@ def quit(self): app.jinja_env.globals.update(builtins.__dict__) app.config.update({ - 'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async=False).fetch(x), + 'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async_mode=False).fetch(x), 'taskdb': None, 'projectdb': None, 'scheduler_rpc': None, diff --git a/setup.py b/setup.py index 265526133..8ecdaa08a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ 'Jinja2>=2.7', 'chardet>=2.2', 'cssselect>=0.9', - 'lxml', + 'lxml' if sys.version_info[:2] != (3, 3) else "lxml<=4.2.6", 'pycurl', 'requests>=2.2', 'Flask-Login>=0.2.11', diff --git a/tests/test_task_queue.py b/tests/test_task_queue.py index 813ea065c..a84fc98e6 100644 --- a/tests/test_task_queue.py +++ b/tests/test_task_queue.py @@ -31,7 +31,7 @@ def test_task_queue_in_time_order(self): q = queues[it.priority] # type:Queue.Queue q.put(it) tasks[it.taskid] = it - six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) + # six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) for i in range(0, 100): task_id = tq.get() task = tasks[task_id] @@ -39,7 +39,7 @@ def test_task_queue_in_time_order(self): expect_task = q.get() self.assertEqual(task_id, expect_task.taskid) self.assertEqual(task.priority, int(9 - i // 10)) - six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime) + # six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime) self.assertEqual(tq.size(), 100) self.assertEqual(tq.priority_queue.qsize(), 0) @@ -54,7 +54,7 @@ def test_task_queue_in_time_order(self): class TestTimeQueue(unittest.TestCase): def test_time_queue(self): - six.print_('Test time queue order by time only') + # six.print_('Test time queue order by time only') tq = TaskQueue(rate=300, burst=1000) @@ -66,7 +66,7 @@ def test_time_queue(self): it = InQueueTask(str(i), priority=int(i // 10), exetime=time.time() + (i + 1) * interval) tq.put(it.taskid, it.priority, it.exetime) fifo_queue.put(it) - six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) + # six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) self.assertEqual(tq.priority_queue.qsize(), 0) self.assertEqual(tq.processing.qsize(), 0) @@ -76,7 +76,7 @@ def test_time_queue(self): t1 = fifo_queue.get() t2 = tq.time_queue.get() self.assertEqual(t1.taskid, t2.taskid) - six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime) + # six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime) self.assertEqual(tq.priority_queue.qsize(), 0) self.assertEqual(tq.processing.qsize(), 0) self.assertEqual(tq.time_queue.qsize(), 0)