From 984b8ca215ef88bcc1fbf4bae34886fa5d69a55e Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 15 Oct 2018 17:28:22 +0800 Subject: [PATCH 01/21] modify async to async_mode to support python3.7 --- pyspider/fetcher/tornado_fetcher.py | 6 +++--- pyspider/webui/app.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyspider/fetcher/tornado_fetcher.py b/pyspider/fetcher/tornado_fetcher.py index 716db4ebb..7f1b21b87 100644 --- a/pyspider/fetcher/tornado_fetcher.py +++ b/pyspider/fetcher/tornado_fetcher.py @@ -86,13 +86,13 @@ def __init__(self, inqueue, outqueue, poolsize=100, proxy=None, async_mode=True) self._running = False self._quit = False self.proxy = proxy - self.async = async_mode + self.async_mode = async_mode self.ioloop = tornado.ioloop.IOLoop() self.robots_txt_cache = {} # binding io_loop to http_client here - if self.async: + if self.async_mode: self.http_client = MyCurlAsyncHTTPClient(max_clients=self.poolsize, io_loop=self.ioloop) else: @@ -114,7 +114,7 @@ def send_result(self, type, task, result): logger.exception(e) def fetch(self, task, callback=None): - if self.async: + if self.async_mode: return self.async_fetch(task, callback) else: return self.async_fetch(task, callback).result() diff --git a/pyspider/webui/app.py b/pyspider/webui/app.py index e596337e1..2261fd6e6 100644 --- a/pyspider/webui/app.py +++ b/pyspider/webui/app.py @@ -92,7 +92,7 @@ def quit(self): app.jinja_env.globals.update(builtins.__dict__) app.config.update({ - 'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async=False).fetch(x), + 'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async_mode=False).fetch(x), 'taskdb': None, 'projectdb': None, 'scheduler_rpc': None, From 501380696ca49cd4a2291bb92bd038c8ebf1e6a6 Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 15:22:12 +0800 Subject: [PATCH 02/21] add python3.7 CI test --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 8b264a044..a1f9e1ba2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ python: - "3.4" - "3.5" - "3.6" + - "3.7" services: - docker - mongodb From b51e7455eb8d96a63bb4cff320ef3abac150d681 Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 15:47:42 +0800 Subject: [PATCH 03/21] add python3.7 CI test --- .travis.yml | 6 +++++- .travis_py37_workaround.sh | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 .travis_py37_workaround.sh diff --git a/.travis.yml b/.travis.yml index a1f9e1ba2..bcac0875d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ sudo: required +dist: xenial language: python cache: pip python: @@ -8,7 +9,7 @@ python: - "3.4" - "3.5" - "3.6" - - "3.7" + - "3.7-dev" services: - docker - mongodb @@ -24,6 +25,8 @@ addons: - mysql-client-core-5.6 - mysql-client-5.6 before_install: + - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo add-apt-repository ppa:deadsnakes/ppa -y; fi + - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo sudo apt-get update; fi - sudo apt-get update -qq - sudo apt-get install -y beanstalkd - echo "START=yes" | sudo tee -a /etc/default/beanstalkd > /dev/null @@ -37,6 +40,7 @@ before_script: - psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres - sleep 10 install: + - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then source .travis_py37_workaround.sh; fi - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - pip install --no-use-wheel lxml diff --git a/.travis_py37_workaround.sh b/.travis_py37_workaround.sh new file mode 100644 index 000000000..676600414 --- /dev/null +++ b/.travis_py37_workaround.sh @@ -0,0 +1,37 @@ +# The MIT License (MIT) +# +# Copyright (c) 2018 Łukasz Langa +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +echo "The ready-made virtualenv is not the one we want. Deactivating..." +deactivate + +echo "Installing 3.7 from deadsnakes..." +sudo apt-get --yes install python3.7 + +echo "Creating a fresh virtualenv. We can't use `ensurepip` because Debian." +python3.7 -m venv ~/virtualenv/python3.7-deadsnakes --without-pip +source ~/virtualenv/python3.7-deadsnakes/bin/activate + +echo "We ensure our own pip." +curl -sSL https://bootstrap.pypa.io/get-pip.py | python3.7 + +echo +echo "Python version:" +python3.7 -c "import sys; print(sys.version)" \ No newline at end of file From 6502e1fb9fc5a520e5fe8a0fa92ffc61b913ea28 Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 15:50:18 +0800 Subject: [PATCH 04/21] add python3.7 CI test --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index bcac0875d..24c589cfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: - "3.4" - "3.5" - "3.6" - - "3.7-dev" + - "3.7" services: - docker - mongodb @@ -25,8 +25,8 @@ addons: - mysql-client-core-5.6 - mysql-client-5.6 before_install: - - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo add-apt-repository ppa:deadsnakes/ppa -y; fi - - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo sudo apt-get update; fi +# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo add-apt-repository ppa:deadsnakes/ppa -y; fi +# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo sudo apt-get update; fi - sudo apt-get update -qq - sudo apt-get install -y beanstalkd - echo "START=yes" | sudo tee -a /etc/default/beanstalkd > /dev/null @@ -40,7 +40,7 @@ before_script: - psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres - sleep 10 install: - - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then source .travis_py37_workaround.sh; fi +# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then source .travis_py37_workaround.sh; fi - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - pip install --no-use-wheel lxml From e379d30c60718953e94c190838eab4d957a8ef83 Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 15:59:41 +0800 Subject: [PATCH 05/21] add python3.7 CI test --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 24c589cfa..4000d30cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,4 @@ sudo: required -dist: xenial language: python cache: pip python: @@ -10,6 +9,7 @@ python: - "3.5" - "3.6" - "3.7" + dist: xenial services: - docker - mongodb From cbc3e462251f8cae1a2754e2d80d90ff61eef7c5 Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 16:02:03 +0800 Subject: [PATCH 06/21] remove python3.7 CI test --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4000d30cf..f093c1ec9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,7 @@ python: - "3.4" - "3.5" - "3.6" - - "3.7" - dist: xenial +# - "3.7" services: - docker - mongodb From ba30efe3ba80a46acabfb01fbcbd3204da0897df Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 16:15:53 +0800 Subject: [PATCH 07/21] add py3.7-dev CI test --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f093c1ec9..31f2b0416 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ python: - "3.4" - "3.5" - "3.6" + - "3.7-dev" # - "3.7" services: - docker From 329cadbfd99750a61e9c214438a30aaa1fe389df Mon Sep 17 00:00:00 2001 From: farmercode Date: Mon, 19 Nov 2018 17:05:33 +0800 Subject: [PATCH 08/21] add support py3.7-dev CI test --- .travis.yml | 5 +---- .travis_py37_workaround.sh | 37 ------------------------------------- 2 files changed, 1 insertion(+), 41 deletions(-) delete mode 100644 .travis_py37_workaround.sh diff --git a/.travis.yml b/.travis.yml index 31f2b0416..2761a07cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ python: - "3.5" - "3.6" - "3.7-dev" -# - "3.7" + # - "3.7" # TODO: Re-enable after https://github.com/travis-ci/travis-ci/issues/9815 is fixed services: - docker - mongodb @@ -25,8 +25,6 @@ addons: - mysql-client-core-5.6 - mysql-client-5.6 before_install: -# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo add-apt-repository ppa:deadsnakes/ppa -y; fi -# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then sudo sudo apt-get update; fi - sudo apt-get update -qq - sudo apt-get install -y beanstalkd - echo "START=yes" | sudo tee -a /etc/default/beanstalkd > /dev/null @@ -40,7 +38,6 @@ before_script: - psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres - sleep 10 install: -# - if [[ $TRAVIS_PYTHON_VERSION == '3.7-dev' ]]; then source .travis_py37_workaround.sh; fi - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - pip install --no-use-wheel lxml diff --git a/.travis_py37_workaround.sh b/.travis_py37_workaround.sh deleted file mode 100644 index 676600414..000000000 --- a/.travis_py37_workaround.sh +++ /dev/null @@ -1,37 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2018 Łukasz Langa -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -echo "The ready-made virtualenv is not the one we want. Deactivating..." -deactivate - -echo "Installing 3.7 from deadsnakes..." -sudo apt-get --yes install python3.7 - -echo "Creating a fresh virtualenv. We can't use `ensurepip` because Debian." -python3.7 -m venv ~/virtualenv/python3.7-deadsnakes --without-pip -source ~/virtualenv/python3.7-deadsnakes/bin/activate - -echo "We ensure our own pip." -curl -sSL https://bootstrap.pypa.io/get-pip.py | python3.7 - -echo -echo "Python version:" -python3.7 -c "import sys; print(sys.version)" \ No newline at end of file From 0bc3c7f238fd8a8e3a67b09381886e1c70679c3d Mon Sep 17 00:00:00 2001 From: feiyang Date: Sat, 5 Jan 2019 17:25:29 +0800 Subject: [PATCH 09/21] feature puppeteer js engine --- pyspider/fetcher/puppeteer_fetcher.js | 204 ++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 pyspider/fetcher/puppeteer_fetcher.js diff --git a/pyspider/fetcher/puppeteer_fetcher.js b/pyspider/fetcher/puppeteer_fetcher.js new file mode 100644 index 000000000..2c26f91cf --- /dev/null +++ b/pyspider/fetcher/puppeteer_fetcher.js @@ -0,0 +1,204 @@ +const express = require("express"); +const puppeteer = require('puppeteer'); +const bodyParser = require('body-parser'); + +const app = express(); + +app.use(bodyParser.json()); +app.use(bodyParser.urlencoded({extended: false})); + +let init_browser = true; +let browser_settings = {}; + +app.use(async (req, res, next) => { + if (init_browser) { + var options = req.body; + if (options.proxy) { + if (options.proxy.indexOf("://") == -1) { + options.proxy = "http://" + options.proxy; + } + browser_settings["args"] = ['--no-sandbox', "--disable-setuid-sandbox", "--proxy-server="+options.proxy]; + } + browser_settings["headless"] = options.headless === "false"? false:true + browser = await puppeteer.launch(browser_settings); + init_browser=false; + console.log("init browser success!"); + next(); + } else { + next(); + }; +}); + + +async function fetch(options) { + var page = await browser.newPage(); + options.start_time = Date.now(); + try { + await _fetch(page, options); + var result = await make_result(page, options); + await page.close(); + return result + } catch (error) { + var result = await make_result(page, options, error); + await page.close(); + return result + } +} + +async function _fetch(page, options) { + + width = options.js_viewport_width || 1024; + height = options.js_viewport_height || 768 * 3; + await page.setViewport({ + "width": width, + "height": height + }); + + if (options.headers) { + options.headers = JSON.parse(options.headers); + await page.setExtraHTTPHeaders(options.headers); + } + + if (options.headers && options.headers["User-Agent"]) { + page.setUserAgent(options.headers["User-Agent"]); + } + + page.on("console", msg => { + console.log('console: ' + msg.args()); + }); + + // Http post method + let first_request = true; + let request_reseted = false; + await page.setRequestInterception(true); + if (options.method && options.method.toLowerCase() === "post") { + page.on("request", interceptedRequest => { + request_reseted = false; + end_time = null; + if (first_request) { + first_request = false; + var data = { + "method": "POST", + "postData": options.data + }; + console.log(data); + interceptedRequest.continue(data); + request_reseted = true + } + }) + } else { + page.on("request", interceptedRequest => { + request_reseted = false; + end_time = null; + }) + } + + // load images or not + if (options.load_images && options.load_images.toLowerCase() === "false") { + page.on("request", request => { + if (!!!request_reseted) { + if (request.resourceType() === 'image') + request.abort(); + else + request.continue(); + } + }) + } else { + page.on("request", request => { + if (!!!request_reseted) + request.continue() + }) + } + + let error_message = null; + page.on("error", e => { + error_message = e + }); + page.on("pageerror", e => { + error_message = e + }); + + let page_settings = {}; + var page_timeout = options.timeout ? options.timeout * 1000 : 20 * 1000; + page_settings["timeout"] = page_timeout + page_settings["waitUntil"] = ["domcontentloaded", "networkidle0"]; + + var response = await page.goto(options.url, page_settings); + + if (error_message) { + throw error_message + } + + if (options.js_script) { + console.log('running document-end script.'); + script_result = await page.evaluate(options.js_script); + console.log("end script_result is: ", script_result); + options.script_result = script_result + } + + if (options.screenshot_path) { + await page.screenshot({path: options.screenshot_path}); + } + + options.response = response +} + +async function make_result(page, options, error) { + response = options.response; + + var cookies = {}; + var tmp_cookies = await page.cookies(); + tmp_cookies.forEach(function (e) { + cookies[e.name] = e.value; + }); + + let status_code = null; + let headers = null; + let page_content = null; + + if (!!!error) { + response = options.response; + status_code = response.status(); + headers = response.headers(); + page_content = await page.content(); + } + + return { + orig_url: options.url, + status_code: status_code || 599, + error: error, + content: page_content, + headers: headers, + url: page.url(), + cookies: cookies, + time: (Date.now() - options.start_time) / 1000, + js_script_result: options.script_result, + save: options.save + } +} + +app.get("/", function (request, response) { + body = "method not allowed!"; + response.status(403); + response.set({ + "cache": "no-cache", + "Content-Length": body.length + }); + response.send(body); +}); + +app.post("/", async (request, response) => { + var options = request.body; + result = await fetch(options); + response.send(result) +}); + + +var port = 22222; +if (process.argv.length === 3) { + port = parseInt(process.argv[2]) +} + +app.listen(port, function () { + console.log("server listen: " + port); +}); \ No newline at end of file From 99c9fb5170dc31bf531b54e79cda0c125405952d Mon Sep 17 00:00:00 2001 From: feiyang Date: Tue, 8 Jan 2019 18:01:55 +0800 Subject: [PATCH 10/21] features: add opened pages maximum limit, default 5 --- pyspider/fetcher/puppeteer_fetcher.js | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/pyspider/fetcher/puppeteer_fetcher.js b/pyspider/fetcher/puppeteer_fetcher.js index 2c26f91cf..8fd5e70c5 100644 --- a/pyspider/fetcher/puppeteer_fetcher.js +++ b/pyspider/fetcher/puppeteer_fetcher.js @@ -187,14 +187,33 @@ app.get("/", function (request, response) { response.send(body); }); + + +let max_open_pages = 5; +let opened_page_nums = 0; + app.post("/", async (request, response) => { - var options = request.body; - result = await fetch(options); - response.send(result) + console.log("opened pages: " + opened_page_nums); + if (opened_page_nums >= max_open_pages){ + body = "browser pages is too many, open new browser process!"; + response.status(403); + response.set({ + "cache": "no-cache", + "Content-Length": body.length + }); + response.send(body); + } else { + opened_page_nums += 1; + let options = request.body; + result = await fetch(options); + opened_page_nums -= 1; + response.send(result) + } }); -var port = 22222; +let port = 22222; + if (process.argv.length === 3) { port = parseInt(process.argv[2]) } From 563b5194fed34067c1dc5c00339ecefbf588014d Mon Sep 17 00:00:00 2001 From: feiyang Date: Tue, 15 Jan 2019 20:09:02 +0800 Subject: [PATCH 11/21] fix: python3.5 install lxml error --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 168991ae6..fb36041e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ language: python cache: pip python: - "2.7" - - "3.3" - "3.4" - "3.5" - "3.6" @@ -38,7 +37,7 @@ install: - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --no-use-wheel lxml; else pip install lxml; fi + - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install lxml --no-binary :all:; else pip install lxml; fi - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --allow-all-external -e .[all,test]; else pip install -e .[all,test]; fi - pip install coveralls script: From e29441724e39549d102f91614aa2484479b489fa Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 15:47:16 -0800 Subject: [PATCH 12/21] use suggested python3.7 build --- .travis.yml | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 443e9c76b..9e7d1279d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,15 @@ sudo: required language: python cache: pip -python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "3.7-dev" - # - "3.7" # TODO: Re-enable after https://github.com/travis-ci/travis-ci/issues/9815 is fixed +matrix: + include: + - python: 2.7 + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: 3.7 + dist: xenial + sudo: true services: - docker - mongodb @@ -40,8 +41,13 @@ install: - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --no-use-wheel lxml; else pip install lxml; fi - - if [[ $TRAVIS_PYTHON_VERSION != '3.5' ]]; then pip install --allow-all-external -e .[all,test]; else pip install -e .[all,test]; fi + - | + if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then + pip install lxml==4.2.6 + else + pip install lxml + fi + - pip install -e .[all,test] - pip install coveralls script: - coverage run setup.py test From 4a5d243840a41a92395622c3a8b7f881d05f6d48 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 15:50:38 -0800 Subject: [PATCH 13/21] fix build for 3.3 --- .travis.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9e7d1279d..afa8dfc34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: pip matrix: include: - python: 2.7 + - python: 3.3 - python: 3.4 - python: 3.5 - python: 3.6 diff --git a/setup.py b/setup.py index 265526133..75098269b 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ 'Jinja2>=2.7', 'chardet>=2.2', 'cssselect>=0.9', - 'lxml', + 'lxml' if sys.version != (3, 3) else "lxml<=4.2.6", 'pycurl', 'requests>=2.2', 'Flask-Login>=0.2.11', From 53f9de5cb1e6f7dcd559e40d068ff9178989bae6 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:06:21 -0800 Subject: [PATCH 14/21] 1. python2.7 image is different when using metrix 2. pip install just works now days --- .travis.yml | 11 ++--------- tests/test_task_queue.py | 10 +++++----- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index afa8dfc34..ed5aed73f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,10 @@ sudo: required language: python cache: pip +python: + - "2.7" matrix: include: - - python: 2.7 - python: 3.3 - python: 3.4 - python: 3.5 @@ -39,15 +40,7 @@ before_script: - psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres - sleep 10 install: - - pip install mysql-connector-python - pip install https://github.com/marcus67/easywebdav/archive/master.zip - - - | - if [[ $TRAVIS_PYTHON_VERSION == '3.3' ]]; then - pip install lxml==4.2.6 - else - pip install lxml - fi - pip install -e .[all,test] - pip install coveralls script: diff --git a/tests/test_task_queue.py b/tests/test_task_queue.py index 813ea065c..a84fc98e6 100644 --- a/tests/test_task_queue.py +++ b/tests/test_task_queue.py @@ -31,7 +31,7 @@ def test_task_queue_in_time_order(self): q = queues[it.priority] # type:Queue.Queue q.put(it) tasks[it.taskid] = it - six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) + # six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) for i in range(0, 100): task_id = tq.get() task = tasks[task_id] @@ -39,7 +39,7 @@ def test_task_queue_in_time_order(self): expect_task = q.get() self.assertEqual(task_id, expect_task.taskid) self.assertEqual(task.priority, int(9 - i // 10)) - six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime) + # six.print_('get, taskid=', task.taskid, 'priority=', task.priority, 'exetime=', task.exetime) self.assertEqual(tq.size(), 100) self.assertEqual(tq.priority_queue.qsize(), 0) @@ -54,7 +54,7 @@ def test_task_queue_in_time_order(self): class TestTimeQueue(unittest.TestCase): def test_time_queue(self): - six.print_('Test time queue order by time only') + # six.print_('Test time queue order by time only') tq = TaskQueue(rate=300, burst=1000) @@ -66,7 +66,7 @@ def test_time_queue(self): it = InQueueTask(str(i), priority=int(i // 10), exetime=time.time() + (i + 1) * interval) tq.put(it.taskid, it.priority, it.exetime) fifo_queue.put(it) - six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) + # six.print_('put, taskid=', it.taskid, 'priority=', it.priority, 'exetime=', it.exetime) self.assertEqual(tq.priority_queue.qsize(), 0) self.assertEqual(tq.processing.qsize(), 0) @@ -76,7 +76,7 @@ def test_time_queue(self): t1 = fifo_queue.get() t2 = tq.time_queue.get() self.assertEqual(t1.taskid, t2.taskid) - six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime) + # six.print_('get, taskid=', t2.taskid, 'priority=', t2.priority, 'exetime=', t2.exetime) self.assertEqual(tq.priority_queue.qsize(), 0) self.assertEqual(tq.processing.qsize(), 0) self.assertEqual(tq.time_queue.qsize(), 0) From 578664f27c1b0115bc86b4b28eaa80e36ebada41 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:10:23 -0800 Subject: [PATCH 15/21] sudo not required any more? --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ed5aed73f..09309c7d2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ matrix: - python: 3.6 - python: 3.7 dist: xenial - sudo: true services: - docker - mongodb From 74874e216a052a97ed03ace68b58d69c6fc68b1a Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:19:32 -0800 Subject: [PATCH 16/21] try not to specify a version for apt-get --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 09309c7d2..5c20d413d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,9 +22,9 @@ addons: postgresql: "9.4" apt: packages: - - mysql-server-5.6 - - mysql-client-core-5.6 - - mysql-client-5.6 + - mysql-server + - mysql-client-core + - mysql-client before_install: - sudo apt-get update -qq - sudo apt-get install -y beanstalkd From 0d65272b8e862ccb9e93b4562397ed80b4f7e29d Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:23:49 -0800 Subject: [PATCH 17/21] fix setup.py test for py3.3 --- .travis.yml | 3 +-- setup.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5c20d413d..65c8eb793 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,9 @@ sudo: required language: python cache: pip -python: - - "2.7" matrix: include: + - python: 2.7 - python: 3.3 - python: 3.4 - python: 3.5 diff --git a/setup.py b/setup.py index 75098269b..8ecdaa08a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ 'Jinja2>=2.7', 'chardet>=2.2', 'cssselect>=0.9', - 'lxml' if sys.version != (3, 3) else "lxml<=4.2.6", + 'lxml' if sys.version_info[:2] != (3, 3) else "lxml<=4.2.6", 'pycurl', 'requests>=2.2', 'Flask-Login>=0.2.11', From 40669065d5e1fb4eea738aaf473114bfbad81c86 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:32:37 -0800 Subject: [PATCH 18/21] try manually install --- .travis.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 65c8eb793..9f58b53f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,14 +19,15 @@ services: - postgresql addons: postgresql: "9.4" - apt: - packages: - - mysql-server - - mysql-client-core - - mysql-client before_install: - sudo apt-get update -qq - sudo apt-get install -y beanstalkd + - | + if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then + sudo apt-get install -y mysql-server-5.7 mysql-client-core-5.7 mysql-client-5.7 + else + sudo apt-get install -y mysql-server-5.6 mysql-client-core-5.6 mysql-client-5.6 + fi - echo "START=yes" | sudo tee -a /etc/default/beanstalkd > /dev/null - sudo service beanstalkd start - curl -O https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/deb/elasticsearch/2.4.0/elasticsearch-2.4.0.deb && sudo dpkg -i --force-confnew elasticsearch-2.4.0.deb && sudo service elasticsearch restart From 44a4dda64b35819687dabc52c269958845ff5dd9 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:46:26 -0800 Subject: [PATCH 19/21] try again --- .travis.yml | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9f58b53f9..04b76ea64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,14 @@ sudo: required language: python cache: pip +python: + - 2.7 + - 3.3 + - 3.4 + - 3.5 + - 3.6 matrix: include: - - python: 2.7 - - python: 3.3 - - python: 3.4 - - python: 3.5 - - python: 3.6 - python: 3.7 dist: xenial services: @@ -22,12 +23,6 @@ addons: before_install: - sudo apt-get update -qq - sudo apt-get install -y beanstalkd - - | - if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then - sudo apt-get install -y mysql-server-5.7 mysql-client-core-5.7 mysql-client-5.7 - else - sudo apt-get install -y mysql-server-5.6 mysql-client-core-5.6 mysql-client-5.6 - fi - echo "START=yes" | sudo tee -a /etc/default/beanstalkd > /dev/null - sudo service beanstalkd start - curl -O https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/deb/elasticsearch/2.4.0/elasticsearch-2.4.0.deb && sudo dpkg -i --force-confnew elasticsearch-2.4.0.deb && sudo service elasticsearch restart From 398211ddc93265619bb39e49d23c7cc081763824 Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:58:13 -0800 Subject: [PATCH 20/21] fix for 3.7 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 04b76ea64..08c1afb55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,6 +35,7 @@ before_script: - sleep 10 install: - pip install https://github.com/marcus67/easywebdav/archive/master.zip + - if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then sudo apt-get install libgnutls28-dev; fi - pip install -e .[all,test] - pip install coveralls script: From 3fb99bd24aa3b516e5091599c8c0b7d93663971f Mon Sep 17 00:00:00 2001 From: binux Date: Sat, 23 Feb 2019 16:59:55 -0800 Subject: [PATCH 21/21] try install librt --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 08c1afb55..3da5c937e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,6 +35,7 @@ before_script: - sleep 10 install: - pip install https://github.com/marcus67/easywebdav/archive/master.zip + - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then sudo apt-get install libc6; fi - if [[ $TRAVIS_PYTHON_VERSION == '3.7' ]]; then sudo apt-get install libgnutls28-dev; fi - pip install -e .[all,test] - pip install coveralls