From 12a9da5a5fa4503047a4f7bc7fb559de43db349e Mon Sep 17 00:00:00 2001 From: gas1121 Date: Wed, 5 Jul 2017 20:42:02 +0800 Subject: [PATCH 01/10] Add python 3 support to scrapy-cluster. We use decode_responses option on Redis client and value_deserializer,value_serializer option on Kafka client to handle unicode problem. We also fix several syntax error and update several test cases. --- .gitignore | 2 ++ .travis.yml | 3 ++ crawler/crawling/distributed_scheduler.py | 5 +-- crawler/crawling/log_retry_middleware.py | 3 +- crawler/crawling/pipelines.py | 3 +- crawler/crawling/redis_stats_middleware.py | 3 +- crawler/tests/online.py | 6 ++-- docker/crawler/Dockerfile.py3 | 35 +++++++++++++++++++++ docker/kafka-monitor/Dockerfile.py3 | 27 ++++++++++++++++ docker/redis-monitor/Dockerfile.py3 | 27 ++++++++++++++++ docker/rest/Dockerfile.py3 | 27 ++++++++++++++++ kafka-monitor/kafka_monitor.py | 6 ++-- kafka-monitor/kafkadump.py | 1 + kafka-monitor/plugins/action_handler.py | 3 +- kafka-monitor/plugins/scraper_handler.py | 3 +- kafka-monitor/plugins/stats_handler.py | 3 +- kafka-monitor/plugins/zookeeper_handler.py | 3 +- kafka-monitor/tests/online.py | 3 +- redis-monitor/plugins/kafka_base_monitor.py | 2 +- redis-monitor/redis_monitor.py | 10 ++++-- redis-monitor/tests/online.py | 8 ++++- rest/rest_service.py | 7 +++-- rest/tests/test_rest_service.py | 17 ++++++---- utils/examples/example_rq.py | 2 +- utils/examples/example_rtq.py | 2 +- utils/scutils/redis_queue.py | 7 +++-- utils/scutils/stats_collector.py | 3 +- utils/tests/online.py | 6 ++-- utils/tests/test_redis_queue.py | 4 +-- utils/tests/throttled_queue.py | 2 +- 30 files changed, 195 insertions(+), 38 deletions(-) create mode 100644 docker/crawler/Dockerfile.py3 create mode 100644 docker/kafka-monitor/Dockerfile.py3 create mode 100644 docker/redis-monitor/Dockerfile.py3 create mode 100644 docker/rest/Dockerfile.py3 diff --git a/.gitignore b/.gitignore index b8047dfd..8fd3a28e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.vscode + # Python binaries *.pyc diff --git a/.travis.yml b/.travis.yml index b94e790d..657aabdb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,9 @@ env: - docker: 1 dockerfile_name: Dockerfile.py2alpine docker_tag_suffix: dev-alpine + - docker: 1 + dockerfile_name: Dockerfile.py3 + docker_tag_suffix: dev-py3 install: true diff --git a/crawler/crawling/distributed_scheduler.py b/crawler/crawling/distributed_scheduler.py index 03376be6..14cf2818 100644 --- a/crawler/crawling/distributed_scheduler.py +++ b/crawler/crawling/distributed_scheduler.py @@ -282,7 +282,7 @@ def update_ipaddress(self): try: obj = urllib.request.urlopen(settings.get('PUBLIC_IP_URL', 'http://ip.42.pl/raw')) - results = self.ip_regex.findall(obj.read()) + results = self.ip_regex.findall(obj.read().decode('utf-8')) if len(results) > 0: self.my_ip = results[0] else: @@ -313,7 +313,8 @@ def report_self(self): def from_settings(cls, settings): server = redis.Redis(host=settings.get('REDIS_HOST'), port=settings.get('REDIS_PORT'), - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) persist = settings.get('SCHEDULER_PERSIST', True) up_int = settings.get('SCHEDULER_QUEUE_REFRESH', 10) hits = settings.get('QUEUE_HITS', 10) diff --git a/crawler/crawling/log_retry_middleware.py b/crawler/crawling/log_retry_middleware.py index 3f49177b..29592c9a 100644 --- a/crawler/crawling/log_retry_middleware.py +++ b/crawler/crawling/log_retry_middleware.py @@ -61,7 +61,8 @@ def setup(self, settings): if self.settings['STATS_STATUS_CODES']: self.redis_conn = redis.Redis(host=self.settings.get('REDIS_HOST'), port=self.settings.get('REDIS_PORT'), - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/crawler/crawling/pipelines.py b/crawler/crawling/pipelines.py index 22f468f7..5baa710a 100644 --- a/crawler/crawling/pipelines.py +++ b/crawler/crawling/pipelines.py @@ -111,7 +111,8 @@ def from_settings(cls, settings): producer = KafkaProducer(bootstrap_servers=settings['KAFKA_HOSTS'], retries=3, linger_ms=settings['KAFKA_PRODUCER_BATCH_LINGER_MS'], - buffer_memory=settings['KAFKA_PRODUCER_BUFFER_BYTES']) + buffer_memory=settings['KAFKA_PRODUCER_BUFFER_BYTES'], + value_serializer=lambda m: m.encode('utf-8')) except Exception as e: logger.error("Unable to connect to Kafka in Pipeline"\ ", raising exit flag.") diff --git a/crawler/crawling/redis_stats_middleware.py b/crawler/crawling/redis_stats_middleware.py index 415d29c8..6b3a0951 100644 --- a/crawler/crawling/redis_stats_middleware.py +++ b/crawler/crawling/redis_stats_middleware.py @@ -41,7 +41,8 @@ def setup(self, settings): # set up redis self.redis_conn = redis.Redis(host=settings.get('REDIS_HOST'), port=settings.get('REDIS_PORT'), - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/crawler/tests/online.py b/crawler/tests/online.py index be3e9d03..3296cf02 100644 --- a/crawler/tests/online.py +++ b/crawler/tests/online.py @@ -46,7 +46,8 @@ def setUp(self): # set up redis self.redis_conn = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], - db=self.settings['REDIS_DB']) + db=self.settings['REDIS_DB'], + decode_responses=True) try: self.redis_conn.info() except ConnectionError: @@ -66,7 +67,8 @@ def setUp(self): group_id="demo-id", auto_commit_interval_ms=10, consumer_timeout_ms=5000, - auto_offset_reset='earliest' + auto_offset_reset='earliest', + value_deserializer=lambda m: m.decode('utf-8') ) time.sleep(1) diff --git a/docker/crawler/Dockerfile.py3 b/docker/crawler/Dockerfile.py3 new file mode 100644 index 00000000..58874164 --- /dev/null +++ b/docker/crawler/Dockerfile.py3 @@ -0,0 +1,35 @@ +FROM python:3.6 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update && apt-get -y install \ + python-lxml \ + build-essential \ + libssl-dev \ + libffi-dev \ + python-dev \ + libxml2-dev \ + libxslt1-dev \ + && rm -rf /var/lib/apt/lists/* +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# install requirements +COPY utils /usr/src/utils +COPY crawler/requirements.txt /usr/src/app/ +RUN pip install --no-cache-dir -r requirements.txt +RUN rm -rf /usr/src/utils + +# move codebase over +COPY crawler /usr/src/app + +# override settings via localsettings.py +COPY docker/crawler/settings.py /usr/src/app/crawling/localsettings.py + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run the spider +CMD ["scrapy", "runspider", "crawling/spiders/link_spider.py"] \ No newline at end of file diff --git a/docker/kafka-monitor/Dockerfile.py3 b/docker/kafka-monitor/Dockerfile.py3 new file mode 100644 index 00000000..acb3ddd2 --- /dev/null +++ b/docker/kafka-monitor/Dockerfile.py3 @@ -0,0 +1,27 @@ +FROM python:3.6 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# install requirements +COPY utils /usr/src/utils +COPY kafka-monitor/requirements.txt /usr/src/app/ +RUN pip install --no-cache-dir -r requirements.txt +RUN rm -rf /usr/src/utils + +# move codebase over +COPY kafka-monitor /usr/src/app + +# override settings via localsettings.py +COPY docker/kafka-monitor/settings.py /usr/src/app/localsettings.py + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["python", "kafka_monitor.py", "run"] \ No newline at end of file diff --git a/docker/redis-monitor/Dockerfile.py3 b/docker/redis-monitor/Dockerfile.py3 new file mode 100644 index 00000000..e2c28d34 --- /dev/null +++ b/docker/redis-monitor/Dockerfile.py3 @@ -0,0 +1,27 @@ +FROM python:3.6 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# install requirements +COPY utils /usr/src/utils +COPY redis-monitor/requirements.txt /usr/src/app/ +RUN pip install --no-cache-dir -r requirements.txt +RUN rm -rf /usr/src/utils + +# move codebase over +COPY redis-monitor /usr/src/app + +# override settings via localsettings.py +COPY docker/redis-monitor/settings.py /usr/src/app/localsettings.py + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["python", "redis_monitor.py"] \ No newline at end of file diff --git a/docker/rest/Dockerfile.py3 b/docker/rest/Dockerfile.py3 new file mode 100644 index 00000000..15725fc2 --- /dev/null +++ b/docker/rest/Dockerfile.py3 @@ -0,0 +1,27 @@ +FROM python:3.6 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# install requirements +COPY utils /usr/src/utils +COPY rest/requirements.txt /usr/src/app/ +RUN pip install --no-cache-dir -r requirements.txt +RUN rm -rf /usr/src/utils + +# move codebase over +COPY rest /usr/src/app + +# override settings via localsettings.py +COPY docker/rest/settings.py /usr/src/app/localsettings.py + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["python", "rest_service.py"] \ No newline at end of file diff --git a/kafka-monitor/kafka_monitor.py b/kafka-monitor/kafka_monitor.py index 19642ddb..c793caa5 100644 --- a/kafka-monitor/kafka_monitor.py +++ b/kafka-monitor/kafka_monitor.py @@ -123,7 +123,8 @@ def _setup_stats(self): redis_conn = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], - db=self.settings.get('REDIS_DB')) + db=self.settings.get('REDIS_DB'), + decode_responses=True) try: redis_conn.info() @@ -455,6 +456,7 @@ def _create_consumer(self): self.settings['KAFKA_INCOMING_TOPIC'], group_id=self.settings['KAFKA_GROUP'], bootstrap_servers=brokers, + value_deserializer=lambda m: m.decode('utf-8'), consumer_timeout_ms=self.settings['KAFKA_CONSUMER_TIMEOUT'], auto_offset_reset=self.settings['KAFKA_CONSUMER_AUTO_OFFSET_RESET'], auto_commit_interval_ms=self.settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'], @@ -478,7 +480,7 @@ def _create_producer(self): str(brokers)) return KafkaProducer(bootstrap_servers=brokers, - value_serializer=lambda m: json.dumps(m), + value_serializer=lambda m: json.dumps(m).encode('utf-8'), retries=3, linger_ms=self.settings['KAFKA_PRODUCER_BATCH_LINGER_MS'], buffer_memory=self.settings['KAFKA_PRODUCER_BUFFER_BYTES']) diff --git a/kafka-monitor/kafkadump.py b/kafka-monitor/kafkadump.py index acc847c7..ff77cc29 100644 --- a/kafka-monitor/kafkadump.py +++ b/kafka-monitor/kafkadump.py @@ -102,6 +102,7 @@ def main(): topic, group_id=consumer_id, bootstrap_servers=kafka_host, + value_deserializer=lambda m: m.decode('utf-8'), consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'], auto_offset_reset=offset, auto_commit_interval_ms=settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'], diff --git a/kafka-monitor/plugins/action_handler.py b/kafka-monitor/plugins/action_handler.py index 1e476b32..acad9b65 100644 --- a/kafka-monitor/plugins/action_handler.py +++ b/kafka-monitor/plugins/action_handler.py @@ -17,7 +17,8 @@ def setup(self, settings): self.extract = tldextract.TLDExtract() self.redis_conn = redis.Redis(host=settings['REDIS_HOST'], port=settings['REDIS_PORT'], - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/kafka-monitor/plugins/scraper_handler.py b/kafka-monitor/plugins/scraper_handler.py index a7687b46..aca5ab45 100644 --- a/kafka-monitor/plugins/scraper_handler.py +++ b/kafka-monitor/plugins/scraper_handler.py @@ -18,7 +18,8 @@ def setup(self, settings): self.extract = tldextract.TLDExtract() self.redis_conn = redis.Redis(host=settings['REDIS_HOST'], port=settings['REDIS_PORT'], - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/kafka-monitor/plugins/stats_handler.py b/kafka-monitor/plugins/stats_handler.py index 21de7078..b053c2c9 100644 --- a/kafka-monitor/plugins/stats_handler.py +++ b/kafka-monitor/plugins/stats_handler.py @@ -15,7 +15,8 @@ def setup(self, settings): ''' self.redis_conn = redis.Redis(host=settings['REDIS_HOST'], port=settings['REDIS_PORT'], - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/kafka-monitor/plugins/zookeeper_handler.py b/kafka-monitor/plugins/zookeeper_handler.py index 2996d702..4df0da53 100644 --- a/kafka-monitor/plugins/zookeeper_handler.py +++ b/kafka-monitor/plugins/zookeeper_handler.py @@ -18,7 +18,8 @@ def setup(self, settings): self.extract = tldextract.TLDExtract() self.redis_conn = redis.Redis(host=settings['REDIS_HOST'], port=settings['REDIS_PORT'], - db=settings.get('REDIS_DB')) + db=settings.get('REDIS_DB'), + decode_responses=True) try: self.redis_conn.info() diff --git a/kafka-monitor/tests/online.py b/kafka-monitor/tests/online.py index 3deb9b67..e79762d4 100644 --- a/kafka-monitor/tests/online.py +++ b/kafka-monitor/tests/online.py @@ -59,7 +59,8 @@ def timer(): self.redis_conn = redis.Redis( host=self.kafka_monitor.settings['REDIS_HOST'], port=self.kafka_monitor.settings['REDIS_PORT'], - db=self.kafka_monitor.settings['REDIS_DB']) + db=self.kafka_monitor.settings['REDIS_DB'], + decode_responses=True) def test_feed(self): json_req = "{\"uuid\":\"mytestid\"," \ diff --git a/redis-monitor/plugins/kafka_base_monitor.py b/redis-monitor/plugins/kafka_base_monitor.py index fa13ae72..217f732d 100644 --- a/redis-monitor/plugins/kafka_base_monitor.py +++ b/redis-monitor/plugins/kafka_base_monitor.py @@ -39,7 +39,7 @@ def _create_producer(self, settings): str(brokers)) return KafkaProducer(bootstrap_servers=brokers, - value_serializer=lambda m: json.dumps(m), + value_serializer=lambda m: json.dumps(m).encode('utf-8'), retries=3, linger_ms=settings['KAFKA_PRODUCER_BATCH_LINGER_MS'], buffer_memory=settings['KAFKA_PRODUCER_BUFFER_BYTES']) diff --git a/redis-monitor/redis_monitor.py b/redis-monitor/redis_monitor.py index 6b721c63..af2022af 100644 --- a/redis-monitor/redis_monitor.py +++ b/redis-monitor/redis_monitor.py @@ -60,7 +60,13 @@ def setup(self, level=None, log_file=None, json=None): self.redis_conn = redis.StrictRedis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], - db=self.settings['REDIS_DB']) + db=self.settings['REDIS_DB'], + decode_responses=True) + # redis_lock needs a redis connection without setting decode_responses + # to True + self.lock_redis_conn = redis.StrictRedis(host=self.settings['REDIS_HOST'], + port=self.settings['REDIS_PORT'], + db=self.settings['REDIS_DB']) try: self.redis_conn.info() @@ -182,7 +188,7 @@ def _create_lock_object(self, key): ''' Returns a lock object, split for testing ''' - return redis_lock.Lock(self.redis_conn, key, + return redis_lock.Lock(self.lock_redis_conn, key, expire=self.settings['REDIS_LOCK_EXPIRATION'], auto_renewal=True) diff --git a/redis-monitor/tests/online.py b/redis-monitor/tests/online.py index c12132ee..4db06aef 100644 --- a/redis-monitor/tests/online.py +++ b/redis-monitor/tests/online.py @@ -59,6 +59,11 @@ def setUp(self): 'tests.online.CustomMonitor': 100, } self.redis_monitor.redis_conn = redis.Redis( + host=self.redis_monitor.settings['REDIS_HOST'], + port=self.redis_monitor.settings['REDIS_PORT'], + db=self.redis_monitor.settings['REDIS_DB'], + decode_responses=True) + self.redis_monitor.lock_redis_conn = redis.Redis( host=self.redis_monitor.settings['REDIS_HOST'], port=self.redis_monitor.settings['REDIS_PORT'], db=self.redis_monitor.settings['REDIS_DB']) @@ -72,7 +77,8 @@ def setUp(self): group_id="demo-id", auto_commit_interval_ms=10, consumer_timeout_ms=5000, - auto_offset_reset='earliest' + auto_offset_reset='earliest', + value_deserializer=lambda m: m.decode('utf-8') ) sleep(1) diff --git a/rest/rest_service.py b/rest/rest_service.py index 06ddeb3b..342a4f7f 100644 --- a/rest/rest_service.py +++ b/rest/rest_service.py @@ -63,7 +63,7 @@ def wrapper(*args, **kw): True, instance.UNKNOWN_ERROR) log_dict = deepcopy(ret_dict) - log_dict['error']['cause'] = e.message + log_dict['error']['cause'] = "" log_dict['error']['exception'] = str(e) log_dict['error']['ex'] = traceback.format_exc() instance.logger.error("Uncaught Exception Thrown", log_dict) @@ -102,7 +102,7 @@ def wrapper(*args, **kw): instance = args[0] try: instance.validator(instance.schemas[schema_name]).validate(request.get_json()) - except ValidationError, e: + except ValidationError as e: ret_dict = instance._create_ret_object(instance.FAILURE, None, True, instance.BAD_SCHEMA, @@ -352,7 +352,8 @@ def _setup_redis(self): str(self.settings['REDIS_HOST'])) self.redis_conn = redis.StrictRedis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], - db=self.settings['REDIS_DB']) + db=self.settings['REDIS_DB'], + decode_responses=True) self.redis_conn.info() self.redis_connected = True self.logger.info("Successfully connected to redis") diff --git a/rest/tests/test_rest_service.py b/rest/tests/test_rest_service.py index efc15e52..2f2772f7 100644 --- a/rest/tests/test_rest_service.py +++ b/rest/tests/test_rest_service.py @@ -10,6 +10,7 @@ import mock import json import flask +import six from kafka.common import OffsetOutOfRangeError from kafka.conn import ConnectionStates @@ -51,13 +52,13 @@ def setUp(self): self.rest_service.logger = MagicMock() @mock.patch('os.listdir', MagicMock(return_value=['hey.json'])) - @mock.patch('__builtin__.open', mock_open(read_data='bibble'), create=True) + @mock.patch('six.moves.builtins.open', mock_open(read_data='bibble'), create=True) def test_load_schemas_bad(self): with self.assertRaises(ValueError): self.rest_service._load_schemas() @mock.patch('os.listdir', MagicMock(return_value=['hey2.json'])) - @mock.patch('__builtin__.open', mock_open(read_data='{\"stuff\":\"value\"}'), create=True) + @mock.patch('six.moves.builtins.open', mock_open(read_data='{\"stuff\":\"value\"}'), create=True) def test_load_schemas_bad(self): self.rest_service._load_schemas() self.assertEquals(self.rest_service.schemas, @@ -475,7 +476,7 @@ def test_validate_schema(self): self.assertEquals(results, 'data') # invalid schema - data = '{"otherkey": "bad data"}' + data = u'{"value": "data here", "otherkey": "bad data"}' with self.rest_service.app.test_request_context(data=data, content_type='application/json'): results = override.test_schema() @@ -483,11 +484,15 @@ def test_validate_schema(self): self.assertEquals(override.logger.error.call_args[0][0], "Invalid Schema") + if six.PY3: + cause_text = u"Additional properties are not allowed ('otherkey' was unexpected)" + else: + cause_text = u"Additional properties are not allowed (u'otherkey' was unexpected)" d = { u'data': None, u'error': { u'message': u"JSON did not validate against schema.", - u'cause': u"Additional properties are not allowed (u'otherkey' was unexpected)" + u'cause': cause_text }, u'status': u'FAILURE' } @@ -587,7 +592,7 @@ def fancy_get_time(): data = json.loads(results[0].data) self.assertEquals(data, d) self.assertEquals(results[1], 200) - self.assertFalse(self.rest_service.uuids.has_key('key')) + self.assertFalse('key' in self.rest_service.uuids) # test with uuid, no response time_list = [0, 1, 2, 3, 4, 5, 6] @@ -606,7 +611,7 @@ def fancy_get_time2(): data = json.loads(results[0].data) self.assertEquals(data, d) self.assertEquals(results[1], 200) - self.assertTrue(self.rest_service.uuids.has_key('key')) + self.assertTrue('key' in self.rest_service.uuids) self.assertEquals(self.rest_service.uuids['key'], 'poll') def test_poll(self): diff --git a/utils/examples/example_rq.py b/utils/examples/example_rq.py index 1a753d4b..3075ddc4 100644 --- a/utils/examples/example_rq.py +++ b/utils/examples/example_rq.py @@ -5,7 +5,7 @@ # change these for your Redis host host = 'scdev' port = 6379 -redis_conn = redis.Redis(host=host, port=port) +redis_conn = redis.Redis(host=host, port=port, decode_responses=True) parser = argparse.ArgumentParser(description='Example Redis Queues.') group = parser.add_mutually_exclusive_group(required=True) diff --git a/utils/examples/example_rtq.py b/utils/examples/example_rtq.py index dd5dfb4e..c70e771e 100644 --- a/utils/examples/example_rtq.py +++ b/utils/examples/example_rtq.py @@ -44,7 +44,7 @@ def main(): queue = args['queue'] elastic = args['elastic'] - conn = redis.Redis(host=host, port=port) + conn = redis.Redis(host=host, port=port, decode_responses=True) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod, elastic=elastic) diff --git a/utils/scutils/redis_queue.py b/utils/scutils/redis_queue.py index 09bc51c5..8bd325fa 100644 --- a/utils/scutils/redis_queue.py +++ b/utils/scutils/redis_queue.py @@ -39,7 +39,7 @@ def _encode_item(self, item): @requires: The object be serializable ''' if self.encoding.__name__ == 'pickle': - return self.encoding.dumps(item, protocol=-1) + return self.encoding.dumps(item, protocol=-1).decode('latin1') else: return self.encoding.dumps(item) @@ -47,7 +47,10 @@ def _decode_item(self, encoded_item): ''' Decode an item previously encoded ''' - return self.encoding.loads(encoded_item) + if self.encoding.__name__ == 'pickle': + return self.encoding.loads(encoded_item.encode('latin1')) + else: + return self.encoding.loads(encoded_item) def __len__(self): ''' diff --git a/utils/scutils/stats_collector.py b/utils/scutils/stats_collector.py index 36db9eb3..33b8f110 100644 --- a/utils/scutils/stats_collector.py +++ b/utils/scutils/stats_collector.py @@ -209,7 +209,8 @@ def setup(self, redis_conn=None, host='localhost', port=6379): ''' if redis_conn is None: if host is not None and port is not None: - self.redis_conn = redis.Redis(host=host, port=port) + self.redis_conn = redis.Redis(host=host, port=port, + decode_responses=True) else: raise Exception("Please specify some form of connection " "to Redis") diff --git a/utils/tests/online.py b/utils/tests/online.py index 76239334..ac7c44c4 100644 --- a/utils/tests/online.py +++ b/utils/tests/online.py @@ -10,7 +10,6 @@ from mock import MagicMock import time import sys -import six import redis import argparse @@ -129,7 +128,7 @@ def test_purge_old(self): self.redis_conn.set('default_counter:2015-10', 'stuff2') tc.purge_old() - self.assertEqual([six.b('default_counter:2015-10')], + self.assertEqual(['default_counter:2015-10'], self.redis_conn.keys(tc.get_key() + ':*')) self.redis_conn.delete('default_counter:2015-10') @@ -377,7 +376,8 @@ def test_roll_bitmap_counter(self): help="The Redis port") args = vars(parser.parse_args()) - redis_conn = redis.Redis(host=args['redis_host'], port=args['redis_port']) + redis_conn = redis.Redis(host=args['redis_host'], port=args['redis_port'], + decode_responses=True) # build testing suite suite = unittest.TestSuite() diff --git a/utils/tests/test_redis_queue.py b/utils/tests/test_redis_queue.py index 507eaa6e..14c152f2 100644 --- a/utils/tests/test_redis_queue.py +++ b/utils/tests/test_redis_queue.py @@ -29,14 +29,14 @@ def test_init(self): def test_encode(self): q = Base(MagicMock(), 'key', pickle) # python pickling is different between versions - data = pickle.dumps('cool', protocol=-1) + data = pickle.dumps('cool', protocol=-1).decode('latin1') self.assertEquals(q._encode_item('cool'), data) q2 = Base(MagicMock(), 'key', ujson) self.assertEquals(q2._encode_item('cool2'), '"cool2"') def test_decode(self): q = Base(MagicMock(), 'key', pickle) - self.assertEquals(q._decode_item(b"\x80\x02U\x04coolq\x00."), 'cool') + self.assertEquals(q._decode_item(u"\x80\x02U\x04coolq\x00."), 'cool') q2 = Base(MagicMock(), 'key', ujson) self.assertEquals(q2._decode_item('"cool2"'), 'cool2') diff --git a/utils/tests/throttled_queue.py b/utils/tests/throttled_queue.py index 57664a18..a2dc7cc4 100644 --- a/utils/tests/throttled_queue.py +++ b/utils/tests/throttled_queue.py @@ -44,7 +44,7 @@ def main(): mod = args['moderate'] queue = args['queue'] - conn = redis.Redis(host=host, port=port) + conn = redis.Redis(host=host, port=port, decode_responses=True) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod) From 909c65d9219429d873911f7f6da7243275bedbc0 Mon Sep 17 00:00:00 2001 From: gas1121 Date: Thu, 20 Jul 2017 19:55:34 +0800 Subject: [PATCH 02/10] update example_rq to make it easier to work in docker container --- utils/examples/example_rq.py | 82 ++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/utils/examples/example_rq.py b/utils/examples/example_rq.py index 3075ddc4..f75781d0 100644 --- a/utils/examples/example_rq.py +++ b/utils/examples/example_rq.py @@ -1,40 +1,50 @@ +import sys import redis from scutils.redis_queue import RedisStack, RedisQueue, RedisPriorityQueue import argparse -# change these for your Redis host -host = 'scdev' -port = 6379 -redis_conn = redis.Redis(host=host, port=port, decode_responses=True) - -parser = argparse.ArgumentParser(description='Example Redis Queues.') -group = parser.add_mutually_exclusive_group(required=True) -group.add_argument('-q', '--queue', action='store_true', help="Use a RedisQueue") -group.add_argument('-s', '--stack', action='store_true', - help="Use a RedisStack") -group.add_argument('-p', '--priority', action='store_true', - help="Use a RedisPriorityQueue") - -args = vars(parser.parse_args()) - -if args['queue']: - queue = RedisQueue(redis_conn, "my_key") -elif args['stack']: - queue = RedisStack(redis_conn, "my_key") -elif args['priority']: - queue = RedisPriorityQueue(redis_conn, "my_key") - -print("Using " + queue.__class__.__name__) - -if isinstance(queue, RedisPriorityQueue): - queue.push("item1", 50) - queue.push("item2", 100) - queue.push("item3", 20) -else: - queue.push("item1") - queue.push("item2") - queue.push("item3") - -print("Pop 1 " + queue.pop()) -print("Pop 2 " + queue.pop()) -print("Pop 3 " + queue.pop()) \ No newline at end of file + +def main(): + parser = argparse.ArgumentParser(description='Example Redis Queues.') + parser.add_argument('-r', '--redis-host', action='store', default='scdev', + help="The Redis host ip") + parser.add_argument('-rp', '--redis-port', action='store', default='6379', + help="The Redis port") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-q', '--queue', action='store_true', help="Use a RedisQueue") + group.add_argument('-s', '--stack', action='store_true', + help="Use a RedisStack") + group.add_argument('-p', '--priority', action='store_true', + help="Use a RedisPriorityQueue") + + args = vars(parser.parse_args()) + + host = args['redis_host'] + port = args['redis_port'] + redis_conn = redis.Redis(host=host, port=port, decode_responses=True) + + if args['queue']: + queue = RedisQueue(redis_conn, "my_key") + elif args['stack']: + queue = RedisStack(redis_conn, "my_key") + elif args['priority']: + queue = RedisPriorityQueue(redis_conn, "my_key") + + print("Using " + queue.__class__.__name__) + + if isinstance(queue, RedisPriorityQueue): + queue.push("item1", 50) + queue.push("item2", 100) + queue.push("item3", 20) + else: + queue.push("item1") + queue.push("item2") + queue.push("item3") + + print("Pop 1 " + queue.pop()) + print("Pop 2 " + queue.pop()) + print("Pop 3 " + queue.pop()) + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file From 04c432a372d8d3ee019b078c8a975d1d56e7a5da Mon Sep 17 00:00:00 2001 From: gas1121 Date: Thu, 20 Jul 2017 20:18:04 +0800 Subject: [PATCH 03/10] update document for Redis Queue --- docs/topics/utils/redisqueue.rst | 82 ++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/docs/topics/utils/redisqueue.rst b/docs/topics/utils/redisqueue.rst index c986bc63..175594dc 100644 --- a/docs/topics/utils/redisqueue.rst +++ b/docs/topics/utils/redisqueue.rst @@ -141,46 +141,56 @@ In this example lets create a simple script that changes what type of Queue we u :: + import sys import redis from scutils.redis_queue import RedisStack, RedisQueue, RedisPriorityQueue import argparse - # change these for your Redis host - host = 'scdev' - port = 6379 - redis_conn = redis.Redis(host=host, port=port) - - parser = argparse.ArgumentParser(description='Example Redis Queues.') - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-q', '--queue', action='store_true', help="Use a RedisQueue") - group.add_argument('-s', '--stack', action='store_true', - help="Use a RedisStack") - group.add_argument('-p', '--priority', action='store_true', - help="Use a RedisPriorityQueue") - - args = vars(parser.parse_args()) - - if args['queue']: - queue = RedisQueue(redis_conn, "my_key") - elif args['stack']: - queue = RedisStack(redis_conn, "my_key") - elif args['priority']: - queue = RedisPriorityQueue(redis_conn, "my_key") - - print("Using " + queue.__class__.__name__) - - if isinstance(queue, RedisPriorityQueue): - queue.push("item1", 50) - queue.push("item2", 100) - queue.push("item3", 20) - else: - queue.push("item1") - queue.push("item2") - queue.push("item3") - - print("Pop 1 " + queue.pop()) - print("Pop 2 " + queue.pop()) - print("Pop 3 " + queue.pop()) + + def main(): + parser = argparse.ArgumentParser(description='Example Redis Queues.') + parser.add_argument('-r', '--redis-host', action='store', default='scdev', + help="The Redis host ip") + parser.add_argument('-rp', '--redis-port', action='store', default='6379', + help="The Redis port") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-q', '--queue', action='store_true', help="Use a RedisQueue") + group.add_argument('-s', '--stack', action='store_true', + help="Use a RedisStack") + group.add_argument('-p', '--priority', action='store_true', + help="Use a RedisPriorityQueue") + + args = vars(parser.parse_args()) + + host = args['redis_host'] + port = args['redis_port'] + redis_conn = redis.Redis(host=host, port=port, decode_responses=True) + + if args['queue']: + queue = RedisQueue(redis_conn, "my_key") + elif args['stack']: + queue = RedisStack(redis_conn, "my_key") + elif args['priority']: + queue = RedisPriorityQueue(redis_conn, "my_key") + + print("Using " + queue.__class__.__name__) + + if isinstance(queue, RedisPriorityQueue): + queue.push("item1", 50) + queue.push("item2", 100) + queue.push("item3", 20) + else: + queue.push("item1") + queue.push("item2") + queue.push("item3") + + print("Pop 1 " + queue.pop()) + print("Pop 2 " + queue.pop()) + print("Pop 3 " + queue.pop()) + + + if __name__ == "__main__": + sys.exit(main()) Save the file as ``example_rq.py`` or use the one located at ``utils/examples/example_rq.py``, and now lets run the different tests. From 28fadc253aa8f4cb3ef9dbe4f3f7430dbdbf2016 Mon Sep 17 00:00:00 2001 From: gas1121 Date: Thu, 20 Jul 2017 20:50:11 +0800 Subject: [PATCH 04/10] update documentation --- docs/topics/utils/redisqueue.rst | 2 +- docs/topics/utils/redisthrottledqueue.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/topics/utils/redisqueue.rst b/docs/topics/utils/redisqueue.rst index 175594dc..493dd656 100644 --- a/docs/topics/utils/redisqueue.rst +++ b/docs/topics/utils/redisqueue.rst @@ -123,7 +123,7 @@ You can use any of the three classes in the following way, you just need to have >>> import redis >>> import ujson >>> from scutils.redis_queue import RedisStack - >>> redis_conn = redis.Redis(host='scdev', port=6379) + >>> redis_conn = redis.Redis(host='scdev', port=6379, decode_responses=True) >>> queue = RedisStack(redis_conn, "stack_key", encoding=ujson)) >>> queue.push('item1') >>> queue.push(['my', 'array', 'here']) diff --git a/docs/topics/utils/redisthrottledqueue.rst b/docs/topics/utils/redisthrottledqueue.rst index 687eac23..187253ad 100644 --- a/docs/topics/utils/redisthrottledqueue.rst +++ b/docs/topics/utils/redisthrottledqueue.rst @@ -56,7 +56,7 @@ If you would like to throttle your Redis queue, you need to pass the queue in as >>> import redis >>> from scutils.redis_queue import RedisPriorityQueue >>> from scutils.redis_throttled_queue import RedisThrottledQueue - >>> redis_conn = redis.Redis(host='scdev', port=6379) + >>> redis_conn = redis.Redis(host='scdev', port=6379, decode_responses=True) >>> queue = RedisPriorityQueue(redis_conn, 'my_key') >>> t = RedisThrottledQueue(redis_conn, queue, 10, 5) >>> t.push('item', 5) @@ -123,7 +123,7 @@ The Redis Throttled Queue really shines when multiple processes are trying to po queue = args['queue'] elastic = args['elastic'] - conn = redis.Redis(host=host, port=port) + conn = redis.Redis(host=host, port=port, decode_responses=True) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod, elastic=elastic) From 64b12f5f57163b91d91bea404a84d1421da2fe04 Mon Sep 17 00:00:00 2001 From: gas1121 Date: Thu, 20 Jul 2017 20:55:53 +0800 Subject: [PATCH 05/10] begin to update tests for ZookeeperWatcher --- utils/tests/online.py | 15 +++++++++++++++ utils/tests/test_zookeeper_watcher.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/utils/tests/online.py b/utils/tests/online.py index ac7c44c4..e844806e 100644 --- a/utils/tests/online.py +++ b/utils/tests/online.py @@ -13,6 +13,7 @@ import redis import argparse +from kazoo.client import KazooClient from scutils.redis_queue import RedisQueue, RedisPriorityQueue, RedisStack @@ -367,6 +368,20 @@ def test_roll_bitmap_counter(self): counter.stop() self.clean_keys(counter.key) + +class TestZookeeperWatcher(TestCase): + def __init__(self, hosts): + self.hosts = hosts + + def setUp(self): + self.zoo_client = KazooClient(hosts=self.hosts) + self.zoo_client.start() + + def tearDown(self): + self.zoo_client.stop() + self.zoo_client.close() + + if __name__ == '__main__': parser = argparse.ArgumentParser(description="Online deployment Test" " Script for Utils") diff --git a/utils/tests/test_zookeeper_watcher.py b/utils/tests/test_zookeeper_watcher.py index 6a291577..fc5b52c8 100644 --- a/utils/tests/test_zookeeper_watcher.py +++ b/utils/tests/test_zookeeper_watcher.py @@ -9,7 +9,7 @@ class TestZookeeperWatcher(TestCase): def setUp(self): zoo_client = MagicMock() - zoo_client.get = MagicMock(return_value=('data', 'blah')) + zoo_client.get = MagicMock(return_value=(b'data', 'blah')) with patch('scutils.zookeeper_watcher.KazooClient') as k: k.return_value = zoo_client From ca11e82e530fc423f1100fe2968eef55d1f4a21d Mon Sep 17 00:00:00 2001 From: gas1121 Date: Fri, 21 Jul 2017 13:45:49 +0800 Subject: [PATCH 06/10] fix unicode issue for ZookeeperWatcher on python 3 and add online tests --- utils/scutils/zookeeper_watcher.py | 2 ++ utils/tests/online.py | 50 +++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/utils/scutils/zookeeper_watcher.py b/utils/scutils/zookeeper_watcher.py index 3d935cd2..04b22ce9 100644 --- a/utils/scutils/zookeeper_watcher.py +++ b/utils/scutils/zookeeper_watcher.py @@ -202,6 +202,7 @@ def update_file(self, path): try: # grab the file result, stat = self.zoo_client.get(path, watch=self.watch_file) + result = result.decode('utf-8') except ZookeeperError: self.set_valid(False) self.call_error(self.INVALID_GET) @@ -243,6 +244,7 @@ def update_pointed(self): try: conf_string, stat2 = self.zoo_client.get(self.point_path, watch=self.watch_pointed) + conf_string = conf_string.decode('utf-8') except ZookeeperError: self.old_data = '' self.set_valid(False) diff --git a/utils/tests/online.py b/utils/tests/online.py index e844806e..ffa06be7 100644 --- a/utils/tests/online.py +++ b/utils/tests/online.py @@ -20,6 +20,7 @@ from scutils.stats_collector import (ThreadedCounter, TimeWindow, RollingTimeWindow, Counter, UniqueCounter, HyperLogLogCounter, BitMapCounter) +from scutils.zookeeper_watcher import ZookeeperWatcher class RedisMixin(object): @@ -370,14 +371,55 @@ def test_roll_bitmap_counter(self): class TestZookeeperWatcher(TestCase): - def __init__(self, hosts): + def __init__(self, name, hosts): self.hosts = hosts + self.file_path = '/test_path' + self.file_data = 'test_data' + self.pointer_path = '/test_pointer' + self.pointer_data = self.file_path + TestCase.__init__(self, name) def setUp(self): self.zoo_client = KazooClient(hosts=self.hosts) self.zoo_client.start() + # prepare data + self.zoo_client.ensure_path(self.file_path) + self.zoo_client.set(self.file_path, self.file_data.encode('utf-8')) + self.zoo_client.ensure_path(self.pointer_path) + self.zoo_client.set(self.pointer_path, + self.pointer_data.encode('utf-8')) + + self.zoo_watcher = ZookeeperWatcher(hosts=self.hosts, + filepath=self.file_path, + pointer=False, + ensure=False, + valid_init=True) + + def test_ping(self): + self.assertTrue(self.zoo_watcher.ping()) + + def test_get_file_contents(self): + pointer_zoo_watcher = ZookeeperWatcher(hosts=self.hosts, + filepath=self.pointer_path, + pointer=True, + ensure=False, + valid_init=True) + + self.assertEquals(self.zoo_watcher.get_file_contents(), self.file_data) + self.assertEquals(pointer_zoo_watcher.get_file_contents(), + self.file_data) + self.assertEquals(pointer_zoo_watcher.get_file_contents(True), + self.pointer_data) + + pointer_zoo_watcher.close() def tearDown(self): + self.zoo_watcher.close() + + self.zoo_client.ensure_path(self.file_path) + self.zoo_client.delete(self.file_path) + self.zoo_client.ensure_path(self.pointer_path) + self.zoo_client.delete(self.pointer_path) self.zoo_client.stop() self.zoo_client.close() @@ -389,6 +431,9 @@ def tearDown(self): default='localhost', help="The Redis host ip") parser.add_argument('-p', '--redis-port', action='store', default='6379', help="The Redis port") + parser.add_argument('-z', '--zoo-keeper', action='store', + default='localhost:2181', + help="The Zookeeper connection :") args = vars(parser.parse_args()) redis_conn = redis.Redis(host=args['redis_host'], port=args['redis_port'], @@ -426,6 +471,9 @@ def tearDown(self): suite.addTest(TestStatsBitMapCounter('test_bitmap_counter', redis_conn)) suite.addTest(TestStatsBitMapCounter('test_roll_bitmap_counter', redis_conn)) + suite.addTest(TestZookeeperWatcher('test_ping', args['zoo_keeper'])) + suite.addTest(TestZookeeperWatcher('test_get_file_contents', + args['zoo_keeper'])) result = unittest.TextTestRunner(verbosity=2).run(suite) From d95e8b3c91c9f84ed95460b95bfea837539ba74f Mon Sep 17 00:00:00 2001 From: gas1121 Date: Fri, 21 Jul 2017 16:08:29 +0800 Subject: [PATCH 07/10] fix unicode issue in ZookeeperMonitor and update related tests --- redis-monitor/plugins/zookeeper_monitor.py | 3 ++- redis-monitor/tests/test_plugins.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/redis-monitor/plugins/zookeeper_monitor.py b/redis-monitor/plugins/zookeeper_monitor.py index b691b0bd..014995ef 100644 --- a/redis-monitor/plugins/zookeeper_monitor.py +++ b/redis-monitor/plugins/zookeeper_monitor.py @@ -56,6 +56,7 @@ def handle(self, key, value): data = None try: data = self.zoo_client.get(self.path)[0] + data = data.decode('utf-8') except ZookeeperError: e = "Unable to load Zookeeper config" self.logger.error(e) @@ -92,7 +93,7 @@ def handle(self, key, value): # write the configuration back to zookeeper the_string = yaml.dump(the_dict, default_flow_style=False) try: - self.zoo_client.set(self.path, the_string) + self.zoo_client.set(self.path, the_string.encode('utf-8')) except ZookeeperError: e = "Unable to store Zookeeper config" self.logger.error(e) diff --git a/redis-monitor/tests/test_plugins.py b/redis-monitor/tests/test_plugins.py index 41143ab3..3242ae40 100644 --- a/redis-monitor/tests/test_plugins.py +++ b/redis-monitor/tests/test_plugins.py @@ -395,36 +395,36 @@ def test_zk_regex(self): def test_zk_handle_du(self): # domain update - s = 'blacklist: []\ndomains:\n dmoz.org: {hits: 60, scale: 1.0, window: 60}\n' + s = b'blacklist: []\ndomains:\n dmoz.org: {hits: 60, scale: 1.0, window: 60}\n' val = '{"uuid":"blah123","hits":15,"scale":0.9,"window":60}' - expected = 'blacklist: []\ndomains:\n cnn.com:\n hits: 15\n scale: 0.9\n window: 60\n dmoz.org:\n hits: 60\n scale: 1.0\n window: 60\n' + expected = b'blacklist: []\ndomains:\n cnn.com:\n hits: 15\n scale: 0.9\n window: 60\n dmoz.org:\n hits: 60\n scale: 1.0\n window: 60\n' self.plugin.zoo_client.get = MagicMock(return_value=(s,)) self.plugin.handle(key="zk:domain-update:cnn.com:testapp", value=val) self.plugin.zoo_client.set.assert_called_once_with("/some/path", expected) def test_zk_handle_dr(self): # domain remove - s = 'blacklist: []\ndomains:\n dmoz.org: {hits: 60, scale: 1.0, window: 60}\n' + s = b'blacklist: []\ndomains:\n dmoz.org: {hits: 60, scale: 1.0, window: 60}\n' val = '{"uuid":"blah123"}' - expected = 'blacklist: []\ndomains: {}\n' + expected = b'blacklist: []\ndomains: {}\n' self.plugin.zoo_client.get = MagicMock(return_value=(s,)) self.plugin.handle(key="zk:domain-remove:dmoz.org:testapp", value=val) self.plugin.zoo_client.set.assert_called_once_with("/some/path", expected) def test_zk_handle_bu(self): # blacklist update - s = 'blacklist: []\ndomains: {}\n' + s = b'blacklist: []\ndomains: {}\n' val = '{"uuid":"blah123"}' - expected = 'blacklist:\n- bingo.com\ndomains: {}\n' + expected = b'blacklist:\n- bingo.com\ndomains: {}\n' self.plugin.zoo_client.get = MagicMock(return_value=(s,)) self.plugin.handle(key="zk:blacklist-update:bingo.com:testapp", value=val) self.plugin.zoo_client.set.assert_called_once_with("/some/path", expected) def test_zk_handle_br(self): # blacklist remove - s = 'blacklist: [bingo.com]\ndomains: {}\n' + s = b'blacklist: [bingo.com]\ndomains: {}\n' val = '{"uuid":"blah123"}' - expected = 'blacklist: []\ndomains: {}\n' + expected = b'blacklist: []\ndomains: {}\n' self.plugin.zoo_client.get = MagicMock(return_value=(s,)) self.plugin.handle(key="zk:blacklist-remove:bingo.com:testapp", value=val) self.plugin.zoo_client.set.assert_called_once_with("/some/path", expected) From 2f3ec46d144800c622c217f85ad24370abfdbaa5 Mon Sep 17 00:00:00 2001 From: gas1121 Date: Fri, 21 Jul 2017 19:59:31 +0800 Subject: [PATCH 08/10] fix yaml dump issue in ZookeeperMonitor --- redis-monitor/plugins/zookeeper_monitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis-monitor/plugins/zookeeper_monitor.py b/redis-monitor/plugins/zookeeper_monitor.py index 014995ef..02308664 100644 --- a/redis-monitor/plugins/zookeeper_monitor.py +++ b/redis-monitor/plugins/zookeeper_monitor.py @@ -91,7 +91,7 @@ def handle(self, key, value): self.logger.warn("Unknown command given to Zookeeper Monitor") # write the configuration back to zookeeper - the_string = yaml.dump(the_dict, default_flow_style=False) + the_string = yaml.safe_dump(the_dict, default_flow_style=False) try: self.zoo_client.set(self.path, the_string.encode('utf-8')) except ZookeeperError: From 25eb5fbc0dffa39f0d66e3129fa3163a4be452ab Mon Sep 17 00:00:00 2001 From: Madison Bahmer Date: Sun, 23 Jul 2017 17:03:44 -0400 Subject: [PATCH 09/10] Add scutils install from source --- travis/ansible.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/travis/ansible.sh b/travis/ansible.sh index 76e49f44..37eddeb4 100755 --- a/travis/ansible.sh +++ b/travis/ansible.sh @@ -19,6 +19,9 @@ sudo docker exec --tty "$(cat ${container_id})" env TERM=xterm /bin/bash -c "ans # Install coveralls and other pip requiremnts sudo docker exec --tty "$(cat ${container_id})" env TERM=xterm /bin/bash -c "virtualenv ${PWD}/sc; source ${PWD}/sc/bin/activate; cd ${PWD}; pip install -r requirements.txt; find . -name "*.pyc" -type f -delete;" +# Install scutils from source +sudo docker exec --tty "$(cat ${container_id})" env TERM=xterm /bin/bash -c "virtualenv ${PWD}/sc; source ${PWD}/sc/bin/activate; cd ${PWD}; pip uninstall scutils -y; cd utils; python setup.py install; cd ../; find . -name "*.pyc" -type f -delete;" + # Run offline tests sudo docker exec --tty "$(cat ${container_id})" env TERM=xterm /bin/bash -c "source ${PWD}/sc/bin/activate; cd ${PWD}; ./run_offline_tests.sh" From e6d273f53830867a8031b8d523a0b145d26aa7b9 Mon Sep 17 00:00:00 2001 From: gas1121 Date: Mon, 24 Jul 2017 13:51:33 +0800 Subject: [PATCH 10/10] add scutils tests into travis's docker test job --- docker/run_docker_tests.sh | 17 ++++++++++++---- docker/utils/Dockerfile | 20 +++++++++++++++++++ docker/utils/Dockerfile.py2alpine | 33 +++++++++++++++++++++++++++++++ docker/utils/Dockerfile.py3 | 20 +++++++++++++++++++ run_online_tests.sh | 12 ++++++----- travis/docker-compose.test.yml | 6 ++++++ travis/docker.sh | 3 +++ 7 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 docker/utils/Dockerfile create mode 100644 docker/utils/Dockerfile.py2alpine create mode 100644 docker/utils/Dockerfile.py3 diff --git a/docker/run_docker_tests.sh b/docker/run_docker_tests.sh index 5deeb9e4..f3880c7c 100755 --- a/docker/run_docker_tests.sh +++ b/docker/run_docker_tests.sh @@ -13,8 +13,17 @@ if [ $? -eq 1 ]; then exit 1 fi -python tests/online.py -v -if [ $? -eq 1 ]; then - echo "integration tests failed" - exit 1 +# if 3 parameters passed in, then it's util's test +if [ $# -eq 3 ]; then + python tests/online.py -r $1 -p $2 -z $3 + if [ $? -eq 1 ]; then + echo "integration tests failed" + exit 1 + fi +else + python tests/online.py -v + if [ $? -eq 1 ]; then + echo "integration tests failed" + exit 1 + fi fi diff --git a/docker/utils/Dockerfile b/docker/utils/Dockerfile new file mode 100644 index 00000000..ac8c1ab8 --- /dev/null +++ b/docker/utils/Dockerfile @@ -0,0 +1,20 @@ +FROM python:2.7 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# move codebase over and install requirements +COPY utils /usr/src/app +RUN pip install . +RUN pip install nose + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["ping", "localhost"] \ No newline at end of file diff --git a/docker/utils/Dockerfile.py2alpine b/docker/utils/Dockerfile.py2alpine new file mode 100644 index 00000000..55290fc8 --- /dev/null +++ b/docker/utils/Dockerfile.py2alpine @@ -0,0 +1,33 @@ +FROM python:2.7.12-alpine +MAINTAINER Madison Bahmer + +# move codebase over +WORKDIR /usr/src/app +COPY utils /usr/src/app + +# Combine run command to create single intermeiate image layer +# This MANDATORY because developments dependencies are huge. +RUN mkdir -p /usr/src/app \ + && cd /usr/src/app \ +# Installing runtime dependencies + && apk --no-cache add \ + curl \ +# Installing buildtime dependencies. They will be removed at end of this +# commands sequence. + && apk --no-cache add --virtual build-dependencies \ + build-base \ +# Updating pip itself before installing packages + && pip install --no-cache-dir pip setuptools \ +# Installing scutils from local codebase + && pip install . \ +# Removing build dependencies leaving image layer clean and neat + && apk del build-dependencies +RUN pip install nose + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["ping", "localhost"] \ No newline at end of file diff --git a/docker/utils/Dockerfile.py3 b/docker/utils/Dockerfile.py3 new file mode 100644 index 00000000..02c6b693 --- /dev/null +++ b/docker/utils/Dockerfile.py3 @@ -0,0 +1,20 @@ +FROM python:3.6 +MAINTAINER Madison Bahmer + +# os setup +RUN apt-get update +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app + +# move codebase over and install requirements +COPY utils /usr/src/app +RUN pip install . +RUN pip install nose + +# copy testing script into container +COPY docker/run_docker_tests.sh /usr/src/app/run_docker_tests.sh + +# set up environment variables + +# run command +CMD ["ping", "localhost"] \ No newline at end of file diff --git a/run_online_tests.sh b/run_online_tests.sh index 36c04c17..b08e103b 100755 --- a/run_online_tests.sh +++ b/run_online_tests.sh @@ -6,20 +6,22 @@ HOST='localhost' PORT=6379 +ZOOKEEPER_HOST='localhost:2181' -if [ $# -ne 2 ] +if [ $# -ne 3 ] then - echo "---- Running utils online test with localhost 6379" + echo "---- Running utils online test with redis on localhost:6379 and zookeeper on localhost:2181" echo "Other usage:" - echo " ./bundle.sh " + echo " ./bundle.sh " else - echo "---- Using custom redis host and port for utils online test" + echo "---- Using custom redis and zookeeper host and port for utils online test" HOST=$1 PORT=$2 + ZOOKEEPER_HOST=$3 fi cd utils -python tests/online.py -r $HOST -p $PORT +python tests/online.py -r $HOST -p $PORT -z $ZOOKEEPER_HOST if [ $? -eq 1 ]; then echo "utils tests failed" exit 1 diff --git a/travis/docker-compose.test.yml b/travis/docker-compose.test.yml index 8ef9d1e8..41af4e5d 100644 --- a/travis/docker-compose.test.yml +++ b/travis/docker-compose.test.yml @@ -2,6 +2,12 @@ version: '2' # this file is used for travis ci testing, and is built in .travis.yml services: + utils: + image: istresearch/scrapy-cluster:utils-test + depends_on: + - kafka + - redis + - zookeeper kafka_monitor: image: istresearch/scrapy-cluster:kafka-monitor-test depends_on: diff --git a/travis/docker.sh b/travis/docker.sh index 05656c17..00a33ea7 100755 --- a/travis/docker.sh +++ b/travis/docker.sh @@ -3,6 +3,7 @@ set -e # Build Dockerfiles for Scrapy Cluster +sudo docker build --rm=true --file docker/utils/$dockerfile_name --tag=istresearch/scrapy-cluster:utils-test . sudo docker build --rm=true --file docker/kafka-monitor/$dockerfile_name --tag=istresearch/scrapy-cluster:kafka-monitor-test . sudo docker build --rm=true --file docker/redis-monitor/$dockerfile_name --tag=istresearch/scrapy-cluster:redis-monitor-test . sudo docker build --rm=true --file docker/crawler/$dockerfile_name --tag=istresearch/scrapy-cluster:crawler-test . @@ -15,6 +16,7 @@ sudo docker-compose -f travis/docker-compose.test.yml up -d sleep 10 # run docker unit and integration tests for each component +sudo docker-compose -f travis/docker-compose.test.yml exec utils ./run_docker_tests.sh redis 6379 zookeeper:2181 sudo docker-compose -f travis/docker-compose.test.yml exec kafka_monitor ./run_docker_tests.sh sudo docker-compose -f travis/docker-compose.test.yml exec redis_monitor ./run_docker_tests.sh sudo docker-compose -f travis/docker-compose.test.yml exec crawler ./run_docker_tests.sh @@ -33,6 +35,7 @@ if [ "$TRAVIS_BRANCH" = "dev" ] && [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$T sudo docker build --rm=true --file docker/rest/$dockerfile_name --tag=istresearch/scrapy-cluster:rest-$docker_tag_suffix . # remove 'test' images + sudo docker rmi istresearch/scrapy-cluster:utils-test sudo docker rmi istresearch/scrapy-cluster:kafka-monitor-test sudo docker rmi istresearch/scrapy-cluster:redis-monitor-test sudo docker rmi istresearch/scrapy-cluster:crawler-test