Skip to content

Commit

Permalink
Merge branch 'dev' into rest
Browse files Browse the repository at this point in the history
  • Loading branch information
Madison Bahmer committed Oct 16, 2016
2 parents 9642425 + 2712b12 commit 3ef24a2
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
13 changes: 12 additions & 1 deletion crawler/crawling/log_retry_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@
import redis
import socket
import time
import sys
from scrapy.utils.response import response_status_message

from scrapy.xlib.tx import ResponseFailed
from twisted.internet import defer
from twisted.internet.error import TimeoutError, DNSLookupError, \
ConnectionRefusedError, ConnectionDone, ConnectError, \
ConnectionLost, TCPTimedOutError
from redis.exceptions import ConnectionError

from scutils.stats_collector import StatsCollector
from scutils.log_factory import LogFactory


class LogRetryMiddleware(object):

EXCEPTIONS_TO_RETRY = (defer.TimeoutError, TimeoutError, DNSLookupError,
Expand Down Expand Up @@ -48,7 +51,6 @@ def setup(self, settings):
bytes=my_bytes,
backups=my_backups)

#self.logger.setLevel(logging.DEBUG)
self.retry_http_codes = set(int(x) for x in
settings.getlist('RETRY_HTTP_CODES'))

Expand All @@ -60,6 +62,15 @@ def setup(self, settings):
self.redis_conn = redis.Redis(host=self.settings.get('REDIS_HOST'),
port=self.settings.get('REDIS_PORT'),
db=settings.get('REDIS_DB'))

try:
self.redis_conn.info()
self.logger.debug("Connected to Redis in LogRetryMiddleware")
except ConnectionError:
self.logger.error("Failed to connect to Redis in LogRetryMiddleware")
# plugin is essential to functionality
sys.exit(1)

self._setup_stats_status_codes()

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion kafka-monitor/kafkadump.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def main():
group_id=consumer_id,
bootstrap_servers=kafka_host,
consumer_timeout_ms=settings['KAFKA_CONSUMER_TIMEOUT'],
auto_offset_reset=settings['KAFKA_CONSUMER_AUTO_OFFSET_RESET'],
auto_offset_reset=offset,
auto_commit_interval_ms=settings['KAFKA_CONSUMER_COMMIT_INTERVAL_MS'],
enable_auto_commit=settings['KAFKA_CONSUMER_AUTO_COMMIT_ENABLE'],
max_partition_fetch_bytes=settings['KAFKA_CONSUMER_FETCH_MESSAGE_MAX_BYTES'])
Expand Down
10 changes: 9 additions & 1 deletion redis-monitor/plugins/stats_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,14 @@ def _get_plugin_stats(self, name):
if main not in the_dict:
the_dict[main] = {}
the_dict[main][end] = self._get_key_value(key, end == 'lifetime')

elif main == 'self':
if 'nodes' not in the_dict:
# main is self, end is machine, true_tail is uuid
the_dict['nodes'] = {}
true_tail = elements[4]
if end not in the_dict['nodes']:
the_dict['nodes'][end] = []
the_dict['nodes'][end].append(true_tail)
else:
if 'plugins' not in the_dict:
the_dict['plugins'] = {}
Expand Down Expand Up @@ -194,6 +201,7 @@ def get_machine_stats(self):
self.logger.debug("Gathering machine stats")
the_dict = {}
keys = self.redis_conn.keys('stats:crawler:*:*:*:*')

for key in keys:
# break down key
elements = key.split(":")
Expand Down

0 comments on commit 3ef24a2

Please sign in to comment.