From 1797c89377cf5c69a7471cf775151199c5fe09e4 Mon Sep 17 00:00:00 2001 From: Jiang Bian Date: Wed, 17 Dec 2014 10:39:10 -0600 Subject: [PATCH] allow crawling without proxy --- tweetf0rm/bootstrap.py | 1 - tweetf0rm/scheduler.py | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tweetf0rm/bootstrap.py b/tweetf0rm/bootstrap.py index cc8c8d2..050929b 100644 --- a/tweetf0rm/bootstrap.py +++ b/tweetf0rm/bootstrap.py @@ -158,7 +158,6 @@ def start_server(config, proxies): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help="config.json that contains a) twitter api keys; b) redis connection string;", required = True) parser.add_argument('-p', '--proxies', help="the proxies.json file") - parser.add_argument('-m', '--mode', help="mode of the cralwer (streaming or normal crawler)", default='crawler') args = parser.parse_args() diff --git a/tweetf0rm/scheduler.py b/tweetf0rm/scheduler.py index c99f5be..f683710 100644 --- a/tweetf0rm/scheduler.py +++ b/tweetf0rm/scheduler.py @@ -26,7 +26,7 @@ class Scheduler(object): def __init__(self, node_id, config={}, proxies=[]): self.node_id = node_id self.config = config - if (len(proxies) > 0): + if (proxies and len(proxies) > 0): self.proxy_list = proxy_checker(proxies) @@ -70,9 +70,6 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None): } } - # try: - #crawler_id = md5('%s:%s'%(self.node_id, idx)) - #apikeys = self.config['apikeys'][apikey_list[idx]] crawler_id = apikeys['app_key'] logger.debug('creating a new crawler: %s'%crawler_id) if (not crawler_proxies): @@ -91,11 +88,7 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None): 'crawler_proxies': crawler_proxies } crawler.start() - # except twython.exceptions.TwythonAuthError as exc: - # logger.error('%s: %s'%(exc, apikeys)) - # except Exception as exc: - # logger.error(exc) - # raise + def is_alive(self):