Permalink
Browse files

allow crawling without proxy

  • Loading branch information...
1 parent e5c1183 commit 1797c89377cf5c69a7471cf775151199c5fe09e4 @bianjiang committed Dec 17, 2014
Showing with 2 additions and 10 deletions.
  1. +0 −1 tweetf0rm/bootstrap.py
  2. +2 −9 tweetf0rm/scheduler.py
View
@@ -158,7 +158,6 @@ def start_server(config, proxies):
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', help="config.json that contains a) twitter api keys; b) redis connection string;", required = True)
parser.add_argument('-p', '--proxies', help="the proxies.json file")
- parser.add_argument('-m', '--mode', help="mode of the cralwer (streaming or normal crawler)", default='crawler')
args = parser.parse_args()
View
@@ -26,7 +26,7 @@ class Scheduler(object):
def __init__(self, node_id, config={}, proxies=[]):
self.node_id = node_id
self.config = config
- if (len(proxies) > 0):
+ if (proxies and len(proxies) > 0):
self.proxy_list = proxy_checker(proxies)
@@ -70,9 +70,6 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None):
}
}
- # try:
- #crawler_id = md5('%s:%s'%(self.node_id, idx))
- #apikeys = self.config['apikeys'][apikey_list[idx]]
crawler_id = apikeys['app_key']
logger.debug('creating a new crawler: %s'%crawler_id)
if (not crawler_proxies):
@@ -91,11 +88,7 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None):
'crawler_proxies': crawler_proxies
}
crawler.start()
- # except twython.exceptions.TwythonAuthError as exc:
- # logger.error('%s: %s'%(exc, apikeys))
- # except Exception as exc:
- # logger.error(exc)
- # raise
+
def is_alive(self):

0 comments on commit 1797c89

Please sign in to comment.