Permalink
Browse files
allow crawling without proxy
- Loading branch information...
Showing
with
2 additions
and
10 deletions.
-
+0
−1
tweetf0rm/bootstrap.py
-
+2
−9
tweetf0rm/scheduler.py
|
|
@@ -158,7 +158,6 @@ def start_server(config, proxies): |
|
|
parser = argparse.ArgumentParser()
|
|
|
parser.add_argument('-c', '--config', help="config.json that contains a) twitter api keys; b) redis connection string;", required = True)
|
|
|
parser.add_argument('-p', '--proxies', help="the proxies.json file")
|
|
|
- parser.add_argument('-m', '--mode', help="mode of the cralwer (streaming or normal crawler)", default='crawler')
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
@@ -26,7 +26,7 @@ class Scheduler(object): |
|
|
def __init__(self, node_id, config={}, proxies=[]):
|
|
|
self.node_id = node_id
|
|
|
self.config = config
|
|
|
- if (len(proxies) > 0):
|
|
|
+ if (proxies and len(proxies) > 0):
|
|
|
|
|
|
self.proxy_list = proxy_checker(proxies)
|
|
|
|
|
|
@@ -70,9 +70,6 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None): |
|
|
}
|
|
|
}
|
|
|
|
|
|
- # try:
|
|
|
- #crawler_id = md5('%s:%s'%(self.node_id, idx))
|
|
|
- #apikeys = self.config['apikeys'][apikey_list[idx]]
|
|
|
crawler_id = apikeys['app_key']
|
|
|
logger.debug('creating a new crawler: %s'%crawler_id)
|
|
|
if (not crawler_proxies):
|
|
|
@@ -91,11 +88,7 @@ def new_crawler(self, node_id, apikeys, config, crawler_proxies = None): |
|
|
'crawler_proxies': crawler_proxies
|
|
|
}
|
|
|
crawler.start()
|
|
|
- # except twython.exceptions.TwythonAuthError as exc:
|
|
|
- # logger.error('%s: %s'%(exc, apikeys))
|
|
|
- # except Exception as exc:
|
|
|
- # logger.error(exc)
|
|
|
- # raise
|
|
|
+
|
|
|
|
|
|
|
|
|
def is_alive(self):
|
|
|
|
0 comments on commit
1797c89