Permalink
Please sign in to comment.
Browse files
rewrite local crawler queue using redis; using lifo queue so that con…
…trol commands can be grab asap;
- Loading branch information...
Showing
with
222 additions
and 152 deletions.
- +0 −1 bootstrap.sh
- +36 −0 scripts/trim_proxies.py
- +13 −26 tweetf0rm/bootstrap.py
- +27 −8 tweetf0rm/client.py
- +1 −1 tweetf0rm/handler/file_handler.py
- +16 −6 tweetf0rm/process/crawler_process.py
- +8 −10 tweetf0rm/process/user_relationship_crawler.py
- +7 −4 tweetf0rm/proxies.py
- +39 −16 tweetf0rm/redis_helper.py
- +75 −80 tweetf0rm/scheduler.py
| @@ -1,4 +1,3 @@ | ||
| #!/bin/bash | ||
| -ulimit -S n 4096 | ||
| PYTHONPATH=$PYTHONPATH:./tweetf0rm python ./tweetf0rm/bootstrap.py "$@" |
| @@ -0,0 +1,36 @@ | ||
| +#!/usr/bin/env python | ||
| +# -*- coding: utf-8 -*- | ||
| + | ||
| +import logging | ||
| + | ||
| +logger = logging.getLogger(__name__) | ||
| +logging.basicConfig(level=logging.INFO, format='%(levelname)s-[%(asctime)s][%(module)s][%(funcName)s][%(lineno)d]: %(message)s') | ||
| +requests_log = logging.getLogger("requests") | ||
| +requests_log.setLevel(logging.WARNING) | ||
| + | ||
| +import argparse, pickle, os, json, sys, time | ||
| +sys.path.append("..") | ||
| + | ||
| + | ||
| +from tweetf0rm.proxies import proxy_checker | ||
| + | ||
| +if __name__=="__main__": | ||
| + | ||
| + parser = argparse.ArgumentParser() | ||
| + parser.add_argument('-p', '--proxies', help="define the location of the output;", default="proxies.json") | ||
| + args = parser.parse_args() | ||
| + | ||
| + with open(os.path.abspath(args.proxies), 'rb') as proxy_f: | ||
| + proxies = json.load(proxy_f)['proxies'] | ||
| + | ||
| + proxies = [proxy['proxy'] for proxy in proxy_checker(proxies)] | ||
| + | ||
| + logger.info('%d live proxies left'%(len(proxies))) | ||
| + | ||
| + with open(os.path.abspath(args.proxies), 'wb') as proxy_f: | ||
| + json.dump({'proxies':proxies}, proxy_f) | ||
| + | ||
| + | ||
| + | ||
| + | ||
| + |
Oops, something went wrong.
0 comments on commit
4439f2f