-
Notifications
You must be signed in to change notification settings - Fork 208
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #44 from damklis/develop
Develop
- Loading branch information
Showing
11 changed files
with
150 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
[run] | ||
omit = | ||
*test*.py | ||
*main.py | ||
*operator.py | ||
|
||
[report] | ||
exclude_lines = | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from custom_operators.proxypool_operator import ProxyPoolOperator | ||
from custom_operators.rss_news_operator import RSSNewsOperator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import json | ||
from concurrent.futures import ThreadPoolExecutor | ||
from retry import RetryOnException as retry | ||
from proxypool import ( | ||
ProxyPoolValidator, | ||
ProxyPoolScraper, | ||
RedisProxyPoolClient | ||
) | ||
|
||
from airflow.models.baseoperator import BaseOperator | ||
from airflow.utils.decorators import apply_defaults | ||
|
||
|
||
class ProxyPoolOperator(BaseOperator): | ||
|
||
@apply_defaults | ||
def __init__( | ||
self, | ||
proxy_webpage, | ||
number_of_proxies, | ||
testing_url, | ||
max_workers, | ||
redis_config, | ||
redis_key, | ||
*args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.proxy_webpage = proxy_webpage | ||
self.testing_url = testing_url | ||
self.number_of_proxies = number_of_proxies | ||
self.max_workers = max_workers | ||
self.redis_config = redis_config | ||
self.redis_key = redis_key | ||
|
||
@retry(5) | ||
def execute(self, context): | ||
proxy_scraper = ProxyPoolScraper(self.proxy_webpage) | ||
proxy_validator = ProxyPoolValidator(self.testing_url) | ||
proxy_stream = proxy_scraper.get_proxy_stream(self.number_of_proxies) | ||
|
||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: | ||
results = executor.map( | ||
proxy_validator.validate_proxy, proxy_stream | ||
) | ||
valid_proxies = filter(lambda x: x.is_valid is True, results) | ||
sorted_valid_proxies = sorted( | ||
valid_proxies, key=lambda x: x.health, reverse=True | ||
) | ||
|
||
with RedisProxyPoolClient(self.redis_key, self.redis_config) as client: | ||
client.override_existing_proxies( | ||
[ | ||
json.dumps(record.proxy) | ||
for record in sorted_valid_proxies[:5] | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from log import log | ||
from retry import RetryOnException as retry | ||
from proxypool import RedisProxyPoolClient | ||
from rss_news import ( | ||
NewsProducer, | ||
NewsExporter, | ||
NewsValidator | ||
) | ||
|
||
from airflow.models.baseoperator import BaseOperator | ||
from airflow.utils.decorators import apply_defaults | ||
|
||
|
||
@log | ||
class RSSNewsOperator(BaseOperator): | ||
|
||
@apply_defaults | ||
def __init__( | ||
self, | ||
validator_config, | ||
rss_feed, | ||
language, | ||
redis_config, | ||
redis_key, | ||
bootstrap_servers, | ||
topic, | ||
*args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.validator_config = validator_config | ||
self.rss_feed = rss_feed | ||
self.language = language | ||
self.redis_config = redis_config | ||
self.redis_key = redis_key | ||
self.bootstrap_servers = bootstrap_servers | ||
self.topic = topic | ||
|
||
@retry(5) | ||
def execute(self, context): | ||
validator = NewsValidator(self.validator_config) | ||
producer = NewsProducer(self.rss_feed, self.language) | ||
redis = RedisProxyPoolClient(self.redis_key, self.redis_config) | ||
|
||
with NewsExporter(self.bootstrap_servers) as exporter: | ||
proxy = redis.get_proxy() | ||
self.logger.info(proxy) | ||
try: | ||
for news in producer.get_news_stream(proxy): | ||
self.logger.info(news) | ||
validator.validate_news(news) | ||
exporter.export_news_to_broker( | ||
self.topic, | ||
news.as_dict() | ||
) | ||
except Exception as err: | ||
redis.lpop_proxy() | ||
self.logger.error(f"Exception: {err}") | ||
raise err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
from proxypool.redis_proxypool_client import RedisProxyPoolClient | ||
from proxypool.main import update_proxypool | ||
from proxypool.proxypool_scraper import ProxyPoolScraper, ProxyRecord | ||
from proxypool.proxypool_validator import ProxyPoolValidator |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
from rss_news.main import export_news_to_broker | ||
from rss_news.rss_news_producer import NewsProducer, NewsFormatter, News | ||
from rss_news.rss_news_exporter import NewsExporter | ||
from rss_news.rss_news_validator import NewsValidator | ||
from rss_news.rss_news_validator import NewsValidator |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
#!/bin/sh | ||
|
||
pip install -r airflow/requirements.txt \ | ||
&& py.test airflow/modules/tests/ --doctest-modules --cov airflow/modules --show-capture=no -v \ | ||
&& flake8 -v \ | ||
&& docker-compose up -d mongo \ | ||
&& sleep 10 \ | ||
&& docker exec -it mongo /usr/local/bin/init.sh \ | ||
&& docker-compose up -d api \ | ||
&& sleep 30 \ | ||
&& docker exec -it api ./manage.py test -k \ | ||
&& docker-compose down | ||
&& py.test airflow/modules/tests/ --doctest-modules --cov airflow/modules --show-capture=no -v \ | ||
&& flake8 -v \ | ||
&& docker-compose up -d mongo \ | ||
&& sleep 10 \ | ||
&& docker exec -it mongo /usr/local/bin/init.sh \ | ||
&& docker-compose up -d api \ | ||
&& sleep 30 \ | ||
&& docker exec -it api ./manage.py test -k \ | ||
&& docker-compose down |