diff --git a/build_deployment.sh b/build_deployment.sh index 6810945..1ce8c01 100644 --- a/build_deployment.sh +++ b/build_deployment.sh @@ -1,6 +1,6 @@ ORGANIZATION=brighthive IMAGE_NAME=data-trust-logger -VERSION=0.1.0-76e27 +VERSION=1.0.1 AWS_ECR_REPO=396527728813.dkr.ecr.us-east-2.amazonaws.com docker build -t $ORGANIZATION/$IMAGE_NAME:$VERSION -f Dockerfile . diff --git a/data_trust_logger/health_audit/data_resources_collector.py b/data_trust_logger/health_audit/data_resources_collector.py index 2d619da..d363853 100644 --- a/data_trust_logger/health_audit/data_resources_collector.py +++ b/data_trust_logger/health_audit/data_resources_collector.py @@ -1,25 +1,22 @@ -import logging - -from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError from data_trust_logger.config import ConfigurationFactory from data_trust_logger.health_audit.metrics_collector import \ HealthMetricsCollector +from data_trust_logger.utilities.basic_logger import basic_logger config = ConfigurationFactory.from_env() -logger = logging.getLogger(__name__) -def instantiate_data_resources_collector(): +def instantiate_data_resources_collector(data_resources_engine): try: # create_engine() itself does not establish a DB connection. # We call `connect()` to assess the database health early on. - data_resources_engine = create_engine(config.dr_psql_uri) - data_resources_engine.connect() + connection = data_resources_engine.connect() + connection.close() except (ValueError, OperationalError) as error: - logger.error("Data Resources HealthMetricsCollector cannot connect to database.") - logger.error(error) + basic_logger.error("Data Resources HealthMetricsCollector cannot connect to database.") + basic_logger.error(error) data_resources_engine = None table_names = [] else: diff --git a/data_trust_logger/health_audit/health_auditor.py b/data_trust_logger/health_audit/health_auditor.py index 276d569..878f4ca 100644 --- a/data_trust_logger/health_audit/health_auditor.py +++ b/data_trust_logger/health_audit/health_auditor.py @@ -2,23 +2,29 @@ import os from threading import Thread from time import sleep +from sqlalchemy import create_engine +from data_trust_logger.config import ConfigurationFactory from data_trust_logger.health_audit.data_resources_collector import \ instantiate_data_resources_collector from data_trust_logger.health_audit.mci_collector import \ instantiate_mci_collector +config = ConfigurationFactory.from_env() class HealthAuditor(Thread): def __init__(self): Thread.__init__(self) def audit(self): + mci_engine = create_engine(config.mci_psql_uri) + data_resources_engine = create_engine(config.dr_psql_uri) + while True: - mci_collector = instantiate_mci_collector() + mci_collector = instantiate_mci_collector(mci_engine) mci_metrics = mci_collector.collect_metrics() - data_resources_collector = instantiate_data_resources_collector() + data_resources_collector = instantiate_data_resources_collector(data_resources_engine) data_resources_metrics = data_resources_collector.collect_metrics() metrics_blob = { diff --git a/data_trust_logger/health_audit/mci_collector.py b/data_trust_logger/health_audit/mci_collector.py index 74e4771..7a4807d 100644 --- a/data_trust_logger/health_audit/mci_collector.py +++ b/data_trust_logger/health_audit/mci_collector.py @@ -1,25 +1,23 @@ -import logging - from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError from data_trust_logger.config import ConfigurationFactory from data_trust_logger.health_audit.metrics_collector import \ HealthMetricsCollector +from data_trust_logger.utilities.basic_logger import basic_logger -logger = logging.getLogger(__name__) config = ConfigurationFactory.from_env() -def instantiate_mci_collector(): +def instantiate_mci_collector(mci_engine): try: # create_engine() itself does not establish a DB connection. # We call `connect()` to assess the database health early on. - mci_engine = create_engine(config.mci_psql_uri) - mci_engine.connect() + connection = mci_engine.connect() + connection.close() except (ValueError, OperationalError) as error: - logger.error("MCI HealthMetricsCollector cannot connect to database.") - logger.error(error) + basic_logger.error("MCI HealthMetricsCollector cannot connect to database.") + basic_logger.error(error) mci_engine = None mci_tablenames = ['individual', 'source', 'gender', 'address', 'disposition', 'ethnicity_race', 'employment_status', 'education_level'] diff --git a/data_trust_logger/health_audit/metrics_collector.py b/data_trust_logger/health_audit/metrics_collector.py index e09fa85..c9a8237 100644 --- a/data_trust_logger/health_audit/metrics_collector.py +++ b/data_trust_logger/health_audit/metrics_collector.py @@ -2,7 +2,7 @@ import requests -from data_trust_logger.utilities import get_access_token, secure_requests +from data_trust_logger.utilities import get_access_token, read_token, secure_requests class HealthMetricsCollector(object): @@ -25,8 +25,10 @@ def _get_endpoint_status(self, api_ep: str, token: str): def _get_endpoint_record_count(self, engine: object, endpoint: str): try: - result = engine.execute(f"SELECT COUNT(*) from {endpoint}") + connection = engine.connect() + result = connection.execute(f"SELECT COUNT(*) from {endpoint}") count, = result.fetchone() + connection.close() except Exception: count = -1 @@ -34,7 +36,7 @@ def _get_endpoint_record_count(self, engine: object, endpoint: str): def collect_metrics(self): metrics_list = [] - token = get_access_token() + token = read_token() for table in self.tablenames: try: @@ -42,9 +44,15 @@ def collect_metrics(self): except KeyError: endpoint = table - status, last_accessed = self._get_endpoint_status(f"{self.api_url}/{endpoint}", token) count = self._get_endpoint_record_count(self.engine, table) + # Request a new access token if the token has expired, i.e., the endpoint raises a 401. + status, last_accessed = self._get_endpoint_status(f"{self.api_url}/{endpoint}", token) + if status == 401: + get_access_token() + token = read_token() + status, last_accessed = self._get_endpoint_status(f"{self.api_url}/{endpoint}", token) + metrics_list.append({ 'endpoint': endpoint, 'record_count': count, diff --git a/data_trust_logger/utilities/__init__.py b/data_trust_logger/utilities/__init__.py index 9eb239c..daa1ca4 100644 --- a/data_trust_logger/utilities/__init__.py +++ b/data_trust_logger/utilities/__init__.py @@ -1 +1,2 @@ -from data_trust_logger.utilities.secure_requests import get_access_token, secure_get \ No newline at end of file +from data_trust_logger.utilities.secure_requests import get_access_token, read_token, secure_get +from data_trust_logger.utilities.basic_logger import basic_logger \ No newline at end of file diff --git a/data_trust_logger/utilities/basic_logger.py b/data_trust_logger/utilities/basic_logger.py new file mode 100644 index 0000000..91ef7d9 --- /dev/null +++ b/data_trust_logger/utilities/basic_logger.py @@ -0,0 +1,11 @@ +import logging + + +basic_logger = logging.getLogger("basic_logger") + +handler = logging.StreamHandler() +handler.setLevel(logging.DEBUG) +formatter = logging.Formatter(fmt='[%(asctime)s] [%(levelname)s] %(message)s', datefmt="%a, %d %b %Y %H:%M:%S") +handler.setFormatter(formatter) + +basic_logger.addHandler(handler) \ No newline at end of file diff --git a/data_trust_logger/utilities/secure_requests.py b/data_trust_logger/utilities/secure_requests.py index b38e530..5a36fde 100644 --- a/data_trust_logger/utilities/secure_requests.py +++ b/data_trust_logger/utilities/secure_requests.py @@ -9,16 +9,20 @@ import requests from data_trust_logger.config import ConfigurationFactory +from data_trust_logger.utilities.basic_logger import basic_logger config = ConfigurationFactory.from_env() +location_of_token = '/tmp/token.txt' def get_access_token(): - """ Retrieves an OAuth 2.0 access token from the OAuth 2.0 provider. - Note: - At present, we use Auth0 as our OAuth 2.0 provider. - Returns: - str: OAuth 2.0 access token. + """ + Retrieves an OAuth 2.0 access token from the OAuth 2.0 provider, and + write the access token to a `tmp` file. + + It is possible that OAuth cannot return a token, e.g., due to a service outage. + In this case, we set the token value to an empty string, which eventually gets passed to + `_get_endpoint_status` and, ultimately, returns a 401 status to the Logger API health endpoint. """ headers = {'content-type': 'application/json'} data = { @@ -30,13 +34,36 @@ def get_access_token(): try: response = requests.post(config.oauth2_url, headers=headers, data=json.dumps(data)) - except requests.exceptions.ConnectionError: - return None - else: token = response.json()['access_token'] + except Exception as e: + basic_logger.error(e) + token = "invalid token" + + with open(location_of_token, 'w+') as f: + f.write(token) + + +def read_token(): + """ + Reads the access token from a temporary file, and returns the value. + + If the file does not exist, then we call `get_access_token`, and try again. + """ + token = "invalid token" + + for _ in range(2): + try: + with open(location_of_token) as f: + basic_logger.info(f"Token read from {location_of_token}") + token = f.read() + break + except FileNotFoundError as e: + basic_logger.error(e) + get_access_token() + pass return token - + def secure_get(url: str, token: str): """Convenience method for GET requests against API resources. diff --git a/tests/test_health_metrics_collector.py b/tests/test_health_metrics_collector.py index 5ecc2d9..bea8b19 100644 --- a/tests/test_health_metrics_collector.py +++ b/tests/test_health_metrics_collector.py @@ -70,7 +70,7 @@ def test_get_data_resources_count(mocker): for count in record_counts: assert count == seeded_count -def test_get_mci_count_fail(): +def test_get_data_resources_count_fail(): collector = instantiate_data_resources_collector() metrics = collector.collect_metrics() @@ -79,7 +79,7 @@ def test_get_mci_count_fail(): for count in record_counts: assert count == -1 -def test_get_mci_statuses(client): +def test_get_data_resources_statuses(client): with requests_mock.Mocker() as m: matcher = re.compile('/programs') m.get(matcher, json={}, status_code=200) @@ -92,4 +92,4 @@ def test_get_mci_statuses(client): if entry['endpoint'] == 'programs': expect(entry['endpoint_health']).to(equal(200)) else: - expect(entry['endpoint_health']).to(equal(503)) + expect(entry['endpoint_health']).to(equal(503)) \ No newline at end of file diff --git a/tests/test_secure_requests.py b/tests/test_secure_requests.py new file mode 100644 index 0000000..e7ac01a --- /dev/null +++ b/tests/test_secure_requests.py @@ -0,0 +1,19 @@ +import re +from unittest.mock import mock_open + +from expects import be, equal, expect, have_key, have_len + +from data_trust_logger.health_audit.mci_collector import \ + instantiate_mci_collector + + +def test_get_statuses_with_invalid_token(client, mocker): + """ + Test that endpoints return a 401 status code when `/tmp/token.txt` does not contain a valid token. + """ + mocker.patch('data_trust_logger.utilities.secure_requests.open', mock_open(read_data='invalid token')) + collector = instantiate_mci_collector() + metrics = collector.collect_metrics() + + for entry in metrics: + expect(entry['endpoint_health']).to(equal(401))