diff --git a/env.template b/env.template index 81a7307161..97c72d31f7 100644 --- a/env.template +++ b/env.template @@ -23,15 +23,16 @@ AIRFLOW_VAR_ENVIRONMENT=dev ######################################################################################## # API Keys ######################################################################################## -WALTERS_ART_MUSEUEM_KEY=not_set -BROOKLYN_MUSEUM_API_KEY=not_set -DATA_GOV_API_KEY=not_set -EUROPEANA_API_KEY=not_set -FLICKR_API_KEY=not_set -JAMENDO_APP_KEY=not_set -NYPL_API_KEY=not_set -THINGIVERSE_TOKEN=not_set -FREESOUND_API_KEY=not_set +# See: https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html#storing-variables-in-environment-variables +AIRFLOW_VAR_API_KEY_WALTERS_ART_MUSEUM=not_set +AIRFLOW_VAR_API_KEY_BROOKLYN_MUSEUM=not_set +AIRFLOW_VAR_API_KEY_DATA_GOV=not_set +AIRFLOW_VAR_API_KEY_EUROPEANA=not_set +AIRFLOW_VAR_API_KEY_FLICKR=not_set +AIRFLOW_VAR_API_KEY_JAMENDO=not_set +AIRFLOW_VAR_API_KEY_NYPL=not_set +AIRFLOW_VAR_API_KEY_THINGIVERSE=not_set +AIRFLOW_VAR_API_KEY_FREESOUND=not_set ######################################################################################## # Connection/Variable info diff --git a/openverse_catalog/dags/providers/provider_api_scripts/Thingiverse.py b/openverse_catalog/dags/providers/provider_api_scripts/Thingiverse.py index f629f41906..13e583b858 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/Thingiverse.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/Thingiverse.py @@ -13,12 +13,13 @@ import argparse +from airflow.models import Variable from modules.etlMods import * MAX_THINGS = 30 LICENSE = "pd0" -TOKEN = os.environ["THINGIVERSE_TOKEN"] +TOKEN = Variable.get("API_KEY_THINGIVERSE", default_var=None) DELAY = 5.0 # seconds FILE = "thingiverse_{}.tsv".format(int(time.time())) diff --git a/openverse_catalog/dags/providers/provider_api_scripts/brooklyn_museum.py b/openverse_catalog/dags/providers/provider_api_scripts/brooklyn_museum.py index ddb48b7a96..63b9c8e5ca 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/brooklyn_museum.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/brooklyn_museum.py @@ -1,7 +1,7 @@ import logging -import os import lxml.html as html +from airflow.models import Variable from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -18,7 +18,7 @@ RETRIES = 3 PROVIDER = prov.BROOKLYN_DEFAULT_PROVIDER ENDPOINT = "https://www.brooklynmuseum.org/api/v2/object/" -API_KEY = os.getenv("BROOKLYN_MUSEUM_API_KEY", "nokeyprovided") +API_KEY = Variable.get("API_KEY_BROOKLYN_MUSEUM", default_var="nokeyprovided") delay_request = DelayedRequester(delay=DELAY) image_store = ImageStore(provider=PROVIDER) diff --git a/openverse_catalog/dags/providers/provider_api_scripts/europeana.py b/openverse_catalog/dags/providers/provider_api_scripts/europeana.py index 6a137ee0a3..2897ec228b 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/europeana.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/europeana.py @@ -11,9 +11,9 @@ import argparse import logging -import os from datetime import datetime, timedelta, timezone +from airflow.models import Variable from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -28,7 +28,7 @@ DELAY = 30.0 RESOURCES_PER_REQUEST = "100" PROVIDER = prov.EUROPEANA_DEFAULT_PROVIDER -API_KEY = os.getenv("EUROPEANA_API_KEY") +API_KEY = Variable.get("API_KEY_EUROPEANA", default_var=None) ENDPOINT = "https://www.europeana.eu/api/v2/search.json?" # SUB_PROVIDERS is a collection of providers within europeana which are # valuable to a broad audience diff --git a/openverse_catalog/dags/providers/provider_api_scripts/flickr.py b/openverse_catalog/dags/providers/provider_api_scripts/flickr.py index 9ceb553028..ededbbec1c 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/flickr.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/flickr.py @@ -12,10 +12,10 @@ import argparse import logging -import os from datetime import datetime, timedelta, timezone import lxml.html as html +from airflow.models import Variable from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -32,7 +32,7 @@ MAX_TAG_STRING_LENGTH = 2000 MAX_DESCRIPTION_LENGTH = 2000 PROVIDER = prov.FLICKR_DEFAULT_PROVIDER -API_KEY = os.getenv("FLICKR_API_KEY") +API_KEY = Variable.get("API_KEY_FLICKR", default_var=None) ENDPOINT = "https://api.flickr.com/services/rest/" PHOTO_URL_BASE = prov.FLICKR_PHOTO_URL_BASE DATE_TYPE = "upload" diff --git a/openverse_catalog/dags/providers/provider_api_scripts/freesound.py b/openverse_catalog/dags/providers/provider_api_scripts/freesound.py index c90f412348..63f0ebaaa8 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/freesound.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/freesound.py @@ -12,10 +12,10 @@ import copy import functools import logging -import os from datetime import datetime import requests +from airflow.models import Variable from common.licenses.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -32,7 +32,7 @@ PROVIDER = prov.FREESOUND_DEFAULT_PROVIDER # Freesound only has 'sounds' FREESOUND_CATEGORY = "sound" -API_KEY = os.getenv("FREESOUND_API_KEY", "not_set") +API_KEY = Variable.get("API_KEY_FREESOUND", default_var="not_set") HEADERS = { "Accept": "application/json", diff --git a/openverse_catalog/dags/providers/provider_api_scripts/jamendo.py b/openverse_catalog/dags/providers/provider_api_scripts/jamendo.py index 181a6e0fa1..083d01d096 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/jamendo.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/jamendo.py @@ -16,12 +16,12 @@ channels: 1/2 """ import logging -import os from functools import lru_cache from typing import Optional from urllib.parse import parse_qs, urlencode, urlsplit import common +from airflow.models import Variable from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -42,7 +42,7 @@ HOST = "jamendo.com" ENDPOINT = f"https://api.{HOST}/v3.0/tracks" PROVIDER = prov.JAMENDO_DEFAULT_PROVIDER -APP_KEY = os.getenv("JAMENDO_APP_KEY", "not_set") +APP_KEY = Variable.get("API_KEY_JAMENDO", "not_set") HEADERS = { "Accept": "application/json", diff --git a/openverse_catalog/dags/providers/provider_api_scripts/nypl.py b/openverse_catalog/dags/providers/provider_api_scripts/nypl.py index cce2d15e2a..e7beed35c2 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/nypl.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/nypl.py @@ -1,7 +1,7 @@ import logging -import os from urllib.parse import parse_qs, urlparse +from airflow.models import Variable from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -19,7 +19,7 @@ PROVIDER = prov.NYPL_DEFAULT_PROVIDER BASE_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/search" METADATA_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/item_details/" -NYPL_API = os.getenv("NYPL_API_KEY") +NYPL_API = Variable.get("API_KEY_NYPL", default_var=None) TOKEN = f"Token token={NYPL_API}" delay_request = DelayedRequester(delay=DELAY) diff --git a/openverse_catalog/dags/providers/provider_api_scripts/smithsonian.py b/openverse_catalog/dags/providers/provider_api_scripts/smithsonian.py index 304c6d7f7b..498a1680cc 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/smithsonian.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/smithsonian.py @@ -13,6 +13,7 @@ import os from datetime import datetime +from airflow.models import Variable from common.licenses import LicenseInfo from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -24,7 +25,7 @@ format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.INFO ) -API_KEY = os.getenv("DATA_GOV_API_KEY") +API_KEY = Variable.get("API_KEY_DATA_GOV", default_var=None) DELAY = 5.0 HASH_PREFIX_LENGTH = 2 LIMIT = 1000 # number of rows to pull at once diff --git a/openverse_catalog/dags/providers/provider_api_scripts/walters_art_museum.py b/openverse_catalog/dags/providers/provider_api_scripts/walters_art_museum.py index f85bdd72db..b50a3f35fe 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/walters_art_museum.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/walters_art_museum.py @@ -11,8 +11,8 @@ """ import logging -import os +from airflow.models import Variable from common.loader import provider_details as prov from common.requester import DelayedRequester from common.storage.image import ImageStore @@ -28,7 +28,7 @@ PROVIDER = prov.WALTERS_DEFAULT_PROVIDER REQUEST_TYPE = "objects" ENDPOINT = f"https://api.thewalters.org/v1/{REQUEST_TYPE}" -API_KEY = os.getenv("WALTERS_ART_MUSEUEM_KEY") +API_KEY = Variable.get("API_KEY_WALTERS_ART_MUSEUM", default_var=None) MUSEUM_SITE = "https://art.thewalters.org" LICENSE = "CC0 1.0"