Skip to content

Commit

Permalink
Use Airflow Variables for storing API keys (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
AetherUnbound committed Feb 16, 2022
1 parent 9555374 commit cb19f83
Show file tree
Hide file tree
Showing 10 changed files with 28 additions and 25 deletions.
19 changes: 10 additions & 9 deletions env.template
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ AIRFLOW_VAR_ENVIRONMENT=dev
########################################################################################
# API Keys
########################################################################################
WALTERS_ART_MUSEUEM_KEY=not_set
BROOKLYN_MUSEUM_API_KEY=not_set
DATA_GOV_API_KEY=not_set
EUROPEANA_API_KEY=not_set
FLICKR_API_KEY=not_set
JAMENDO_APP_KEY=not_set
NYPL_API_KEY=not_set
THINGIVERSE_TOKEN=not_set
FREESOUND_API_KEY=not_set
# See: https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html#storing-variables-in-environment-variables
AIRFLOW_VAR_API_KEY_WALTERS_ART_MUSEUM=not_set
AIRFLOW_VAR_API_KEY_BROOKLYN_MUSEUM=not_set
AIRFLOW_VAR_API_KEY_DATA_GOV=not_set
AIRFLOW_VAR_API_KEY_EUROPEANA=not_set
AIRFLOW_VAR_API_KEY_FLICKR=not_set
AIRFLOW_VAR_API_KEY_JAMENDO=not_set
AIRFLOW_VAR_API_KEY_NYPL=not_set
AIRFLOW_VAR_API_KEY_THINGIVERSE=not_set
AIRFLOW_VAR_API_KEY_FREESOUND=not_set

########################################################################################
# Connection/Variable info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@

import argparse

from airflow.models import Variable
from modules.etlMods import *


MAX_THINGS = 30
LICENSE = "pd0"
TOKEN = os.environ["THINGIVERSE_TOKEN"]
TOKEN = Variable.get("API_KEY_THINGIVERSE", default_var=None)
DELAY = 5.0 # seconds
FILE = "thingiverse_{}.tsv".format(int(time.time()))

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os

import lxml.html as html
from airflow.models import Variable
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -18,7 +18,7 @@
RETRIES = 3
PROVIDER = prov.BROOKLYN_DEFAULT_PROVIDER
ENDPOINT = "https://www.brooklynmuseum.org/api/v2/object/"
API_KEY = os.getenv("BROOKLYN_MUSEUM_API_KEY", "nokeyprovided")
API_KEY = Variable.get("API_KEY_BROOKLYN_MUSEUM", default_var="nokeyprovided")

delay_request = DelayedRequester(delay=DELAY)
image_store = ImageStore(provider=PROVIDER)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import argparse
import logging
import os
from datetime import datetime, timedelta, timezone

from airflow.models import Variable
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -28,7 +28,7 @@
DELAY = 30.0
RESOURCES_PER_REQUEST = "100"
PROVIDER = prov.EUROPEANA_DEFAULT_PROVIDER
API_KEY = os.getenv("EUROPEANA_API_KEY")
API_KEY = Variable.get("API_KEY_EUROPEANA", default_var=None)
ENDPOINT = "https://www.europeana.eu/api/v2/search.json?"
# SUB_PROVIDERS is a collection of providers within europeana which are
# valuable to a broad audience
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@

import argparse
import logging
import os
from datetime import datetime, timedelta, timezone

import lxml.html as html
from airflow.models import Variable
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -32,7 +32,7 @@
MAX_TAG_STRING_LENGTH = 2000
MAX_DESCRIPTION_LENGTH = 2000
PROVIDER = prov.FLICKR_DEFAULT_PROVIDER
API_KEY = os.getenv("FLICKR_API_KEY")
API_KEY = Variable.get("API_KEY_FLICKR", default_var=None)
ENDPOINT = "https://api.flickr.com/services/rest/"
PHOTO_URL_BASE = prov.FLICKR_PHOTO_URL_BASE
DATE_TYPE = "upload"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import copy
import functools
import logging
import os
from datetime import datetime

import requests
from airflow.models import Variable
from common.licenses.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -32,7 +32,7 @@
PROVIDER = prov.FREESOUND_DEFAULT_PROVIDER
# Freesound only has 'sounds'
FREESOUND_CATEGORY = "sound"
API_KEY = os.getenv("FREESOUND_API_KEY", "not_set")
API_KEY = Variable.get("API_KEY_FREESOUND", default_var="not_set")

HEADERS = {
"Accept": "application/json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
channels: 1/2
"""
import logging
import os
from functools import lru_cache
from typing import Optional
from urllib.parse import parse_qs, urlencode, urlsplit

import common
from airflow.models import Variable
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -42,7 +42,7 @@
HOST = "jamendo.com"
ENDPOINT = f"https://api.{HOST}/v3.0/tracks"
PROVIDER = prov.JAMENDO_DEFAULT_PROVIDER
APP_KEY = os.getenv("JAMENDO_APP_KEY", "not_set")
APP_KEY = Variable.get("API_KEY_JAMENDO", "not_set")

HEADERS = {
"Accept": "application/json",
Expand Down
4 changes: 2 additions & 2 deletions openverse_catalog/dags/providers/provider_api_scripts/nypl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from urllib.parse import parse_qs, urlparse

from airflow.models import Variable
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -19,7 +19,7 @@
PROVIDER = prov.NYPL_DEFAULT_PROVIDER
BASE_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/search"
METADATA_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/item_details/"
NYPL_API = os.getenv("NYPL_API_KEY")
NYPL_API = Variable.get("API_KEY_NYPL", default_var=None)
TOKEN = f"Token token={NYPL_API}"

delay_request = DelayedRequester(delay=DELAY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os
from datetime import datetime

from airflow.models import Variable
from common.licenses import LicenseInfo
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand All @@ -24,7 +25,7 @@
format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.INFO
)

API_KEY = os.getenv("DATA_GOV_API_KEY")
API_KEY = Variable.get("API_KEY_DATA_GOV", default_var=None)
DELAY = 5.0
HASH_PREFIX_LENGTH = 2
LIMIT = 1000 # number of rows to pull at once
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"""

import logging
import os

from airflow.models import Variable
from common.loader import provider_details as prov
from common.requester import DelayedRequester
from common.storage.image import ImageStore
Expand All @@ -28,7 +28,7 @@
PROVIDER = prov.WALTERS_DEFAULT_PROVIDER
REQUEST_TYPE = "objects"
ENDPOINT = f"https://api.thewalters.org/v1/{REQUEST_TYPE}"
API_KEY = os.getenv("WALTERS_ART_MUSEUEM_KEY")
API_KEY = Variable.get("API_KEY_WALTERS_ART_MUSEUM", default_var=None)
MUSEUM_SITE = "https://art.thewalters.org"
LICENSE = "CC0 1.0"

Expand Down

0 comments on commit cb19f83

Please sign in to comment.