Skip to content

Commit

Permalink
Update API requests for Museum Victoria DAG (#414)
Browse files Browse the repository at this point in the history
* Update query parameter to 'hasimages'

* Pull out User Agent string and add to Museum Victoria API requests

* Update param in test
  • Loading branch information
stacimc committed Mar 17, 2022
1 parent 8712f32 commit 3034e31
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
5 changes: 5 additions & 0 deletions openverse_catalog/dags/common/loader/provider_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
might be useful for retrieving sub-providers at the database level and the
API level.
"""
import os
from enum import Enum


Expand Down Expand Up @@ -111,6 +112,10 @@
"smithsonian_libraries": {"SIL"}, # Smithsonian Libraries
}

# User-Agent header for APIs that require it
CONTACT_EMAIL = os.getenv("CONTACT_EMAIL")
UA_STRING = f"Openverse/0.1 (https://wordpress.com/openverse; {CONTACT_EMAIL})"


class ImageCategory(Enum):
PHOTOGRAPH = "photograph"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
delay_request = DelayedRequester(delay=DELAY)
image_store = ImageStore(provider=PROVIDER)

HEADERS = {"Accept": "application/json"}
HEADERS = {"User-Agent": prov.UA_STRING, "Accept": "application/json"}

DEFAULT_QUERY_PARAMS = {
"has_image": "yes",
"hasimages": "yes",
"perpage": LIMIT,
"imagelicence": "cc by",
"page": 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

import argparse
import logging
import os
from copy import deepcopy
from datetime import datetime, timedelta, timezone
from typing import Optional
Expand Down Expand Up @@ -43,9 +42,7 @@
ENDPOINT = f"https://{HOST}/w/api.php"
PROVIDER = prov.WIKIMEDIA_DEFAULT_PROVIDER
AUDIO_PROVIDER = prov.WIKIMEDIA_AUDIO_PROVIDER
CONTACT_EMAIL = os.getenv("CONTACT_EMAIL")
UA_STRING = f"Openverse/0.1 (https://wordpress.com/openverse; {CONTACT_EMAIL})"
DEFAULT_REQUEST_HEADERS = {"User-Agent": UA_STRING}
DEFAULT_REQUEST_HEADERS = {"User-Agent": prov.UA_STRING}
DEFAULT_QUERY_PARAMS = {
"action": "query",
"generator": "allimages",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _get_resource_json(json_name):
def test_get_query_param_default():
actual_param = mv._get_query_params()
expected_param = {
"has_image": "yes",
"hasimages": "yes",
"perpage": 100,
"imagelicence": "cc by",
"page": 0,
Expand All @@ -38,7 +38,7 @@ def test_get_query_param_offset():
actual_param = mv._get_query_params(license_type="public domain", page=10)

expected_param = {
"has_image": "yes",
"hasimages": "yes",
"perpage": 100,
"imagelicence": "public domain",
"page": 10,
Expand All @@ -49,7 +49,7 @@ def test_get_query_param_offset():

def test_get_batch_objects_success():
query_param = {
"has_image": "yes",
"hasimages": "yes",
"perpage": 100,
"imagelicence": "cc+by",
"page": 0,
Expand All @@ -71,7 +71,7 @@ def test_get_batch_objects_success():

def test_get_batch_objects_empty():
query_param = {
"has_image": "yes",
"hasimages": "yes",
"perpage": 1,
"imagelicence": "cc by",
"page": 1000,
Expand All @@ -87,7 +87,7 @@ def test_get_batch_objects_empty():


def test_get_batch_objects_error():
query_param = {"has_image": "yes", "perpage": 1, "imagelicence": "cc by", "page": 0}
query_param = {"hasimages": "yes", "perpage": 1, "imagelicence": "cc by", "page": 0}

r = requests.Response()
r.status_code = 404
Expand Down

0 comments on commit 3034e31

Please sign in to comment.