diff --git a/openverse_catalog/dags/providers/provider_api_scripts/nypl.py b/openverse_catalog/dags/providers/provider_api_scripts/nypl.py index e7beed35c2..b82ade81ba 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/nypl.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/nypl.py @@ -194,30 +194,23 @@ def _get_metadata(mods): metadata = {} type_of_resource = mods.get("typeOfResource") - if type(type_of_resource) == list and ( + if isinstance(type_of_resource, list) and ( type_of_resource[0].get("usage") == "primary" ): metadata["type_of_resource"] = type_of_resource[0].get("$") - if type(mods.get("genre")) == dict: + if isinstance(mods.get("genre"), dict): metadata["genre"] = mods.get("genre").get("$") origin_info = mods.get("originInfo") - try: - metadata["date_issued"] = origin_info.get("dateIssued").get("$") - except AttributeError as e: - logger.warning(f"date_issued not found due to {e}") - - try: - metadata["publisher"] = origin_info.get("publisher").get("$") - except AttributeError as e: - logger.warning(f"publisher not found due to {e}") + if date_issued := origin_info.get("dateIssued", {}).get("$"): + metadata["date_issued"] = date_issued + if publisher := origin_info.get("publisher", {}).get("$"): + metadata["publisher"] = publisher physical_description = mods.get("physicalDescription") - try: - metadata["description"] = physical_description.get("note").get("$") - except AttributeError as e: - logger.warning(f"description not found, due to {e}") + if description := physical_description.get("note", {}).get("$"): + metadata["description"] = description return metadata diff --git a/tests/dags/providers/provider_api_scripts/test_nypl.py b/tests/dags/providers/provider_api_scripts/test_nypl.py index eda4dd05f7..6d73601c7b 100644 --- a/tests/dags/providers/provider_api_scripts/test_nypl.py +++ b/tests/dags/providers/provider_api_scripts/test_nypl.py @@ -141,6 +141,23 @@ def test_get_metadata(): assert actual_metadata == expected_metadata +def test_get_metadata_missing_attrs(): + item_response = _get_resource_json("response_itemdetails_success.json") + mods = item_response.get("nyplAPI").get("response").get("mods") + # Remove data to simulate it being missing + mods["originInfo"].pop("dateIssued") + mods["originInfo"].pop("publisher") + mods["physicalDescription"].pop("note") + # Remove data from expected values too + expected_metadata = _get_resource_json("metadata.json") + for attr in ["date_issued", "publisher", "description"]: + expected_metadata.pop(attr) + + actual_metadata = np._get_metadata(mods) + + assert actual_metadata == expected_metadata + + def test_handle_results_success(): search_response = _get_resource_json("response_search_success.json") result = search_response.get("nyplAPI").get("response").get("result")