diff --git a/CITATION.cff b/CITATION.cff index e626019..e8e6679 100755 --- a/CITATION.cff +++ b/CITATION.cff @@ -11,6 +11,7 @@ authors: abstract: Python wrapper for CaltechDATA API. repository-code: "https://github.com/caltechlibrary/caltechdata_api" type: software +doi: 10.22002/wfjr5-kw507 version: 1.8.2 license-url: "https://data.caltech.edu/license" keywords: @@ -18,4 +19,4 @@ keywords: - metadata - software - InvenioRDM -date-released: 2024-11-06 +date-released: 2024-11-08 diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py index e0cb0dd..2a46365 100644 --- a/caltechdata_api/caltechdata_write.py +++ b/caltechdata_api/caltechdata_write.py @@ -248,7 +248,10 @@ def caltechdata_write( # Make draft and publish result = requests.post(url + "/api/records", headers=headers, json=data) if result.status_code != 201: - raise Exception(result.text) + if result.status_code == 400 and "Referer checking failed" in result.text: + raise Exception("Token is incorrect or missing referer.") + else: + raise Exception(result.text) idv = result.json()["id"] publish_link = result.json()["links"]["publish"] diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py index ecb54a3..a0b46ae 100644 --- a/caltechdata_api/cli.py +++ b/caltechdata_api/cli.py @@ -2,7 +2,7 @@ import requests import s3fs from caltechdata_api import caltechdata_write, caltechdata_edit -from .md_to_json import parse_readme_to_json +from md_to_json import parse_readme_to_json import json import os from cryptography.fernet import Fernet @@ -469,6 +469,7 @@ def create_record(production): "descriptions": [ {"description": args["description"], "descriptionType": "Abstract"} ], + "publisher": "CaltechDATA", "creators": [ { "affiliation": [ diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py index 5403244..3199b43 100644 --- a/caltechdata_api/customize_schema.py +++ b/caltechdata_api/customize_schema.py @@ -393,20 +393,12 @@ def validate_metadata(json_record): """ errors = [] - # Check for 'types' and 'resourceTypeGeneral' - if "types" not in json_record: - errors.append("'types' field is missing.") - elif not isinstance(json_record["types"], dict): - errors.append("'types' field should be a dictionary.") - elif "resourceTypeGeneral" not in json_record["types"]: - errors.append("'resourceTypeGeneral' field is missing in 'types'.") - - # Check for 'title' if "titles" not in json_record: errors.append("'titles' field is missing.") elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0: errors.append("'titles' should be a non-empty list.") else: + # Ensure each title is a dictionary with 'title' field for title in json_record["titles"]: if not isinstance(title, dict) or "title" not in title: @@ -480,6 +472,7 @@ def validate_metadata(json_record): errors.append("'relatedIdentifiers' should be a list.") else: for related_id in json_record["relatedIdentifiers"]: + if ( not isinstance(related_id, dict) or "relatedIdentifier" not in related_id @@ -493,6 +486,76 @@ def validate_metadata(json_record): if not isinstance(json_record["rightsList"], list): errors.append("'rightsList' should be a list.") else: + + for right in json_record["rightsList"]: + if not isinstance(right, dict) or "rights" not in right: + errors.append("Each 'rightsList' entry must have 'rights'.") + if "rightsURI" in right and not isinstance(right["rightsURI"], str): + errors.append("'rightsURI' should be a string.") + + # Check for 'subjects' + if "subjects" in json_record: + if not isinstance(json_record["subjects"], list): + errors.append("'subjects' should be a list.") + else: + for subject in json_record["subjects"]: + if not isinstance(subject, dict) or "subject" not in subject: + errors.append("Each 'subject' must have a 'subject' key.") + + # Check for 'dates' + if "dates" not in json_record: + errors.append("'dates' field is missing.") + elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0: + errors.append("'dates' should be a non-empty list.") + else: + for date in json_record["dates"]: + if ( + not isinstance(date, dict) + or "date" not in date + or "dateType" not in date + ): + errors.append("Each 'date' must have 'date' and 'dateType'.") + + # Check for 'identifiers' + if "identifiers" not in json_record: + errors.append("'identifiers' field is missing.") + elif ( + not isinstance(json_record["identifiers"], list) + or len(json_record["identifiers"]) == 0 + ): + errors.append("'identifiers' should be a non-empty list.") + else: + for identifier in json_record["identifiers"]: + if ( + not isinstance(identifier, dict) + or "identifier" not in identifier + or "identifierType" not in identifier + ): + errors.append( + "Each 'identifier' must have 'identifier' and 'identifierType'." + ) + + # Check for 'creators' + if "creators" not in json_record: + errors.append("'creators' field is missing.") + elif ( + not isinstance(json_record["creators"], list) + or len(json_record["creators"]) == 0 + ): + errors.append("'creators' should be a non-empty list.") + else: + for creator in json_record["creators"]: + if not isinstance(creator, dict) or "name" not in creator: + errors.append("Each 'creator' must have 'name'.") + if "affiliation" in creator: + if not isinstance(creator["affiliation"], list): + errors.append("'affiliation' in 'creators' should be a list.") + for affiliation in creator["affiliation"]: + if not isinstance(affiliation, dict) or "name" not in affiliation: + errors.append( + "Each 'affiliation' in 'creators' must have a 'name'." + ) + for rights in json_record["rightsList"]: if not isinstance(rights, dict) or "rights" not in rights: errors.append( @@ -504,6 +567,60 @@ def validate_metadata(json_record): if not isinstance(json_record["geoLocations"], list): errors.append("'geoLocations' should be a list.") else: + + for geo_loc in json_record["geoLocations"]: + if not isinstance(geo_loc, dict) or "geoLocationPlace" not in geo_loc: + errors.append("Each 'geoLocation' must have 'geoLocationPlace'.") + if "geoLocationPoint" in geo_loc: + point = geo_loc["geoLocationPoint"] + if ( + not isinstance(point, dict) + or "pointLatitude" not in point + or "pointLongitude" not in point + ): + errors.append( + "'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'." + ) + + # Check for 'formats' + if "formats" in json_record and ( + not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0 + ): + errors.append("'formats' should be a non-empty list.") + + # Check for 'language' + if "language" in json_record: + if not isinstance(json_record["language"], str): + errors.append("'language' should be a string.") + + # Check for 'version' + if "version" in json_record and not isinstance(json_record["version"], str): + errors.append("'version' should be a string.") + + # Check for 'publisher' + if "publisher" not in json_record: + errors.append("'publisher' field is missing.") + elif not isinstance(json_record["publisher"], str): + errors.append("'publisher' should be a string.") + + # Check for 'publicationYear' + if "publicationYear" not in json_record: + errors.append("'publicationYear' field is missing.") + elif not isinstance(json_record["publicationYear"], str): + errors.append("'publicationYear' should be a string.") + + # Check for 'types' + if "types" not in json_record: + errors.append("'types' field is missing.") + elif not isinstance(json_record["types"], dict): + errors.append("'types' should be a dictionary.") + else: + if "resourceTypeGeneral" not in json_record["types"]: + errors.append("'types' must have 'resourceTypeGeneral'.") + if "resourceType" in json_record["types"] and not isinstance( + json_record["types"]["resourceType"], str + ): + errors.append("'resourceType' should be a string if provided.") for location in json_record["geoLocations"]: if not isinstance(location, dict): errors.append("Each entry in 'geoLocations' must be a dictionary.") diff --git a/tests/bot.py b/tests/bot.py new file mode 100644 index 0000000..b98360e --- /dev/null +++ b/tests/bot.py @@ -0,0 +1,196 @@ +import subprocess +import time +from unittest.mock import patch +import sys +import os +import json +import requests +from datetime import datetime +import pytest +from customize_schema import validate_metadata as validator43 # Import validator + + +class CaltechDataTester: + def __init__(self): + self.test_dir = "caltech_test_data" + self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if not os.path.exists(self.test_dir): + os.makedirs(self.test_dir) + + # Create test data directory with timestamp + self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}") + os.makedirs(self.test_run_dir) + + # Initialize logging + self.log_file = os.path.join(self.test_run_dir, "test_log.txt") + + def log(self, message): + """Log message to both console and file""" + print(message) + with open(self.log_file, "a") as f: + f.write(f"{datetime.now()}: {message}\n") + + def create_test_files(self): + """Create necessary test files""" + # Create a dummy CSV file + csv_path = os.path.join(self.test_run_dir, "test_data.csv") + with open(csv_path, "w") as f: + f.write("date,temperature,humidity\n") + f.write("2023-01-01,25.5,60\n") + f.write("2023-01-02,26.0,62\n") + f.write("2023-01-03,24.8,65\n") + + self.log(f"Created test CSV file: {csv_path}") + return csv_path + + def generate_test_responses(self): + """Generate test responses for CLI prompts""" + return { + "Do you want to create or edit a CaltechDATA record? (create/edit): ": "create", + "Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create", + "Enter the title of the dataset: ": f"Test Dataset {self.timestamp}", + "Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.", + "Enter the number corresponding to the desired license: ": "1", + "Enter your ORCID identifier: ": "0000-0002-1825-0097", + "How many funding entries do you want to provide? ": "1", + "Enter the award number for funding: ": "NSF-1234567", + "Enter the award title for funding: ": "Automated Testing Grant", + "Enter the funder ROR (https://ror.org): ": "021nxhr62", + "Do you want to upload or link data files? (upload/link/n): ": "upload", + "Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv", + "Do you want to add more files? (y/n): ": "n", + "Do you want to send this record to CaltechDATA? (y/n): ": "y", + } + + def extract_record_id(self, output_text): + """Extract record ID from CLI output""" + try: + for line in output_text.split("\n"): + if "uploads/" in line: + return line.strip().split("/")[-1] + except Exception as e: + self.log(f"Error extracting record ID: {e}") + return None + + def download_and_validate_record(self, record_id): + """Download and validate the record""" + try: + # Wait for record to be available + time.sleep(5) + + # Download metadata + url = f"https://data.caltechlibrary.dev/records/{record_id}/export/datacite-json" + response = requests.get(url) + response.raise_for_status() + + # Save metadata + json_path = os.path.join(self.test_run_dir, f"{record_id}.json") + with open(json_path, "w") as f: + json.dump(response.json(), f, indent=2) + + self.log(f"Downloaded metadata to: {json_path}") + + # Validate metadata using the imported validator + validation_errors = validator43(response.json()) + + if validation_errors: + self.log("āŒ Validation errors found:") + for error in validation_errors: + self.log(f" - {error}") + return False + else: + self.log("āœ… Validation passed successfully") + return True + + except Exception as e: + self.log(f"Error in download and validation: {e}") + return False + + def run_test_submission(self): + """Run the complete test submission process""" + try: + self.log("Starting test submission process...") + + # Create test files + test_csv = self.create_test_files() + + # Generate responses + responses = self.generate_test_responses() + + # Setup output capture + class OutputCapture: + def __init__(self): + self.output = [] + + def write(self, text): + self.output.append(text) + sys.__stdout__.write(text) + + def flush(self): + pass + + def get_output(self): + return "".join(self.output) + + output_capture = OutputCapture() + sys.stdout = output_capture + + # Mock input and run CLI + def mock_input(prompt): + self.log(f"Prompt: {prompt}") + if prompt in responses: + response = responses[prompt] + self.log(f"Response: {response}") + return response + return "" + + with patch("builtins.input", side_effect=mock_input): + try: + import cli + + cli.main() + except Exception as e: + self.log(f"Error during CLI execution: {e}") + return False + + # Restore stdout + sys.stdout = sys.__stdout__ + + # Get output and extract record ID + cli_output = output_capture.get_output() + record_id = self.extract_record_id(cli_output) + + if not record_id: + self.log("Failed to extract record ID") + return False + + self.log(f"Successfully created record with ID: {record_id}") + + # Validate the record + return self.download_and_validate_record(record_id) + + except Exception as e: + self.log(f"Error in test submission: {e}") + return False + finally: + # Cleanup + if os.path.exists(test_csv): + os.remove(test_csv) + self.log("Test files cleaned up") + + +def main(): + tester = CaltechDataTester() + + success = tester.run_test_submission() + + if success: + tester.log("\nšŸŽ‰ Test submission and validation completed successfully!") + else: + tester.log("\nāŒ Test submission or validation failed - check logs for details") + + tester.log(f"\nTest logs available at: {tester.log_file}") + + +if __name__ == "__main__": + main() diff --git a/tests/data/invalid_datacite43/invalid_metadata_1.json b/tests/data/invalid_datacite43/invalid_metadata_1.json new file mode 100644 index 0000000..1bba16b --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_1.json @@ -0,0 +1,12 @@ +{ + "creators": [ + { + "name": "John Doe" + } + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_10.json b/tests/data/invalid_datacite43/invalid_metadata_10.json new file mode 100644 index 0000000..759757d --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_10.json @@ -0,0 +1,18 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "version": 1, + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_2.json b/tests/data/invalid_datacite43/invalid_metadata_2.json new file mode 100644 index 0000000..3899136 --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_2.json @@ -0,0 +1,13 @@ +{ + "titles": [], + "creators": [ + { + "name": "John Doe" + } + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_3.json b/tests/data/invalid_datacite43/invalid_metadata_3.json new file mode 100644 index 0000000..707dbab --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_3.json @@ -0,0 +1,12 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_4.json b/tests/data/invalid_datacite43/invalid_metadata_4.json new file mode 100644 index 0000000..f7d2fe4 --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_4.json @@ -0,0 +1,20 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "contributors": [ + {} + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_5.json b/tests/data/invalid_datacite43/invalid_metadata_5.json new file mode 100644 index 0000000..deeff7f --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_5.json @@ -0,0 +1,22 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "descriptions": [ + { + "description": "Sample Description" + } + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_6.json b/tests/data/invalid_datacite43/invalid_metadata_6.json new file mode 100644 index 0000000..8fa14f1 --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_6.json @@ -0,0 +1,22 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "fundingReferences": [ + { + "funderIdentifier": "1234" + } + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_7.json b/tests/data/invalid_datacite43/invalid_metadata_7.json new file mode 100644 index 0000000..bae4d11 --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_7.json @@ -0,0 +1,20 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "identifiers": [ + {} + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_8.json b/tests/data/invalid_datacite43/invalid_metadata_8.json new file mode 100644 index 0000000..247f3ff --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_8.json @@ -0,0 +1,20 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "dates": [ + {} + ], + "publisher": "Caltech", + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/invalid_metadata_9.json b/tests/data/invalid_datacite43/invalid_metadata_9.json new file mode 100644 index 0000000..2eddcf1 --- /dev/null +++ b/tests/data/invalid_datacite43/invalid_metadata_9.json @@ -0,0 +1,16 @@ +{ + "titles": [ + { + "title": "Sample Title" + } + ], + "creators": [ + { + "name": "John Doe" + } + ], + "publicationYear": "2023", + "types": { + "resourceTypeGeneral": "Dataset" + } +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/missing_creators.json b/tests/data/invalid_datacite43/missing_creators.json new file mode 100644 index 0000000..0d0f1a1 --- /dev/null +++ b/tests/data/invalid_datacite43/missing_creators.json @@ -0,0 +1,263 @@ +{ + "contributors": [ + { + "nameIdentifiers": [ + { + "nameIdentifier": "grid.20861.3d", + "nameIdentifierScheme": "GRID" + } + ], + "name": "California Institute of Techonolgy, Pasadena, CA (US)", + "contributorType": "HostingInstitution" + }, + { + "affiliation": [ + { + "name": "California Institute of Technology, Pasadena, CA (US)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-5383-8462", + "nameIdentifierScheme": "ORCID" + } + ], + "name": "Roehl, C. M.", + "contributorType": "DataCurator" + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-9947-1053", + "nameIdentifierScheme": "ORCID" + }, + { + "nameIdentifier": "D-2563-2012", + "nameIdentifierScheme": "ResearcherID" + } + ], + "name": "Kimberly Strong", + "contributorType": "ContactPerson" + }, + { + "name": "TCCON", + "contributorType": "ResearchGroup" + } + ], + "descriptions": [ + { + "descriptionType": "Abstract", + "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada." + }, + { + "descriptionType": "Other", + "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
" + }, + { + "descriptionType": "Other", + "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
" + } + ], + "fundingReferences": [ + { + "funderName": "Atlantic Innovation Fund" + }, + { + "funderName": "Canada Foundation for Innovation", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.439998.6" + }, + { + "funderName": "Canadian Foundation for Climate and Atmospheric Sciences" + }, + { + "funderName": "Canadian Space Agency", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.236846.d" + }, + { + "funderName": "Environment and Climate Change Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.410334.1" + }, + { + "funderName": "Government of Canada (International Polar Year funding)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451254.3" + }, + { + "funderName": "Natural Sciences and Engineering Research Council of Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.452912.9" + }, + { + "funderName": "Polar Commission (Northern Scientific Training Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.465477.3" + }, + { + "funderName": "Nova Scotia Research Innovation Trust" + }, + { + "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451078.f" + }, + { + "funderName": "Natural Resources Canada (Polar Continental Shelf Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.202033.0" + } + ], + "language": "eng", + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "10.14291/TCCON.GGG2014", + "relationType": "IsPartOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + } + ], + "rightsList": [ + { + "rights": "TCCON Data License", + "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/" + } + ], + "subjects": [ + { + "subject": "atmospheric trace gases" + }, + { + "subject": "CO2" + }, + { + "subject": "CH4" + }, + { + "subject": "CO" + }, + { + "subject": "N2O" + }, + { + "subject": "column-averaged dry-air mole fractions" + }, + { + "subject": "remote sensing" + }, + { + "subject": "FTIR spectroscopy" + }, + { + "subject": "TCCON" + } + ], + "version": "R3", + "titles": [ + { + "title": "TCCON data from Eureka (CA), Release GGG2014.R3" + } + ], + "formats": [ + "application/x-netcdf" + ], + "dates": [ + { + "date": "2019-01-31", + "dateType": "Created" + }, + { + "date": "2020-07-01", + "dateType": "Updated" + }, + { + "date": "2010-07-24/2019-08-15", + "dateType": "Collected" + }, + { + "date": "2019-01-31", + "dateType": "Submitted" + }, + { + "date": "2019-01-31", + "dateType": "Issued" + } + ], + "publicationYear": "2019", + "publisher": "CaltechDATA", + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Dataset" + }, + "identifiers": [ + { + "identifier": "10.14291/tccon.ggg2014.eureka01.R3", + "identifierType": "DOI" + }, + { + "identifier": "1171", + "identifierType": "CaltechDATA_Identifier" + }, + { + "identifier": "GGG2014", + "identifierType": "Software_Version" + }, + { + "identifier": "eu", + "identifierType": "id" + }, + { + "identifier": "eureka01", + "identifierType": "longName" + }, + { + "identifier": "R1", + "identifierType": "Data_Revision" + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Eureka, NU (CA)", + "geoLocationPoint": { + "pointLatitude": "80.05", + "pointLongitude": "-86.42" + } + } + ], + "schemaVersion": "http://datacite.org/schema/kernel-4" +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/missing_publisher.json b/tests/data/invalid_datacite43/missing_publisher.json new file mode 100644 index 0000000..9035027 --- /dev/null +++ b/tests/data/invalid_datacite43/missing_publisher.json @@ -0,0 +1,350 @@ +{ + "contributors": [ + { + "nameIdentifiers": [ + { + "nameIdentifier": "grid.20861.3d", + "nameIdentifierScheme": "GRID" + } + ], + "name": "California Institute of Techonolgy, Pasadena, CA (US)", + "contributorType": "HostingInstitution" + }, + { + "affiliation": [ + { + "name": "California Institute of Technology, Pasadena, CA (US)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-5383-8462", + "nameIdentifierScheme": "ORCID" + } + ], + "name": "Roehl, C. M.", + "contributorType": "DataCurator" + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-9947-1053", + "nameIdentifierScheme": "ORCID" + }, + { + "nameIdentifier": "D-2563-2012", + "nameIdentifierScheme": "ResearcherID" + } + ], + "name": "Kimberly Strong", + "contributorType": "ContactPerson" + }, + { + "name": "TCCON", + "contributorType": "ResearchGroup" + } + ], + "descriptions": [ + { + "descriptionType": "Abstract", + "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada." + }, + { + "descriptionType": "Other", + "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
" + }, + { + "descriptionType": "Other", + "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
" + } + ], + "fundingReferences": [ + { + "funderName": "Atlantic Innovation Fund" + }, + { + "funderName": "Canada Foundation for Innovation", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.439998.6" + }, + { + "funderName": "Canadian Foundation for Climate and Atmospheric Sciences" + }, + { + "funderName": "Canadian Space Agency", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.236846.d" + }, + { + "funderName": "Environment and Climate Change Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.410334.1" + }, + { + "funderName": "Government of Canada (International Polar Year funding)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451254.3" + }, + { + "funderName": "Natural Sciences and Engineering Research Council of Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.452912.9" + }, + { + "funderName": "Polar Commission (Northern Scientific Training Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.465477.3" + }, + { + "funderName": "Nova Scotia Research Innovation Trust" + }, + { + "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451078.f" + }, + { + "funderName": "Natural Resources Canada (Polar Continental Shelf Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.202033.0" + } + ], + "language": "eng", + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "10.14291/TCCON.GGG2014", + "relationType": "IsPartOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + } + ], + "rightsList": [ + { + "rights": "TCCON Data License", + "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/" + } + ], + "subjects": [ + { + "subject": "atmospheric trace gases" + }, + { + "subject": "CO2" + }, + { + "subject": "CH4" + }, + { + "subject": "CO" + }, + { + "subject": "N2O" + }, + { + "subject": "column-averaged dry-air mole fractions" + }, + { + "subject": "remote sensing" + }, + { + "subject": "FTIR spectroscopy" + }, + { + "subject": "TCCON" + } + ], + "version": "R3", + "titles": [ + { + "title": "TCCON data from Eureka (CA), Release GGG2014.R3" + } + ], + "formats": [ + "application/x-netcdf" + ], + "dates": [ + { + "date": "2019-01-31", + "dateType": "Created" + }, + { + "date": "2020-07-01", + "dateType": "Updated" + }, + { + "date": "2010-07-24/2019-08-15", + "dateType": "Collected" + }, + { + "date": "2019-01-31", + "dateType": "Submitted" + }, + { + "date": "2019-01-31", + "dateType": "Issued" + } + ], + "publicationYear": "2019", + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Dataset" + }, + "identifiers": [ + { + "identifier": "10.14291/tccon.ggg2014.eureka01.R3", + "identifierType": "DOI" + }, + { + "identifier": "1171", + "identifierType": "CaltechDATA_Identifier" + }, + { + "identifier": "GGG2014", + "identifierType": "Software_Version" + }, + { + "identifier": "eu", + "identifierType": "id" + }, + { + "identifier": "eureka01", + "identifierType": "longName" + }, + { + "identifier": "R1", + "identifierType": "Data_Revision" + } + ], + "creators": [ + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "name": "Strong, K." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "name": "Roche, S." + }, + { + "affiliation": [ + { + "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)" + } + ], + "name": "Franklin, J. E." + }, + { + "affiliation": [ + { + "name": "Environment and Climate Change Canada, Downsview, ON (CA)" + } + ], + "name": "Mendonca, J." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "name": "Lutsch, E." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "name": "Weaver, D." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "name": "Fogal, P. F." + }, + { + "affiliation": [ + { + "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA" + } + ], + "name": "Drummond, J. R." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + }, + { + "name": "UCAR Center for Science Education, Boulder, CO (US)" + } + ], + "name": "Batchelor, R." + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + }, + { + "name": "Pacific Northwest National Laboratory, Richland, WA (US)" + } + ], + "name": "Lindenmaier, R." + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Eureka, NU (CA)", + "geoLocationPoint": { + "pointLatitude": "80.05", + "pointLongitude": "-86.42" + } + } + ], + "schemaVersion": "http://datacite.org/schema/kernel-4" +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/multiple_errors.json b/tests/data/invalid_datacite43/multiple_errors.json new file mode 100644 index 0000000..c18931b --- /dev/null +++ b/tests/data/invalid_datacite43/multiple_errors.json @@ -0,0 +1,263 @@ +{ + "contributors": [ + { + "nameIdentifiers": [ + { + "nameIdentifier": "grid.20861.3d", + "nameIdentifierScheme": "GRID" + } + ], + "name": "California Institute of Techonolgy, Pasadena, CA (US)", + "contributorType": "HostingInstitution" + }, + { + "affiliation": [ + { + "name": "California Institute of Technology, Pasadena, CA (US)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-5383-8462", + "nameIdentifierScheme": "ORCID" + } + ], + "name": "Roehl, C. M.", + "contributorType": "DataCurator" + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-9947-1053", + "nameIdentifierScheme": "ORCID" + }, + { + "nameIdentifier": "D-2563-2012", + "nameIdentifierScheme": "ResearcherID" + } + ], + "name": "Kimberly Strong", + "contributorType": "ContactPerson" + }, + { + "name": "TCCON", + "contributorType": "ResearchGroup" + } + ], + "descriptions": [ + { + "descriptionType": "Abstract", + "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada." + }, + { + "descriptionType": "Other", + "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
" + }, + { + "descriptionType": "Other", + "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
" + } + ], + "fundingReferences": [ + { + "funderName": "Atlantic Innovation Fund" + }, + { + "funderName": "Canada Foundation for Innovation", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.439998.6" + }, + { + "funderName": "Canadian Foundation for Climate and Atmospheric Sciences" + }, + { + "funderName": "Canadian Space Agency", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.236846.d" + }, + { + "funderName": "Environment and Climate Change Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.410334.1" + }, + { + "funderName": "Government of Canada (International Polar Year funding)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451254.3" + }, + { + "funderName": "Natural Sciences and Engineering Research Council of Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.452912.9" + }, + { + "funderName": "Polar Commission (Northern Scientific Training Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.465477.3" + }, + { + "funderName": "Nova Scotia Research Innovation Trust" + }, + { + "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451078.f" + }, + { + "funderName": "Natural Resources Canada (Polar Continental Shelf Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.202033.0" + } + ], + "language": "eng", + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "10.14291/TCCON.GGG2014", + "relationType": "IsPartOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + } + ], + "rightsList": [ + { + "rights": "TCCON Data License", + "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/" + } + ], + "subjects": [ + { + "subject": "atmospheric trace gases" + }, + { + "subject": "CO2" + }, + { + "subject": "CH4" + }, + { + "subject": "CO" + }, + { + "subject": "N2O" + }, + { + "subject": "column-averaged dry-air mole fractions" + }, + { + "subject": "remote sensing" + }, + { + "subject": "FTIR spectroscopy" + }, + { + "subject": "TCCON" + } + ], + "version": "R3", + "titles": [ + { + "title": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + ], + "formats": [ + "application/x-netcdf" + ], + "dates": [ + { + "date": "31-01-2019", + "dateType": "Created" + }, + { + "date": "2020-07-01", + "dateType": "Updated" + }, + { + "date": "2010-07-24/2019-08-15", + "dateType": "Collected" + }, + { + "date": "2019-01-31", + "dateType": "Submitted" + }, + { + "date": "2019-01-31", + "dateType": "Issued" + } + ], + "publicationYear": "2019", + "publisher": "CaltechDATA", + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Dataset" + }, + "identifiers": [ + { + "identifier": "10.14291/tccon.ggg2014.eureka01.R3", + "identifierType": "DOI" + }, + { + "identifier": "1171", + "identifierType": "CaltechDATA_Identifier" + }, + { + "identifier": "GGG2014", + "identifierType": "Software_Version" + }, + { + "identifier": "eu", + "identifierType": "id" + }, + { + "identifier": "eureka01", + "identifierType": "longName" + }, + { + "identifier": "R1", + "identifierType": "Data_Revision" + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Eureka, NU (CA)", + "geoLocationPoint": { + "pointLatitude": "80.05", + "pointLongitude": "-86.42" + } + } + ], + "schemaVersion": "http://datacite.org/schema/kernel-4" +} \ No newline at end of file diff --git a/tests/data/invalid_datacite43/type_error_creators.json b/tests/data/invalid_datacite43/type_error_creators.json new file mode 100644 index 0000000..6200870 --- /dev/null +++ b/tests/data/invalid_datacite43/type_error_creators.json @@ -0,0 +1,264 @@ +{ + "contributors": [ + { + "nameIdentifiers": [ + { + "nameIdentifier": "grid.20861.3d", + "nameIdentifierScheme": "GRID" + } + ], + "name": "California Institute of Techonolgy, Pasadena, CA (US)", + "contributorType": "HostingInstitution" + }, + { + "affiliation": [ + { + "name": "California Institute of Technology, Pasadena, CA (US)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-5383-8462", + "nameIdentifierScheme": "ORCID" + } + ], + "name": "Roehl, C. M.", + "contributorType": "DataCurator" + }, + { + "affiliation": [ + { + "name": "Department of Physics, University of Toronto, Toronto, ON (CA)" + } + ], + "nameIdentifiers": [ + { + "nameIdentifier": "0000-0001-9947-1053", + "nameIdentifierScheme": "ORCID" + }, + { + "nameIdentifier": "D-2563-2012", + "nameIdentifierScheme": "ResearcherID" + } + ], + "name": "Kimberly Strong", + "contributorType": "ContactPerson" + }, + { + "name": "TCCON", + "contributorType": "ResearchGroup" + } + ], + "descriptions": [ + { + "descriptionType": "Abstract", + "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada." + }, + { + "descriptionType": "Other", + "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
" + }, + { + "descriptionType": "Other", + "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
" + } + ], + "fundingReferences": [ + { + "funderName": "Atlantic Innovation Fund" + }, + { + "funderName": "Canada Foundation for Innovation", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.439998.6" + }, + { + "funderName": "Canadian Foundation for Climate and Atmospheric Sciences" + }, + { + "funderName": "Canadian Space Agency", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.236846.d" + }, + { + "funderName": "Environment and Climate Change Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.410334.1" + }, + { + "funderName": "Government of Canada (International Polar Year funding)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451254.3" + }, + { + "funderName": "Natural Sciences and Engineering Research Council of Canada", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.452912.9" + }, + { + "funderName": "Polar Commission (Northern Scientific Training Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.465477.3" + }, + { + "funderName": "Nova Scotia Research Innovation Trust" + }, + { + "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.451078.f" + }, + { + "funderName": "Natural Resources Canada (Polar Continental Shelf Program)", + "funderIdentifierType": "GRID", + "funderIdentifier": "grid.202033.0" + } + ], + "language": "eng", + "relatedIdentifiers": [ + { + "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", + "relationType": "IsDocumentedBy", + "relatedIdentifierType": "URL" + }, + { + "relatedIdentifier": "10.14291/TCCON.GGG2014", + "relationType": "IsPartOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + }, + { + "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2", + "relationType": "IsNewVersionOf", + "relatedIdentifierType": "DOI" + } + ], + "rightsList": [ + { + "rights": "TCCON Data License", + "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/" + } + ], + "subjects": [ + { + "subject": "atmospheric trace gases" + }, + { + "subject": "CO2" + }, + { + "subject": "CH4" + }, + { + "subject": "CO" + }, + { + "subject": "N2O" + }, + { + "subject": "column-averaged dry-air mole fractions" + }, + { + "subject": "remote sensing" + }, + { + "subject": "FTIR spectroscopy" + }, + { + "subject": "TCCON" + } + ], + "version": "R3", + "titles": [ + { + "title": "TCCON data from Eureka (CA), Release GGG2014.R3" + } + ], + "formats": [ + "application/x-netcdf" + ], + "dates": [ + { + "date": "2019-01-31", + "dateType": "Created" + }, + { + "date": "2020-07-01", + "dateType": "Updated" + }, + { + "date": "2010-07-24/2019-08-15", + "dateType": "Collected" + }, + { + "date": "2019-01-31", + "dateType": "Submitted" + }, + { + "date": "2019-01-31", + "dateType": "Issued" + } + ], + "publicationYear": "2019", + "publisher": "CaltechDATA", + "types": { + "resourceTypeGeneral": "Dataset", + "resourceType": "Dataset" + }, + "identifiers": [ + { + "identifier": "10.14291/tccon.ggg2014.eureka01.R3", + "identifierType": "DOI" + }, + { + "identifier": "1171", + "identifierType": "CaltechDATA_Identifier" + }, + { + "identifier": "GGG2014", + "identifierType": "Software_Version" + }, + { + "identifier": "eu", + "identifierType": "id" + }, + { + "identifier": "eureka01", + "identifierType": "longName" + }, + { + "identifier": "R1", + "identifierType": "Data_Revision" + } + ], + "creators": "Incorrect type", + "geoLocations": [ + { + "geoLocationPlace": "Eureka, NU (CA)", + "geoLocationPoint": { + "pointLatitude": "80.05", + "pointLongitude": "-86.42" + } + } + ], + "schemaVersion": "http://datacite.org/schema/kernel-4" +} \ No newline at end of file diff --git a/tests/test_unit.py b/tests/test_unit.py new file mode 100644 index 0000000..c9b57d2 --- /dev/null +++ b/tests/test_unit.py @@ -0,0 +1,152 @@ +import os +import pytest +from customize_schema import validate_metadata as validator43 +from helpers import load_json_path +import logging +from tqdm import tqdm + +# Directories for valid and invalid JSON files +VALID_DATACITE43_DIR = "../tests/data/datacite43/" +INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/" + + +# Function to get all JSON files in the directory +def get_all_json_files(directory): + return [ + os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json") + ] + + +# Get list of all valid JSON files in the directory +VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR) +INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR) + + +@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES) +def test_valid_json(valid_file): + """Test that valid example files validate successfully.""" + print(f"\nValidating file: {valid_file}") # Log for file being tested + json_data = load_json_path(valid_file) + validation_errors = None + try: + validation_errors = validator43(json_data) + except ValueError as e: + pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}") + + if validation_errors: + pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}") + else: + print(f"Validation passed for: {valid_file}") + + +@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES) +def test_invalid_json(invalid_file): + """Test that invalid example files do not validate successfully.""" + print(f"\nValidating file: {invalid_file}") # Log for file being tested + json_data = load_json_path(invalid_file) + validation_errors = None + try: + validation_errors = validator43(json_data) + except ValueError: + print(f"Validation failed as expected for: {invalid_file}") + return # Test passes if validation raises a ValueError + + if validation_errors: + print(f"Validation failed as expected for: {invalid_file}") + else: + pytest.fail(f"Validation passed unexpectedly for: {invalid_file}") + + +@pytest.mark.parametrize( + "missing_field_file", + [ + {"file": "../tests/data/missing_creators.json", "missing_field": "creators"}, + {"file": "../tests/data/missing_titles.json", "missing_field": "titles"}, + ], +) +def test_missing_required_fields(missing_field_file): + """Test that JSON files missing required fields fail validation.""" + print( + f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}" + ) + json_data = load_json_path(missing_field_file["file"]) + with pytest.raises( + ValueError, + match=f"Missing required metadata field: {missing_field_file['missing_field']}", + ): + validator43(json_data) + + +@pytest.mark.parametrize( + "type_error_file", + [ + {"file": "../tests/data/type_error_creators.json", "field": "creators"}, + {"file": "../tests/data/type_error_dates.json", "field": "dates"}, + ], +) +def test_incorrect_field_types(type_error_file): + """Test that JSON files with incorrect field types fail validation.""" + print( + f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}" + ) + json_data = load_json_path(type_error_file["file"]) + with pytest.raises( + ValueError, match=f"Incorrect type for field: {type_error_file['field']}" + ): + validator43(json_data) + + +def test_multiple_errors(): + """Test JSON file with multiple issues to check all errors are raised.""" + json_data = load_json_path("../tests/data/multiple_errors.json") + with pytest.raises(ValueError, match="Multiple validation errors"): + validator43(json_data) + + +def test_error_logging(caplog): + """Test that errors are logged correctly during validation.""" + json_data = load_json_path( + "../tests/data/invalid_datacite43/some_invalid_file.json" + ) + with caplog.at_level(logging.ERROR): + with pytest.raises(ValueError): + validator43(json_data) + assert "Validation failed" in caplog.text + + +if __name__ == "__main__": + # Manual test runner for valid files + failed_valid_files = [] + print("\nRunning validation for valid files...") + for file in tqdm(VALID_DATACITE43_FILES, desc="Valid files"): + try: + test_valid_json(file) + except AssertionError as e: + failed_valid_files.append(file) + print(f"Error occurred in valid file: {file}\nError details: {e}") + + if not failed_valid_files: + print("\nāœ… All valid files passed validation. Test complete.") + else: + print("\nāŒ The following valid files failed validation:") + for failed_file in failed_valid_files: + print(f"- {failed_file}") + + # Manual test runner for invalid files + passed_invalid_files = [] + print("\nRunning validation for invalid files...") + for file in tqdm(INVALID_DATACITE43_FILES, desc="Invalid files"): + try: + test_invalid_json(file) + except AssertionError as e: + passed_invalid_files.append(file) + print(f"Error occurred in invalid file: {file}\nError details: {e}") + + if not passed_invalid_files: + print( + "\nāœ… All invalid files failed validation as expected. Test is a success." + ) + else: + print("\nāŒ The following invalid files unexpectedly passed validation:") + for passed_file in passed_invalid_files: + print(f"- {passed_file}") diff --git a/tests/tester.py b/tests/tester.py new file mode 100644 index 0000000..13e8250 --- /dev/null +++ b/tests/tester.py @@ -0,0 +1,56 @@ +import os +import pytest +from customize_schema import validate_metadata as validator43 +from helpers import load_json_path + +# Define the directory containing the test JSON files +VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files + + +# Function to get all JSON files in the directory +def get_all_json_files(directory): + return [ + os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json") + ] + + +# Get list of all valid JSON files in the directory +VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR) + + +@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES) +def test_valid_json(valid_file): + """Test that valid example files validate successfully.""" + print(f"Validating file: {valid_file}") # Added log for file being tested + json_data = load_json_path(valid_file) + validation_errors = None + try: + validation_errors = validator43(json_data) + except ValueError as e: + pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}") + + if validation_errors: + pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}") + else: + print(f"Validation passed for: {valid_file}") + + +if __name__ == "__main__": + # Track failures for manual testing + failed_files = [] + + # Run the tests and print results for each file + for file in VALID_DATACITE43_FILES: + try: + test_valid_json(file) + except AssertionError as e: + failed_files.append(file) + print(f"Error occurred in file: {file}\nError details: {e}") + + # Print a summary of all failed files + if failed_files: + print("\nThe following files failed validation:") + for failed_file in failed_files: + print(f"- {failed_file}") + else: + print("\nAll files passed validation.")