diff --git a/CITATION.cff b/CITATION.cff
index e626019..e8e6679 100755
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -11,6 +11,7 @@ authors:
abstract: Python wrapper for CaltechDATA API.
repository-code: "https://github.com/caltechlibrary/caltechdata_api"
type: software
+doi: 10.22002/wfjr5-kw507
version: 1.8.2
license-url: "https://data.caltech.edu/license"
keywords:
@@ -18,4 +19,4 @@ keywords:
- metadata
- software
- InvenioRDM
-date-released: 2024-11-06
+date-released: 2024-11-08
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index e0cb0dd..2a46365 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -248,7 +248,10 @@ def caltechdata_write(
# Make draft and publish
result = requests.post(url + "/api/records", headers=headers, json=data)
if result.status_code != 201:
- raise Exception(result.text)
+ if result.status_code == 400 and "Referer checking failed" in result.text:
+ raise Exception("Token is incorrect or missing referer.")
+ else:
+ raise Exception(result.text)
idv = result.json()["id"]
publish_link = result.json()["links"]["publish"]
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index ecb54a3..a0b46ae 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -2,7 +2,7 @@
import requests
import s3fs
from caltechdata_api import caltechdata_write, caltechdata_edit
-from .md_to_json import parse_readme_to_json
+from md_to_json import parse_readme_to_json
import json
import os
from cryptography.fernet import Fernet
@@ -469,6 +469,7 @@ def create_record(production):
"descriptions": [
{"description": args["description"], "descriptionType": "Abstract"}
],
+ "publisher": "CaltechDATA",
"creators": [
{
"affiliation": [
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index 5403244..3199b43 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -393,20 +393,12 @@ def validate_metadata(json_record):
"""
errors = []
- # Check for 'types' and 'resourceTypeGeneral'
- if "types" not in json_record:
- errors.append("'types' field is missing.")
- elif not isinstance(json_record["types"], dict):
- errors.append("'types' field should be a dictionary.")
- elif "resourceTypeGeneral" not in json_record["types"]:
- errors.append("'resourceTypeGeneral' field is missing in 'types'.")
-
- # Check for 'title'
if "titles" not in json_record:
errors.append("'titles' field is missing.")
elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
errors.append("'titles' should be a non-empty list.")
else:
+
# Ensure each title is a dictionary with 'title' field
for title in json_record["titles"]:
if not isinstance(title, dict) or "title" not in title:
@@ -480,6 +472,7 @@ def validate_metadata(json_record):
errors.append("'relatedIdentifiers' should be a list.")
else:
for related_id in json_record["relatedIdentifiers"]:
+
if (
not isinstance(related_id, dict)
or "relatedIdentifier" not in related_id
@@ -493,6 +486,76 @@ def validate_metadata(json_record):
if not isinstance(json_record["rightsList"], list):
errors.append("'rightsList' should be a list.")
else:
+
+ for right in json_record["rightsList"]:
+ if not isinstance(right, dict) or "rights" not in right:
+ errors.append("Each 'rightsList' entry must have 'rights'.")
+ if "rightsURI" in right and not isinstance(right["rightsURI"], str):
+ errors.append("'rightsURI' should be a string.")
+
+ # Check for 'subjects'
+ if "subjects" in json_record:
+ if not isinstance(json_record["subjects"], list):
+ errors.append("'subjects' should be a list.")
+ else:
+ for subject in json_record["subjects"]:
+ if not isinstance(subject, dict) or "subject" not in subject:
+ errors.append("Each 'subject' must have a 'subject' key.")
+
+ # Check for 'dates'
+ if "dates" not in json_record:
+ errors.append("'dates' field is missing.")
+ elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
+ errors.append("'dates' should be a non-empty list.")
+ else:
+ for date in json_record["dates"]:
+ if (
+ not isinstance(date, dict)
+ or "date" not in date
+ or "dateType" not in date
+ ):
+ errors.append("Each 'date' must have 'date' and 'dateType'.")
+
+ # Check for 'identifiers'
+ if "identifiers" not in json_record:
+ errors.append("'identifiers' field is missing.")
+ elif (
+ not isinstance(json_record["identifiers"], list)
+ or len(json_record["identifiers"]) == 0
+ ):
+ errors.append("'identifiers' should be a non-empty list.")
+ else:
+ for identifier in json_record["identifiers"]:
+ if (
+ not isinstance(identifier, dict)
+ or "identifier" not in identifier
+ or "identifierType" not in identifier
+ ):
+ errors.append(
+ "Each 'identifier' must have 'identifier' and 'identifierType'."
+ )
+
+ # Check for 'creators'
+ if "creators" not in json_record:
+ errors.append("'creators' field is missing.")
+ elif (
+ not isinstance(json_record["creators"], list)
+ or len(json_record["creators"]) == 0
+ ):
+ errors.append("'creators' should be a non-empty list.")
+ else:
+ for creator in json_record["creators"]:
+ if not isinstance(creator, dict) or "name" not in creator:
+ errors.append("Each 'creator' must have 'name'.")
+ if "affiliation" in creator:
+ if not isinstance(creator["affiliation"], list):
+ errors.append("'affiliation' in 'creators' should be a list.")
+ for affiliation in creator["affiliation"]:
+ if not isinstance(affiliation, dict) or "name" not in affiliation:
+ errors.append(
+ "Each 'affiliation' in 'creators' must have a 'name'."
+ )
+
for rights in json_record["rightsList"]:
if not isinstance(rights, dict) or "rights" not in rights:
errors.append(
@@ -504,6 +567,60 @@ def validate_metadata(json_record):
if not isinstance(json_record["geoLocations"], list):
errors.append("'geoLocations' should be a list.")
else:
+
+ for geo_loc in json_record["geoLocations"]:
+ if not isinstance(geo_loc, dict) or "geoLocationPlace" not in geo_loc:
+ errors.append("Each 'geoLocation' must have 'geoLocationPlace'.")
+ if "geoLocationPoint" in geo_loc:
+ point = geo_loc["geoLocationPoint"]
+ if (
+ not isinstance(point, dict)
+ or "pointLatitude" not in point
+ or "pointLongitude" not in point
+ ):
+ errors.append(
+ "'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'."
+ )
+
+ # Check for 'formats'
+ if "formats" in json_record and (
+ not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0
+ ):
+ errors.append("'formats' should be a non-empty list.")
+
+ # Check for 'language'
+ if "language" in json_record:
+ if not isinstance(json_record["language"], str):
+ errors.append("'language' should be a string.")
+
+ # Check for 'version'
+ if "version" in json_record and not isinstance(json_record["version"], str):
+ errors.append("'version' should be a string.")
+
+ # Check for 'publisher'
+ if "publisher" not in json_record:
+ errors.append("'publisher' field is missing.")
+ elif not isinstance(json_record["publisher"], str):
+ errors.append("'publisher' should be a string.")
+
+ # Check for 'publicationYear'
+ if "publicationYear" not in json_record:
+ errors.append("'publicationYear' field is missing.")
+ elif not isinstance(json_record["publicationYear"], str):
+ errors.append("'publicationYear' should be a string.")
+
+ # Check for 'types'
+ if "types" not in json_record:
+ errors.append("'types' field is missing.")
+ elif not isinstance(json_record["types"], dict):
+ errors.append("'types' should be a dictionary.")
+ else:
+ if "resourceTypeGeneral" not in json_record["types"]:
+ errors.append("'types' must have 'resourceTypeGeneral'.")
+ if "resourceType" in json_record["types"] and not isinstance(
+ json_record["types"]["resourceType"], str
+ ):
+ errors.append("'resourceType' should be a string if provided.")
for location in json_record["geoLocations"]:
if not isinstance(location, dict):
errors.append("Each entry in 'geoLocations' must be a dictionary.")
diff --git a/tests/bot.py b/tests/bot.py
new file mode 100644
index 0000000..b98360e
--- /dev/null
+++ b/tests/bot.py
@@ -0,0 +1,196 @@
+import subprocess
+import time
+from unittest.mock import patch
+import sys
+import os
+import json
+import requests
+from datetime import datetime
+import pytest
+from customize_schema import validate_metadata as validator43 # Import validator
+
+
+class CaltechDataTester:
+ def __init__(self):
+ self.test_dir = "caltech_test_data"
+ self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ if not os.path.exists(self.test_dir):
+ os.makedirs(self.test_dir)
+
+ # Create test data directory with timestamp
+ self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
+ os.makedirs(self.test_run_dir)
+
+ # Initialize logging
+ self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
+
+ def log(self, message):
+ """Log message to both console and file"""
+ print(message)
+ with open(self.log_file, "a") as f:
+ f.write(f"{datetime.now()}: {message}\n")
+
+ def create_test_files(self):
+ """Create necessary test files"""
+ # Create a dummy CSV file
+ csv_path = os.path.join(self.test_run_dir, "test_data.csv")
+ with open(csv_path, "w") as f:
+ f.write("date,temperature,humidity\n")
+ f.write("2023-01-01,25.5,60\n")
+ f.write("2023-01-02,26.0,62\n")
+ f.write("2023-01-03,24.8,65\n")
+
+ self.log(f"Created test CSV file: {csv_path}")
+ return csv_path
+
+ def generate_test_responses(self):
+ """Generate test responses for CLI prompts"""
+ return {
+ "Do you want to create or edit a CaltechDATA record? (create/edit): ": "create",
+ "Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
+ "Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
+ "Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
+ "Enter the number corresponding to the desired license: ": "1",
+ "Enter your ORCID identifier: ": "0000-0002-1825-0097",
+ "How many funding entries do you want to provide? ": "1",
+ "Enter the award number for funding: ": "NSF-1234567",
+ "Enter the award title for funding: ": "Automated Testing Grant",
+ "Enter the funder ROR (https://ror.org): ": "021nxhr62",
+ "Do you want to upload or link data files? (upload/link/n): ": "upload",
+ "Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
+ "Do you want to add more files? (y/n): ": "n",
+ "Do you want to send this record to CaltechDATA? (y/n): ": "y",
+ }
+
+ def extract_record_id(self, output_text):
+ """Extract record ID from CLI output"""
+ try:
+ for line in output_text.split("\n"):
+ if "uploads/" in line:
+ return line.strip().split("/")[-1]
+ except Exception as e:
+ self.log(f"Error extracting record ID: {e}")
+ return None
+
+ def download_and_validate_record(self, record_id):
+ """Download and validate the record"""
+ try:
+ # Wait for record to be available
+ time.sleep(5)
+
+ # Download metadata
+ url = f"https://data.caltechlibrary.dev/records/{record_id}/export/datacite-json"
+ response = requests.get(url)
+ response.raise_for_status()
+
+ # Save metadata
+ json_path = os.path.join(self.test_run_dir, f"{record_id}.json")
+ with open(json_path, "w") as f:
+ json.dump(response.json(), f, indent=2)
+
+ self.log(f"Downloaded metadata to: {json_path}")
+
+ # Validate metadata using the imported validator
+ validation_errors = validator43(response.json())
+
+ if validation_errors:
+ self.log("ā Validation errors found:")
+ for error in validation_errors:
+ self.log(f" - {error}")
+ return False
+ else:
+ self.log("ā
Validation passed successfully")
+ return True
+
+ except Exception as e:
+ self.log(f"Error in download and validation: {e}")
+ return False
+
+ def run_test_submission(self):
+ """Run the complete test submission process"""
+ try:
+ self.log("Starting test submission process...")
+
+ # Create test files
+ test_csv = self.create_test_files()
+
+ # Generate responses
+ responses = self.generate_test_responses()
+
+ # Setup output capture
+ class OutputCapture:
+ def __init__(self):
+ self.output = []
+
+ def write(self, text):
+ self.output.append(text)
+ sys.__stdout__.write(text)
+
+ def flush(self):
+ pass
+
+ def get_output(self):
+ return "".join(self.output)
+
+ output_capture = OutputCapture()
+ sys.stdout = output_capture
+
+ # Mock input and run CLI
+ def mock_input(prompt):
+ self.log(f"Prompt: {prompt}")
+ if prompt in responses:
+ response = responses[prompt]
+ self.log(f"Response: {response}")
+ return response
+ return ""
+
+ with patch("builtins.input", side_effect=mock_input):
+ try:
+ import cli
+
+ cli.main()
+ except Exception as e:
+ self.log(f"Error during CLI execution: {e}")
+ return False
+
+ # Restore stdout
+ sys.stdout = sys.__stdout__
+
+ # Get output and extract record ID
+ cli_output = output_capture.get_output()
+ record_id = self.extract_record_id(cli_output)
+
+ if not record_id:
+ self.log("Failed to extract record ID")
+ return False
+
+ self.log(f"Successfully created record with ID: {record_id}")
+
+ # Validate the record
+ return self.download_and_validate_record(record_id)
+
+ except Exception as e:
+ self.log(f"Error in test submission: {e}")
+ return False
+ finally:
+ # Cleanup
+ if os.path.exists(test_csv):
+ os.remove(test_csv)
+ self.log("Test files cleaned up")
+
+
+def main():
+ tester = CaltechDataTester()
+
+ success = tester.run_test_submission()
+
+ if success:
+ tester.log("\nš Test submission and validation completed successfully!")
+ else:
+ tester.log("\nā Test submission or validation failed - check logs for details")
+
+ tester.log(f"\nTest logs available at: {tester.log_file}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/data/invalid_datacite43/invalid_metadata_1.json b/tests/data/invalid_datacite43/invalid_metadata_1.json
new file mode 100644
index 0000000..1bba16b
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_1.json
@@ -0,0 +1,12 @@
+{
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_10.json b/tests/data/invalid_datacite43/invalid_metadata_10.json
new file mode 100644
index 0000000..759757d
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_10.json
@@ -0,0 +1,18 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "version": 1,
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_2.json b/tests/data/invalid_datacite43/invalid_metadata_2.json
new file mode 100644
index 0000000..3899136
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_2.json
@@ -0,0 +1,13 @@
+{
+ "titles": [],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_3.json b/tests/data/invalid_datacite43/invalid_metadata_3.json
new file mode 100644
index 0000000..707dbab
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_3.json
@@ -0,0 +1,12 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_4.json b/tests/data/invalid_datacite43/invalid_metadata_4.json
new file mode 100644
index 0000000..f7d2fe4
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_4.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "contributors": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_5.json b/tests/data/invalid_datacite43/invalid_metadata_5.json
new file mode 100644
index 0000000..deeff7f
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_5.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "descriptions": [
+ {
+ "description": "Sample Description"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_6.json b/tests/data/invalid_datacite43/invalid_metadata_6.json
new file mode 100644
index 0000000..8fa14f1
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_6.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderIdentifier": "1234"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_7.json b/tests/data/invalid_datacite43/invalid_metadata_7.json
new file mode 100644
index 0000000..bae4d11
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_7.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "identifiers": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_8.json b/tests/data/invalid_datacite43/invalid_metadata_8.json
new file mode 100644
index 0000000..247f3ff
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_8.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "dates": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_9.json b/tests/data/invalid_datacite43/invalid_metadata_9.json
new file mode 100644
index 0000000..2eddcf1
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_9.json
@@ -0,0 +1,16 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/missing_creators.json b/tests/data/invalid_datacite43/missing_creators.json
new file mode 100644
index 0000000..0d0f1a1
--- /dev/null
+++ b/tests/data/invalid_datacite43/missing_creators.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/missing_publisher.json b/tests/data/invalid_datacite43/missing_publisher.json
new file mode 100644
index 0000000..9035027
--- /dev/null
+++ b/tests/data/invalid_datacite43/missing_publisher.json
@@ -0,0 +1,350 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Strong, K."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Roche, S."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
+ }
+ ],
+ "name": "Franklin, J. E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
+ }
+ ],
+ "name": "Mendonca, J."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Lutsch, E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Weaver, D."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Fogal, P. F."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
+ }
+ ],
+ "name": "Drummond, J. R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "UCAR Center for Science Education, Boulder, CO (US)"
+ }
+ ],
+ "name": "Batchelor, R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
+ }
+ ],
+ "name": "Lindenmaier, R."
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/multiple_errors.json b/tests/data/invalid_datacite43/multiple_errors.json
new file mode 100644
index 0000000..c18931b
--- /dev/null
+++ b/tests/data/invalid_datacite43/multiple_errors.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "31-01-2019",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/type_error_creators.json b/tests/data/invalid_datacite43/type_error_creators.json
new file mode 100644
index 0000000..6200870
--- /dev/null
+++ b/tests/data/invalid_datacite43/type_error_creators.json
@@ -0,0 +1,264 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": "Incorrect type",
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/test_unit.py b/tests/test_unit.py
new file mode 100644
index 0000000..c9b57d2
--- /dev/null
+++ b/tests/test_unit.py
@@ -0,0 +1,152 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+import logging
+from tqdm import tqdm
+
+# Directories for valid and invalid JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/"
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [
+ os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json")
+ ]
+
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
+
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"\nValidating file: {valid_file}") # Log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+
+@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
+def test_invalid_json(invalid_file):
+ """Test that invalid example files do not validate successfully."""
+ print(f"\nValidating file: {invalid_file}") # Log for file being tested
+ json_data = load_json_path(invalid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError:
+ print(f"Validation failed as expected for: {invalid_file}")
+ return # Test passes if validation raises a ValueError
+
+ if validation_errors:
+ print(f"Validation failed as expected for: {invalid_file}")
+ else:
+ pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
+
+
+@pytest.mark.parametrize(
+ "missing_field_file",
+ [
+ {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
+ {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
+ ],
+)
+def test_missing_required_fields(missing_field_file):
+ """Test that JSON files missing required fields fail validation."""
+ print(
+ f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}"
+ )
+ json_data = load_json_path(missing_field_file["file"])
+ with pytest.raises(
+ ValueError,
+ match=f"Missing required metadata field: {missing_field_file['missing_field']}",
+ ):
+ validator43(json_data)
+
+
+@pytest.mark.parametrize(
+ "type_error_file",
+ [
+ {"file": "../tests/data/type_error_creators.json", "field": "creators"},
+ {"file": "../tests/data/type_error_dates.json", "field": "dates"},
+ ],
+)
+def test_incorrect_field_types(type_error_file):
+ """Test that JSON files with incorrect field types fail validation."""
+ print(
+ f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}"
+ )
+ json_data = load_json_path(type_error_file["file"])
+ with pytest.raises(
+ ValueError, match=f"Incorrect type for field: {type_error_file['field']}"
+ ):
+ validator43(json_data)
+
+
+def test_multiple_errors():
+ """Test JSON file with multiple issues to check all errors are raised."""
+ json_data = load_json_path("../tests/data/multiple_errors.json")
+ with pytest.raises(ValueError, match="Multiple validation errors"):
+ validator43(json_data)
+
+
+def test_error_logging(caplog):
+ """Test that errors are logged correctly during validation."""
+ json_data = load_json_path(
+ "../tests/data/invalid_datacite43/some_invalid_file.json"
+ )
+ with caplog.at_level(logging.ERROR):
+ with pytest.raises(ValueError):
+ validator43(json_data)
+ assert "Validation failed" in caplog.text
+
+
+if __name__ == "__main__":
+ # Manual test runner for valid files
+ failed_valid_files = []
+ print("\nRunning validation for valid files...")
+ for file in tqdm(VALID_DATACITE43_FILES, desc="Valid files"):
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_valid_files.append(file)
+ print(f"Error occurred in valid file: {file}\nError details: {e}")
+
+ if not failed_valid_files:
+ print("\nā
All valid files passed validation. Test complete.")
+ else:
+ print("\nā The following valid files failed validation:")
+ for failed_file in failed_valid_files:
+ print(f"- {failed_file}")
+
+ # Manual test runner for invalid files
+ passed_invalid_files = []
+ print("\nRunning validation for invalid files...")
+ for file in tqdm(INVALID_DATACITE43_FILES, desc="Invalid files"):
+ try:
+ test_invalid_json(file)
+ except AssertionError as e:
+ passed_invalid_files.append(file)
+ print(f"Error occurred in invalid file: {file}\nError details: {e}")
+
+ if not passed_invalid_files:
+ print(
+ "\nā
All invalid files failed validation as expected. Test is a success."
+ )
+ else:
+ print("\nā The following invalid files unexpectedly passed validation:")
+ for passed_file in passed_invalid_files:
+ print(f"- {passed_file}")
diff --git a/tests/tester.py b/tests/tester.py
new file mode 100644
index 0000000..13e8250
--- /dev/null
+++ b/tests/tester.py
@@ -0,0 +1,56 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+
+# Define the directory containing the test JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
+
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [
+ os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json")
+ ]
+
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"Validating file: {valid_file}") # Added log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+
+if __name__ == "__main__":
+ # Track failures for manual testing
+ failed_files = []
+
+ # Run the tests and print results for each file
+ for file in VALID_DATACITE43_FILES:
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_files.append(file)
+ print(f"Error occurred in file: {file}\nError details: {e}")
+
+ # Print a summary of all failed files
+ if failed_files:
+ print("\nThe following files failed validation:")
+ for failed_file in failed_files:
+ print(f"- {failed_file}")
+ else:
+ print("\nAll files passed validation.")