From ba1be711bd6b947ff8eed7705ea7721e4c5e58d4 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Tue, 24 Sep 2024 01:23:48 +0545
Subject: [PATCH 01/42] Update cli.py
---
caltechdata_api/cli.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index 4fbdea9..ed4a7a6 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -60,10 +60,13 @@ def decrypt_token(encrypted_token, key):
# Function to get or set token
-def get_or_set_token():
-
+def get_or_set_token(production=True):
key = load_or_generate_key()
- token_file = os.path.join(caltechdata_directory, "token.txt")
+
+ # Use different token files for production and test environments
+ token_filename = "token.txt" if production else "token_test.txt"
+ token_file = os.path.join(caltechdata_directory, token_filename)
+
try:
with open(token_file, "rb") as f:
encrypted_token = f.read()
@@ -71,8 +74,8 @@ def get_or_set_token():
return token
except FileNotFoundError:
while True:
- token = input("Enter your CaltechDATA token: ").strip()
- confirm_token = input("Confirm your CaltechDATA token: ").strip()
+ token = input(f"Enter your {'Production' if production else 'Test'} CaltechDATA token: ").strip()
+ confirm_token = input(f"Confirm your {'Production' if production else 'Test'} CaltechDATA token: ").strip()
if token == confirm_token:
encrypted_token = encrypt_token(token, key)
with open(token_file, "wb") as f:
@@ -403,7 +406,7 @@ def main():
def create_record(production):
- token = get_or_set_token()
+ token = get_or_set_token(production)
print("Using CaltechDATA token:", token)
while True:
choice = get_user_input(
@@ -526,7 +529,7 @@ def print_upload_message(rec_id, production):
def edit_record(production):
record_id = input("Enter the CaltechDATA record ID: ")
- token = get_or_set_token()
+ token = get_or_set_token(production)
file_name = download_file_by_id(record_id, token)
if file_name:
From 1bff778de59dbdd834fb7aa948b8339d09576340 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Tue, 24 Sep 2024 02:03:26 +0545
Subject: [PATCH 02/42] Update caltechdata_write.py
---
caltechdata_api/caltechdata_write.py | 36 +++++++++++-----------------
1 file changed, 14 insertions(+), 22 deletions(-)
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index 68a1da9..d14d80d 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -1,7 +1,7 @@
import copy
import json
-import os, requests
-
+import os
+import requests
import s3fs
from requests import session
from json.decoder import JSONDecodeError
@@ -49,8 +49,6 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
infile = open(name, "rb")
else:
infile = open(f_list[name], "rb")
- # size = infile.seek(0, 2)
- # infile.seek(0, 0) # reset at beginning
result = requests.put(link, headers=f_headers, data=infile)
if result.status_code != 200:
raise Exception(result.text)
@@ -65,10 +63,11 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
raise Exception(result.text)
+
def add_file_links(
metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
):
- # Currently configured for S3 links, assuming all are at same endpoint
+ # Currently configured for S3 links, assuming all are at the same endpoint
link_string = ""
endpoint = "https://" + file_links[0].split("/")[2]
s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
@@ -152,13 +151,8 @@ def caltechdata_write(
s3_link=None,
default_preview=None,
review_message=None,
+ keep_file=False, # New parameter
):
- """
- File links are links to files existing in external systems that will
- be added directly in a CaltechDATA record, instead of uploading the file.
-
- S3 is a s3sf object for directly opening files
- """
# Make a copy so that none of our changes leak out
metadata = copy.deepcopy(metadata)
@@ -167,7 +161,7 @@ def caltechdata_write(
token = os.environ["RDMTOK"]
# If files is a string - change to single value array
- if isinstance(files, str) == True:
+ if isinstance(files, str):
files = [files]
if file_links:
@@ -176,14 +170,13 @@ def caltechdata_write(
)
# Pull out pid information
- if production == True:
+ if production:
repo_prefix = "10.22002"
else:
repo_prefix = "10.33569"
pids = {}
identifiers = []
if "metadata" in metadata:
- # we have rdm schema
if "identifiers" in metadata["metadata"]:
identifiers = metadata["metadata"]["identifiers"]
elif "identifiers" in metadata:
@@ -200,11 +193,10 @@ def caltechdata_write(
"provider": "oai",
}
elif "scheme" in identifier:
- # We have RDM internal metadata
if identifier["scheme"] == "doi":
doi = identifier["identifier"]
prefix = doi.split("/")[0]
- if doi != False:
+ if doi:
if prefix == repo_prefix:
pids["doi"] = {
"identifier": doi,
@@ -220,25 +212,25 @@ def caltechdata_write(
if "pids" not in metadata:
metadata["pids"] = pids
- if authors == False:
+ if not authors:
data = customize_schema.customize_schema(metadata, schema=schema)
- if production == True:
+ if production:
url = "https://data.caltech.edu/"
else:
url = "https://data.caltechlibrary.dev/"
else:
data = metadata
- if production == True:
+ if production:
url = "https://authors.library.caltech.edu/"
else:
url = "https://authors.caltechlibrary.dev/"
headers = {
- "Authorization": "Bearer %s" % token,
+ "Authorization": f"Bearer {token}",
"Content-type": "application/json",
}
f_headers = {
- "Authorization": "Bearer %s" % token,
+ "Authorization": f"Bearer {token}",
"Content-type": "application/octet-stream",
}
@@ -256,7 +248,7 @@ def caltechdata_write(
if files:
file_link = result.json()["links"]["files"]
- write_files_rdm(files, file_link, headers, f_headers, s3)
+ write_files_rdm(files, file_link, headers, f_headers, s3, keep_file)
if community:
review_link = result.json()["links"]["review"]
From a31d86f1c6c67648ff8aca705e1e6f697470777d Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Tue, 24 Sep 2024 22:11:46 +0545
Subject: [PATCH 03/42] Update caltechdata_write.py
---
caltechdata_api/caltechdata_write.py | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index d14d80d..1b25f80 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -151,8 +151,13 @@ def caltechdata_write(
s3_link=None,
default_preview=None,
review_message=None,
- keep_file=False, # New parameter
):
+ """
+ File links are links to files existing in external systems that will
+ be added directly in a CaltechDATA record, instead of uploading the file.
+
+ S3 is a s3sf object for directly opening files
+ """
# Make a copy so that none of our changes leak out
metadata = copy.deepcopy(metadata)
@@ -161,7 +166,7 @@ def caltechdata_write(
token = os.environ["RDMTOK"]
# If files is a string - change to single value array
- if isinstance(files, str):
+ if isinstance(files, str) == True:
files = [files]
if file_links:
@@ -170,13 +175,14 @@ def caltechdata_write(
)
# Pull out pid information
- if production:
+ if production == True:
repo_prefix = "10.22002"
else:
repo_prefix = "10.33569"
pids = {}
identifiers = []
if "metadata" in metadata:
+ # we have rdm schema
if "identifiers" in metadata["metadata"]:
identifiers = metadata["metadata"]["identifiers"]
elif "identifiers" in metadata:
@@ -193,10 +199,11 @@ def caltechdata_write(
"provider": "oai",
}
elif "scheme" in identifier:
+ # We have RDM internal metadata
if identifier["scheme"] == "doi":
doi = identifier["identifier"]
prefix = doi.split("/")[0]
- if doi:
+ if doi != False:
if prefix == repo_prefix:
pids["doi"] = {
"identifier": doi,
@@ -212,25 +219,25 @@ def caltechdata_write(
if "pids" not in metadata:
metadata["pids"] = pids
- if not authors:
+ if authors == False:
data = customize_schema.customize_schema(metadata, schema=schema)
- if production:
+ if production == True:
url = "https://data.caltech.edu/"
else:
url = "https://data.caltechlibrary.dev/"
else:
data = metadata
- if production:
+ if production == True:
url = "https://authors.library.caltech.edu/"
else:
url = "https://authors.caltechlibrary.dev/"
headers = {
- "Authorization": f"Bearer {token}",
+ "Authorization": "Bearer %s" % token,
"Content-type": "application/json",
}
f_headers = {
- "Authorization": f"Bearer {token}",
+ "Authorization": "Bearer %s" % token,
"Content-type": "application/octet-stream",
}
@@ -248,7 +255,7 @@ def caltechdata_write(
if files:
file_link = result.json()["links"]["files"]
- write_files_rdm(files, file_link, headers, f_headers, s3, keep_file)
+ write_files_rdm(files, file_link, headers, f_headers, s3)
if community:
review_link = result.json()["links"]["review"]
From 8b742ccf532c42f0df0dec22f1d91cf786131d58 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 01:22:42 +0545
Subject: [PATCH 04/42] Update cli.py
---
caltechdata_api/cli.py | 115 +++++++++++++++++++++++------------------
1 file changed, 66 insertions(+), 49 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index ed4a7a6..e63db47 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -59,7 +59,7 @@ def decrypt_token(encrypted_token, key):
return f.decrypt(encrypted_token).decode()
-# Function to get or set token
+# Function to get or set token with support for test system
def get_or_set_token(production=True):
key = load_or_generate_key()
@@ -85,6 +85,7 @@ def get_or_set_token(production=True):
print("Tokens do not match. Please try again.")
+
def welcome_message():
print("Welcome to CaltechDATA CLI")
@@ -378,22 +379,22 @@ def upload_data_from_file():
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON format in the file '{filename}'. {str(e)}")
-
def parse_args():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(description="CaltechDATA CLI tool.")
parser.add_argument(
- "-test", action="store_true", help="Use test mode, sets production to False"
+ "-test",
+ action="store_true",
+ help="Use test mode, sets production to False"
)
args = parser.parse_args()
return args
-
def main():
args = parse_args()
-
+
production = not args.test # Set production to False if -test flag is provided
-
+
choice = get_user_input(
"Do you want to create or edit a CaltechDATA record? (create/edit): "
).lower()
@@ -407,6 +408,7 @@ def main():
def create_record(production):
token = get_or_set_token(production)
+ #keep_file = input("Do you want to keep your existing files? (yes/no): ").lower() == "yes"
print("Using CaltechDATA token:", token)
while True:
choice = get_user_input(
@@ -418,11 +420,7 @@ def create_record(production):
if existing_data:
if filepath != "":
response = caltechdata_write(
- existing_data,
- token,
- filepath,
- production=production,
- publish=False,
+ existing_data, token, filepath, production=production, publish=False
)
elif file_link != "":
response = caltechdata_write(
@@ -499,6 +497,7 @@ def create_record(production):
metadata, token, production=production, publish=False
)
rec_id = response
+
print_upload_message(rec_id, production)
with open(response + ".json", "w") as file:
@@ -509,29 +508,20 @@ def create_record(production):
else:
print("Invalid choice. Please enter 'existing' or 'create'.")
-
def print_upload_message(rec_id, production):
- base_url = (
- "https://data.caltech.edu/uploads/"
- if production
- else "https://data.caltechlibrary.dev/uploads/"
- )
+ base_url = "https://data.caltech.edu/uploads/" if production else "https://data.caltechlibrary.dev/uploads/"
print(
- f"""
- You can view and publish this record at
-
+ f"""You can view and publish this record at
{base_url}{rec_id}
-
- If you need to upload large files to S3, you can type `s3cmd put DATA_FILE s3://ini230004-bucket01/{rec_id}/`
- """
+ If you need to upload large files to S3, you can type
+ `s3cmd put DATA_FILE s3://ini230004-bucket01/{rec_id}/`"""
)
-
def edit_record(production):
record_id = input("Enter the CaltechDATA record ID: ")
token = get_or_set_token(production)
file_name = download_file_by_id(record_id, token)
-
+
if file_name:
try:
# Read the edited metadata file
@@ -548,38 +538,51 @@ def edit_record(production):
print(f"An error occurred during metadata editing: {e}")
else:
print("No metadata file found.")
-
choice = get_user_input("Do you want to add files? (y/n): ").lower()
if choice == "y":
if production:
API_URL_TEMPLATE = "https://data.caltech.edu/api/records/{record_id}/files"
- API_URL_TEMPLATE_DRAFT = (
- "https://data.caltech.edu/api/records/{record_id}/draft/files"
- )
+ API_URL_TEMPLATE_DRAFT = "https://data.caltech.edu/api/records/{record_id}/draft/files"
else:
- API_URL_TEMPLATE = (
- "https://data.caltechlibrary.dev/api/records/{record_id}/files"
- )
- API_URL_TEMPLATE_DRAFT = (
- "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
- )
-
+ API_URL_TEMPLATE = "https://data.caltechlibrary.dev/api/records/{record_id}/files"
+ API_URL_TEMPLATE_DRAFT = "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
+
url = API_URL_TEMPLATE.format(record_id=record_id)
url_draft = API_URL_TEMPLATE_DRAFT.format(record_id=record_id)
+
+ headers = {
+ "accept": "application/json",
+ }
- response = requests.get(url)
- response_draft = requests.get(url_draft)
+ if token:
+ headers["Authorization"] = "Bearer %s" % token
- filepath, file_link = upload_supporting_file(record_id)
- print(file_link)
+ response = requests.get(url, headers=headers)
+ response_draft = requests.get(url_draft, headers=headers)
+
+ #print(production, response, response_draft)
+ #print(response.status_code, response_draft.status_code)
- if response.status_code == 404 and response_draft.status_code == 404:
+ data = response.json()
+ data_draft = response_draft.json()
+
+ #print(data_draft)
+ # Check if 'entries' exists and its length
+ if len(data.get('entries', [])) == 0 and len(data_draft.get('entries', [])) == 0:
keepfile = False
else:
- keepfile = (
- input("Do you want to keep existing files? (y/n): ").lower() == "y"
- )
-
+ keepfile = input("Do you want to keep existing files? (y/n): ").lower() == "y"
+
+ # if response.status_code == 404 and response_draft.status_code == 404:
+ # keepfile = False
+ # else:
+
+ # keepfile = input("Do you want to keep existing files? (y/n): ").lower() == "y"
+
+ filepath, file_link = upload_supporting_file(record_id)
+ if file_link:
+ print(file_link)
+
if filepath != "":
response = caltechdata_edit(
record_id,
@@ -599,9 +602,12 @@ def edit_record(production):
publish=False,
keepfile=keepfile,
)
-
+
rec_id = response
print_upload_message(rec_id, production)
+
+
+
def download_file_by_id(record_id, token=None):
@@ -616,15 +622,26 @@ def download_file_by_id(record_id, token=None):
try:
response = requests.get(url, headers=headers)
-
if response.status_code != 200:
# Might have a draft
response = requests.get(
url + "/draft",
headers=headers,
)
- if response.status_code != 200:
- raise Exception(f"Record {record_id} does not exist, cannot edit")
+ if response.status_code != 200:
+ url = f"https://data.caltechlibrary.dev/api/records/{record_id}"
+ response = requests.get(
+ url,
+ headers=headers,
+ )
+ if response.status_code != 200:
+ # Might have a draft
+ response = requests.get(
+ url + "/draft",
+ headers=headers,
+ )
+ if response.status_code != 200:
+ raise Exception(f"Record {record_id} does not exist, cannot edit")
file_content = response.content
file_name = f"downloaded_data_{record_id}.json"
with open(file_name, "wb") as file:
From bf5ca3c98b93247e497637c0cd1b665995d7262d Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 02:14:25 +0545
Subject: [PATCH 05/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 126 +++++++++++++++++++++++++++-
1 file changed, 125 insertions(+), 1 deletion(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index c379e58..1e7e18c 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -134,8 +134,9 @@ def rdm_creators_contributors(person_list, peopleroles):
def customize_schema_rdm(json_record):
# Get vocabularies used in InvenioRDM
+
vocabularies = get_vocabularies()
-
+ validate_metadata(json_record)
peopleroles = vocabularies["crr"]
resourcetypes = vocabularies["rsrct"]
descriptiontypes = vocabularies["dty"]
@@ -385,6 +386,129 @@ def customize_schema_rdm(json_record):
return final
+def validate_metadata(json_record):
+ """
+ Validates the presence and structure of required fields in a CaltechDATA JSON record.
+ Raises an exception if any required field is missing or structured incorrectly.
+ """
+ errors = []
+
+ # Check for 'types' and 'resourceTypeGeneral'
+ if 'types' not in json_record:
+ errors.append("'types' field is missing.")
+ elif not isinstance(json_record['types'], dict):
+ errors.append("'types' field should be a dictionary.")
+ elif 'resourceTypeGeneral' not in json_record['types']:
+ errors.append("'resourceTypeGeneral' field is missing in 'types'.")
+
+ # Check for 'title'
+ if 'titles' not in json_record:
+ errors.append("'titles' field is missing.")
+ elif not isinstance(json_record['titles'], list) or len(json_record['titles']) == 0:
+ errors.append("'titles' should be a non-empty list.")
+ else:
+ # Ensure each title is a dictionary with 'title' field
+ for title in json_record['titles']:
+ if not isinstance(title, dict) or 'title' not in title:
+ errors.append("Each entry in 'titles' must be a dictionary with a 'title' key.")
+
+ # Check for 'publication_date'
+ if 'publicationYear' not in json_record and 'dates' not in json_record:
+ errors.append("A publication date is required ('publicationYear' or 'dates' field is missing).")
+ if 'dates' in json_record:
+ if not isinstance(json_record['dates'], list):
+ errors.append("'dates' should be a list.")
+ else:
+ for date_entry in json_record['dates']:
+ if not isinstance(date_entry, dict) or 'dateType' not in date_entry or 'date' not in date_entry:
+ errors.append("Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys.")
+
+ # Check for 'creators'
+ if 'creators' not in json_record:
+ errors.append("'creators' field is missing.")
+ elif not isinstance(json_record['creators'], list) or len(json_record['creators']) == 0:
+ errors.append("'creators' should be a non-empty list.")
+ else:
+ for creator in json_record['creators']:
+ if not isinstance(creator, dict) or 'name' not in creator:
+ errors.append("Each creator in 'creators' must be a dictionary with a 'name' key.")
+
+ # Check for 'contributors'
+ if 'contributors' in json_record:
+ if not isinstance(json_record['contributors'], list):
+ errors.append("'contributors' should be a list.")
+ else:
+ for contributor in json_record['contributors']:
+ if not isinstance(contributor, dict) or 'name' not in contributor:
+ errors.append("Each contributor must be a dictionary with a 'name' key.")
+
+ # Check for 'resourceType'
+ if 'resourceType' not in json_record['types']:
+ errors.append("'resourceType' field is missing in 'types'.")
+ elif not isinstance(json_record['types']['resourceType'], str):
+ errors.append("'resourceType' should be a string.")
+
+ # Check for 'identifiers'
+ if 'identifiers' in json_record:
+ if not isinstance(json_record['identifiers'], list):
+ errors.append("'identifiers' should be a list.")
+ else:
+ for identifier in json_record['identifiers']:
+ if not isinstance(identifier, dict) or 'identifier' not in identifier or 'identifierType' not in identifier:
+ errors.append("Each identifier must be a dictionary with 'identifier' and 'identifierType' keys.")
+
+ # Check for 'subjects'
+ if 'subjects' in json_record:
+ if not isinstance(json_record['subjects'], list):
+ errors.append("'subjects' should be a list.")
+ else:
+ for subject in json_record['subjects']:
+ if not isinstance(subject, dict) or 'subject' not in subject:
+ errors.append("Each subject must be a dictionary with a 'subject' key.")
+
+ # Check for 'relatedIdentifiers'
+ if 'relatedIdentifiers' in json_record:
+ if not isinstance(json_record['relatedIdentifiers'], list):
+ errors.append("'relatedIdentifiers' should be a list.")
+ else:
+ for related_id in json_record['relatedIdentifiers']:
+ if not isinstance(related_id, dict) or 'relatedIdentifier' not in related_id:
+ errors.append("Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key.")
+
+ # Check for 'rightsList'
+ if 'rightsList' in json_record:
+ if not isinstance(json_record['rightsList'], list):
+ errors.append("'rightsList' should be a list.")
+ else:
+ for rights in json_record['rightsList']:
+ if not isinstance(rights, dict) or 'rights' not in rights:
+ errors.append("Each entry in 'rightsList' must be a dictionary with a 'rights' key.")
+
+ # Check for 'geoLocations'
+ if 'geoLocations' in json_record:
+ if not isinstance(json_record['geoLocations'], list):
+ errors.append("'geoLocations' should be a list.")
+ else:
+ for location in json_record['geoLocations']:
+ if not isinstance(location, dict):
+ errors.append("Each entry in 'geoLocations' must be a dictionary.")
+ elif 'geoLocationPoint' not in location and 'geoLocationBox' not in location and 'geoLocationPlace' not in location:
+ errors.append("Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'.")
+
+ # Check for 'fundingReferences'
+ if 'fundingReferences' in json_record:
+ if not isinstance(json_record['fundingReferences'], list):
+ errors.append("'fundingReferences' should be a list.")
+ else:
+ for funding in json_record['fundingReferences']:
+ if not isinstance(funding, dict):
+ errors.append("Each funding reference must be a dictionary.")
+ if 'funderName' not in funding:
+ errors.append("Each funding reference must contain 'funderName'.")
+
+ # Return errors if any are found
+ if errors:
+ raise ValueError(f"Validation errors in metadata: {', '.join(errors)}")
if __name__ == "__main__":
# Read in from file for demo purposes
From f46700a919708cff55dad0491daa6a5b57d438e7 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 22:52:02 +0545
Subject: [PATCH 06/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 154 ++++++++++++++++++----------
1 file changed, 97 insertions(+), 57 deletions(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index 1e7e18c..b3ff9ab 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -134,7 +134,7 @@ def rdm_creators_contributors(person_list, peopleroles):
def customize_schema_rdm(json_record):
# Get vocabularies used in InvenioRDM
-
+
vocabularies = get_vocabularies()
validate_metadata(json_record)
peopleroles = vocabularies["crr"]
@@ -386,6 +386,7 @@ def customize_schema_rdm(json_record):
return final
+
def validate_metadata(json_record):
"""
Validates the presence and structure of required fields in a CaltechDATA JSON record.
@@ -394,122 +395,161 @@ def validate_metadata(json_record):
errors = []
# Check for 'types' and 'resourceTypeGeneral'
- if 'types' not in json_record:
+ if "types" not in json_record:
errors.append("'types' field is missing.")
- elif not isinstance(json_record['types'], dict):
+ elif not isinstance(json_record["types"], dict):
errors.append("'types' field should be a dictionary.")
- elif 'resourceTypeGeneral' not in json_record['types']:
+ elif "resourceTypeGeneral" not in json_record["types"]:
errors.append("'resourceTypeGeneral' field is missing in 'types'.")
# Check for 'title'
- if 'titles' not in json_record:
+ if "titles" not in json_record:
errors.append("'titles' field is missing.")
- elif not isinstance(json_record['titles'], list) or len(json_record['titles']) == 0:
+ elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
errors.append("'titles' should be a non-empty list.")
else:
# Ensure each title is a dictionary with 'title' field
- for title in json_record['titles']:
- if not isinstance(title, dict) or 'title' not in title:
- errors.append("Each entry in 'titles' must be a dictionary with a 'title' key.")
+ for title in json_record["titles"]:
+ if not isinstance(title, dict) or "title" not in title:
+ errors.append(
+ "Each entry in 'titles' must be a dictionary with a 'title' key."
+ )
# Check for 'publication_date'
- if 'publicationYear' not in json_record and 'dates' not in json_record:
- errors.append("A publication date is required ('publicationYear' or 'dates' field is missing).")
- if 'dates' in json_record:
- if not isinstance(json_record['dates'], list):
+ if "publicationYear" not in json_record and "dates" not in json_record:
+ errors.append(
+ "A publication date is required ('publicationYear' or 'dates' field is missing)."
+ )
+ if "dates" in json_record:
+ if not isinstance(json_record["dates"], list):
errors.append("'dates' should be a list.")
else:
- for date_entry in json_record['dates']:
- if not isinstance(date_entry, dict) or 'dateType' not in date_entry or 'date' not in date_entry:
- errors.append("Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys.")
+ for date_entry in json_record["dates"]:
+ if (
+ not isinstance(date_entry, dict)
+ or "dateType" not in date_entry
+ or "date" not in date_entry
+ ):
+ errors.append(
+ "Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys."
+ )
# Check for 'creators'
- if 'creators' not in json_record:
+ if "creators" not in json_record:
errors.append("'creators' field is missing.")
- elif not isinstance(json_record['creators'], list) or len(json_record['creators']) == 0:
+ elif (
+ not isinstance(json_record["creators"], list)
+ or len(json_record["creators"]) == 0
+ ):
errors.append("'creators' should be a non-empty list.")
else:
- for creator in json_record['creators']:
- if not isinstance(creator, dict) or 'name' not in creator:
- errors.append("Each creator in 'creators' must be a dictionary with a 'name' key.")
+ for creator in json_record["creators"]:
+ if not isinstance(creator, dict) or "name" not in creator:
+ errors.append(
+ "Each creator in 'creators' must be a dictionary with a 'name' key."
+ )
# Check for 'contributors'
- if 'contributors' in json_record:
- if not isinstance(json_record['contributors'], list):
+ if "contributors" in json_record:
+ if not isinstance(json_record["contributors"], list):
errors.append("'contributors' should be a list.")
else:
- for contributor in json_record['contributors']:
- if not isinstance(contributor, dict) or 'name' not in contributor:
- errors.append("Each contributor must be a dictionary with a 'name' key.")
+ for contributor in json_record["contributors"]:
+ if not isinstance(contributor, dict) or "name" not in contributor:
+ errors.append(
+ "Each contributor must be a dictionary with a 'name' key."
+ )
# Check for 'resourceType'
- if 'resourceType' not in json_record['types']:
+ if "resourceType" not in json_record["types"]:
errors.append("'resourceType' field is missing in 'types'.")
- elif not isinstance(json_record['types']['resourceType'], str):
+ elif not isinstance(json_record["types"]["resourceType"], str):
errors.append("'resourceType' should be a string.")
# Check for 'identifiers'
- if 'identifiers' in json_record:
- if not isinstance(json_record['identifiers'], list):
+ if "identifiers" in json_record:
+ if not isinstance(json_record["identifiers"], list):
errors.append("'identifiers' should be a list.")
else:
- for identifier in json_record['identifiers']:
- if not isinstance(identifier, dict) or 'identifier' not in identifier or 'identifierType' not in identifier:
- errors.append("Each identifier must be a dictionary with 'identifier' and 'identifierType' keys.")
+ for identifier in json_record["identifiers"]:
+ if (
+ not isinstance(identifier, dict)
+ or "identifier" not in identifier
+ or "identifierType" not in identifier
+ ):
+ errors.append(
+ "Each identifier must be a dictionary with 'identifier' and 'identifierType' keys."
+ )
# Check for 'subjects'
- if 'subjects' in json_record:
- if not isinstance(json_record['subjects'], list):
+ if "subjects" in json_record:
+ if not isinstance(json_record["subjects"], list):
errors.append("'subjects' should be a list.")
else:
- for subject in json_record['subjects']:
- if not isinstance(subject, dict) or 'subject' not in subject:
- errors.append("Each subject must be a dictionary with a 'subject' key.")
+ for subject in json_record["subjects"]:
+ if not isinstance(subject, dict) or "subject" not in subject:
+ errors.append(
+ "Each subject must be a dictionary with a 'subject' key."
+ )
# Check for 'relatedIdentifiers'
- if 'relatedIdentifiers' in json_record:
- if not isinstance(json_record['relatedIdentifiers'], list):
+ if "relatedIdentifiers" in json_record:
+ if not isinstance(json_record["relatedIdentifiers"], list):
errors.append("'relatedIdentifiers' should be a list.")
else:
- for related_id in json_record['relatedIdentifiers']:
- if not isinstance(related_id, dict) or 'relatedIdentifier' not in related_id:
- errors.append("Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key.")
+ for related_id in json_record["relatedIdentifiers"]:
+ if (
+ not isinstance(related_id, dict)
+ or "relatedIdentifier" not in related_id
+ ):
+ errors.append(
+ "Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
+ )
# Check for 'rightsList'
- if 'rightsList' in json_record:
- if not isinstance(json_record['rightsList'], list):
+ if "rightsList" in json_record:
+ if not isinstance(json_record["rightsList"], list):
errors.append("'rightsList' should be a list.")
else:
- for rights in json_record['rightsList']:
- if not isinstance(rights, dict) or 'rights' not in rights:
- errors.append("Each entry in 'rightsList' must be a dictionary with a 'rights' key.")
+ for rights in json_record["rightsList"]:
+ if not isinstance(rights, dict) or "rights" not in rights:
+ errors.append(
+ "Each entry in 'rightsList' must be a dictionary with a 'rights' key."
+ )
# Check for 'geoLocations'
- if 'geoLocations' in json_record:
- if not isinstance(json_record['geoLocations'], list):
+ if "geoLocations" in json_record:
+ if not isinstance(json_record["geoLocations"], list):
errors.append("'geoLocations' should be a list.")
else:
- for location in json_record['geoLocations']:
+ for location in json_record["geoLocations"]:
if not isinstance(location, dict):
errors.append("Each entry in 'geoLocations' must be a dictionary.")
- elif 'geoLocationPoint' not in location and 'geoLocationBox' not in location and 'geoLocationPlace' not in location:
- errors.append("Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'.")
+ elif (
+ "geoLocationPoint" not in location
+ and "geoLocationBox" not in location
+ and "geoLocationPlace" not in location
+ ):
+ errors.append(
+ "Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'."
+ )
# Check for 'fundingReferences'
- if 'fundingReferences' in json_record:
- if not isinstance(json_record['fundingReferences'], list):
+ if "fundingReferences" in json_record:
+ if not isinstance(json_record["fundingReferences"], list):
errors.append("'fundingReferences' should be a list.")
else:
- for funding in json_record['fundingReferences']:
+ for funding in json_record["fundingReferences"]:
if not isinstance(funding, dict):
errors.append("Each funding reference must be a dictionary.")
- if 'funderName' not in funding:
+ if "funderName" not in funding:
errors.append("Each funding reference must contain 'funderName'.")
# Return errors if any are found
if errors:
raise ValueError(f"Validation errors in metadata: {', '.join(errors)}")
+
if __name__ == "__main__":
# Read in from file for demo purposes
From d62c2781f35f5ad5d54366560b19b33ac168045f Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 22:52:40 +0545
Subject: [PATCH 07/42] Update cli.py
---
caltechdata_api/cli.py | 92 +++++++++++++++++++++++++-----------------
1 file changed, 55 insertions(+), 37 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index 2286497..cd7cb26 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -89,7 +89,6 @@ def get_or_set_token(production=True):
print("Tokens do not match. Please try again.")
-
def welcome_message():
print("Welcome to CaltechDATA CLI")
@@ -383,22 +382,22 @@ def upload_data_from_file():
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON format in the file '{filename}'. {str(e)}")
+
def parse_args():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(description="CaltechDATA CLI tool.")
parser.add_argument(
- "-test",
- action="store_true",
- help="Use test mode, sets production to False"
+ "-test", action="store_true", help="Use test mode, sets production to False"
)
args = parser.parse_args()
return args
+
def main():
args = parse_args()
-
+
production = not args.test # Set production to False if -test flag is provided
-
+
choice = get_user_input(
"Do you want to create or edit a CaltechDATA record? (create/edit): "
).lower()
@@ -412,7 +411,7 @@ def main():
def create_record(production):
token = get_or_set_token(production)
- #keep_file = input("Do you want to keep your existing files? (yes/no): ").lower() == "yes"
+ # keep_file = input("Do you want to keep your existing files? (yes/no): ").lower() == "yes"
print("Using CaltechDATA token:", token)
while True:
choice = get_user_input(
@@ -424,7 +423,11 @@ def create_record(production):
if existing_data:
if filepath != "":
response = caltechdata_write(
- existing_data, token, filepath, production=production, publish=False
+ existing_data,
+ token,
+ filepath,
+ production=production,
+ publish=False,
)
elif file_link != "":
response = caltechdata_write(
@@ -501,7 +504,6 @@ def create_record(production):
metadata, token, production=production, publish=False
)
rec_id = response
-
print_upload_message(rec_id, production)
with open(response + ".json", "w") as file:
@@ -512,8 +514,13 @@ def create_record(production):
else:
print("Invalid choice. Please enter 'existing' or 'create'.")
+
def print_upload_message(rec_id, production):
- base_url = "https://data.caltech.edu/uploads/" if production else "https://data.caltechlibrary.dev/uploads/"
+ base_url = (
+ "https://data.caltech.edu/uploads/"
+ if production
+ else "https://data.caltechlibrary.dev/uploads/"
+ )
print(
f"""You can view and publish this record at
{base_url}{rec_id}
@@ -521,11 +528,12 @@ def print_upload_message(rec_id, production):
`s3cmd put DATA_FILE s3://ini230004-bucket01/{rec_id}/`"""
)
+
def edit_record(production):
record_id = input("Enter the CaltechDATA record ID: ")
token = get_or_set_token(production)
file_name = download_file_by_id(record_id, token)
-
+
if file_name:
try:
# Read the edited metadata file
@@ -546,16 +554,22 @@ def edit_record(production):
if choice == "y":
if production:
API_URL_TEMPLATE = "https://data.caltech.edu/api/records/{record_id}/files"
- API_URL_TEMPLATE_DRAFT = "https://data.caltech.edu/api/records/{record_id}/draft/files"
+ API_URL_TEMPLATE_DRAFT = (
+ "https://data.caltech.edu/api/records/{record_id}/draft/files"
+ )
else:
- API_URL_TEMPLATE = "https://data.caltechlibrary.dev/api/records/{record_id}/files"
- API_URL_TEMPLATE_DRAFT = "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
-
+ API_URL_TEMPLATE = (
+ "https://data.caltechlibrary.dev/api/records/{record_id}/files"
+ )
+ API_URL_TEMPLATE_DRAFT = (
+ "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
+ )
+
url = API_URL_TEMPLATE.format(record_id=record_id)
url_draft = API_URL_TEMPLATE_DRAFT.format(record_id=record_id)
-
+
headers = {
- "accept": "application/json",
+ "accept": "application/json",
}
if token:
@@ -563,30 +577,35 @@ def edit_record(production):
response = requests.get(url, headers=headers)
response_draft = requests.get(url_draft, headers=headers)
-
- #print(production, response, response_draft)
- #print(response.status_code, response_draft.status_code)
+
+ # print(production, response, response_draft)
+ # print(response.status_code, response_draft.status_code)
data = response.json()
data_draft = response_draft.json()
- #print(data_draft)
+ # print(data_draft)
# Check if 'entries' exists and its length
- if len(data.get('entries', [])) == 0 and len(data_draft.get('entries', [])) == 0:
+ if (
+ len(data.get("entries", [])) == 0
+ and len(data_draft.get("entries", [])) == 0
+ ):
keepfile = False
else:
- keepfile = input("Do you want to keep existing files? (y/n): ").lower() == "y"
-
+ keepfile = (
+ input("Do you want to keep existing files? (y/n): ").lower() == "y"
+ )
+
# if response.status_code == 404 and response_draft.status_code == 404:
# keepfile = False
# else:
-
+
# keepfile = input("Do you want to keep existing files? (y/n): ").lower() == "y"
-
+
filepath, file_link = upload_supporting_file(record_id)
if file_link:
print(file_link)
-
+
if filepath != "":
response = caltechdata_edit(
record_id,
@@ -604,14 +623,11 @@ def edit_record(production):
file_links=file_link,
production=production,
publish=False,
- keepfile=keepfile,
+ keepfiles=keepfile,
)
-
+
rec_id = response
print_upload_message(rec_id, production)
-
-
-
def download_file_by_id(record_id, token=None):
@@ -632,12 +648,12 @@ def download_file_by_id(record_id, token=None):
url + "/draft",
headers=headers,
)
- if response.status_code != 200:
+ if response.status_code != 200:
url = f"https://data.caltechlibrary.dev/api/records/{record_id}"
response = requests.get(
- url,
- headers=headers,
- )
+ url,
+ headers=headers,
+ )
if response.status_code != 200:
# Might have a draft
response = requests.get(
@@ -645,7 +661,9 @@ def download_file_by_id(record_id, token=None):
headers=headers,
)
if response.status_code != 200:
- raise Exception(f"Record {record_id} does not exist, cannot edit")
+ raise Exception(
+ f"Record {record_id} does not exist, cannot edit"
+ )
file_content = response.content
file_name = f"downloaded_data_{record_id}.json"
with open(file_name, "wb") as file:
From a57a075f922e6e660e5c3dc5ff0c1117bbe7a74f Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 22:53:15 +0545
Subject: [PATCH 08/42] Update caltechdata_write.py
---
caltechdata_api/caltechdata_write.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index 1b25f80..e0cb0dd 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -63,7 +63,6 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
raise Exception(result.text)
-
def add_file_links(
metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
):
From f0f40e3631b5c840f15d4db695e033eee8b4457a Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 26 Sep 2024 22:56:28 +0545
Subject: [PATCH 09/42] Update cli.py
---
caltechdata_api/cli.py | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index cd7cb26..3222c09 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -577,14 +577,8 @@ def edit_record(production):
response = requests.get(url, headers=headers)
response_draft = requests.get(url_draft, headers=headers)
-
- # print(production, response, response_draft)
- # print(response.status_code, response_draft.status_code)
-
data = response.json()
data_draft = response_draft.json()
-
- # print(data_draft)
# Check if 'entries' exists and its length
if (
len(data.get("entries", [])) == 0
@@ -596,12 +590,6 @@ def edit_record(production):
input("Do you want to keep existing files? (y/n): ").lower() == "y"
)
- # if response.status_code == 404 and response_draft.status_code == 404:
- # keepfile = False
- # else:
-
- # keepfile = input("Do you want to keep existing files? (y/n): ").lower() == "y"
-
filepath, file_link = upload_supporting_file(record_id)
if file_link:
print(file_link)
From 687e098d637dadf4ac01728df4975bc822df9081 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 24 Oct 2024 18:15:30 -0700
Subject: [PATCH 10/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 260 +++++++++++++++-------------
1 file changed, 139 insertions(+), 121 deletions(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index b3ff9ab..b07064d 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -394,103 +394,57 @@ def validate_metadata(json_record):
"""
errors = []
- # Check for 'types' and 'resourceTypeGeneral'
- if "types" not in json_record:
- errors.append("'types' field is missing.")
- elif not isinstance(json_record["types"], dict):
- errors.append("'types' field should be a dictionary.")
- elif "resourceTypeGeneral" not in json_record["types"]:
- errors.append("'resourceTypeGeneral' field is missing in 'types'.")
-
- # Check for 'title'
if "titles" not in json_record:
errors.append("'titles' field is missing.")
elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
errors.append("'titles' should be a non-empty list.")
else:
- # Ensure each title is a dictionary with 'title' field
for title in json_record["titles"]:
if not isinstance(title, dict) or "title" not in title:
- errors.append(
- "Each entry in 'titles' must be a dictionary with a 'title' key."
- )
-
- # Check for 'publication_date'
- if "publicationYear" not in json_record and "dates" not in json_record:
- errors.append(
- "A publication date is required ('publicationYear' or 'dates' field is missing)."
- )
- if "dates" in json_record:
- if not isinstance(json_record["dates"], list):
- errors.append("'dates' should be a list.")
- else:
- for date_entry in json_record["dates"]:
- if (
- not isinstance(date_entry, dict)
- or "dateType" not in date_entry
- or "date" not in date_entry
- ):
- errors.append(
- "Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys."
- )
-
- # Check for 'creators'
- if "creators" not in json_record:
- errors.append("'creators' field is missing.")
- elif (
- not isinstance(json_record["creators"], list)
- or len(json_record["creators"]) == 0
- ):
- errors.append("'creators' should be a non-empty list.")
- else:
- for creator in json_record["creators"]:
- if not isinstance(creator, dict) or "name" not in creator:
- errors.append(
- "Each creator in 'creators' must be a dictionary with a 'name' key."
- )
-
+ errors.append("Each entry in 'titles' must be a dictionary with a 'title' key.")
+
# Check for 'contributors'
- if "contributors" in json_record:
- if not isinstance(json_record["contributors"], list):
- errors.append("'contributors' should be a list.")
- else:
- for contributor in json_record["contributors"]:
- if not isinstance(contributor, dict) or "name" not in contributor:
- errors.append(
- "Each contributor must be a dictionary with a 'name' key."
- )
-
- # Check for 'resourceType'
- if "resourceType" not in json_record["types"]:
- errors.append("'resourceType' field is missing in 'types'.")
- elif not isinstance(json_record["types"]["resourceType"], str):
- errors.append("'resourceType' should be a string.")
-
- # Check for 'identifiers'
- if "identifiers" in json_record:
- if not isinstance(json_record["identifiers"], list):
- errors.append("'identifiers' should be a list.")
- else:
- for identifier in json_record["identifiers"]:
- if (
- not isinstance(identifier, dict)
- or "identifier" not in identifier
- or "identifierType" not in identifier
- ):
- errors.append(
- "Each identifier must be a dictionary with 'identifier' and 'identifierType' keys."
- )
+ if "contributors" not in json_record:
+ errors.append("'contributors' field is missing.")
+ elif not isinstance(json_record["contributors"], list) or len(json_record["contributors"]) == 0:
+ errors.append("'contributors' should be a non-empty list.")
+ else:
+ for contributor in json_record["contributors"]:
+ if not isinstance(contributor, dict) or "name" not in contributor or "contributorType" not in contributor:
+ errors.append("Each 'contributor' must have 'name' and 'contributorType'.")
+ if "nameIdentifiers" in contributor:
+ if not isinstance(contributor["nameIdentifiers"], list):
+ errors.append("'nameIdentifiers' should be a list.")
+ for name_id in contributor["nameIdentifiers"]:
+ if not isinstance(name_id, dict) or "nameIdentifier" not in name_id or "nameIdentifierScheme" not in name_id:
+ errors.append("Each 'nameIdentifier' should have 'nameIdentifier' and 'nameIdentifierScheme'.")
+ if "affiliation" in contributor:
+ if not isinstance(contributor["affiliation"], list):
+ errors.append("'affiliation' should be a list.")
+ for affiliation in contributor["affiliation"]:
+ if not isinstance(affiliation, dict) or "name" not in affiliation:
+ errors.append("Each 'affiliation' should have a 'name' key.")
+
+ # Check for 'descriptions'
+ if "descriptions" not in json_record:
+ errors.append("'descriptions' field is missing.")
+ elif not isinstance(json_record["descriptions"], list) or len(json_record["descriptions"]) == 0:
+ errors.append("'descriptions' should be a non-empty list.")
+ else:
+ for description in json_record["descriptions"]:
+ if not isinstance(description, dict) or "description" not in description or "descriptionType" not in description:
+ errors.append("Each 'description' must have 'description' and 'descriptionType'.")
- # Check for 'subjects'
- if "subjects" in json_record:
- if not isinstance(json_record["subjects"], list):
- errors.append("'subjects' should be a list.")
+ # Check for 'fundingReferences'
+ if "fundingReferences" in json_record:
+ if not isinstance(json_record["fundingReferences"], list):
+ errors.append("'fundingReferences' should be a list.")
else:
- for subject in json_record["subjects"]:
- if not isinstance(subject, dict) or "subject" not in subject:
- errors.append(
- "Each subject must be a dictionary with a 'subject' key."
- )
+ for fund_ref in json_record["fundingReferences"]:
+ if not isinstance(fund_ref, dict) or "funderName" not in fund_ref:
+ errors.append("Each 'fundingReference' must have 'funderName'.")
+ if "funderIdentifier" in fund_ref and "funderIdentifierType" not in fund_ref:
+ errors.append("'funderIdentifier' should have an associated 'funderIdentifierType'.")
# Check for 'relatedIdentifiers'
if "relatedIdentifiers" in json_record:
@@ -498,52 +452,116 @@ def validate_metadata(json_record):
errors.append("'relatedIdentifiers' should be a list.")
else:
for related_id in json_record["relatedIdentifiers"]:
- if (
- not isinstance(related_id, dict)
- or "relatedIdentifier" not in related_id
- ):
- errors.append(
- "Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
- )
+ if not isinstance(related_id, dict) or "relatedIdentifier" not in related_id or "relationType" not in related_id:
+ errors.append("Each 'relatedIdentifier' must have 'relatedIdentifier' and 'relationType'.")
+ if "relatedIdentifierType" not in related_id:
+ errors.append("Each 'relatedIdentifier' must have 'relatedIdentifierType'.")
# Check for 'rightsList'
if "rightsList" in json_record:
if not isinstance(json_record["rightsList"], list):
errors.append("'rightsList' should be a list.")
else:
- for rights in json_record["rightsList"]:
- if not isinstance(rights, dict) or "rights" not in rights:
- errors.append(
- "Each entry in 'rightsList' must be a dictionary with a 'rights' key."
- )
+ for right in json_record["rightsList"]:
+ if not isinstance(right, dict) or "rights" not in right:
+ errors.append("Each 'rightsList' entry must have 'rights'.")
+ if "rightsURI" in right and not isinstance(right["rightsURI"], str):
+ errors.append("'rightsURI' should be a string.")
+
+ # Check for 'subjects'
+ if "subjects" in json_record:
+ if not isinstance(json_record["subjects"], list):
+ errors.append("'subjects' should be a list.")
+ else:
+ for subject in json_record["subjects"]:
+ if not isinstance(subject, dict) or "subject" not in subject:
+ errors.append("Each 'subject' must have a 'subject' key.")
+
+ # Check for 'dates'
+ if "dates" not in json_record:
+ errors.append("'dates' field is missing.")
+ elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
+ errors.append("'dates' should be a non-empty list.")
+ else:
+ for date in json_record["dates"]:
+ if not isinstance(date, dict) or "date" not in date or "dateType" not in date:
+ errors.append("Each 'date' must have 'date' and 'dateType'.")
+
+ # Check for 'identifiers'
+ if "identifiers" not in json_record:
+ errors.append("'identifiers' field is missing.")
+ elif not isinstance(json_record["identifiers"], list) or len(json_record["identifiers"]) == 0:
+ errors.append("'identifiers' should be a non-empty list.")
+ else:
+ for identifier in json_record["identifiers"]:
+ if not isinstance(identifier, dict) or "identifier" not in identifier or "identifierType" not in identifier:
+ errors.append("Each 'identifier' must have 'identifier' and 'identifierType'.")
+
+ # Check for 'creators'
+ if "creators" not in json_record:
+ errors.append("'creators' field is missing.")
+ elif not isinstance(json_record["creators"], list) or len(json_record["creators"]) == 0:
+ errors.append("'creators' should be a non-empty list.")
+ else:
+ for creator in json_record["creators"]:
+ if not isinstance(creator, dict) or "name" not in creator:
+ errors.append("Each 'creator' must have 'name'.")
+ if "affiliation" in creator:
+ if not isinstance(creator["affiliation"], list):
+ errors.append("'affiliation' in 'creators' should be a list.")
+ for affiliation in creator["affiliation"]:
+ if not isinstance(affiliation, dict) or "name" not in affiliation:
+ errors.append("Each 'affiliation' in 'creators' must have a 'name'.")
# Check for 'geoLocations'
if "geoLocations" in json_record:
if not isinstance(json_record["geoLocations"], list):
errors.append("'geoLocations' should be a list.")
else:
- for location in json_record["geoLocations"]:
- if not isinstance(location, dict):
- errors.append("Each entry in 'geoLocations' must be a dictionary.")
- elif (
- "geoLocationPoint" not in location
- and "geoLocationBox" not in location
- and "geoLocationPlace" not in location
- ):
- errors.append(
- "Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'."
- )
-
- # Check for 'fundingReferences'
- if "fundingReferences" in json_record:
- if not isinstance(json_record["fundingReferences"], list):
- errors.append("'fundingReferences' should be a list.")
- else:
- for funding in json_record["fundingReferences"]:
- if not isinstance(funding, dict):
- errors.append("Each funding reference must be a dictionary.")
- if "funderName" not in funding:
- errors.append("Each funding reference must contain 'funderName'.")
+ for geo_loc in json_record["geoLocations"]:
+ if not isinstance(geo_loc, dict) or "geoLocationPlace" not in geo_loc:
+ errors.append("Each 'geoLocation' must have 'geoLocationPlace'.")
+ if "geoLocationPoint" in geo_loc:
+ point = geo_loc["geoLocationPoint"]
+ if not isinstance(point, dict) or "pointLatitude" not in point or "pointLongitude" not in point:
+ errors.append("'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'.")
+
+ # Check for 'formats'
+ if "formats" in json_record and (not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0):
+ errors.append("'formats' should be a non-empty list.")
+
+ # Check for 'language'
+ if "language" not in json_record:
+ errors.append("'language' field is missing.")
+ elif not isinstance(json_record["language"], str):
+ errors.append("'language' should be a string.")
+
+ # Check for 'version'
+ if "version" in json_record and not isinstance(json_record["version"], str):
+ errors.append("'version' should be a string.")
+
+ # Check for 'publisher'
+ if "publisher" not in json_record:
+ errors.append("'publisher' field is missing.")
+ elif not isinstance(json_record["publisher"], str):
+ errors.append("'publisher' should be a string.")
+
+ # Check for 'publicationYear'
+ if "publicationYear" not in json_record:
+ errors.append("'publicationYear' field is missing.")
+ elif not isinstance(json_record["publicationYear"], str):
+ errors.append("'publicationYear' should be a string.")
+
+ # Check for 'types'
+ if "types" not in json_record:
+ errors.append("'types' field is missing.")
+ elif not isinstance(json_record["types"], dict):
+ errors.append("'types' should be a dictionary.")
+ else:
+ if "resourceTypeGeneral" not in json_record["types"]:
+ errors.append("'types' must have 'resourceTypeGeneral'.")
+ if "resourceType" in json_record["types"] and not isinstance(json_record["types"]["resourceType"], str):
+ errors.append("'resourceType' should be a string if provided.")
# Return errors if any are found
if errors:
From 8693d66d0afac439aa031efeb54e8e58dc484d87 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 24 Oct 2024 18:32:14 -0700
Subject: [PATCH 11/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 46 ++++++++++++++---------------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index b07064d..8990529 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -404,26 +404,25 @@ def validate_metadata(json_record):
errors.append("Each entry in 'titles' must be a dictionary with a 'title' key.")
# Check for 'contributors'
- if "contributors" not in json_record:
- errors.append("'contributors' field is missing.")
- elif not isinstance(json_record["contributors"], list) or len(json_record["contributors"]) == 0:
- errors.append("'contributors' should be a non-empty list.")
- else:
- for contributor in json_record["contributors"]:
- if not isinstance(contributor, dict) or "name" not in contributor or "contributorType" not in contributor:
- errors.append("Each 'contributor' must have 'name' and 'contributorType'.")
- if "nameIdentifiers" in contributor:
- if not isinstance(contributor["nameIdentifiers"], list):
- errors.append("'nameIdentifiers' should be a list.")
- for name_id in contributor["nameIdentifiers"]:
- if not isinstance(name_id, dict) or "nameIdentifier" not in name_id or "nameIdentifierScheme" not in name_id:
- errors.append("Each 'nameIdentifier' should have 'nameIdentifier' and 'nameIdentifierScheme'.")
- if "affiliation" in contributor:
- if not isinstance(contributor["affiliation"], list):
- errors.append("'affiliation' should be a list.")
- for affiliation in contributor["affiliation"]:
- if not isinstance(affiliation, dict) or "name" not in affiliation:
- errors.append("Each 'affiliation' should have a 'name' key.")
+ if "contributors" in json_record:
+ if not isinstance(json_record["contributors"], list) or len(json_record["contributors"]) == 0:
+ errors.append("'contributors' should be a non-empty list.")
+ else:
+ for contributor in json_record["contributors"]:
+ if not isinstance(contributor, dict) or "name" not in contributor or "contributorType" not in contributor:
+ errors.append("Each 'contributor' must have 'name' and 'contributorType'.")
+ if "nameIdentifiers" in contributor:
+ if not isinstance(contributor["nameIdentifiers"], list):
+ errors.append("'nameIdentifiers' should be a list.")
+ for name_id in contributor["nameIdentifiers"]:
+ if not isinstance(name_id, dict) or "nameIdentifier" not in name_id or "nameIdentifierScheme" not in name_id:
+ errors.append("Each 'nameIdentifier' should have 'nameIdentifier' and 'nameIdentifierScheme'.")
+ if "affiliation" in contributor:
+ if not isinstance(contributor["affiliation"], list):
+ errors.append("'affiliation' should be a list.")
+ for affiliation in contributor["affiliation"]:
+ if not isinstance(affiliation, dict) or "name" not in affiliation:
+ errors.append("Each 'affiliation' should have a 'name' key.")
# Check for 'descriptions'
if "descriptions" not in json_record:
@@ -531,10 +530,9 @@ def validate_metadata(json_record):
errors.append("'formats' should be a non-empty list.")
# Check for 'language'
- if "language" not in json_record:
- errors.append("'language' field is missing.")
- elif not isinstance(json_record["language"], str):
- errors.append("'language' should be a string.")
+ if "language" in json_record:
+ if not isinstance(json_record["language"], str):
+ errors.append("'language' should be a string.")
# Check for 'version'
if "version" in json_record and not isinstance(json_record["version"], str):
From 3426b405a3883411edbd97bc564d0265e354a06b Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 24 Oct 2024 18:52:18 -0700
Subject: [PATCH 12/42] Create generator.py
---
caltechdata_api/tester/generator.py | 1 +
1 file changed, 1 insertion(+)
create mode 100644 caltechdata_api/tester/generator.py
diff --git a/caltechdata_api/tester/generator.py b/caltechdata_api/tester/generator.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/caltechdata_api/tester/generator.py
@@ -0,0 +1 @@
+
From f54d1e43cffd439293caf03c38de44f35588c624 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 24 Oct 2024 18:53:01 -0700
Subject: [PATCH 13/42] Add files via upload
---
caltechdata_api/tester/fulltest.py | 83 ++++++++++++++
caltechdata_api/tester/helpers.py | 36 ++++++
caltechdata_api/tester/invalid_generator.py | 116 ++++++++++++++++++++
caltechdata_api/tester/tester.py | 50 +++++++++
4 files changed, 285 insertions(+)
create mode 100644 caltechdata_api/tester/fulltest.py
create mode 100644 caltechdata_api/tester/helpers.py
create mode 100644 caltechdata_api/tester/invalid_generator.py
create mode 100644 caltechdata_api/tester/tester.py
diff --git a/caltechdata_api/tester/fulltest.py b/caltechdata_api/tester/fulltest.py
new file mode 100644
index 0000000..1f1b6eb
--- /dev/null
+++ b/caltechdata_api/tester/fulltest.py
@@ -0,0 +1,83 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+
+# Directories for valid and invalid JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/" # Directory for invalid JSON files
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"Validating file: {valid_file}") # Added log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
+def test_invalid_json(invalid_file):
+ """Test that invalid example files do not validate successfully."""
+ print(f"Validating file: {invalid_file}") # Added log for file being tested
+ json_data = load_json_path(invalid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError:
+ print(f"Validation failed as expected for: {invalid_file}")
+ return # Test passes if validation raises a ValueError
+
+ # If no errors, the test fails because the file is expected to be invalid
+ if validation_errors:
+ print(f"Validation failed as expected for: {invalid_file}")
+ else:
+ pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
+
+if __name__ == "__main__":
+ # Manual test runner for valid files
+ failed_valid_files = []
+ for file in VALID_DATACITE43_FILES:
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_valid_files.append(file)
+ print(f"Error occurred in valid file: {file}\nError details: {e}")
+
+ if not failed_valid_files:
+ print("\nAll valid files passed validation. Test complete.")
+ else:
+ print("\nThe following valid files failed validation:")
+ for failed_file in failed_valid_files:
+ print(f"- {failed_file}")
+
+ # Manual test runner for invalid files
+ passed_invalid_files = []
+ for file in INVALID_DATACITE43_FILES:
+ try:
+ test_invalid_json(file)
+ except AssertionError as e:
+ passed_invalid_files.append(file)
+ print(f"Error occurred in invalid file: {file}\nError details: {e}")
+
+ if not passed_invalid_files:
+ print("\nAll invalid files failed validation as expected. Test is a success.")
+ else:
+ print("\nThe following invalid files unexpectedly passed validation:")
+ for passed_file in passed_invalid_files:
+ print(f"- {passed_file}")
diff --git a/caltechdata_api/tester/helpers.py b/caltechdata_api/tester/helpers.py
new file mode 100644
index 0000000..19aa5be
--- /dev/null
+++ b/caltechdata_api/tester/helpers.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of DataCite.
+#
+# Copyright (C) 2015, 2016 CERN.
+#
+# DataCite is free software; you can redistribute it and/or modify it
+# under the terms of the Revised BSD License; see LICENSE file for
+# more details.
+
+"""Test helpers."""
+
+from __future__ import absolute_import, print_function
+
+import io
+import json
+import os
+from os.path import dirname, join
+
+
+def load_json_path(path):
+ """Helper method for loading a JSON example file from a path."""
+ path_base = dirname(__file__)
+ with io.open(join(path_base, path), encoding="utf-8") as file:
+ content = file.read()
+ return json.loads(content)
+
+
+def write_json_path(path, metadata):
+ """Helper method for writing a JSON example file to a path."""
+ path_base = dirname(__file__)
+ path_full = join(path_base, path)
+ print(path_full)
+ print(metadata)
+ with io.open(path_full, "w", encoding="utf-8") as file:
+ json.dump(metadata, file)
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_generator.py b/caltechdata_api/tester/invalid_generator.py
new file mode 100644
index 0000000..02a0da3
--- /dev/null
+++ b/caltechdata_api/tester/invalid_generator.py
@@ -0,0 +1,116 @@
+import json
+import os
+
+# Directory to save invalid metadata JSON files
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
+# Ensure the directory exists
+os.makedirs(INVALID_DATACITE43_DIR, exist_ok=True)
+
+# Helper function to save a dictionary as a JSON file
+def save_invalid_json(data, filename):
+ with open(os.path.join(INVALID_DATACITE43_DIR, filename), 'w') as f:
+ json.dump(data, f, indent=4)
+
+# Generate different invalid JSON examples
+invalid_metadata_examples = [
+ # Missing 'titles' field
+ {
+ "creators": [{"name": "John Doe"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Empty 'titles' list
+ {
+ "titles": [],
+ "creators": [{"name": "John Doe"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Missing 'creators' field
+ {
+ "titles": [{"title": "Sample Title"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'contributors' missing 'name' and 'contributorType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "contributors": [{}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Invalid 'descriptions' structure
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "descriptions": [{"description": "Sample Description"}], # Missing 'descriptionType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'fundingReferences' missing 'funderName'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "fundingReferences": [{"funderIdentifier": "1234"}], # Missing 'funderName'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'identifiers' missing 'identifier' and 'identifierType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "identifiers": [{}], # Missing 'identifier' and 'identifierType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'dates' missing 'date' and 'dateType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "dates": [{}], # Missing 'date' and 'dateType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Missing 'publisher'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Invalid 'version' type (should be a string)
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "version": 1, # Incorrect type, should be a string
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ }
+]
+
+# Save each invalid example as a JSON file
+for i, invalid_json in enumerate(invalid_metadata_examples, start=1):
+ filename = f"invalid_metadata_{i}.json"
+ save_invalid_json(invalid_json, filename)
+
+print(f"Generated {len(invalid_metadata_examples)} invalid metadata files in {INVALID_DATACITE43_DIR}")
diff --git a/caltechdata_api/tester/tester.py b/caltechdata_api/tester/tester.py
new file mode 100644
index 0000000..72efe49
--- /dev/null
+++ b/caltechdata_api/tester/tester.py
@@ -0,0 +1,50 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+
+# Define the directory containing the test JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"Validating file: {valid_file}") # Added log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+if __name__ == "__main__":
+ # Track failures for manual testing
+ failed_files = []
+
+ # Run the tests and print results for each file
+ for file in VALID_DATACITE43_FILES:
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_files.append(file)
+ print(f"Error occurred in file: {file}\nError details: {e}")
+
+ # Print a summary of all failed files
+ if failed_files:
+ print("\nThe following files failed validation:")
+ for failed_file in failed_files:
+ print(f"- {failed_file}")
+ else:
+ print("\nAll files passed validation.")
From 08a092f9441fbcb2c828ca9334308dcb9da6aabc Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:29:46 -0700
Subject: [PATCH 14/42] Delete caltechdata_api/tester directory
---
caltechdata_api/tester/fulltest.py | 83 --------------
caltechdata_api/tester/generator.py | 1 -
caltechdata_api/tester/helpers.py | 36 ------
caltechdata_api/tester/invalid_generator.py | 116 --------------------
caltechdata_api/tester/tester.py | 50 ---------
5 files changed, 286 deletions(-)
delete mode 100644 caltechdata_api/tester/fulltest.py
delete mode 100644 caltechdata_api/tester/generator.py
delete mode 100644 caltechdata_api/tester/helpers.py
delete mode 100644 caltechdata_api/tester/invalid_generator.py
delete mode 100644 caltechdata_api/tester/tester.py
diff --git a/caltechdata_api/tester/fulltest.py b/caltechdata_api/tester/fulltest.py
deleted file mode 100644
index 1f1b6eb..0000000
--- a/caltechdata_api/tester/fulltest.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import os
-import pytest
-from customize_schema import validate_metadata as validator43
-from helpers import load_json_path
-
-# Directories for valid and invalid JSON files
-VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
-INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/" # Directory for invalid JSON files
-
-# Function to get all JSON files in the directory
-def get_all_json_files(directory):
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
-
-# Get list of all valid JSON files in the directory
-VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
-INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
-
-@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
-def test_valid_json(valid_file):
- """Test that valid example files validate successfully."""
- print(f"Validating file: {valid_file}") # Added log for file being tested
- json_data = load_json_path(valid_file)
- validation_errors = None
- try:
- validation_errors = validator43(json_data)
- except ValueError as e:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
-
- if validation_errors:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
- else:
- print(f"Validation passed for: {valid_file}")
-
-@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
-def test_invalid_json(invalid_file):
- """Test that invalid example files do not validate successfully."""
- print(f"Validating file: {invalid_file}") # Added log for file being tested
- json_data = load_json_path(invalid_file)
- validation_errors = None
- try:
- validation_errors = validator43(json_data)
- except ValueError:
- print(f"Validation failed as expected for: {invalid_file}")
- return # Test passes if validation raises a ValueError
-
- # If no errors, the test fails because the file is expected to be invalid
- if validation_errors:
- print(f"Validation failed as expected for: {invalid_file}")
- else:
- pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
-
-if __name__ == "__main__":
- # Manual test runner for valid files
- failed_valid_files = []
- for file in VALID_DATACITE43_FILES:
- try:
- test_valid_json(file)
- except AssertionError as e:
- failed_valid_files.append(file)
- print(f"Error occurred in valid file: {file}\nError details: {e}")
-
- if not failed_valid_files:
- print("\nAll valid files passed validation. Test complete.")
- else:
- print("\nThe following valid files failed validation:")
- for failed_file in failed_valid_files:
- print(f"- {failed_file}")
-
- # Manual test runner for invalid files
- passed_invalid_files = []
- for file in INVALID_DATACITE43_FILES:
- try:
- test_invalid_json(file)
- except AssertionError as e:
- passed_invalid_files.append(file)
- print(f"Error occurred in invalid file: {file}\nError details: {e}")
-
- if not passed_invalid_files:
- print("\nAll invalid files failed validation as expected. Test is a success.")
- else:
- print("\nThe following invalid files unexpectedly passed validation:")
- for passed_file in passed_invalid_files:
- print(f"- {passed_file}")
diff --git a/caltechdata_api/tester/generator.py b/caltechdata_api/tester/generator.py
deleted file mode 100644
index 8b13789..0000000
--- a/caltechdata_api/tester/generator.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/caltechdata_api/tester/helpers.py b/caltechdata_api/tester/helpers.py
deleted file mode 100644
index 19aa5be..0000000
--- a/caltechdata_api/tester/helpers.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# This file is part of DataCite.
-#
-# Copyright (C) 2015, 2016 CERN.
-#
-# DataCite is free software; you can redistribute it and/or modify it
-# under the terms of the Revised BSD License; see LICENSE file for
-# more details.
-
-"""Test helpers."""
-
-from __future__ import absolute_import, print_function
-
-import io
-import json
-import os
-from os.path import dirname, join
-
-
-def load_json_path(path):
- """Helper method for loading a JSON example file from a path."""
- path_base = dirname(__file__)
- with io.open(join(path_base, path), encoding="utf-8") as file:
- content = file.read()
- return json.loads(content)
-
-
-def write_json_path(path, metadata):
- """Helper method for writing a JSON example file to a path."""
- path_base = dirname(__file__)
- path_full = join(path_base, path)
- print(path_full)
- print(metadata)
- with io.open(path_full, "w", encoding="utf-8") as file:
- json.dump(metadata, file)
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_generator.py b/caltechdata_api/tester/invalid_generator.py
deleted file mode 100644
index 02a0da3..0000000
--- a/caltechdata_api/tester/invalid_generator.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import json
-import os
-
-# Directory to save invalid metadata JSON files
-INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
-
-# Ensure the directory exists
-os.makedirs(INVALID_DATACITE43_DIR, exist_ok=True)
-
-# Helper function to save a dictionary as a JSON file
-def save_invalid_json(data, filename):
- with open(os.path.join(INVALID_DATACITE43_DIR, filename), 'w') as f:
- json.dump(data, f, indent=4)
-
-# Generate different invalid JSON examples
-invalid_metadata_examples = [
- # Missing 'titles' field
- {
- "creators": [{"name": "John Doe"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Empty 'titles' list
- {
- "titles": [],
- "creators": [{"name": "John Doe"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Missing 'creators' field
- {
- "titles": [{"title": "Sample Title"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'contributors' missing 'name' and 'contributorType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "contributors": [{}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Invalid 'descriptions' structure
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "descriptions": [{"description": "Sample Description"}], # Missing 'descriptionType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'fundingReferences' missing 'funderName'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "fundingReferences": [{"funderIdentifier": "1234"}], # Missing 'funderName'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'identifiers' missing 'identifier' and 'identifierType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "identifiers": [{}], # Missing 'identifier' and 'identifierType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'dates' missing 'date' and 'dateType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "dates": [{}], # Missing 'date' and 'dateType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Missing 'publisher'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Invalid 'version' type (should be a string)
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "version": 1, # Incorrect type, should be a string
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- }
-]
-
-# Save each invalid example as a JSON file
-for i, invalid_json in enumerate(invalid_metadata_examples, start=1):
- filename = f"invalid_metadata_{i}.json"
- save_invalid_json(invalid_json, filename)
-
-print(f"Generated {len(invalid_metadata_examples)} invalid metadata files in {INVALID_DATACITE43_DIR}")
diff --git a/caltechdata_api/tester/tester.py b/caltechdata_api/tester/tester.py
deleted file mode 100644
index 72efe49..0000000
--- a/caltechdata_api/tester/tester.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-import pytest
-from customize_schema import validate_metadata as validator43
-from helpers import load_json_path
-
-# Define the directory containing the test JSON files
-VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
-
-# Function to get all JSON files in the directory
-def get_all_json_files(directory):
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
-
-# Get list of all valid JSON files in the directory
-VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
-
-@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
-def test_valid_json(valid_file):
- """Test that valid example files validate successfully."""
- print(f"Validating file: {valid_file}") # Added log for file being tested
- json_data = load_json_path(valid_file)
- validation_errors = None
- try:
- validation_errors = validator43(json_data)
- except ValueError as e:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
-
- if validation_errors:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
- else:
- print(f"Validation passed for: {valid_file}")
-
-if __name__ == "__main__":
- # Track failures for manual testing
- failed_files = []
-
- # Run the tests and print results for each file
- for file in VALID_DATACITE43_FILES:
- try:
- test_valid_json(file)
- except AssertionError as e:
- failed_files.append(file)
- print(f"Error occurred in file: {file}\nError details: {e}")
-
- # Print a summary of all failed files
- if failed_files:
- print("\nThe following files failed validation:")
- for failed_file in failed_files:
- print(f"- {failed_file}")
- else:
- print("\nAll files passed validation.")
From 586f7bf944a6bcf26f06900fffd51baede0e5989 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:30:02 -0700
Subject: [PATCH 15/42] Create code.py
---
caltechdata_api/tester/code.py | 1 +
1 file changed, 1 insertion(+)
create mode 100644 caltechdata_api/tester/code.py
diff --git a/caltechdata_api/tester/code.py b/caltechdata_api/tester/code.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/caltechdata_api/tester/code.py
@@ -0,0 +1 @@
+
From 055292e202c85e4ce2da5d5bec1ffb814c3b65db Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:32:50 -0700
Subject: [PATCH 16/42] Validator
---
caltechdata_api/tester/invalid_generator.py | 116 +++++
.../tester/missing_fields_generator.py | 403 ++++++++++++++++++
caltechdata_api/tester/test_unit.py | 122 ++++++
.../tester/validatorfordownload.py | 54 +++
4 files changed, 695 insertions(+)
create mode 100644 caltechdata_api/tester/invalid_generator.py
create mode 100644 caltechdata_api/tester/missing_fields_generator.py
create mode 100644 caltechdata_api/tester/test_unit.py
create mode 100644 caltechdata_api/tester/validatorfordownload.py
diff --git a/caltechdata_api/tester/invalid_generator.py b/caltechdata_api/tester/invalid_generator.py
new file mode 100644
index 0000000..02a0da3
--- /dev/null
+++ b/caltechdata_api/tester/invalid_generator.py
@@ -0,0 +1,116 @@
+import json
+import os
+
+# Directory to save invalid metadata JSON files
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
+# Ensure the directory exists
+os.makedirs(INVALID_DATACITE43_DIR, exist_ok=True)
+
+# Helper function to save a dictionary as a JSON file
+def save_invalid_json(data, filename):
+ with open(os.path.join(INVALID_DATACITE43_DIR, filename), 'w') as f:
+ json.dump(data, f, indent=4)
+
+# Generate different invalid JSON examples
+invalid_metadata_examples = [
+ # Missing 'titles' field
+ {
+ "creators": [{"name": "John Doe"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Empty 'titles' list
+ {
+ "titles": [],
+ "creators": [{"name": "John Doe"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Missing 'creators' field
+ {
+ "titles": [{"title": "Sample Title"}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'contributors' missing 'name' and 'contributorType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "contributors": [{}],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Invalid 'descriptions' structure
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "descriptions": [{"description": "Sample Description"}], # Missing 'descriptionType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'fundingReferences' missing 'funderName'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "fundingReferences": [{"funderIdentifier": "1234"}], # Missing 'funderName'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'identifiers' missing 'identifier' and 'identifierType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "identifiers": [{}], # Missing 'identifier' and 'identifierType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # 'dates' missing 'date' and 'dateType'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "dates": [{}], # Missing 'date' and 'dateType'
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Missing 'publisher'
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ },
+
+ # Invalid 'version' type (should be a string)
+ {
+ "titles": [{"title": "Sample Title"}],
+ "creators": [{"name": "John Doe"}],
+ "version": 1, # Incorrect type, should be a string
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {"resourceTypeGeneral": "Dataset"}
+ }
+]
+
+# Save each invalid example as a JSON file
+for i, invalid_json in enumerate(invalid_metadata_examples, start=1):
+ filename = f"invalid_metadata_{i}.json"
+ save_invalid_json(invalid_json, filename)
+
+print(f"Generated {len(invalid_metadata_examples)} invalid metadata files in {INVALID_DATACITE43_DIR}")
diff --git a/caltechdata_api/tester/missing_fields_generator.py b/caltechdata_api/tester/missing_fields_generator.py
new file mode 100644
index 0000000..2ce14b3
--- /dev/null
+++ b/caltechdata_api/tester/missing_fields_generator.py
@@ -0,0 +1,403 @@
+import json
+import os
+import copy
+
+# Directory to store invalid test files
+INVALID_DATA_DIR = "../tests/data/invalid_datacite43"
+os.makedirs(INVALID_DATA_DIR, exist_ok=True)
+
+# Load the valid metadata as a base
+valid_metadata = {
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Strong, K."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Roche, S."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
+ }
+ ],
+ "name": "Franklin, J. E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
+ }
+ ],
+ "name": "Mendonca, J."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Lutsch, E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Weaver, D."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Fogal, P. F."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
+ }
+ ],
+ "name": "Drummond, J. R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "UCAR Center for Science Education, Boulder, CO (US)"
+ }
+ ],
+ "name": "Batchelor, R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
+ }
+ ],
+ "name": "Lindenmaier, R."
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
+
+# Function to save invalid files
+def save_invalid_file(metadata, filename):
+ filepath = os.path.join(INVALID_DATA_DIR, filename)
+ with open(filepath, 'w') as f:
+ json.dump(metadata, f, indent=4)
+ print(f"Created: {filepath}")
+
+# Create invalid files
+
+missing_creators = copy.deepcopy(valid_metadata)
+missing_creators.pop("creators", None)
+save_invalid_file(missing_creators, "missing_creators.json")
+
+type_error_creators = copy.deepcopy(valid_metadata)
+type_error_creators["creators"] = "Incorrect type"
+save_invalid_file(type_error_creators, "type_error_creators.json")
+
+unmapped_vocab_contributor = copy.deepcopy(valid_metadata)
+unmapped_vocab_contributor["contributors"][0]["contributorType"] = "UnknownType"
+save_invalid_file(unmapped_vocab_contributor, "unmapped_vocab_contributor.json")
+
+invalid_date_format = copy.deepcopy(valid_metadata)
+invalid_date_format["dates"][0]["date"] = "31-01-2019" # Incorrect format
+save_invalid_file(invalid_date_format, "invalid_date_format.json")
+
+missing_publisher = copy.deepcopy(valid_metadata)
+missing_publisher.pop("publisher", None)
+save_invalid_file(missing_publisher, "missing_publisher.json")
+
+type_error_publication_year = copy.deepcopy(valid_metadata)
+type_error_publication_year["publicationYear"] = "Two Thousand Nineteen"
+save_invalid_file(type_error_publication_year, "type_error_publication_year.json")
+
+unmapped_vocab_related_identifier = copy.deepcopy(valid_metadata)
+unmapped_vocab_related_identifier["relatedIdentifiers"][0]["relatedIdentifierType"] = "UNKNOWN_TYPE"
+save_invalid_file(unmapped_vocab_related_identifier, "unmapped_vocab_related_identifier.json")
+
+multiple_errors = copy.deepcopy(valid_metadata)
+multiple_errors.pop("creators", None)
+multiple_errors["dates"][0]["date"] = "31-01-2019" # Incorrect format
+multiple_errors["titles"][0]["title"] = "A" * 300
+save_invalid_file(multiple_errors, "multiple_errors.json")
diff --git a/caltechdata_api/tester/test_unit.py b/caltechdata_api/tester/test_unit.py
new file mode 100644
index 0000000..5d1cad6
--- /dev/null
+++ b/caltechdata_api/tester/test_unit.py
@@ -0,0 +1,122 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+import logging
+from tqdm import tqdm
+
+# Directories for valid and invalid JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/"
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"\nValidating file: {valid_file}") # Log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
+def test_invalid_json(invalid_file):
+ """Test that invalid example files do not validate successfully."""
+ print(f"\nValidating file: {invalid_file}") # Log for file being tested
+ json_data = load_json_path(invalid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError:
+ print(f"Validation failed as expected for: {invalid_file}")
+ return # Test passes if validation raises a ValueError
+
+ if validation_errors:
+ print(f"Validation failed as expected for: {invalid_file}")
+ else:
+ pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
+
+@pytest.mark.parametrize("missing_field_file", [
+ {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
+ {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
+])
+def test_missing_required_fields(missing_field_file):
+ """Test that JSON files missing required fields fail validation."""
+ print(f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}")
+ json_data = load_json_path(missing_field_file['file'])
+ with pytest.raises(ValueError, match=f"Missing required metadata field: {missing_field_file['missing_field']}"):
+ validator43(json_data)
+
+@pytest.mark.parametrize("type_error_file", [
+ {"file": "../tests/data/type_error_creators.json", "field": "creators"},
+ {"file": "../tests/data/type_error_dates.json", "field": "dates"},
+])
+def test_incorrect_field_types(type_error_file):
+ """Test that JSON files with incorrect field types fail validation."""
+ print(f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}")
+ json_data = load_json_path(type_error_file['file'])
+ with pytest.raises(ValueError, match=f"Incorrect type for field: {type_error_file['field']}"):
+ validator43(json_data)
+
+def test_multiple_errors():
+ """Test JSON file with multiple issues to check all errors are raised."""
+ json_data = load_json_path("../tests/data/multiple_errors.json")
+ with pytest.raises(ValueError, match="Multiple validation errors"):
+ validator43(json_data)
+
+def test_error_logging(caplog):
+ """Test that errors are logged correctly during validation."""
+ json_data = load_json_path("../tests/data/invalid_datacite43/some_invalid_file.json")
+ with caplog.at_level(logging.ERROR):
+ with pytest.raises(ValueError):
+ validator43(json_data)
+ assert "Validation failed" in caplog.text
+
+if __name__ == "__main__":
+ # Manual test runner for valid files
+ failed_valid_files = []
+ print("\nRunning validation for valid files...")
+ for file in tqdm(VALID_DATACITE43_FILES, desc="Valid files"):
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_valid_files.append(file)
+ print(f"Error occurred in valid file: {file}\nError details: {e}")
+
+ if not failed_valid_files:
+ print("\n✅ All valid files passed validation. Test complete.")
+ else:
+ print("\n❌ The following valid files failed validation:")
+ for failed_file in failed_valid_files:
+ print(f"- {failed_file}")
+
+ # Manual test runner for invalid files
+ passed_invalid_files = []
+ print("\nRunning validation for invalid files...")
+ for file in tqdm(INVALID_DATACITE43_FILES, desc="Invalid files"):
+ try:
+ test_invalid_json(file)
+ except AssertionError as e:
+ passed_invalid_files.append(file)
+ print(f"Error occurred in invalid file: {file}\nError details: {e}")
+
+ if not passed_invalid_files:
+ print("\n✅ All invalid files failed validation as expected. Test is a success.")
+ else:
+ print("\n❌ The following invalid files unexpectedly passed validation:")
+ for passed_file in passed_invalid_files:
+ print(f"- {passed_file}")
diff --git a/caltechdata_api/tester/validatorfordownload.py b/caltechdata_api/tester/validatorfordownload.py
new file mode 100644
index 0000000..ffd1ddc
--- /dev/null
+++ b/caltechdata_api/tester/validatorfordownload.py
@@ -0,0 +1,54 @@
+import subprocess
+import requests
+import pytest
+import json
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+
+def run_caltechdata_write(metadata_path):
+ """Run the caltechdata_write.py script with the given metadata file."""
+ try:
+ result = subprocess.run(
+ ["python", "caltechdata_write.py", "--metadata", metadata_path],
+ capture_output=True,
+ text=True,
+ check=True
+ )
+ output = result.stdout
+ record_url = output.split("Record created with URL: ")[1].strip()
+ return record_url
+ except subprocess.CalledProcessError as e:
+ print("Error running caltechdata_write:", e.stderr)
+ return None
+
+def fetch_datacite_json(record_url):
+ """Fetch the JSON metadata from the export endpoint."""
+ try:
+ export_url = f"{record_url}/export/datacite-json"
+ response = requests.get(export_url)
+ response.raise_for_status()
+ return response.json()
+ except requests.RequestException as e:
+ print("Error fetching JSON data:", e)
+ return None
+
+def test_validator(metadata_path):
+ """Test the validator by uploading metadata and validating the returned JSON."""
+ record_url = run_caltechdata_write(metadata_path)
+ if not record_url:
+ pytest.fail("Failed to upload metadata and get record URL")
+
+ json_data = fetch_datacite_json(record_url)
+ if not json_data:
+ pytest.fail("Failed to retrieve JSON data from export endpoint")
+
+ validation_errors = validator43(json_data)
+ if validation_errors:
+ pytest.fail(f"Validation failed for {record_url}:\n{validation_errors}")
+ else:
+ print("Validation passed")
+ return True
+
+if __name__ == "__main__":
+ metadata_file = "1171.json"
+ test_validator(metadata_file)
From 8804e3dc9115121c39ee11c65b3cff6b363b118d Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:34:05 -0700
Subject: [PATCH 17/42] Delete caltechdata_api/tester/code.py
---
caltechdata_api/tester/code.py | 1 -
1 file changed, 1 deletion(-)
delete mode 100644 caltechdata_api/tester/code.py
diff --git a/caltechdata_api/tester/code.py b/caltechdata_api/tester/code.py
deleted file mode 100644
index 8b13789..0000000
--- a/caltechdata_api/tester/code.py
+++ /dev/null
@@ -1 +0,0 @@
-
From f2ede3b0bd38503546f1a793d2301688325663fb Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:34:48 -0700
Subject: [PATCH 18/42] edit in caltech_write.py to test the function
---
caltechdata_api/tester/caltechdata_write.py | 224 ++++++++++++++++++++
1 file changed, 224 insertions(+)
create mode 100644 caltechdata_api/tester/caltechdata_write.py
diff --git a/caltechdata_api/tester/caltechdata_write.py b/caltechdata_api/tester/caltechdata_write.py
new file mode 100644
index 0000000..332135e
--- /dev/null
+++ b/caltechdata_api/tester/caltechdata_write.py
@@ -0,0 +1,224 @@
+import copy
+import json
+import os
+import requests
+import s3fs
+from requests import session
+from json.decoder import JSONDecodeError
+from caltechdata_api import customize_schema
+from caltechdata_api.utils import humanbytes
+
+
+def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=False):
+ f_json = []
+ f_list = {}
+ fnames = []
+ for f in files:
+ split = f.split("/")
+ filename = split[-1]
+ if filename in fnames:
+ # We can't have a duplicate filename
+ # Assume that the previous path value makes a unique name
+ filename = f"{split[-2]}-{split[-1]}"
+ fnames.append(filename)
+ f_json.append({"key": filename})
+ f_list[filename] = f
+ # Now we see if any existing draft files need to be replaced
+ result = requests.get(file_link, headers=f_headers)
+ if result.status_code == 200:
+ ex_files = result.json()["entries"]
+ for ex in ex_files:
+ if ex["key"] in f_list:
+ result = requests.delete(ex["links"]["self"], headers=f_headers)
+ if result.status_code != 204:
+ raise Exception(result.text)
+ # Create new file upload links
+ result = requests.post(file_link, headers=headers, json=f_json)
+ if result.status_code != 201:
+ raise Exception(result.text)
+ # Now we have the upload links
+ for entry in result.json()["entries"]:
+ self = entry["links"]["self"]
+ link = entry["links"]["content"]
+ commit = entry["links"]["commit"]
+ name = entry["key"]
+ if name in f_list:
+ if s3:
+ print("Downloading", f_list[name])
+ s3.download(f_list[name], name)
+ infile = open(name, "rb")
+ else:
+ infile = open(f_list[name], "rb")
+ result = requests.put(link, headers=f_headers, data=infile)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ result = requests.post(commit, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ else:
+ # Delete any files not included in this write command
+ if keepfiles == False:
+ result = requests.delete(self, headers=f_headers)
+ if result.status_code != 204:
+ raise Exception(result.text)
+
+
+def add_file_links(
+ metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
+):
+ # Currently configured for S3 links, assuming all are at the same endpoint
+ link_string = ""
+ endpoint = "https://" + file_links[0].split("/")[2]
+ s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
+ index = 0
+ for link in file_links:
+ file = link.split("/")[-1]
+ path = link.split(endpoint)[1]
+ size = s3.info(path)["size"]
+ size = humanbytes(size)
+ try:
+ desc = file_descriptions[index] + ","
+ except IndexError:
+ desc = ""
+ if link_string == "":
+ if s3_link:
+ link_string = f"Files available via S3 at {s3_link}</p>
"
+ else:
+ cleaned = link.strip(file)
+ link_string = f"Files available via S3 at {cleaned}</p>"
+ link_string += f"""{file}, {desc} {size}
+ <a role="button" class="ui compact mini button" href="{link}"
+ > <i class="download icon"></i> Download </a>
</p>
+ """
+ index += 1
+ # Tack on any additional descriptions
+ if additional_descriptions != "":
+ link_string += additional_descriptions
+
+ description = {"description": link_string, "descriptionType": "files"}
+ metadata["descriptions"].append(description)
+ return metadata
+
+
+def send_to_community(review_link, data, headers, publish, community, message=None):
+ if not message:
+ message = "This record is submitted automatically with the CaltechDATA API"
+
+ data = {
+ "receiver": {"community": community},
+ "type": "community-submission",
+ }
+ result = requests.put(review_link, json=data, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ submit_link = review_link.replace("/review", "/actions/submit-review")
+ data = comment = {
+ "payload": {
+ "content": message,
+ "format": "html",
+ }
+ }
+ result = requests.post(submit_link, json=data, headers=headers)
+ if result.status_code != 202:
+ raise Exception(result.text)
+ if publish:
+ accept_link = result.json()["links"]["actions"]["accept"]
+ data = comment = {
+ "payload": {
+ "content": "This record is accepted automatically with the CaltechDATA API",
+ "format": "html",
+ }
+ }
+ result = requests.post(accept_link, json=data, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ return result
+
+def caltechdata_write(metadata, token=None, files=[], production=False, schema="43", publish=False, file_links=[],
+ s3=None, community=None, authors=False, file_descriptions=[], s3_link=None,
+ default_preview=None, review_message=None):
+ metadata = copy.deepcopy(metadata)
+
+ if not token:
+ token = os.environ["RDMTOK"]
+
+ if isinstance(files, str):
+ files = [files]
+
+ if file_links:
+ metadata = add_file_links(metadata, file_links, file_descriptions, s3_link=s3_link)
+
+ url = "https://data.caltech.edu/" if production else "https://data.caltechlibrary.dev/"
+
+ headers = {
+ "Authorization": f"Bearer {token}",
+ "Content-type": "application/json",
+ }
+ f_headers = {
+ "Authorization": f"Bearer {token}",
+ "Content-type": "application/octet-stream",
+ }
+
+ if not files:
+ metadata["files"] = {"enabled": False}
+
+ result = requests.post(f"{url}api/records", headers=headers, json=metadata)
+ if result.status_code != 201:
+ raise Exception(result.text)
+
+ idv = result.json()["id"]
+ record_url = f"{url}records/{idv}"
+
+ if files:
+ file_link = result.json()["links"]["files"]
+ write_files_rdm(files, file_link, headers, f_headers, s3)
+
+ if community:
+ review_link = result.json()["links"]["review"]
+ send_to_community(review_link, metadata, headers, publish, community, review_message)
+ elif publish:
+ publish_link = result.json()["links"]["publish"]
+ result = requests.post(publish_link, json=metadata, headers=headers)
+ if result.status_code != 202:
+ raise Exception(result.text)
+
+ return record_url
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Upload files to CaltechDATA with metadata")
+ parser.add_argument("--metadata", required=True, type=str, help="Path to JSON file with metadata")
+ parser.add_argument("--token", default=os.environ.get("RDMTOK"), type=str, help="API token for authentication (defaults to RDMTOK environment variable)")
+ parser.add_argument("--files", nargs="*", default=[], help="List of file paths to upload (default: empty list)")
+ parser.add_argument("--production", action="store_true", help="Use production environment (default: False)")
+ parser.add_argument("--schema", default="43", help="Metadata schema version (default: '43')")
+ parser.add_argument("--publish", action="store_true", help="Publish the record after upload (default: False)")
+ parser.add_argument("--file_links", nargs="*", default=[], help="List of file links to add (default: empty list)")
+ parser.add_argument("--community", type=str, default=None, help="Community ID for submission (default: None)")
+ parser.add_argument("--file_descriptions", nargs="*", default=[], help="Descriptions for each file link (default: empty list)")
+ parser.add_argument("--s3_link", type=str, default=None, help="Link to S3 bucket (default: None)")
+ parser.add_argument("--review_message", type=str, default="This record is submitted automatically with the CaltechDATA API", help="Message for review process (default message)")
+
+ args = parser.parse_args()
+
+ with open(args.metadata, "r") as f:
+ metadata = json.load(f)
+
+ record_url = caltechdata_write(
+ metadata=metadata,
+ token=args.token,
+ files=args.files,
+ production=args.production,
+ schema=args.schema,
+ publish=args.publish,
+ file_links=args.file_links,
+ community=args.community,
+ file_descriptions=args.file_descriptions,
+ s3_link=args.s3_link,
+ review_message=args.review_message,
+ )
+
+ print(f"Record created with URL: {record_url}")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
From 757cfb020984ebbe87e40e06adfc4a1b02817fbf Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:35:51 -0700
Subject: [PATCH 19/42] Create files
---
caltechdata_api/tester/datacite43/files | 1 +
1 file changed, 1 insertion(+)
create mode 100644 caltechdata_api/tester/datacite43/files
diff --git a/caltechdata_api/tester/datacite43/files b/caltechdata_api/tester/datacite43/files
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/caltechdata_api/tester/datacite43/files
@@ -0,0 +1 @@
+
From 098913e7fbc5c202aa697cf2c929bc54388095c4 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:36:14 -0700
Subject: [PATCH 20/42] Create files
---
caltechdata_api/tester/invalid_datacite/files | 1 +
1 file changed, 1 insertion(+)
create mode 100644 caltechdata_api/tester/invalid_datacite/files
diff --git a/caltechdata_api/tester/invalid_datacite/files b/caltechdata_api/tester/invalid_datacite/files
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/files
@@ -0,0 +1 @@
+
From fc9546044e8363076e3b0bcb457e2ce193a71421 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:36:28 -0700
Subject: [PATCH 21/42] Delete caltechdata_api/tester/datacite43 directory
---
caltechdata_api/tester/datacite43/files | 1 -
1 file changed, 1 deletion(-)
delete mode 100644 caltechdata_api/tester/datacite43/files
diff --git a/caltechdata_api/tester/datacite43/files b/caltechdata_api/tester/datacite43/files
deleted file mode 100644
index 8b13789..0000000
--- a/caltechdata_api/tester/datacite43/files
+++ /dev/null
@@ -1 +0,0 @@
-
From 613f194b15191c8fcfe68532679c33e662f0a44d Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:37:15 -0700
Subject: [PATCH 22/42] Add files via upload
---
.../invalid_datacite/invalid_metadata_1.json | 12 +
.../invalid_datacite/invalid_metadata_10.json | 18 +
.../invalid_datacite/invalid_metadata_2.json | 13 +
.../invalid_datacite/invalid_metadata_3.json | 12 +
.../invalid_datacite/invalid_metadata_4.json | 20 +
.../invalid_datacite/invalid_metadata_5.json | 22 ++
.../invalid_datacite/invalid_metadata_6.json | 22 ++
.../invalid_datacite/invalid_metadata_7.json | 20 +
.../invalid_datacite/invalid_metadata_8.json | 20 +
.../invalid_datacite/invalid_metadata_9.json | 16 +
.../invalid_datacite/missing_creators.json | 263 +++++++++++++
.../invalid_datacite/missing_publisher.json | 350 ++++++++++++++++++
.../invalid_datacite/multiple_errors.json | 263 +++++++++++++
.../invalid_datacite/type_error_creators.json | 264 +++++++++++++
14 files changed, 1315 insertions(+)
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
create mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
create mode 100644 caltechdata_api/tester/invalid_datacite/missing_creators.json
create mode 100644 caltechdata_api/tester/invalid_datacite/missing_publisher.json
create mode 100644 caltechdata_api/tester/invalid_datacite/multiple_errors.json
create mode 100644 caltechdata_api/tester/invalid_datacite/type_error_creators.json
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
new file mode 100644
index 0000000..1bba16b
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
@@ -0,0 +1,12 @@
+{
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
new file mode 100644
index 0000000..759757d
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
@@ -0,0 +1,18 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "version": 1,
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
new file mode 100644
index 0000000..3899136
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
@@ -0,0 +1,13 @@
+{
+ "titles": [],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
new file mode 100644
index 0000000..707dbab
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
@@ -0,0 +1,12 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
new file mode 100644
index 0000000..f7d2fe4
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "contributors": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
new file mode 100644
index 0000000..deeff7f
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "descriptions": [
+ {
+ "description": "Sample Description"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
new file mode 100644
index 0000000..8fa14f1
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderIdentifier": "1234"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
new file mode 100644
index 0000000..bae4d11
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "identifiers": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
new file mode 100644
index 0000000..247f3ff
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "dates": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
new file mode 100644
index 0000000..2eddcf1
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
@@ -0,0 +1,16 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/missing_creators.json b/caltechdata_api/tester/invalid_datacite/missing_creators.json
new file mode 100644
index 0000000..0d0f1a1
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/missing_creators.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/missing_publisher.json b/caltechdata_api/tester/invalid_datacite/missing_publisher.json
new file mode 100644
index 0000000..9035027
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/missing_publisher.json
@@ -0,0 +1,350 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Strong, K."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Roche, S."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
+ }
+ ],
+ "name": "Franklin, J. E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
+ }
+ ],
+ "name": "Mendonca, J."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Lutsch, E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Weaver, D."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Fogal, P. F."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
+ }
+ ],
+ "name": "Drummond, J. R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "UCAR Center for Science Education, Boulder, CO (US)"
+ }
+ ],
+ "name": "Batchelor, R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
+ }
+ ],
+ "name": "Lindenmaier, R."
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/multiple_errors.json b/caltechdata_api/tester/invalid_datacite/multiple_errors.json
new file mode 100644
index 0000000..c18931b
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/multiple_errors.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "31-01-2019",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/type_error_creators.json b/caltechdata_api/tester/invalid_datacite/type_error_creators.json
new file mode 100644
index 0000000..6200870
--- /dev/null
+++ b/caltechdata_api/tester/invalid_datacite/type_error_creators.json
@@ -0,0 +1,264 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": "Incorrect type",
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
From 5e154b8588f07a22cb10544155b22228d82b5e26 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:37:38 -0700
Subject: [PATCH 23/42] Create file.py
---
caltechdata_api/tester/validfiles/file.py | 1 +
1 file changed, 1 insertion(+)
create mode 100644 caltechdata_api/tester/validfiles/file.py
diff --git a/caltechdata_api/tester/validfiles/file.py b/caltechdata_api/tester/validfiles/file.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/file.py
@@ -0,0 +1 @@
+
From 782480e7c3e2a341b485e4ec78929280964ab007 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 25 Oct 2024 00:38:03 -0700
Subject: [PATCH 24/42] Add files via upload
---
caltechdata_api/tester/validfiles/1171.json | 351 ++++++++++++++++++++
caltechdata_api/tester/validfiles/1235.json | 91 +++++
caltechdata_api/tester/validfiles/1250.json | 1 +
caltechdata_api/tester/validfiles/1259.json | 1 +
caltechdata_api/tester/validfiles/1300.json | 1 +
caltechdata_api/tester/validfiles/210.json | 1 +
caltechdata_api/tester/validfiles/266.json | 1 +
caltechdata_api/tester/validfiles/267.json | 1 +
caltechdata_api/tester/validfiles/268.json | 1 +
caltechdata_api/tester/validfiles/283.json | 1 +
caltechdata_api/tester/validfiles/293.json | 1 +
caltechdata_api/tester/validfiles/301.json | 1 +
caltechdata_api/tester/validfiles/970.json | 1 +
13 files changed, 453 insertions(+)
create mode 100644 caltechdata_api/tester/validfiles/1171.json
create mode 100644 caltechdata_api/tester/validfiles/1235.json
create mode 100644 caltechdata_api/tester/validfiles/1250.json
create mode 100644 caltechdata_api/tester/validfiles/1259.json
create mode 100644 caltechdata_api/tester/validfiles/1300.json
create mode 100644 caltechdata_api/tester/validfiles/210.json
create mode 100644 caltechdata_api/tester/validfiles/266.json
create mode 100644 caltechdata_api/tester/validfiles/267.json
create mode 100644 caltechdata_api/tester/validfiles/268.json
create mode 100644 caltechdata_api/tester/validfiles/283.json
create mode 100644 caltechdata_api/tester/validfiles/293.json
create mode 100644 caltechdata_api/tester/validfiles/301.json
create mode 100644 caltechdata_api/tester/validfiles/970.json
diff --git a/caltechdata_api/tester/validfiles/1171.json b/caltechdata_api/tester/validfiles/1171.json
new file mode 100644
index 0000000..eea6d9b
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/1171.json
@@ -0,0 +1,351 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Strong, K."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Roche, S."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
+ }
+ ],
+ "name": "Franklin, J. E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
+ }
+ ],
+ "name": "Mendonca, J."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Lutsch, E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Weaver, D."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Fogal, P. F."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
+ }
+ ],
+ "name": "Drummond, J. R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "UCAR Center for Science Education, Boulder, CO (US)"
+ }
+ ],
+ "name": "Batchelor, R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
+ }
+ ],
+ "name": "Lindenmaier, R."
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1235.json b/caltechdata_api/tester/validfiles/1235.json
new file mode 100644
index 0000000..ebda909
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/1235.json
@@ -0,0 +1,91 @@
+{
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "First included in ames, this notebook dynamically shows how many records are in CaltechDATA and where they come from (GitHub, Deposit Form, or API). This repository is set to work with MyBinder so you can easily reproduce the plot and include new records. "
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Morrell, T. E. (2019, April 29). caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook (Version v0.0.1). CaltechDATA. https://doi.org/10.22002/d1.1235
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 4
Unique Downloads: 1
between April 29, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.1",
+ "relationType": "IsIdenticalTo",
+ "relatedIdentifierType": "URL"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "license",
+ "rightsURI": "https://data.caltech.edu/license"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "CaltechDATA"
+ },
+ {
+ "subject": "reporitory"
+ },
+ {
+ "subject": "usage"
+ },
+ {
+ "subject": "Jupyter"
+ },
+ {
+ "subject": "GitHub"
+ }
+ ],
+ "version": "v0.0.1",
+ "titles": [
+ {
+ "title": "caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook"
+ }
+ ],
+ "dates": [
+ {
+ "date": "2019-04-29",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Software",
+ "resourceType": "Software"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.22002/D1.1235",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1235",
+ "identifierType": "CaltechDATA_Identifier"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Caltech Library"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9266-5146",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Morrell, Thomas E"
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1250.json b/caltechdata_api/tester/validfiles/1250.json
new file mode 100644
index 0000000..29c72df
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/1250.json
@@ -0,0 +1 @@
+{"descriptions": [{"descriptionType": "Abstract", "description": "This release includes two months more data and has some dependency updates."}, {"descriptionType": "Other", "description": "Jupyter notebooks highlighting usage of CaltechDATA"}, {"descriptionType": "Other", "description": "
Click to run this software: 
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Morrell, T. E. (2019, June 19). caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA (Version v0.0.2). CaltechDATA. https://doi.org/10.22002/d1.1250
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 85
Unique Downloads: 2
between June 19, 2019 and July 02, 2020
More info on how stats are collected
"}], "relatedIdentifiers": [{"relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.2", "relationType": "IsIdenticalTo", "relatedIdentifierType": "URL"}], "rightsList": [{"rights": "license", "rightsURI": "https://data.caltech.edu/license"}], "subjects": [{"subject": "CaltechDATA"}, {"subject": "reporitory"}, {"subject": "usage"}, {"subject": "Jupyter"}, {"subject": "GitHub"}], "version": "v0.0.2", "titles": [{"title": "caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA"}], "dates": [{"date": "2019-06-19", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Software", "resourceType": "Software"}, "identifiers": [{"identifier": "10.22002/D1.1250", "identifierType": "DOI"}, {"identifier": "1250", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "Caltech Library"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9266-5146", "nameIdentifierScheme": "ORCID"}], "name": "Morrell, Thomas E"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1259.json b/caltechdata_api/tester/validfiles/1259.json
new file mode 100644
index 0000000..09fe197
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/1259.json
@@ -0,0 +1 @@
+{"descriptions": [{"descriptionType": "Abstract", "description": "This release includes a new notebook that determines the use of ORCID iDs across Caltech Library DOIs. It also updates all notebooks to use the latest version of ames and streamlines dependencies."}, {"descriptionType": "Other", "description": "Jupyter notebooks highlighting usage of CaltechDATA"}, {"descriptionType": "Other", "description": "
Click to run this software: 
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Morrell, T. E. (2019, July 16). caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version (Version v0.1.0). CaltechDATA. https://doi.org/10.22002/d1.1259
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 86
Unique Downloads: 1
between July 16, 2019 and July 02, 2020
More info on how stats are collected
"}], "relatedIdentifiers": [{"relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.1.0", "relationType": "IsIdenticalTo", "relatedIdentifierType": "URL"}], "rightsList": [{"rights": "license", "rightsURI": "https://data.caltech.edu/license"}], "subjects": [{"subject": "CaltechDATA"}, {"subject": "reporitory"}, {"subject": "usage"}, {"subject": "Jupyter"}, {"subject": "GitHub"}], "version": "v0.1.0", "titles": [{"title": "caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version"}], "dates": [{"date": "2019-07-16", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Software", "resourceType": "Software"}, "identifiers": [{"identifier": "10.22002/D1.1259", "identifierType": "DOI"}, {"identifier": "1259", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "Caltech Library"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9266-5146", "nameIdentifierScheme": "ORCID"}], "name": "Morrell, Thomas E"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1300.json b/caltechdata_api/tester/validfiles/1300.json
new file mode 100644
index 0000000..3c027c6
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/1300.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "AeroMeteo Service, Bia\u0142ystok (PL)"}], "name": "Katry\u0144ski, K.", "contributorType": "Other"}, {"name": "Christof Petri", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bialystok, Poland."}, {"descriptionType": "Other", "description": "
Cite this record as:
Deutscher, N. M., Notholt, J., Messerschmidt, J., Weinzierl, C., Warneke, T., Petri, C., & Grupe, P. (2019). TCCON data from Bialystok (PL), Release GGG2014.R2 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bialystok01.r2
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 52
Unique Downloads: 3
between October 21, 2019 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R0/1149277", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R1/1183984", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data License", "rightsURI": "https://data.caltech.edu/tindfiles/serve/7a5e834c-39e9-4d13-9c55-f50a4532885d/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "R2", "titles": [{"title": "TCCON data from Bialystok (PL), Release GGG2014.R2"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2019-10-21", "dateType": "Created"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2009-03-01/2018-10-01", "dateType": "Collected"}, {"date": "2019-10-21", "dateType": "Submitted"}, {"date": "2019-10-21", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bialystok01.R2", "identifierType": "DOI"}, {"identifier": "1300", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "bi", "identifierType": "id"}, {"identifier": "bialystok01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "geoLocations": [{"geoLocationPlace": "Bia\u0142ystok (PL)", "geoLocationPoint": {"pointLatitude": "53.23", "pointLongitude": "23.025"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/210.json b/caltechdata_api/tester/validfiles/210.json
new file mode 100644
index 0000000..927730f
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/210.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "Dietrich Feist", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Lehrstuhl f\u00fcr Physik der Atmosph\u00e4re, Munich (DE)"}, {"name": "Deutsches Zentrum f\u00fcr Luft- und Raumfahrt, Institut f\u00fcr Physik der Atmosph\u00e4re, Oberpfaffenhofen (DE)"}, {"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-5890-6687", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "B-6489-2013", "nameIdentifierScheme": "ResearcherID"}], "name": "Feist, D. G."}, {"affiliation": [{"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "name": "Arnold, S. G."}, {"affiliation": [{"name": "Ariane Tracking Station, Ascension Island (SH)"}], "name": "John, N."}, {"affiliation": [{"name": "Stockholm University, Stockholm (SE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7369-0781", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "B-8591-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Geibel, M. C."}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station on Ascension Island."}, {"descriptionType": "Other", "description": "
Cite this record as:
Feist, D. G., Arnold, S. G., John, N., & Geibel, M. C. (2014). TCCON data from Ascension Island (SH), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.ascension01.r0/1149285
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 673
Unique Downloads: 28
between February 21, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"funderName": "Bundesministerium f\u00fcr Wirtschaft und Energie", "funderIdentifierType": "GRID", "funderIdentifier": "grid.424440.2", "awardNumber": "50EE1711E"}, {"funderName": "Bundesministerium f\u00fcr Wirtschaft und Energie", "funderIdentifierType": "GRID", "funderIdentifier": "grid.424440.2", "awardNumber": "50EE1711C"}, {"funderName": "European Space Agency", "funderIdentifierType": "GRID", "funderIdentifier": "grid.410379.8", "awardNumber": "3-14737"}, {"funderName": "Max Planck Institute for Biogeochemistry", "funderIdentifierType": "GRID", "funderIdentifier": "grid.419500.9"}, {"funderName": "Max Planck Society", "funderIdentifierType": "GRID", "funderIdentifier": "grid.4372.2"}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.5194/acp-19-9797-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-19-7347-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-12-2241-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-12-1495-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-6539-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-5507-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10010155", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2017JD026453", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1109/jstars.2017.2650942", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1261-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1038/s41598-017-13459-0", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-2381-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-1415-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2016JD026164", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2015JD023389", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2015JD024157", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/cb9b01e4-56ea-4b8c-9543-0c61d0c72148/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Ascension Island (SH), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-01", "dateType": "Created"}, {"date": "2014-10-10", "dateType": "Issued"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2012-05-22/2018-10-31", "dateType": "Collected"}, {"date": "2017-02-21", "dateType": "Submitted"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.ascension01.R0/1149285", "identifierType": "DOI"}, {"identifier": "210", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "ae", "identifierType": "id"}, {"identifier": "ascension01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "geoLocations": [{"geoLocationPlace": "Ariane Tracking Station (AC)", "geoLocationPoint": {"pointLatitude": "-7.9165", "pointLongitude": "-14.3325"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/266.json b/caltechdata_api/tester/validfiles/266.json
new file mode 100644
index 0000000..c7c9945
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/266.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "Young-Suk Oh", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "National Institute of Meteorological Sciences, Seogwipo-si (KR)"}], "name": "Goo, T.-Y."}, {"affiliation": [{"name": "National Institute of Meteorological Sciences, Seogwipo-si (KR)"}], "name": "Oh, Y.-S."}, {"affiliation": [{"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-1376-438X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "H-2280-2011", "nameIdentifierScheme": "ResearcherID"}], "name": "Velazco, V. A."}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Anmeyondo, South Korea."}, {"descriptionType": "Other", "description": "
Cite this record as:
Goo, T.-Y., Oh, Y.-S., & Velazco, V. A. (2014). TCCON data from Anmeyondo (KR), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.anmeyondo01.r0/1149284
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 270
Unique Downloads: 23
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/1f568dd3-02e4-4020-a146-12ee8b53f78a/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Anmeyondo (KR), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2015-02-02/2018-04-18", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.anmeyondo01.R0/1149284", "identifierType": "DOI"}, {"identifier": "266", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "an", "identifierType": "id"}, {"identifier": "anmeyondo01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/267.json b/caltechdata_api/tester/validfiles/267.json
new file mode 100644
index 0000000..9ed0911
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/267.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "AeroMeteo Service, Bia\u0142ystok (PL)"}], "name": "Katry\u0144ski, K.", "contributorType": "Other"}, {"name": "Christof Petri", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.bialystok01.R2
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bialystok, Poland."}, {"descriptionType": "Other", "description": "
Cite this record as:
Deutscher, N. M., Notholt, J., Messerschmidt, J., Weinzierl, C., Warneke, T., Petri, C., & Grupe, P. (2015). TCCON data from Bialystok (PL), Release GGG2014.R1 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bialystok01.r1/1183984
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 252
Unique Downloads: 7
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Bia\u0142ystok (PL)", "geoLocationPoint": {"pointLatitude": "53.23", "pointLongitude": "23.025"}}], "language": "eng", "publicationYear": "2015", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R0/1149277", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R2", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1261-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://cd-sandbox.tind.io/tindfiles/serve/ce27a3a2-14f1-40ea-a898-3c6c5adba935/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Bialystok (PL), Release GGG2014.R1"}], "version": "GGG2014.R1", "formats": ["application/x-netcdf"], "dates": [{"date": "2015-06-10", "dateType": "Created"}, {"date": "2018-12-01", "dateType": "Updated"}, {"date": "2009-03-01/2017-11-28", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2015-06-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bialystok01.R1/1183984", "identifierType": "DOI"}, {"identifier": "267", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "bi", "identifierType": "id"}, {"identifier": "bialystok01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/268.json b/caltechdata_api/tester/validfiles/268.json
new file mode 100644
index 0000000..c979248
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/268.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Kowalewski, S.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Wang, Y.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Wang, Z.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J.", "contributorType": "DataCollector"}, {"name": "Nicholas Deutscher", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-7191-6911", "nameIdentifierScheme": "ORCID"}], "name": "Palm, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5077-9524", "nameIdentifierScheme": "ORCID"}], "name": "Buschmann, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "National Astronomical Research Institute of Thailand, Chiang Mai (TH)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-8020-8642", "nameIdentifierScheme": "ORCID"}], "name": "Macatangay, R. C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.bremen01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bremen, Germany."}, {"descriptionType": "Other", "description": "
Cite this record as:
Notholt, J., Petri, C., Warneke, T., Deutscher, N. M., Palm, M., Buschmann, M., \u2026 Grupe, P. (2014). TCCON data from Bremen (DE), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bremen01.r0/1149275
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 252
Unique Downloads: 9
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Bremen (DE)", "geoLocationPoint": {"pointLatitude": "53.1", "pointLongitude": "8.85"}}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-5043-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-15-13023-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-12005-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bremen01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/b6002cc3-520a-42aa-bc63-81c97ab5982a/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Bremen (DE), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2019-06-01", "dateType": "Updated"}, {"date": "2007-01-15/2018-04-20", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bremen01.R0/1149275", "identifierType": "DOI"}, {"identifier": "268", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "br", "identifierType": "id"}, {"identifier": "bremen01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/283.json b/caltechdata_api/tester/validfiles/283.json
new file mode 100644
index 0000000..b68bc15
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/283.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Vuillemin, C.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Truong, F.\u00e7.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Schmidt, M.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Ramonet, M.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Institut de Physique du Globe de Paris, Observatoire magn\u00e9tique de Chambon la For\u00eat, Cambon la For\u00eat (FR)"}], "name": "Parmentier, E.", "contributorType": "RelatedPerson"}, {"name": "Thorsten Warneke", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.orleans01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Orl\u00e9ans, France."}, {"descriptionType": "Other", "description": "
Cite this record as:
Warneke, T., Messerschmidt, J., Notholt, J., Weinzierl, C., Deutscher, N. M., Petri, C., & Grupe, P. (2014). TCCON data from Orl\u00e9ans (FR), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.orleans01.r0/1149276
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 222
Unique Downloads: 5
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.425996.5"}, {"funderName": "Laboratoire des Sciences du Climat et de l'Environnement", "funderIdentifierType": "GRID", "funderIdentifier": "grid.457340.1"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Tra\u00eenou, Orl\u00e9ans (FR)", "geoLocationPoint": {"pointLatitude": "47.97", "pointLongitude": "2.113"}}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs9010064", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-227-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-5043-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-8-4785-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-15-13023-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-12005-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-4843-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.orleans01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/d0bf0bd6-739b-4aad-9e5d-45338391727f/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Orl\u00e9ans (FR), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2018-12-01", "dateType": "Updated"}, {"date": "2009-08-29/2017-11-28", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.orleans01.R0/1149276", "identifierType": "DOI"}, {"identifier": "283", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "or", "identifierType": "id"}, {"identifier": "orleans01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/293.json b/caltechdata_api/tester/validfiles/293.json
new file mode 100644
index 0000000..cbf8145
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/293.json
@@ -0,0 +1 @@
+{"contributors": [{"affiliation": [{"name": "California Institute of Technology, Pasadena, CA, U.S.A."}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-4924-0377", "nameIdentifierScheme": "ORCID"}], "name": "Wunch, Debra", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-6126-3854", "nameIdentifierScheme": "ORCID"}], "name": "Wennberg, P. O. ", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": " 0000-0002-7986-1924", "nameIdentifierScheme": "ORCID"}], "name": "Griffith, D. W.T.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": " Institute of Environmental Physics, University of Bremen, Bremen (DE), Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU) "}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}], "name": "Deutscher, N. M.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-5890-6687", "nameIdentifierScheme": "ORCID"}], "name": "Feist, D. G.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}], "name": "Notholt, J.", "contributorType": "ContactPerson"}], "descriptions": [{"descriptionType": "Other", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This is the 2014 data release."}, {"descriptionType": "Other", "description": "
Unique Views: 953
Unique Downloads: 98
between September 13, 2017 and July 02, 2020
More info on how stats are collected
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Total Carbon Column Observing Network (TCCON) Team. (2017). 2014 TCCON Data Release (Version GGG2014) [Data set]. CaltechDATA. https://doi.org/10.14291/TCCON.GGG2014
or choose a different citation style.
Download Citation
"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/TCCON.GGG2014.DOCUMENTATION.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/24d2401d-d2b7-42e1-83b1-1ee01839d84d/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": " CO2"}, {"subject": " CH4"}, {"subject": " CO"}, {"subject": " N2O"}, {"subject": " column-averaged dry-air mole fractions"}, {"subject": " remote sensing"}, {"subject": " FTIR spectroscopy"}, {"subject": " TCCON"}], "version": "GGG2014", "titles": [{"title": "2014 TCCON Data Release"}], "formats": [".tgz", ".nc"], "dates": [{"date": "2020-07-01", "dateType": "Updated"}, {"date": "2017-09-13", "dateType": "Submitted"}, {"date": "2017-09-13", "dateType": "Issued"}], "publicationYear": "2017", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/TCCON.GGG2014", "identifierType": "DOI"}, {"identifier": "293", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "TCCON Consortium"}], "name": "Total Carbon Column Observing Network (TCCON) Team"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/301.json b/caltechdata_api/tester/validfiles/301.json
new file mode 100644
index 0000000..186bbec
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/301.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "CaltechDATA, California Institute of Technology, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "AWIPEV Arctic Research Base, Ny-\u00c5lesund, Spitsbergen (NO)", "contributorType": "DataCollector"}, {"name": "Justus Notholt", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.nyalesund01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station Ny \u00c5lesund, Spitsbergen, Norway."}, {"descriptionType": "Other", "description": "
Cite this record as:
Notholt, J., Warneke, T., Petri, C., Deutscher, N. M., Weinzierl, C., Palm, M., & Buschmann, M. (2014). TCCON data from Ny \u00c5lesund, Spitsbergen (NO), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.nyalesund01.r0/1149278
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 196
Unique Downloads: 5
between October 31, 2017 and July 02, 2020
More info on how stats are collected
"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.archive/1348407", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.nyalesund01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/90348ea4-f340-4f43-8db2-b9beb7845519/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "GGG2014.R0", "titles": [{"title": "TCCON data from Ny \u00c5lesund, Spitsbergen (NO), Release GGG2014.R0"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2017-10-31", "dateType": "Created"}, {"date": "2019-06-01", "dateType": "Updated"}, {"date": "2006-03-28/2018-04-27", "dateType": "Collected"}, {"date": "2017-10-31", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "publicationYear": "2014", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.nyalesund01.R0/1149278", "identifierType": "DOI"}, {"identifier": "301", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "sp", "identifierType": "id"}, {"identifier": "nyalesund01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-7191-6911", "nameIdentifierScheme": "ORCID"}], "name": "Palm, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5077-9524", "nameIdentifierScheme": "ORCID"}], "name": "Buschmann, M."}], "geoLocations": [{"geoLocationPlace": "Ny \u00c5lesund (SJ)", "geoLocationPoint": {"pointLatitude": "78.9", "pointLongitude": "11.9"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/970.json b/caltechdata_api/tester/validfiles/970.json
new file mode 100644
index 0000000..31600d9
--- /dev/null
+++ b/caltechdata_api/tester/validfiles/970.json
@@ -0,0 +1 @@
+{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9947-1053", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "D-2563-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Kimberly Strong", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.eureka01.R3
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."}, {"descriptionType": "Other", "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2017). TCCON data from Eureka (CA), Release GGG2014.R2 (Version R2) [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r2
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 41
Unique Downloads: 3
between September 20, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"funderName": "Atlantic Innovation Fund"}, {"funderName": "Canada Foundation for Innovation", "funderIdentifierType": "GRID", "funderIdentifier": "grid.439998.6"}, {"funderName": "Canadian Foundation for Climate and Atmospheric Sciences"}, {"funderName": "Canadian Space Agency", "funderIdentifierType": "GRID", "funderIdentifier": "grid.236846.d"}, {"funderName": "Environment and Climate Change Canada", "funderIdentifierType": "GRID", "funderIdentifier": "grid.410334.1"}, {"funderName": "Government of Canada (International Polar Year funding)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.451254.3"}, {"funderName": "Natural Sciences and Engineering Research Council of Canada", "funderIdentifierType": "GRID", "funderIdentifier": "grid.452912.9"}, {"funderName": "Polar Commission (Northern Scientific Training Program)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.465477.3"}, {"funderName": "Nova Scotia Research Innovation Trust"}, {"funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.451078.f"}, {"funderName": "Natural Resources Canada (Polar Continental Shelf Program)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.202033.0"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R3", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data License", "rightsURI": "https://data.caltech.edu/tindfiles/serve/91de6fb9-18a5-4221-bd6b-41a9db8abc7c/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "R2", "titles": [{"title": "TCCON data from Eureka (CA), Release GGG2014.R2"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2017-09-20", "dateType": "Created"}, {"date": "2018-11-01", "dateType": "Updated"}, {"date": "2010-07-24/2017-09-10", "dateType": "Collected"}, {"date": "2017-09-20", "dateType": "Submitted"}, {"date": "2017-09-20", "dateType": "Issued"}], "publicationYear": "2017", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.eureka01.R2", "identifierType": "DOI"}, {"identifier": "970", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "eu", "identifierType": "id"}, {"identifier": "eureka01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Strong, K."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Roche, S."}, {"affiliation": [{"name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"}], "name": "Franklin, J. E."}, {"affiliation": [{"name": "Environment and Climate Change Canada, Downsview, ON (CA)"}], "name": "Mendonca, J."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Lutsch, E."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Weaver, D."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Fogal, P. F."}, {"affiliation": [{"name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"}], "name": "Drummond, J. R."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}, {"name": "UCAR Center for Science Education, Boulder, CO (US)"}], "name": "Batchelor, R."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}, {"name": "Pacific Northwest National Laboratory, Richland, WA (US)"}], "name": "Lindenmaier, R."}], "geoLocations": [{"geoLocationPlace": "Eureka, NU (CA)", "geoLocationPoint": {"pointLatitude": "80.05", "pointLongitude": "-86.42"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
From 005d9cbaf0441f036a07195bd1f71ba396299497 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Sun, 3 Nov 2024 10:39:59 -0800
Subject: [PATCH 25/42] Add files via upload
---
tests/test_unit.py | 122 +++++++++++++++++++++++++++++++++++++++++++++
tests/tester.py | 50 +++++++++++++++++++
2 files changed, 172 insertions(+)
create mode 100644 tests/test_unit.py
create mode 100644 tests/tester.py
diff --git a/tests/test_unit.py b/tests/test_unit.py
new file mode 100644
index 0000000..5d1cad6
--- /dev/null
+++ b/tests/test_unit.py
@@ -0,0 +1,122 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+import logging
+from tqdm import tqdm
+
+# Directories for valid and invalid JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/"
+INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"\nValidating file: {valid_file}") # Log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
+def test_invalid_json(invalid_file):
+ """Test that invalid example files do not validate successfully."""
+ print(f"\nValidating file: {invalid_file}") # Log for file being tested
+ json_data = load_json_path(invalid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError:
+ print(f"Validation failed as expected for: {invalid_file}")
+ return # Test passes if validation raises a ValueError
+
+ if validation_errors:
+ print(f"Validation failed as expected for: {invalid_file}")
+ else:
+ pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
+
+@pytest.mark.parametrize("missing_field_file", [
+ {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
+ {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
+])
+def test_missing_required_fields(missing_field_file):
+ """Test that JSON files missing required fields fail validation."""
+ print(f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}")
+ json_data = load_json_path(missing_field_file['file'])
+ with pytest.raises(ValueError, match=f"Missing required metadata field: {missing_field_file['missing_field']}"):
+ validator43(json_data)
+
+@pytest.mark.parametrize("type_error_file", [
+ {"file": "../tests/data/type_error_creators.json", "field": "creators"},
+ {"file": "../tests/data/type_error_dates.json", "field": "dates"},
+])
+def test_incorrect_field_types(type_error_file):
+ """Test that JSON files with incorrect field types fail validation."""
+ print(f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}")
+ json_data = load_json_path(type_error_file['file'])
+ with pytest.raises(ValueError, match=f"Incorrect type for field: {type_error_file['field']}"):
+ validator43(json_data)
+
+def test_multiple_errors():
+ """Test JSON file with multiple issues to check all errors are raised."""
+ json_data = load_json_path("../tests/data/multiple_errors.json")
+ with pytest.raises(ValueError, match="Multiple validation errors"):
+ validator43(json_data)
+
+def test_error_logging(caplog):
+ """Test that errors are logged correctly during validation."""
+ json_data = load_json_path("../tests/data/invalid_datacite43/some_invalid_file.json")
+ with caplog.at_level(logging.ERROR):
+ with pytest.raises(ValueError):
+ validator43(json_data)
+ assert "Validation failed" in caplog.text
+
+if __name__ == "__main__":
+ # Manual test runner for valid files
+ failed_valid_files = []
+ print("\nRunning validation for valid files...")
+ for file in tqdm(VALID_DATACITE43_FILES, desc="Valid files"):
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_valid_files.append(file)
+ print(f"Error occurred in valid file: {file}\nError details: {e}")
+
+ if not failed_valid_files:
+ print("\n✅ All valid files passed validation. Test complete.")
+ else:
+ print("\n❌ The following valid files failed validation:")
+ for failed_file in failed_valid_files:
+ print(f"- {failed_file}")
+
+ # Manual test runner for invalid files
+ passed_invalid_files = []
+ print("\nRunning validation for invalid files...")
+ for file in tqdm(INVALID_DATACITE43_FILES, desc="Invalid files"):
+ try:
+ test_invalid_json(file)
+ except AssertionError as e:
+ passed_invalid_files.append(file)
+ print(f"Error occurred in invalid file: {file}\nError details: {e}")
+
+ if not passed_invalid_files:
+ print("\n✅ All invalid files failed validation as expected. Test is a success.")
+ else:
+ print("\n❌ The following invalid files unexpectedly passed validation:")
+ for passed_file in passed_invalid_files:
+ print(f"- {passed_file}")
diff --git a/tests/tester.py b/tests/tester.py
new file mode 100644
index 0000000..72efe49
--- /dev/null
+++ b/tests/tester.py
@@ -0,0 +1,50 @@
+import os
+import pytest
+from customize_schema import validate_metadata as validator43
+from helpers import load_json_path
+
+# Define the directory containing the test JSON files
+VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
+
+# Function to get all JSON files in the directory
+def get_all_json_files(directory):
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+
+# Get list of all valid JSON files in the directory
+VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+
+@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
+def test_valid_json(valid_file):
+ """Test that valid example files validate successfully."""
+ print(f"Validating file: {valid_file}") # Added log for file being tested
+ json_data = load_json_path(valid_file)
+ validation_errors = None
+ try:
+ validation_errors = validator43(json_data)
+ except ValueError as e:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
+
+ if validation_errors:
+ pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
+ else:
+ print(f"Validation passed for: {valid_file}")
+
+if __name__ == "__main__":
+ # Track failures for manual testing
+ failed_files = []
+
+ # Run the tests and print results for each file
+ for file in VALID_DATACITE43_FILES:
+ try:
+ test_valid_json(file)
+ except AssertionError as e:
+ failed_files.append(file)
+ print(f"Error occurred in file: {file}\nError details: {e}")
+
+ # Print a summary of all failed files
+ if failed_files:
+ print("\nThe following files failed validation:")
+ for failed_file in failed_files:
+ print(f"- {failed_file}")
+ else:
+ print("\nAll files passed validation.")
From 4f08a54b7f69849a209516fa82304e4fbb628286 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP
Date: Fri, 8 Nov 2024 05:59:57 +0000
Subject: [PATCH 26/42] Add updated CITATION.cff from codemeta.json file
---
CITATION.cff | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CITATION.cff b/CITATION.cff
index e626019..e8e6679 100755
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -11,6 +11,7 @@ authors:
abstract: Python wrapper for CaltechDATA API.
repository-code: "https://github.com/caltechlibrary/caltechdata_api"
type: software
+doi: 10.22002/wfjr5-kw507
version: 1.8.2
license-url: "https://data.caltech.edu/license"
keywords:
@@ -18,4 +19,4 @@ keywords:
- metadata
- software
- InvenioRDM
-date-released: 2024-11-06
+date-released: 2024-11-08
From 735e74a22a2c74a9bf7f203897c20d1e9877c194 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 7 Nov 2024 22:51:43 -0800
Subject: [PATCH 27/42] Update cli.py
---
caltechdata_api/cli.py | 1008 ++++++++++------------------------------
1 file changed, 254 insertions(+), 754 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index ecb54a3..ffb2cba 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -1,774 +1,274 @@
-import argparse
-import requests
-import s3fs
-from caltechdata_api import caltechdata_write, caltechdata_edit
-from .md_to_json import parse_readme_to_json
+import copy
import json
import os
-from cryptography.fernet import Fernet
-
-CALTECHDATA_API = "https://data.caltech.edu/api/names?q=identifiers.identifier:{}"
-ORCID_API = "https://orcid.org/"
-HEADERS = {"Accept": "application/json"}
-
-name = ""
-affiliationIdentifierScheme = ""
-affiliation_identifier = ""
-
-awardNumber = ""
-awardTitle = ""
-funderIdentifier = ""
-funderIdentifierType = ""
-funderName = ""
-
-
-home_directory = os.path.expanduser("~")
-caltechdata_directory = os.path.join(home_directory, ".caltechdata")
-
-
-if not os.path.exists(caltechdata_directory):
- os.makedirs(caltechdata_directory)
-
-
-def generate_key():
- return Fernet.generate_key()
-
-
-# Load the key from a file or generate a new one if not present
-def load_or_generate_key():
- key_file = os.path.join(caltechdata_directory, "key.key")
- if os.path.exists(key_file):
- with open(key_file, "rb") as f:
- return f.read()
- else:
- key = generate_key()
- with open(key_file, "wb") as f:
- f.write(key)
- return key
-
-
-# Encrypt the token
-def encrypt_token(token, key):
- f = Fernet(key)
- return f.encrypt(token.encode())
-
-
-# Decrypt the token
-def decrypt_token(encrypted_token, key):
- f = Fernet(key)
- return f.decrypt(encrypted_token).decode()
-
-
-# Function to get or set token with support for test system
-def get_or_set_token(production=True):
- key = load_or_generate_key()
-
- # Use different token files for production and test environments
- token_filename = "token.txt" if production else "token_test.txt"
- token_file = os.path.join(caltechdata_directory, token_filename)
-
- try:
- with open(token_file, "rb") as f:
- encrypted_token = f.read()
- token = decrypt_token(encrypted_token, key)
- print(
- "Using saved CaltechDATA production token."
- if production
- else "Using saved CaltechDATA test token."
- )
- return token
- except FileNotFoundError:
- while True:
- token = input(
- f"Enter your {'Production' if production else 'Test'} CaltechDATA token: "
- ).strip()
- confirm_token = input(
- f"Confirm your {'Production' if production else 'Test'} CaltechDATA token: "
- ).strip()
- if token == confirm_token:
- encrypted_token = encrypt_token(token, key)
- with open(token_file, "wb") as f:
- f.write(encrypted_token)
- return token
+import requests
+import s3fs
+from requests import session
+from json.decoder import JSONDecodeError
+from caltechdata_api import customize_schema
+from caltechdata_api.utils import humanbytes
+
+
+def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=False):
+ f_json = []
+ f_list = {}
+ fnames = []
+ for f in files:
+ split = f.split("/")
+ filename = split[-1]
+ if filename in fnames:
+ # We can't have a duplicate filename
+ # Assume that the previous path value makes a unique name
+ filename = f"{split[-2]}-{split[-1]}"
+ fnames.append(filename)
+ f_json.append({"key": filename})
+ f_list[filename] = f
+ # Now we see if any existing draft files need to be replaced
+ result = requests.get(file_link, headers=f_headers)
+ if result.status_code == 200:
+ ex_files = result.json()["entries"]
+ for ex in ex_files:
+ if ex["key"] in f_list:
+ result = requests.delete(ex["links"]["self"], headers=f_headers)
+ if result.status_code != 204:
+ raise Exception(result.text)
+ # Create new file upload links
+ result = requests.post(file_link, headers=headers, json=f_json)
+ if result.status_code != 201:
+ raise Exception(result.text)
+ # Now we have the upload links
+ for entry in result.json()["entries"]:
+ self = entry["links"]["self"]
+ link = entry["links"]["content"]
+ commit = entry["links"]["commit"]
+ name = entry["key"]
+ if name in f_list:
+ if s3:
+ print("Downloading", f_list[name])
+ s3.download(f_list[name], name)
+ infile = open(name, "rb")
else:
- print("Tokens do not match. Please try again.")
-
-
-def welcome_message():
- print("Welcome to CaltechDATA CLI")
-
-
-def get_user_input(prompt, required=True):
- while True:
- user_input = input(prompt)
- if required and not user_input:
- print("This field is required. Please provide a value.")
- else:
- return user_input
-
-
-def confirm_upload():
- while True:
- user_input = input("Do you want to send this record to CaltechDATA? (y/n): ")
- if user_input.lower() == "y":
- return True
- elif user_input.lower() == "n":
- print("Upload canceled.")
- return False
+ infile = open(f_list[name], "rb")
+ result = requests.put(link, headers=f_headers, data=infile)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ result = requests.post(commit, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
else:
- print("Invalid input. Please enter 'y' or 'n'.")
-
-
-def check_award_number(award_number):
- response = requests.get(
- f"https://data.caltech.edu/api/awards?q=number:{award_number}"
- )
- data = response.json()
- total_hits = data.get("hits", {}).get("total", 0)
- return total_hits > 0
-
-
-def get_funding_entries():
- while True:
+ # Delete any files not included in this write command
+ if keepfiles == False:
+ result = requests.delete(self, headers=f_headers)
+ if result.status_code != 204:
+ raise Exception(result.text)
+
+
+
+def add_file_links(
+ metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
+):
+ # Currently configured for S3 links, assuming all are at the same endpoint
+ link_string = ""
+ endpoint = "https://" + file_links[0].split("/")[2]
+ s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
+ index = 0
+ for link in file_links:
+ file = link.split("/")[-1]
+ path = link.split(endpoint)[1]
+ size = s3.info(path)["size"]
+ size = humanbytes(size)
try:
- num_entries = int(
- input("How many funding entries do you want to provide? ")
- )
- if num_entries >= 0:
- return num_entries
+ desc = file_descriptions[index] + ","
+ except IndexError:
+ desc = ""
+ if link_string == "":
+ if s3_link:
+ link_string = f"Files available via S3 at {s3_link}</p>"
else:
- print("Please enter a non-negative integer.")
- except ValueError:
- print("Please enter a valid integer.")
-
-
-def validate_funder_identifier(funder_identifier):
- response = requests.get(f"https://api.ror.org/organizations/{funder_identifier}")
- if response.status_code == 200:
- return response.json().get("name")
- else:
- return False
-
-
-def get_funding_details():
- award_number = get_user_input("Enter the award number for funding: ")
- award_exists = check_award_number(award_number)
- if not award_exists:
- print(
- f"""Error: No award with number '{award_number}' found in
- CaltechDATA. You will need to provide more details about the
- funding."""
- )
- award_title = get_user_input("Enter the award title for funding: ")
- while True:
- funder_identifier = get_user_input("Enter the funder ROR (https://ror.org): ")
- name = validate_funder_identifier(funder_identifier)
- if name:
- break
- else:
- print(
- """This funder identifier is not a ROR. Please enter a valid
- ROR identifier (without the url). For example the ROR for the
- NSF is 021nxhr62."""
- )
- print("-" * 10)
- return {
- "awardNumber": award_number,
- "awardTitle": award_title,
- "funderName": name,
- "funderIdentifier": funder_identifier,
- "funderIdentifierType": "ROR",
+ cleaned = link.strip(file)
+ link_string = f"Files available via S3 at {cleaned}</p>"
+ link_string += f"""{file}, {desc} {size}
+ <a role="button" class="ui compact mini button" href="{link}"
+ > <i class="download icon"></i> Download </a>
</p>
+ """
+ index += 1
+ # Tack on any additional descriptions
+ if additional_descriptions != "":
+ link_string += additional_descriptions
+
+ description = {"description": link_string, "descriptionType": "files"}
+ metadata["descriptions"].append(description)
+ return metadata
+
+
+def send_to_community(review_link, data, headers, publish, community, message=None):
+ if not message:
+ message = "This record is submitted automatically with the CaltechDATA API"
+
+ data = {
+ "receiver": {"community": community},
+ "type": "community-submission",
}
-
-
-def parse_arguments():
- welcome_message()
- args = {}
- args["title"] = get_user_input("Enter the title of the dataset: ")
- args["description"] = get_user_input(
- "Enter the abstract or description of the dataset: "
- )
- print("License options:")
- print("1. Creative Commons Zero Waiver (cc-zero)")
- print("2. Creative Commons Attribution (cc-by)")
- print("3. Creative Commons Attribution Non Commercial (cc-by-nc)")
-
- # Prompt user to select a license
- while True:
- license_number = input(
- "Enter the number corresponding to the desired license: "
- )
- if license_number.isdigit() and 1 <= int(license_number) <= 8:
- # Valid license number selected
- args["license"] = {
- "1": {
- "rights": "Creative Commons Zero v1.0 Universal",
- "rightsIdentifier": "cc0-1.0",
- },
- "2": {
- "rights": "Creative Commons Attribution v4.0 Universal",
- "rightsIdentifier": "cc-by-4.0",
- },
- "3": {
- "rights": "Creative Commons Attribution Non-Commercial v4.0 Universal",
- "rightsIdentifier": "cc-by-nc-4.0",
- },
- }[license_number]
- break
- else:
- print("Invalid input. Please enter a number between 1 and 8.")
-
- while True:
- orcid = get_user_input("Enter your ORCID identifier: ")
- family_name, given_name = get_names(orcid)
- if family_name is not None and given_name is not None:
- args["orcid"] = orcid
- break # Break out of the loop if names are successfully retrieved
- retry = input("Do you want to try again? (y/n): ")
- if retry.lower() != "y":
- print("Exiting program.")
- return
- # Optional arguments
- num_funding_entries = get_funding_entries()
- funding_references = []
- for _ in range(num_funding_entries):
- funding_references.append(get_funding_details())
- args["fundingReferences"] = funding_references
- return args
-
-
-def query_caltechdata_api(orcid):
- response = requests.get(CALTECHDATA_API.format(orcid), headers=HEADERS)
- return response.json()
-
-
-def query_orcid_api(orcid):
- response = requests.get(ORCID_API + orcid, headers=HEADERS)
- return response.json()
-
-
-def get_names(orcid):
- caltechdata_response = query_caltechdata_api(orcid)
- global affiliationIdentifierScheme, affiliation_identifier, name
- if caltechdata_response.get("hits", {}).get("hits"):
- hit = caltechdata_response["hits"]["hits"][0]
- family_name = hit.get("family_name", "")
- given_name = hit.get("given_name", "")
- affiliation_identifier = "05dxps055"
- affiliationIdentifierScheme = "ROR"
- name = "California Institute of Technology"
-
- else:
- orcid_link = "https://orcid.org/"
- headers = {"Accept": "application/json"}
- orcid_response = requests.get(orcid_link + orcid, headers=headers)
- try:
- orcid_data = orcid_response.json()
- name_info = orcid_data.get("person", {}).get("name", {})
- family_name = name_info.get("family-name", {}).get("value", "")
- given_name = name_info.get("given-names", {}).get("value", "")
- except json.decoder.JSONDecodeError:
- print(
- f"Error: ORCID identifier not found or invalid. Please check the ORCID identifier and try again."
- )
- return None, None
- return family_name, given_name
-
-
-def write_s3cmd_config(endpoint):
- configf = os.path.join(home_directory, ".s3cfg")
- if not os.path.exists(configf):
- access_key = get_user_input("Enter the access key: ")
- secret_key = get_user_input("Enter the secret key: ")
- with open(configf, "w") as file:
- file.write(
- f"""[default]
- access_key = {access_key}
- host_base = {endpoint}
- host_bucket = %(bucket).{endpoint}
- secret_key = {secret_key}
- """
- )
-
-
-def upload_supporting_file(record_id=None):
- filepath = ""
- filepaths = []
- file_link = ""
- file_links = []
- while True:
- choice = get_user_input(
- "Do you want to upload or link data files? (upload/link/n): "
- ).lower()
- if choice == "link":
- endpoint = "sdsc.osn.xsede.org"
- path = "ini230004-bucket01/"
- if not record_id:
- write_s3cmd_config(endpoint)
- print("""S3 connection configured.""")
- break
- endpoint = f"https://{endpoint}/"
- s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
- # Find the files
- files = s3.glob(path + record_id + "/*")
- for link in files:
- fname = link.split("/")[-1]
- if "." not in fname:
- # If there is a directory, get files
- folder_files = s3.glob(link + "/*")
- for file in folder_files:
- name = file.split("/")[-1]
- if "." not in name:
- level_2_files = s3.glob(file + "/*")
- for f in level_2_files:
- name = f.split("/")[-1]
- if "." not in name:
- level_3_files = s3.glob(f + "/*")
- for l3 in level_3_files:
- file_links.append(endpoint + l3)
- else:
- file_links.append(endpoint + f)
- else:
- file_links.append(endpoint + file)
- else:
- file_links.append(endpoint + link)
- return filepath, file_links
- elif choice == "upload":
- print("Current files in the directory:")
- files = [
- f for f in os.listdir() if not f.endswith(".json") and os.path.isfile(f)
- ]
- print("\n".join(files))
- while True:
- filename = get_user_input(
- "Enter the filename to upload as a supporting file (or 'n' to finish): "
- )
- if filename == "n":
- break
- if filename in files:
- file_size = os.path.getsize(filename)
- if file_size > 1024 * 1024 * 1024:
- print(
- """The file is greater than 1 GB. Please upload the
- metadata to CaltechDATA, and you'll be provided
- instructions to upload the files to S3 directly."""
- )
- else:
- filepath = os.path.abspath(filename)
- filepaths.append(filepath)
- else:
- print(
- f"Error: File '{filename}' not found. Please enter a valid filename."
- )
- add_more = get_user_input(
- "Do you want to add more files? (y/n): "
- ).lower()
- if add_more != "y":
- break
- break
- elif choice == "n":
- break
- else:
- print("Invalid input. Please enter 'link' or 'upload' or 'n'.")
- return filepaths, file_links
-
-
-def upload_data_from_file():
- while True:
- print("Current JSON files in the directory:")
- files = [f for f in os.listdir() if f.endswith(".json") and os.path.isfile(f)]
- print("\n".join(files))
-
- filename = get_user_input(
- "Enter a README.md or JSON filename to upload to CaltechDATA (or type 'exit' to go back): "
+ result = requests.put(review_link, json=data, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ submit_link = review_link.replace("/review", "/actions/submit-review")
+ data = comment = {
+ "payload": {
+ "content": message,
+ "format": "html",
+ }
+ }
+ result = requests.post(submit_link, json=data, headers=headers)
+ if result.status_code != 202:
+ raise Exception(result.text)
+ if publish:
+ accept_link = result.json()["links"]["actions"]["accept"]
+ data = comment = {
+ "payload": {
+ "content": "This record is accepted automatically with the CaltechDATA API",
+ "format": "html",
+ }
+ }
+ result = requests.post(accept_link, json=data, headers=headers)
+ if result.status_code != 200:
+ raise Exception(result.text)
+ return result
+
+
+def caltechdata_write(
+ metadata,
+ token=None,
+ files=[],
+ production=False,
+ schema="43",
+ publish=False,
+ file_links=[],
+ s3=None,
+ community=None,
+ authors=False,
+ file_descriptions=[],
+ s3_link=None,
+ default_preview=None,
+ review_message=None,
+):
+ """
+ File links are links to files existing in external systems that will
+ be added directly in a CaltechDATA record, instead of uploading the file.
+
+ S3 is a s3sf object for directly opening files
+ """
+ # Make a copy so that none of our changes leak out
+ metadata = copy.deepcopy(metadata)
+
+ # If no token is provided, get from RDMTOK environment variable
+ if not token:
+ token = os.environ["RDMTOK"]
+
+ # If files is a string - change to single value array
+ if isinstance(files, str) == True:
+ files = [files]
+
+ if file_links:
+ metadata = add_file_links(
+ metadata, file_links, file_descriptions, s3_link=s3_link
)
- if filename.lower() == "exit":
- return None
-
- if filename == "README.md":
- data = parse_readme_to_json(filename)
- return data
- else:
- try:
- with open(filename, "r") as file:
- data = json.load(file)
- return data
-
- except json.JSONDecodeError as e:
- print(f"Error: Invalid JSON format in the file '{filename}'. {str(e)}")
-
-
-def parse_args():
- """Parse command-line arguments."""
- parser = argparse.ArgumentParser(description="CaltechDATA CLI tool.")
- parser.add_argument(
- "-test", action="store_true", help="Use test mode, sets production to False"
- )
- args = parser.parse_args()
- return args
-
-
-def main():
- args = parse_args()
-
- production = not args.test # Set production to False if -test flag is provided
-
- choice = get_user_input(
- "Do you want to create or edit a CaltechDATA record? (create/edit): "
- ).lower()
- if choice == "create":
- create_record(production)
- elif choice == "edit":
- edit_record(production)
+ # Pull out pid information
+ if production == True:
+ repo_prefix = "10.22002"
else:
- print("Invalid choice. Please enter 'create' or 'edit'.")
-
-
-def create_record(production):
- token = get_or_set_token(production)
- while True:
- choice = get_user_input(
- "Do you want to use metadata from an existing file or create new metadata? (existing/create): "
- ).lower()
- if choice == "existing":
- existing_data = upload_data_from_file()
- filepath, file_link = upload_supporting_file()
- if existing_data:
- if filepath != "":
- response = caltechdata_write(
- existing_data,
- token,
- filepath,
- production=production,
- publish=False,
- )
- elif file_link != "":
- response = caltechdata_write(
- existing_data,
- token,
- file_links=[file_link],
- s3_link=file_link,
- production=True,
- publish=False,
- )
- else:
- response = caltechdata_write(
- existing_data, token, production=production, publish=False
- )
- rec_id = response
- print_upload_message(rec_id, production)
- break
+ repo_prefix = "10.33569"
+ pids = {}
+ identifiers = []
+ if "metadata" in metadata:
+ # we have rdm schema
+ if "identifiers" in metadata["metadata"]:
+ identifiers = metadata["metadata"]["identifiers"]
+ elif "identifiers" in metadata:
+ identifiers = metadata["identifiers"]
+ for identifier in identifiers:
+ doi = False
+ if "identifierType" in identifier:
+ if identifier["identifierType"] == "DOI":
+ doi = identifier["identifier"]
+ prefix = doi.split("/")[0]
+ elif identifier["identifierType"] == "oai":
+ pids["oai"] = {
+ "identifier": identifier["identifier"],
+ "provider": "oai",
+ }
+ elif "scheme" in identifier:
+ # We have RDM internal metadata
+ if identifier["scheme"] == "doi":
+ doi = identifier["identifier"]
+ prefix = doi.split("/")[0]
+ if doi != False:
+ if prefix == repo_prefix:
+ pids["doi"] = {
+ "identifier": doi,
+ "provider": "datacite",
+ "client": "datacite",
+ }
else:
- print("Going back to the main menu.")
- elif choice == "create":
- args = parse_arguments()
- family_name, given_name = get_names(args["orcid"])
- metadata = {
- "titles": [{"title": args["title"]}],
- "descriptions": [
- {"description": args["description"], "descriptionType": "Abstract"}
- ],
- "creators": [
- {
- "affiliation": [
- {
- "affiliationIdentifier": affiliation_identifier,
- "affiliationIdentifierScheme": affiliationIdentifierScheme,
- "name": name,
- }
- ],
- "familyName": family_name,
- "givenName": given_name,
- "name": f"{family_name}, {given_name}",
- "nameIdentifiers": [
- {
- "nameIdentifier": args["orcid"],
- "nameIdentifierScheme": "ORCID",
- }
- ],
- "nameType": "Personal",
- }
- ],
- "types": {"resourceType": "", "resourceTypeGeneral": "Dataset"},
- "rightsList": [
- args["license"],
- ],
- "fundingReferences": args["fundingReferences"],
- "schemaVersion": "http://datacite.org/schema/kernel-4",
- }
- filepath, file_link = upload_supporting_file()
- if confirm_upload():
- if filepath != "":
- response = caltechdata_write(
- metadata, token, filepath, production=production, publish=False
- )
- elif file_link != "":
- response = caltechdata_write(
- metadata,
- token,
- file_links=[file_link],
- production=production,
- publish=False,
- )
- else:
- response = caltechdata_write(
- metadata, token, production=production, publish=False
- )
- rec_id = response
-
- print_upload_message(rec_id, production)
- with open(response + ".json", "w") as file:
- json.dump(metadata, file, indent=2)
- break
- else:
- break
+ pids["doi"] = {
+ "identifier": doi,
+ "provider": "external",
+ }
+
+ if "pids" not in metadata:
+ metadata["pids"] = pids
+
+ if authors == False:
+ data = customize_schema.customize_schema(metadata, schema=schema)
+ if production == True:
+ url = "https://data.caltech.edu/"
else:
- print("Invalid choice. Please enter 'existing' or 'create'.")
-
-
-def print_upload_message(rec_id, production):
- base_url = (
- "https://data.caltech.edu/uploads/"
- if production
- else "https://data.caltechlibrary.dev/uploads/"
- )
- print(
- f"""You can view and publish this record at
- {base_url}{rec_id}
- If you need to upload large files to S3, you can type
- `s3cmd put DATA_FILE s3://ini230004-bucket01/{rec_id}/`"""
- )
-
-
-def edit_record(production):
- record_id = input("Enter the CaltechDATA record ID: ")
- token = get_or_set_token(production)
- file_name = download_file_by_id(record_id, token)
-
- if file_name:
- try:
- # Read the edited metadata file
- with open(file_name, "r") as file:
- metadata = json.load(file)
- response = caltechdata_edit(
- record_id, metadata, token, production=production, publish=False
- )
- if response:
- print("Metadata edited successfully.")
- else:
- print("Failed to edit metadata.")
- except Exception as e:
- print(f"An error occurred during metadata editing: {e}")
+ url = "https://data.caltechlibrary.dev/"
else:
- print("No metadata file found.")
- choice = get_user_input("Do you want to add files? (y/n): ").lower()
- if choice == "y":
- if production:
- API_URL_TEMPLATE = "https://data.caltech.edu/api/records/{record_id}/files"
- API_URL_TEMPLATE_DRAFT = (
- "https://data.caltech.edu/api/records/{record_id}/draft/files"
- )
+ data = metadata
+ if production == True:
+ url = "https://authors.library.caltech.edu/"
else:
- API_URL_TEMPLATE = (
- "https://data.caltechlibrary.dev/api/records/{record_id}/files"
- )
- API_URL_TEMPLATE_DRAFT = (
- "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
- )
-
- url = API_URL_TEMPLATE.format(record_id=record_id)
- url_draft = API_URL_TEMPLATE_DRAFT.format(record_id=record_id)
-
- headers = {
- "accept": "application/json",
- }
-
- if token:
- headers["Authorization"] = "Bearer %s" % token
-
- response = requests.get(url, headers=headers)
- response_draft = requests.get(url_draft, headers=headers)
- data = response.json()
- data_draft = response_draft.json()
- # Check if 'entries' exists and its length
- if (
- len(data.get("entries", [])) == 0
- and len(data_draft.get("entries", [])) == 0
- ):
- keepfile = False
- else:
- keepfile = (
- input("Do you want to keep existing files? (y/n): ").lower() == "y"
- )
-
- filepath, file_link = upload_supporting_file(record_id)
- if file_link:
- print(file_link)
-
- if filepath != "":
- response = caltechdata_edit(
- record_id,
- token=token,
- files=filepath,
- production=production,
- publish=False,
- keepfiles=keepfile,
- )
- elif file_link != "":
- response = caltechdata_edit(
- record_id,
- metadata,
- token=token,
- file_links=file_link,
- production=production,
- publish=False,
- keepfiles=keepfile,
- )
-
- rec_id = response
- print_upload_message(rec_id, production)
-
-
-def download_file_by_id(record_id, token=None):
- url = f"https://data.caltech.edu/api/records/{record_id}"
+ url = "https://authors.caltechlibrary.dev/"
headers = {
- "accept": "application/vnd.datacite.datacite+json",
+ "Authorization": "Bearer %s" % token,
+ "Content-type": "application/json",
+ }
+ f_headers = {
+ "Authorization": "Bearer %s" % token,
+ "Content-type": "application/octet-stream",
}
- if token:
- headers["Authorization"] = "Bearer %s" % token
-
- try:
- response = requests.get(url, headers=headers)
- if response.status_code != 200:
- # Might have a draft
- response = requests.get(
- url + "/draft",
- headers=headers,
- )
- if response.status_code != 200:
- url = f"https://data.caltechlibrary.dev/api/records/{record_id}"
- response = requests.get(
- url,
- headers=headers,
- )
- if response.status_code != 200:
- # Might have a draft
- response = requests.get(
- url + "/draft",
- headers=headers,
- )
- if response.status_code != 200:
- raise Exception(
- f"Record {record_id} does not exist, cannot edit"
- )
- file_content = response.content
- file_name = f"downloaded_data_{record_id}.json"
- with open(file_name, "wb") as file:
- file.write(file_content)
- print(f"Metadata downloaded successfully: {file_name}")
- with open(file_name, "r") as file:
- metadata = json.load(file)
- while True:
- print("Fields:")
- for i, field in enumerate(metadata.keys()):
- print(f"{i + 1}. {field}")
-
- field_choice = int(
- input(
- "Enter the number of the field you want to edit (or 0 to skip, 'exit' to exit): "
- )
- )
-
- if field_choice == 0:
- break
-
- selected_field = list(metadata.keys())[field_choice - 1]
-
- if isinstance(metadata[selected_field], list):
- while True:
- print(f"Items in {selected_field}:")
- for i, item in enumerate(metadata[selected_field]):
- print(f"{i + 1}. {item}")
-
- item_choice = int(
- input(
- "Enter the number of the item you want to edit (or 0 to go back): "
- )
- )
-
- if item_choice == 0:
- break
-
- selected_item = metadata[selected_field][item_choice - 1]
-
- while True:
- print(f"Subfields for {selected_field}:")
- for i, subfield in enumerate(selected_item.keys()):
- print(f"{i + 1}. {subfield}")
-
- subfield_choice = int(
- input(
- "Enter the number of the subfield you want to edit (or 0 to go back): "
- )
- )
-
- if subfield_choice == 0:
- break
-
- selected_subfield = list(selected_item.keys())[
- subfield_choice - 1
- ]
-
- new_value = input(
- f"Enter the new value for {selected_subfield}: "
- )
-
- metadata[selected_field][item_choice - 1][
- selected_subfield
- ] = new_value
-
- with open(file_name, "w") as file:
- json.dump(metadata, file, indent=2)
-
- print(f"File updated successfully.")
-
- else:
- while True:
- print(f"Subfields for {selected_field}:")
- for i, subfield in enumerate(metadata[selected_field].keys()):
- print(f"{i + 1}. {subfield}")
-
- subfield_choice = int(
- input(
- "Enter the number of the subfield you want to edit (or 0 to go back): "
- )
- )
-
- if subfield_choice == 0:
- break
-
- selected_subfield = list(metadata[selected_field].keys())[
- subfield_choice - 1
- ]
-
- new_value = input(
- f"Enter the new value for {selected_subfield}: "
- )
-
- metadata[selected_field][selected_subfield] = new_value
-
- with open(file_name, "w") as file:
- json.dump(metadata, file, indent=2)
-
- print(f"File updated successfully.")
-
- except Exception as e:
- print(f"An error occurred: {e}")
- return file_name
+ if not files:
+ data["files"] = {"enabled": False}
+ elif default_preview:
+ data["files"] = {"enabled": True, "default_preview": default_preview}
+ # Make draft and publish
+ result = requests.post(url + "/api/records", headers=headers, json=data)
+ if result.status_code != 201:
+ if result.status_code == 400 and "Referer checking failed" in result.text:
+ raise Exception("Token is incorrect or missing referer.")
+ else:
+ raise Exception(result.text)
+ idv = result.json()["id"]
+ publish_link = result.json()["links"]["publish"]
+
+ if files:
+ file_link = result.json()["links"]["files"]
+ write_files_rdm(files, file_link, headers, f_headers, s3)
+
+ if community:
+ review_link = result.json()["links"]["review"]
+ send_to_community(
+ review_link, data, headers, publish, community, review_message
+ )
-if __name__ == "__main__":
- main()
+ else:
+ if publish:
+ result = requests.post(publish_link, json=data, headers=headers)
+ if result.status_code != 202:
+ raise Exception(result.text)
+ return idv
From 6c4a5e5b7da0027c350fbe7d4fc59596421423d4 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 7 Nov 2024 23:20:25 -0800
Subject: [PATCH 28/42] Update test_unit.py
From e6172966620a19b3db44bf5eee6aff313121b917 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 7 Nov 2024 23:20:56 -0800
Subject: [PATCH 29/42] Create hehe.py
---
tests/data/invalid_datacite43/hehe.py | 1 +
1 file changed, 1 insertion(+)
create mode 100644 tests/data/invalid_datacite43/hehe.py
diff --git a/tests/data/invalid_datacite43/hehe.py b/tests/data/invalid_datacite43/hehe.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/data/invalid_datacite43/hehe.py
@@ -0,0 +1 @@
+
From ccf9552acfe2aa34791ec03e2b2a6e253116e7c8 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 7 Nov 2024 23:24:43 -0800
Subject: [PATCH 30/42] Add files via upload
---
.../invalid_metadata_1.json | 12 +
.../invalid_metadata_10.json | 18 +
.../invalid_metadata_2.json | 13 +
.../invalid_metadata_3.json | 12 +
.../invalid_metadata_4.json | 20 +
.../invalid_metadata_5.json | 22 ++
.../invalid_metadata_6.json | 22 ++
.../invalid_metadata_7.json | 20 +
.../invalid_metadata_8.json | 20 +
.../invalid_metadata_9.json | 16 +
.../invalid_datacite43/missing_creators.json | 263 +++++++++++++
.../invalid_datacite43/missing_publisher.json | 350 ++++++++++++++++++
.../invalid_datacite43/multiple_errors.json | 263 +++++++++++++
.../type_error_creators.json | 264 +++++++++++++
14 files changed, 1315 insertions(+)
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_1.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_10.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_2.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_3.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_4.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_5.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_6.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_7.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_8.json
create mode 100644 tests/data/invalid_datacite43/invalid_metadata_9.json
create mode 100644 tests/data/invalid_datacite43/missing_creators.json
create mode 100644 tests/data/invalid_datacite43/missing_publisher.json
create mode 100644 tests/data/invalid_datacite43/multiple_errors.json
create mode 100644 tests/data/invalid_datacite43/type_error_creators.json
diff --git a/tests/data/invalid_datacite43/invalid_metadata_1.json b/tests/data/invalid_datacite43/invalid_metadata_1.json
new file mode 100644
index 0000000..1bba16b
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_1.json
@@ -0,0 +1,12 @@
+{
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_10.json b/tests/data/invalid_datacite43/invalid_metadata_10.json
new file mode 100644
index 0000000..759757d
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_10.json
@@ -0,0 +1,18 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "version": 1,
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_2.json b/tests/data/invalid_datacite43/invalid_metadata_2.json
new file mode 100644
index 0000000..3899136
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_2.json
@@ -0,0 +1,13 @@
+{
+ "titles": [],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_3.json b/tests/data/invalid_datacite43/invalid_metadata_3.json
new file mode 100644
index 0000000..707dbab
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_3.json
@@ -0,0 +1,12 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_4.json b/tests/data/invalid_datacite43/invalid_metadata_4.json
new file mode 100644
index 0000000..f7d2fe4
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_4.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "contributors": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_5.json b/tests/data/invalid_datacite43/invalid_metadata_5.json
new file mode 100644
index 0000000..deeff7f
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_5.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "descriptions": [
+ {
+ "description": "Sample Description"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_6.json b/tests/data/invalid_datacite43/invalid_metadata_6.json
new file mode 100644
index 0000000..8fa14f1
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_6.json
@@ -0,0 +1,22 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderIdentifier": "1234"
+ }
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_7.json b/tests/data/invalid_datacite43/invalid_metadata_7.json
new file mode 100644
index 0000000..bae4d11
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_7.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "identifiers": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_8.json b/tests/data/invalid_datacite43/invalid_metadata_8.json
new file mode 100644
index 0000000..247f3ff
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_8.json
@@ -0,0 +1,20 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "dates": [
+ {}
+ ],
+ "publisher": "Caltech",
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/invalid_metadata_9.json b/tests/data/invalid_datacite43/invalid_metadata_9.json
new file mode 100644
index 0000000..2eddcf1
--- /dev/null
+++ b/tests/data/invalid_datacite43/invalid_metadata_9.json
@@ -0,0 +1,16 @@
+{
+ "titles": [
+ {
+ "title": "Sample Title"
+ }
+ ],
+ "creators": [
+ {
+ "name": "John Doe"
+ }
+ ],
+ "publicationYear": "2023",
+ "types": {
+ "resourceTypeGeneral": "Dataset"
+ }
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/missing_creators.json b/tests/data/invalid_datacite43/missing_creators.json
new file mode 100644
index 0000000..0d0f1a1
--- /dev/null
+++ b/tests/data/invalid_datacite43/missing_creators.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/missing_publisher.json b/tests/data/invalid_datacite43/missing_publisher.json
new file mode 100644
index 0000000..9035027
--- /dev/null
+++ b/tests/data/invalid_datacite43/missing_publisher.json
@@ -0,0 +1,350 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Strong, K."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Roche, S."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
+ }
+ ],
+ "name": "Franklin, J. E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
+ }
+ ],
+ "name": "Mendonca, J."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Lutsch, E."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Weaver, D."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "name": "Fogal, P. F."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
+ }
+ ],
+ "name": "Drummond, J. R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "UCAR Center for Science Education, Boulder, CO (US)"
+ }
+ ],
+ "name": "Batchelor, R."
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ },
+ {
+ "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
+ }
+ ],
+ "name": "Lindenmaier, R."
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/multiple_errors.json b/tests/data/invalid_datacite43/multiple_errors.json
new file mode 100644
index 0000000..c18931b
--- /dev/null
+++ b/tests/data/invalid_datacite43/multiple_errors.json
@@ -0,0 +1,263 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "31-01-2019",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
diff --git a/tests/data/invalid_datacite43/type_error_creators.json b/tests/data/invalid_datacite43/type_error_creators.json
new file mode 100644
index 0000000..6200870
--- /dev/null
+++ b/tests/data/invalid_datacite43/type_error_creators.json
@@ -0,0 +1,264 @@
+{
+ "contributors": [
+ {
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "grid.20861.3d",
+ "nameIdentifierScheme": "GRID"
+ }
+ ],
+ "name": "California Institute of Techonolgy, Pasadena, CA (US)",
+ "contributorType": "HostingInstitution"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "California Institute of Technology, Pasadena, CA (US)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-5383-8462",
+ "nameIdentifierScheme": "ORCID"
+ }
+ ],
+ "name": "Roehl, C. M.",
+ "contributorType": "DataCurator"
+ },
+ {
+ "affiliation": [
+ {
+ "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
+ }
+ ],
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": "0000-0001-9947-1053",
+ "nameIdentifierScheme": "ORCID"
+ },
+ {
+ "nameIdentifier": "D-2563-2012",
+ "nameIdentifierScheme": "ResearcherID"
+ }
+ ],
+ "name": "Kimberly Strong",
+ "contributorType": "ContactPerson"
+ },
+ {
+ "name": "TCCON",
+ "contributorType": "ResearchGroup"
+ }
+ ],
+ "descriptions": [
+ {
+ "descriptionType": "Abstract",
+ "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
+ },
+ {
+ "descriptionType": "Other",
+ "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
+ }
+ ],
+ "fundingReferences": [
+ {
+ "funderName": "Atlantic Innovation Fund"
+ },
+ {
+ "funderName": "Canada Foundation for Innovation",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.439998.6"
+ },
+ {
+ "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
+ },
+ {
+ "funderName": "Canadian Space Agency",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.236846.d"
+ },
+ {
+ "funderName": "Environment and Climate Change Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.410334.1"
+ },
+ {
+ "funderName": "Government of Canada (International Polar Year funding)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451254.3"
+ },
+ {
+ "funderName": "Natural Sciences and Engineering Research Council of Canada",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.452912.9"
+ },
+ {
+ "funderName": "Polar Commission (Northern Scientific Training Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.465477.3"
+ },
+ {
+ "funderName": "Nova Scotia Research Innovation Trust"
+ },
+ {
+ "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.451078.f"
+ },
+ {
+ "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
+ "funderIdentifierType": "GRID",
+ "funderIdentifier": "grid.202033.0"
+ }
+ ],
+ "language": "eng",
+ "relatedIdentifiers": [
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
+ "relationType": "IsDocumentedBy",
+ "relatedIdentifierType": "URL"
+ },
+ {
+ "relatedIdentifier": "10.14291/TCCON.GGG2014",
+ "relationType": "IsPartOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ },
+ {
+ "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
+ "relationType": "IsNewVersionOf",
+ "relatedIdentifierType": "DOI"
+ }
+ ],
+ "rightsList": [
+ {
+ "rights": "TCCON Data License",
+ "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
+ }
+ ],
+ "subjects": [
+ {
+ "subject": "atmospheric trace gases"
+ },
+ {
+ "subject": "CO2"
+ },
+ {
+ "subject": "CH4"
+ },
+ {
+ "subject": "CO"
+ },
+ {
+ "subject": "N2O"
+ },
+ {
+ "subject": "column-averaged dry-air mole fractions"
+ },
+ {
+ "subject": "remote sensing"
+ },
+ {
+ "subject": "FTIR spectroscopy"
+ },
+ {
+ "subject": "TCCON"
+ }
+ ],
+ "version": "R3",
+ "titles": [
+ {
+ "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
+ }
+ ],
+ "formats": [
+ "application/x-netcdf"
+ ],
+ "dates": [
+ {
+ "date": "2019-01-31",
+ "dateType": "Created"
+ },
+ {
+ "date": "2020-07-01",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2010-07-24/2019-08-15",
+ "dateType": "Collected"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Submitted"
+ },
+ {
+ "date": "2019-01-31",
+ "dateType": "Issued"
+ }
+ ],
+ "publicationYear": "2019",
+ "publisher": "CaltechDATA",
+ "types": {
+ "resourceTypeGeneral": "Dataset",
+ "resourceType": "Dataset"
+ },
+ "identifiers": [
+ {
+ "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
+ "identifierType": "DOI"
+ },
+ {
+ "identifier": "1171",
+ "identifierType": "CaltechDATA_Identifier"
+ },
+ {
+ "identifier": "GGG2014",
+ "identifierType": "Software_Version"
+ },
+ {
+ "identifier": "eu",
+ "identifierType": "id"
+ },
+ {
+ "identifier": "eureka01",
+ "identifierType": "longName"
+ },
+ {
+ "identifier": "R1",
+ "identifierType": "Data_Revision"
+ }
+ ],
+ "creators": "Incorrect type",
+ "geoLocations": [
+ {
+ "geoLocationPlace": "Eureka, NU (CA)",
+ "geoLocationPoint": {
+ "pointLatitude": "80.05",
+ "pointLongitude": "-86.42"
+ }
+ }
+ ],
+ "schemaVersion": "http://datacite.org/schema/kernel-4"
+}
\ No newline at end of file
From 947c91b84fa7fd7effaf8749e7f3bf23da9618e6 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Thu, 7 Nov 2024 23:25:10 -0800
Subject: [PATCH 31/42] Delete tests/data/invalid_datacite43/hehe.py
---
tests/data/invalid_datacite43/hehe.py | 1 -
1 file changed, 1 deletion(-)
delete mode 100644 tests/data/invalid_datacite43/hehe.py
diff --git a/tests/data/invalid_datacite43/hehe.py b/tests/data/invalid_datacite43/hehe.py
deleted file mode 100644
index 8b13789..0000000
--- a/tests/data/invalid_datacite43/hehe.py
+++ /dev/null
@@ -1 +0,0 @@
-
From 596651c12666a0abe42cd41e1835e32ef351fc2d Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 02:14:19 -0800
Subject: [PATCH 32/42] Create bot.py
---
tests/bot.py | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 189 insertions(+)
create mode 100644 tests/bot.py
diff --git a/tests/bot.py b/tests/bot.py
new file mode 100644
index 0000000..936f1d2
--- /dev/null
+++ b/tests/bot.py
@@ -0,0 +1,189 @@
+import subprocess
+import time
+from unittest.mock import patch
+import sys
+import os
+import json
+import requests
+from datetime import datetime
+import pytest
+from customize_schema import validate_metadata as validator43 # Import validator
+
+class CaltechDataTester:
+ def __init__(self):
+ self.test_dir = "caltech_test_data"
+ self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ if not os.path.exists(self.test_dir):
+ os.makedirs(self.test_dir)
+
+ # Create test data directory with timestamp
+ self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
+ os.makedirs(self.test_run_dir)
+
+ # Initialize logging
+ self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
+
+ def log(self, message):
+ """Log message to both console and file"""
+ print(message)
+ with open(self.log_file, "a") as f:
+ f.write(f"{datetime.now()}: {message}\n")
+
+ def create_test_files(self):
+ """Create necessary test files"""
+ # Create a dummy CSV file
+ csv_path = os.path.join(self.test_run_dir, "test_data.csv")
+ with open(csv_path, "w") as f:
+ f.write("date,temperature,humidity\n")
+ f.write("2023-01-01,25.5,60\n")
+ f.write("2023-01-02,26.0,62\n")
+ f.write("2023-01-03,24.8,65\n")
+
+ self.log(f"Created test CSV file: {csv_path}")
+ return csv_path
+
+ def generate_test_responses(self):
+ """Generate test responses for CLI prompts"""
+ return {
+ "Do you want to create or edit a CaltechDATA record? (create/edit): ": "create",
+ "Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
+ "Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
+ "Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
+ "Enter the number corresponding to the desired license: ": "1",
+ "Enter your ORCID identifier: ": "0000-0002-1825-0097",
+ "How many funding entries do you want to provide? ": "1",
+ "Enter the award number for funding: ": "NSF-1234567",
+ "Enter the award title for funding: ": "Automated Testing Grant",
+ "Enter the funder ROR (https://ror.org): ": "021nxhr62",
+ "Do you want to upload or link data files? (upload/link/n): ": "upload",
+ "Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
+ "Do you want to add more files? (y/n): ": "n",
+ "Do you want to send this record to CaltechDATA? (y/n): ": "y",
+ }
+
+ def extract_record_id(self, output_text):
+ """Extract record ID from CLI output"""
+ try:
+ for line in output_text.split('\n'):
+ if 'uploads/' in line:
+ return line.strip().split('/')[-1]
+ except Exception as e:
+ self.log(f"Error extracting record ID: {e}")
+ return None
+
+ def download_and_validate_record(self, record_id):
+ """Download and validate the record"""
+ try:
+ # Wait for record to be available
+ time.sleep(5)
+
+ # Download metadata
+ url = f"https://data.caltech.edu/records/{record_id}/export/datacite-json?preview=1"
+ response = requests.get(url)
+ response.raise_for_status()
+
+ # Save metadata
+ json_path = os.path.join(self.test_run_dir, f"{record_id}.json")
+ with open(json_path, 'w') as f:
+ json.dump(response.json(), f, indent=2)
+
+ self.log(f"Downloaded metadata to: {json_path}")
+
+ # Validate metadata using the imported validator
+ validation_errors = validator43(response.json())
+
+ if validation_errors:
+ self.log("❌ Validation errors found:")
+ for error in validation_errors:
+ self.log(f" - {error}")
+ return False
+ else:
+ self.log("✅ Validation passed successfully")
+ return True
+
+ except Exception as e:
+ self.log(f"Error in download and validation: {e}")
+ return False
+
+ def run_test_submission(self):
+ """Run the complete test submission process"""
+ try:
+ self.log("Starting test submission process...")
+
+ # Create test files
+ test_csv = self.create_test_files()
+
+ # Generate responses
+ responses = self.generate_test_responses()
+
+ # Setup output capture
+ class OutputCapture:
+ def __init__(self):
+ self.output = []
+ def write(self, text):
+ self.output.append(text)
+ sys.__stdout__.write(text)
+ def flush(self):
+ pass
+ def get_output(self):
+ return ''.join(self.output)
+
+ output_capture = OutputCapture()
+ sys.stdout = output_capture
+
+ # Mock input and run CLI
+ def mock_input(prompt):
+ self.log(f"Prompt: {prompt}")
+ if prompt in responses:
+ response = responses[prompt]
+ self.log(f"Response: {response}")
+ return response
+ return ""
+
+ with patch('builtins.input', side_effect=mock_input):
+ try:
+ import cli
+ cli.main()
+ except Exception as e:
+ self.log(f"Error during CLI execution: {e}")
+ return False
+
+ # Restore stdout
+ sys.stdout = sys.__stdout__
+
+ # Get output and extract record ID
+ cli_output = output_capture.get_output()
+ record_id = self.extract_record_id(cli_output)
+
+ if not record_id:
+ self.log("Failed to extract record ID")
+ return False
+
+ self.log(f"Successfully created record with ID: {record_id}")
+
+ # Validate the record
+ return self.download_and_validate_record(record_id)
+
+ except Exception as e:
+ self.log(f"Error in test submission: {e}")
+ return False
+ finally:
+ # Cleanup
+ if os.path.exists(test_csv):
+ os.remove(test_csv)
+ self.log("Test files cleaned up")
+
+def main():
+ tester = CaltechDataTester()
+
+ success = tester.run_test_submission()
+
+ if success:
+ tester.log("\n🎉 Test submission and validation completed successfully!")
+ else:
+ tester.log("\n❌ Test submission or validation failed - check logs for details")
+
+ tester.log(f"\nTest logs available at: {tester.log_file}")
+
+if __name__ == "__main__":
+ main()
From 88bee6bd055236bfee5c58f3ea9948ccf2bcc48a Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 02:14:52 -0800
Subject: [PATCH 33/42] Delete caltechdata_api/tester directory
---
caltechdata_api/tester/caltechdata_write.py | 224 ----------
caltechdata_api/tester/invalid_datacite/files | 1 -
.../invalid_datacite/invalid_metadata_1.json | 12 -
.../invalid_datacite/invalid_metadata_10.json | 18 -
.../invalid_datacite/invalid_metadata_2.json | 13 -
.../invalid_datacite/invalid_metadata_3.json | 12 -
.../invalid_datacite/invalid_metadata_4.json | 20 -
.../invalid_datacite/invalid_metadata_5.json | 22 -
.../invalid_datacite/invalid_metadata_6.json | 22 -
.../invalid_datacite/invalid_metadata_7.json | 20 -
.../invalid_datacite/invalid_metadata_8.json | 20 -
.../invalid_datacite/invalid_metadata_9.json | 16 -
.../invalid_datacite/missing_creators.json | 263 ------------
.../invalid_datacite/missing_publisher.json | 350 ---------------
.../invalid_datacite/multiple_errors.json | 263 ------------
.../invalid_datacite/type_error_creators.json | 264 ------------
caltechdata_api/tester/invalid_generator.py | 116 -----
.../tester/missing_fields_generator.py | 403 ------------------
caltechdata_api/tester/test_unit.py | 122 ------
.../tester/validatorfordownload.py | 54 ---
caltechdata_api/tester/validfiles/1171.json | 351 ---------------
caltechdata_api/tester/validfiles/1235.json | 91 ----
caltechdata_api/tester/validfiles/1250.json | 1 -
caltechdata_api/tester/validfiles/1259.json | 1 -
caltechdata_api/tester/validfiles/1300.json | 1 -
caltechdata_api/tester/validfiles/210.json | 1 -
caltechdata_api/tester/validfiles/266.json | 1 -
caltechdata_api/tester/validfiles/267.json | 1 -
caltechdata_api/tester/validfiles/268.json | 1 -
caltechdata_api/tester/validfiles/283.json | 1 -
caltechdata_api/tester/validfiles/293.json | 1 -
caltechdata_api/tester/validfiles/301.json | 1 -
caltechdata_api/tester/validfiles/970.json | 1 -
caltechdata_api/tester/validfiles/file.py | 1 -
34 files changed, 2689 deletions(-)
delete mode 100644 caltechdata_api/tester/caltechdata_write.py
delete mode 100644 caltechdata_api/tester/invalid_datacite/files
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/missing_creators.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/missing_publisher.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/multiple_errors.json
delete mode 100644 caltechdata_api/tester/invalid_datacite/type_error_creators.json
delete mode 100644 caltechdata_api/tester/invalid_generator.py
delete mode 100644 caltechdata_api/tester/missing_fields_generator.py
delete mode 100644 caltechdata_api/tester/test_unit.py
delete mode 100644 caltechdata_api/tester/validatorfordownload.py
delete mode 100644 caltechdata_api/tester/validfiles/1171.json
delete mode 100644 caltechdata_api/tester/validfiles/1235.json
delete mode 100644 caltechdata_api/tester/validfiles/1250.json
delete mode 100644 caltechdata_api/tester/validfiles/1259.json
delete mode 100644 caltechdata_api/tester/validfiles/1300.json
delete mode 100644 caltechdata_api/tester/validfiles/210.json
delete mode 100644 caltechdata_api/tester/validfiles/266.json
delete mode 100644 caltechdata_api/tester/validfiles/267.json
delete mode 100644 caltechdata_api/tester/validfiles/268.json
delete mode 100644 caltechdata_api/tester/validfiles/283.json
delete mode 100644 caltechdata_api/tester/validfiles/293.json
delete mode 100644 caltechdata_api/tester/validfiles/301.json
delete mode 100644 caltechdata_api/tester/validfiles/970.json
delete mode 100644 caltechdata_api/tester/validfiles/file.py
diff --git a/caltechdata_api/tester/caltechdata_write.py b/caltechdata_api/tester/caltechdata_write.py
deleted file mode 100644
index 332135e..0000000
--- a/caltechdata_api/tester/caltechdata_write.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import copy
-import json
-import os
-import requests
-import s3fs
-from requests import session
-from json.decoder import JSONDecodeError
-from caltechdata_api import customize_schema
-from caltechdata_api.utils import humanbytes
-
-
-def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=False):
- f_json = []
- f_list = {}
- fnames = []
- for f in files:
- split = f.split("/")
- filename = split[-1]
- if filename in fnames:
- # We can't have a duplicate filename
- # Assume that the previous path value makes a unique name
- filename = f"{split[-2]}-{split[-1]}"
- fnames.append(filename)
- f_json.append({"key": filename})
- f_list[filename] = f
- # Now we see if any existing draft files need to be replaced
- result = requests.get(file_link, headers=f_headers)
- if result.status_code == 200:
- ex_files = result.json()["entries"]
- for ex in ex_files:
- if ex["key"] in f_list:
- result = requests.delete(ex["links"]["self"], headers=f_headers)
- if result.status_code != 204:
- raise Exception(result.text)
- # Create new file upload links
- result = requests.post(file_link, headers=headers, json=f_json)
- if result.status_code != 201:
- raise Exception(result.text)
- # Now we have the upload links
- for entry in result.json()["entries"]:
- self = entry["links"]["self"]
- link = entry["links"]["content"]
- commit = entry["links"]["commit"]
- name = entry["key"]
- if name in f_list:
- if s3:
- print("Downloading", f_list[name])
- s3.download(f_list[name], name)
- infile = open(name, "rb")
- else:
- infile = open(f_list[name], "rb")
- result = requests.put(link, headers=f_headers, data=infile)
- if result.status_code != 200:
- raise Exception(result.text)
- result = requests.post(commit, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
- else:
- # Delete any files not included in this write command
- if keepfiles == False:
- result = requests.delete(self, headers=f_headers)
- if result.status_code != 204:
- raise Exception(result.text)
-
-
-def add_file_links(
- metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
-):
- # Currently configured for S3 links, assuming all are at the same endpoint
- link_string = ""
- endpoint = "https://" + file_links[0].split("/")[2]
- s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
- index = 0
- for link in file_links:
- file = link.split("/")[-1]
- path = link.split(endpoint)[1]
- size = s3.info(path)["size"]
- size = humanbytes(size)
- try:
- desc = file_descriptions[index] + ","
- except IndexError:
- desc = ""
- if link_string == "":
- if s3_link:
- link_string = f"Files available via S3 at {s3_link}</p>"
- else:
- cleaned = link.strip(file)
- link_string = f"Files available via S3 at {cleaned}</p>"
- link_string += f"""{file}, {desc} {size}
- <a role="button" class="ui compact mini button" href="{link}"
- > <i class="download icon"></i> Download </a>
</p>
- """
- index += 1
- # Tack on any additional descriptions
- if additional_descriptions != "":
- link_string += additional_descriptions
-
- description = {"description": link_string, "descriptionType": "files"}
- metadata["descriptions"].append(description)
- return metadata
-
-
-def send_to_community(review_link, data, headers, publish, community, message=None):
- if not message:
- message = "This record is submitted automatically with the CaltechDATA API"
-
- data = {
- "receiver": {"community": community},
- "type": "community-submission",
- }
- result = requests.put(review_link, json=data, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
- submit_link = review_link.replace("/review", "/actions/submit-review")
- data = comment = {
- "payload": {
- "content": message,
- "format": "html",
- }
- }
- result = requests.post(submit_link, json=data, headers=headers)
- if result.status_code != 202:
- raise Exception(result.text)
- if publish:
- accept_link = result.json()["links"]["actions"]["accept"]
- data = comment = {
- "payload": {
- "content": "This record is accepted automatically with the CaltechDATA API",
- "format": "html",
- }
- }
- result = requests.post(accept_link, json=data, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
- return result
-
-def caltechdata_write(metadata, token=None, files=[], production=False, schema="43", publish=False, file_links=[],
- s3=None, community=None, authors=False, file_descriptions=[], s3_link=None,
- default_preview=None, review_message=None):
- metadata = copy.deepcopy(metadata)
-
- if not token:
- token = os.environ["RDMTOK"]
-
- if isinstance(files, str):
- files = [files]
-
- if file_links:
- metadata = add_file_links(metadata, file_links, file_descriptions, s3_link=s3_link)
-
- url = "https://data.caltech.edu/" if production else "https://data.caltechlibrary.dev/"
-
- headers = {
- "Authorization": f"Bearer {token}",
- "Content-type": "application/json",
- }
- f_headers = {
- "Authorization": f"Bearer {token}",
- "Content-type": "application/octet-stream",
- }
-
- if not files:
- metadata["files"] = {"enabled": False}
-
- result = requests.post(f"{url}api/records", headers=headers, json=metadata)
- if result.status_code != 201:
- raise Exception(result.text)
-
- idv = result.json()["id"]
- record_url = f"{url}records/{idv}"
-
- if files:
- file_link = result.json()["links"]["files"]
- write_files_rdm(files, file_link, headers, f_headers, s3)
-
- if community:
- review_link = result.json()["links"]["review"]
- send_to_community(review_link, metadata, headers, publish, community, review_message)
- elif publish:
- publish_link = result.json()["links"]["publish"]
- result = requests.post(publish_link, json=metadata, headers=headers)
- if result.status_code != 202:
- raise Exception(result.text)
-
- return record_url
-
-
-def main():
- parser = argparse.ArgumentParser(description="Upload files to CaltechDATA with metadata")
- parser.add_argument("--metadata", required=True, type=str, help="Path to JSON file with metadata")
- parser.add_argument("--token", default=os.environ.get("RDMTOK"), type=str, help="API token for authentication (defaults to RDMTOK environment variable)")
- parser.add_argument("--files", nargs="*", default=[], help="List of file paths to upload (default: empty list)")
- parser.add_argument("--production", action="store_true", help="Use production environment (default: False)")
- parser.add_argument("--schema", default="43", help="Metadata schema version (default: '43')")
- parser.add_argument("--publish", action="store_true", help="Publish the record after upload (default: False)")
- parser.add_argument("--file_links", nargs="*", default=[], help="List of file links to add (default: empty list)")
- parser.add_argument("--community", type=str, default=None, help="Community ID for submission (default: None)")
- parser.add_argument("--file_descriptions", nargs="*", default=[], help="Descriptions for each file link (default: empty list)")
- parser.add_argument("--s3_link", type=str, default=None, help="Link to S3 bucket (default: None)")
- parser.add_argument("--review_message", type=str, default="This record is submitted automatically with the CaltechDATA API", help="Message for review process (default message)")
-
- args = parser.parse_args()
-
- with open(args.metadata, "r") as f:
- metadata = json.load(f)
-
- record_url = caltechdata_write(
- metadata=metadata,
- token=args.token,
- files=args.files,
- production=args.production,
- schema=args.schema,
- publish=args.publish,
- file_links=args.file_links,
- community=args.community,
- file_descriptions=args.file_descriptions,
- s3_link=args.s3_link,
- review_message=args.review_message,
- )
-
- print(f"Record created with URL: {record_url}")
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/files b/caltechdata_api/tester/invalid_datacite/files
deleted file mode 100644
index 8b13789..0000000
--- a/caltechdata_api/tester/invalid_datacite/files
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
deleted file mode 100644
index 1bba16b..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_1.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
deleted file mode 100644
index 759757d..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_10.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "version": 1,
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
deleted file mode 100644
index 3899136..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_2.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "titles": [],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
deleted file mode 100644
index 707dbab..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_3.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
deleted file mode 100644
index f7d2fe4..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_4.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "contributors": [
- {}
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
deleted file mode 100644
index deeff7f..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_5.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "descriptions": [
- {
- "description": "Sample Description"
- }
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
deleted file mode 100644
index 8fa14f1..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_6.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "fundingReferences": [
- {
- "funderIdentifier": "1234"
- }
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
deleted file mode 100644
index bae4d11..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_7.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "identifiers": [
- {}
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
deleted file mode 100644
index 247f3ff..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_8.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "dates": [
- {}
- ],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json b/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
deleted file mode 100644
index 2eddcf1..0000000
--- a/caltechdata_api/tester/invalid_datacite/invalid_metadata_9.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
- "titles": [
- {
- "title": "Sample Title"
- }
- ],
- "creators": [
- {
- "name": "John Doe"
- }
- ],
- "publicationYear": "2023",
- "types": {
- "resourceTypeGeneral": "Dataset"
- }
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/missing_creators.json b/caltechdata_api/tester/invalid_datacite/missing_creators.json
deleted file mode 100644
index 0d0f1a1..0000000
--- a/caltechdata_api/tester/invalid_datacite/missing_creators.json
+++ /dev/null
@@ -1,263 +0,0 @@
-{
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "2019-01-31",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/missing_publisher.json b/caltechdata_api/tester/invalid_datacite/missing_publisher.json
deleted file mode 100644
index 9035027..0000000
--- a/caltechdata_api/tester/invalid_datacite/missing_publisher.json
+++ /dev/null
@@ -1,350 +0,0 @@
-{
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "2019-01-31",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "creators": [
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Strong, K."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Roche, S."
- },
- {
- "affiliation": [
- {
- "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
- }
- ],
- "name": "Franklin, J. E."
- },
- {
- "affiliation": [
- {
- "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
- }
- ],
- "name": "Mendonca, J."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Lutsch, E."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Weaver, D."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Fogal, P. F."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
- }
- ],
- "name": "Drummond, J. R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "UCAR Center for Science Education, Boulder, CO (US)"
- }
- ],
- "name": "Batchelor, R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
- }
- ],
- "name": "Lindenmaier, R."
- }
- ],
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/multiple_errors.json b/caltechdata_api/tester/invalid_datacite/multiple_errors.json
deleted file mode 100644
index c18931b..0000000
--- a/caltechdata_api/tester/invalid_datacite/multiple_errors.json
+++ /dev/null
@@ -1,263 +0,0 @@
-{
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "31-01-2019",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_datacite/type_error_creators.json b/caltechdata_api/tester/invalid_datacite/type_error_creators.json
deleted file mode 100644
index 6200870..0000000
--- a/caltechdata_api/tester/invalid_datacite/type_error_creators.json
+++ /dev/null
@@ -1,264 +0,0 @@
-{
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "2019-01-31",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "creators": "Incorrect type",
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/invalid_generator.py b/caltechdata_api/tester/invalid_generator.py
deleted file mode 100644
index 02a0da3..0000000
--- a/caltechdata_api/tester/invalid_generator.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import json
-import os
-
-# Directory to save invalid metadata JSON files
-INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
-
-# Ensure the directory exists
-os.makedirs(INVALID_DATACITE43_DIR, exist_ok=True)
-
-# Helper function to save a dictionary as a JSON file
-def save_invalid_json(data, filename):
- with open(os.path.join(INVALID_DATACITE43_DIR, filename), 'w') as f:
- json.dump(data, f, indent=4)
-
-# Generate different invalid JSON examples
-invalid_metadata_examples = [
- # Missing 'titles' field
- {
- "creators": [{"name": "John Doe"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Empty 'titles' list
- {
- "titles": [],
- "creators": [{"name": "John Doe"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Missing 'creators' field
- {
- "titles": [{"title": "Sample Title"}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'contributors' missing 'name' and 'contributorType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "contributors": [{}],
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Invalid 'descriptions' structure
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "descriptions": [{"description": "Sample Description"}], # Missing 'descriptionType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'fundingReferences' missing 'funderName'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "fundingReferences": [{"funderIdentifier": "1234"}], # Missing 'funderName'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'identifiers' missing 'identifier' and 'identifierType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "identifiers": [{}], # Missing 'identifier' and 'identifierType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # 'dates' missing 'date' and 'dateType'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "dates": [{}], # Missing 'date' and 'dateType'
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Missing 'publisher'
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- },
-
- # Invalid 'version' type (should be a string)
- {
- "titles": [{"title": "Sample Title"}],
- "creators": [{"name": "John Doe"}],
- "version": 1, # Incorrect type, should be a string
- "publisher": "Caltech",
- "publicationYear": "2023",
- "types": {"resourceTypeGeneral": "Dataset"}
- }
-]
-
-# Save each invalid example as a JSON file
-for i, invalid_json in enumerate(invalid_metadata_examples, start=1):
- filename = f"invalid_metadata_{i}.json"
- save_invalid_json(invalid_json, filename)
-
-print(f"Generated {len(invalid_metadata_examples)} invalid metadata files in {INVALID_DATACITE43_DIR}")
diff --git a/caltechdata_api/tester/missing_fields_generator.py b/caltechdata_api/tester/missing_fields_generator.py
deleted file mode 100644
index 2ce14b3..0000000
--- a/caltechdata_api/tester/missing_fields_generator.py
+++ /dev/null
@@ -1,403 +0,0 @@
-import json
-import os
-import copy
-
-# Directory to store invalid test files
-INVALID_DATA_DIR = "../tests/data/invalid_datacite43"
-os.makedirs(INVALID_DATA_DIR, exist_ok=True)
-
-# Load the valid metadata as a base
-valid_metadata = {
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "2019-01-31",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "creators": [
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Strong, K."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Roche, S."
- },
- {
- "affiliation": [
- {
- "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
- }
- ],
- "name": "Franklin, J. E."
- },
- {
- "affiliation": [
- {
- "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
- }
- ],
- "name": "Mendonca, J."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Lutsch, E."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Weaver, D."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Fogal, P. F."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
- }
- ],
- "name": "Drummond, J. R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "UCAR Center for Science Education, Boulder, CO (US)"
- }
- ],
- "name": "Batchelor, R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
- }
- ],
- "name": "Lindenmaier, R."
- }
- ],
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
-
-# Function to save invalid files
-def save_invalid_file(metadata, filename):
- filepath = os.path.join(INVALID_DATA_DIR, filename)
- with open(filepath, 'w') as f:
- json.dump(metadata, f, indent=4)
- print(f"Created: {filepath}")
-
-# Create invalid files
-
-missing_creators = copy.deepcopy(valid_metadata)
-missing_creators.pop("creators", None)
-save_invalid_file(missing_creators, "missing_creators.json")
-
-type_error_creators = copy.deepcopy(valid_metadata)
-type_error_creators["creators"] = "Incorrect type"
-save_invalid_file(type_error_creators, "type_error_creators.json")
-
-unmapped_vocab_contributor = copy.deepcopy(valid_metadata)
-unmapped_vocab_contributor["contributors"][0]["contributorType"] = "UnknownType"
-save_invalid_file(unmapped_vocab_contributor, "unmapped_vocab_contributor.json")
-
-invalid_date_format = copy.deepcopy(valid_metadata)
-invalid_date_format["dates"][0]["date"] = "31-01-2019" # Incorrect format
-save_invalid_file(invalid_date_format, "invalid_date_format.json")
-
-missing_publisher = copy.deepcopy(valid_metadata)
-missing_publisher.pop("publisher", None)
-save_invalid_file(missing_publisher, "missing_publisher.json")
-
-type_error_publication_year = copy.deepcopy(valid_metadata)
-type_error_publication_year["publicationYear"] = "Two Thousand Nineteen"
-save_invalid_file(type_error_publication_year, "type_error_publication_year.json")
-
-unmapped_vocab_related_identifier = copy.deepcopy(valid_metadata)
-unmapped_vocab_related_identifier["relatedIdentifiers"][0]["relatedIdentifierType"] = "UNKNOWN_TYPE"
-save_invalid_file(unmapped_vocab_related_identifier, "unmapped_vocab_related_identifier.json")
-
-multiple_errors = copy.deepcopy(valid_metadata)
-multiple_errors.pop("creators", None)
-multiple_errors["dates"][0]["date"] = "31-01-2019" # Incorrect format
-multiple_errors["titles"][0]["title"] = "A" * 300
-save_invalid_file(multiple_errors, "multiple_errors.json")
diff --git a/caltechdata_api/tester/test_unit.py b/caltechdata_api/tester/test_unit.py
deleted file mode 100644
index 5d1cad6..0000000
--- a/caltechdata_api/tester/test_unit.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import os
-import pytest
-from customize_schema import validate_metadata as validator43
-from helpers import load_json_path
-import logging
-from tqdm import tqdm
-
-# Directories for valid and invalid JSON files
-VALID_DATACITE43_DIR = "../tests/data/datacite43/"
-INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
-
-# Function to get all JSON files in the directory
-def get_all_json_files(directory):
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
-
-# Get list of all valid JSON files in the directory
-VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
-INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
-
-@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
-def test_valid_json(valid_file):
- """Test that valid example files validate successfully."""
- print(f"\nValidating file: {valid_file}") # Log for file being tested
- json_data = load_json_path(valid_file)
- validation_errors = None
- try:
- validation_errors = validator43(json_data)
- except ValueError as e:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
-
- if validation_errors:
- pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
- else:
- print(f"Validation passed for: {valid_file}")
-
-@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
-def test_invalid_json(invalid_file):
- """Test that invalid example files do not validate successfully."""
- print(f"\nValidating file: {invalid_file}") # Log for file being tested
- json_data = load_json_path(invalid_file)
- validation_errors = None
- try:
- validation_errors = validator43(json_data)
- except ValueError:
- print(f"Validation failed as expected for: {invalid_file}")
- return # Test passes if validation raises a ValueError
-
- if validation_errors:
- print(f"Validation failed as expected for: {invalid_file}")
- else:
- pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
-
-@pytest.mark.parametrize("missing_field_file", [
- {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
- {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
-])
-def test_missing_required_fields(missing_field_file):
- """Test that JSON files missing required fields fail validation."""
- print(f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}")
- json_data = load_json_path(missing_field_file['file'])
- with pytest.raises(ValueError, match=f"Missing required metadata field: {missing_field_file['missing_field']}"):
- validator43(json_data)
-
-@pytest.mark.parametrize("type_error_file", [
- {"file": "../tests/data/type_error_creators.json", "field": "creators"},
- {"file": "../tests/data/type_error_dates.json", "field": "dates"},
-])
-def test_incorrect_field_types(type_error_file):
- """Test that JSON files with incorrect field types fail validation."""
- print(f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}")
- json_data = load_json_path(type_error_file['file'])
- with pytest.raises(ValueError, match=f"Incorrect type for field: {type_error_file['field']}"):
- validator43(json_data)
-
-def test_multiple_errors():
- """Test JSON file with multiple issues to check all errors are raised."""
- json_data = load_json_path("../tests/data/multiple_errors.json")
- with pytest.raises(ValueError, match="Multiple validation errors"):
- validator43(json_data)
-
-def test_error_logging(caplog):
- """Test that errors are logged correctly during validation."""
- json_data = load_json_path("../tests/data/invalid_datacite43/some_invalid_file.json")
- with caplog.at_level(logging.ERROR):
- with pytest.raises(ValueError):
- validator43(json_data)
- assert "Validation failed" in caplog.text
-
-if __name__ == "__main__":
- # Manual test runner for valid files
- failed_valid_files = []
- print("\nRunning validation for valid files...")
- for file in tqdm(VALID_DATACITE43_FILES, desc="Valid files"):
- try:
- test_valid_json(file)
- except AssertionError as e:
- failed_valid_files.append(file)
- print(f"Error occurred in valid file: {file}\nError details: {e}")
-
- if not failed_valid_files:
- print("\n✅ All valid files passed validation. Test complete.")
- else:
- print("\n❌ The following valid files failed validation:")
- for failed_file in failed_valid_files:
- print(f"- {failed_file}")
-
- # Manual test runner for invalid files
- passed_invalid_files = []
- print("\nRunning validation for invalid files...")
- for file in tqdm(INVALID_DATACITE43_FILES, desc="Invalid files"):
- try:
- test_invalid_json(file)
- except AssertionError as e:
- passed_invalid_files.append(file)
- print(f"Error occurred in invalid file: {file}\nError details: {e}")
-
- if not passed_invalid_files:
- print("\n✅ All invalid files failed validation as expected. Test is a success.")
- else:
- print("\n❌ The following invalid files unexpectedly passed validation:")
- for passed_file in passed_invalid_files:
- print(f"- {passed_file}")
diff --git a/caltechdata_api/tester/validatorfordownload.py b/caltechdata_api/tester/validatorfordownload.py
deleted file mode 100644
index ffd1ddc..0000000
--- a/caltechdata_api/tester/validatorfordownload.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import subprocess
-import requests
-import pytest
-import json
-from customize_schema import validate_metadata as validator43
-from helpers import load_json_path
-
-def run_caltechdata_write(metadata_path):
- """Run the caltechdata_write.py script with the given metadata file."""
- try:
- result = subprocess.run(
- ["python", "caltechdata_write.py", "--metadata", metadata_path],
- capture_output=True,
- text=True,
- check=True
- )
- output = result.stdout
- record_url = output.split("Record created with URL: ")[1].strip()
- return record_url
- except subprocess.CalledProcessError as e:
- print("Error running caltechdata_write:", e.stderr)
- return None
-
-def fetch_datacite_json(record_url):
- """Fetch the JSON metadata from the export endpoint."""
- try:
- export_url = f"{record_url}/export/datacite-json"
- response = requests.get(export_url)
- response.raise_for_status()
- return response.json()
- except requests.RequestException as e:
- print("Error fetching JSON data:", e)
- return None
-
-def test_validator(metadata_path):
- """Test the validator by uploading metadata and validating the returned JSON."""
- record_url = run_caltechdata_write(metadata_path)
- if not record_url:
- pytest.fail("Failed to upload metadata and get record URL")
-
- json_data = fetch_datacite_json(record_url)
- if not json_data:
- pytest.fail("Failed to retrieve JSON data from export endpoint")
-
- validation_errors = validator43(json_data)
- if validation_errors:
- pytest.fail(f"Validation failed for {record_url}:\n{validation_errors}")
- else:
- print("Validation passed")
- return True
-
-if __name__ == "__main__":
- metadata_file = "1171.json"
- test_validator(metadata_file)
diff --git a/caltechdata_api/tester/validfiles/1171.json b/caltechdata_api/tester/validfiles/1171.json
deleted file mode 100644
index eea6d9b..0000000
--- a/caltechdata_api/tester/validfiles/1171.json
+++ /dev/null
@@ -1,351 +0,0 @@
-{
- "contributors": [
- {
- "nameIdentifiers": [
- {
- "nameIdentifier": "grid.20861.3d",
- "nameIdentifierScheme": "GRID"
- }
- ],
- "name": "California Institute of Techonolgy, Pasadena, CA (US)",
- "contributorType": "HostingInstitution"
- },
- {
- "affiliation": [
- {
- "name": "California Institute of Technology, Pasadena, CA (US)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-5383-8462",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Roehl, C. M.",
- "contributorType": "DataCurator"
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9947-1053",
- "nameIdentifierScheme": "ORCID"
- },
- {
- "nameIdentifier": "D-2563-2012",
- "nameIdentifierScheme": "ResearcherID"
- }
- ],
- "name": "Kimberly Strong",
- "contributorType": "ContactPerson"
- },
- {
- "name": "TCCON",
- "contributorType": "ResearchGroup"
- }
- ],
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2019). TCCON data from Eureka (CA), Release GGG2014.R3 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r3
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 161
Unique Downloads: 7
between January 31, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "fundingReferences": [
- {
- "funderName": "Atlantic Innovation Fund"
- },
- {
- "funderName": "Canada Foundation for Innovation",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.439998.6"
- },
- {
- "funderName": "Canadian Foundation for Climate and Atmospheric Sciences"
- },
- {
- "funderName": "Canadian Space Agency",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.236846.d"
- },
- {
- "funderName": "Environment and Climate Change Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.410334.1"
- },
- {
- "funderName": "Government of Canada (International Polar Year funding)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451254.3"
- },
- {
- "funderName": "Natural Sciences and Engineering Research Council of Canada",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.452912.9"
- },
- {
- "funderName": "Polar Commission (Northern Scientific Training Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.465477.3"
- },
- {
- "funderName": "Nova Scotia Research Innovation Trust"
- },
- {
- "funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.451078.f"
- },
- {
- "funderName": "Natural Resources Canada (Polar Continental Shelf Program)",
- "funderIdentifierType": "GRID",
- "funderIdentifier": "grid.202033.0"
- }
- ],
- "language": "eng",
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites",
- "relationType": "IsDocumentedBy",
- "relatedIdentifierType": "URL"
- },
- {
- "relatedIdentifier": "10.14291/TCCON.GGG2014",
- "relationType": "IsPartOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- },
- {
- "relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R2",
- "relationType": "IsNewVersionOf",
- "relatedIdentifierType": "DOI"
- }
- ],
- "rightsList": [
- {
- "rights": "TCCON Data License",
- "rightsURI": "https://data.caltech.edu/tindfiles/serve/8298981c-6613-4ed9-9c54-5ef8fb5180f4/"
- }
- ],
- "subjects": [
- {
- "subject": "atmospheric trace gases"
- },
- {
- "subject": "CO2"
- },
- {
- "subject": "CH4"
- },
- {
- "subject": "CO"
- },
- {
- "subject": "N2O"
- },
- {
- "subject": "column-averaged dry-air mole fractions"
- },
- {
- "subject": "remote sensing"
- },
- {
- "subject": "FTIR spectroscopy"
- },
- {
- "subject": "TCCON"
- }
- ],
- "version": "R3",
- "titles": [
- {
- "title": "TCCON data from Eureka (CA), Release GGG2014.R3"
- }
- ],
- "formats": [
- "application/x-netcdf"
- ],
- "dates": [
- {
- "date": "2019-01-31",
- "dateType": "Created"
- },
- {
- "date": "2020-07-01",
- "dateType": "Updated"
- },
- {
- "date": "2010-07-24/2019-08-15",
- "dateType": "Collected"
- },
- {
- "date": "2019-01-31",
- "dateType": "Submitted"
- },
- {
- "date": "2019-01-31",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Dataset",
- "resourceType": "Dataset"
- },
- "identifiers": [
- {
- "identifier": "10.14291/tccon.ggg2014.eureka01.R3",
- "identifierType": "DOI"
- },
- {
- "identifier": "1171",
- "identifierType": "CaltechDATA_Identifier"
- },
- {
- "identifier": "GGG2014",
- "identifierType": "Software_Version"
- },
- {
- "identifier": "eu",
- "identifierType": "id"
- },
- {
- "identifier": "eureka01",
- "identifierType": "longName"
- },
- {
- "identifier": "R1",
- "identifierType": "Data_Revision"
- }
- ],
- "creators": [
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Strong, K."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Roche, S."
- },
- {
- "affiliation": [
- {
- "name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"
- }
- ],
- "name": "Franklin, J. E."
- },
- {
- "affiliation": [
- {
- "name": "Environment and Climate Change Canada, Downsview, ON (CA)"
- }
- ],
- "name": "Mendonca, J."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Lutsch, E."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Weaver, D."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- }
- ],
- "name": "Fogal, P. F."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"
- }
- ],
- "name": "Drummond, J. R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "UCAR Center for Science Education, Boulder, CO (US)"
- }
- ],
- "name": "Batchelor, R."
- },
- {
- "affiliation": [
- {
- "name": "Department of Physics, University of Toronto, Toronto, ON (CA)"
- },
- {
- "name": "Pacific Northwest National Laboratory, Richland, WA (US)"
- }
- ],
- "name": "Lindenmaier, R."
- }
- ],
- "geoLocations": [
- {
- "geoLocationPlace": "Eureka, NU (CA)",
- "geoLocationPoint": {
- "pointLatitude": "80.05",
- "pointLongitude": "-86.42"
- }
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1235.json b/caltechdata_api/tester/validfiles/1235.json
deleted file mode 100644
index ebda909..0000000
--- a/caltechdata_api/tester/validfiles/1235.json
+++ /dev/null
@@ -1,91 +0,0 @@
-{
- "descriptions": [
- {
- "descriptionType": "Abstract",
- "description": "First included in ames, this notebook dynamically shows how many records are in CaltechDATA and where they come from (GitHub, Deposit Form, or API). This repository is set to work with MyBinder so you can easily reproduce the plot and include new records. "
- },
- {
- "descriptionType": "Other",
- "description": "
Cite this record as:
Morrell, T. E. (2019, April 29). caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook (Version v0.0.1). CaltechDATA. https://doi.org/10.22002/d1.1235
or choose a different citation style.
Download Citation
"
- },
- {
- "descriptionType": "Other",
- "description": "
Unique Views: 4
Unique Downloads: 1
between April 29, 2019 and July 02, 2020
More info on how stats are collected
"
- }
- ],
- "relatedIdentifiers": [
- {
- "relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.1",
- "relationType": "IsIdenticalTo",
- "relatedIdentifierType": "URL"
- }
- ],
- "rightsList": [
- {
- "rights": "license",
- "rightsURI": "https://data.caltech.edu/license"
- }
- ],
- "subjects": [
- {
- "subject": "CaltechDATA"
- },
- {
- "subject": "reporitory"
- },
- {
- "subject": "usage"
- },
- {
- "subject": "Jupyter"
- },
- {
- "subject": "GitHub"
- }
- ],
- "version": "v0.0.1",
- "titles": [
- {
- "title": "caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook"
- }
- ],
- "dates": [
- {
- "date": "2019-04-29",
- "dateType": "Issued"
- }
- ],
- "publicationYear": "2019",
- "publisher": "CaltechDATA",
- "types": {
- "resourceTypeGeneral": "Software",
- "resourceType": "Software"
- },
- "identifiers": [
- {
- "identifier": "10.22002/D1.1235",
- "identifierType": "DOI"
- },
- {
- "identifier": "1235",
- "identifierType": "CaltechDATA_Identifier"
- }
- ],
- "creators": [
- {
- "affiliation": [
- {
- "name": "Caltech Library"
- }
- ],
- "nameIdentifiers": [
- {
- "nameIdentifier": "0000-0001-9266-5146",
- "nameIdentifierScheme": "ORCID"
- }
- ],
- "name": "Morrell, Thomas E"
- }
- ],
- "schemaVersion": "http://datacite.org/schema/kernel-4"
-}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1250.json b/caltechdata_api/tester/validfiles/1250.json
deleted file mode 100644
index 29c72df..0000000
--- a/caltechdata_api/tester/validfiles/1250.json
+++ /dev/null
@@ -1 +0,0 @@
-{"descriptions": [{"descriptionType": "Abstract", "description": "This release includes two months more data and has some dependency updates."}, {"descriptionType": "Other", "description": "Jupyter notebooks highlighting usage of CaltechDATA"}, {"descriptionType": "Other", "description": "
Click to run this software: 
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Morrell, T. E. (2019, June 19). caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA (Version v0.0.2). CaltechDATA. https://doi.org/10.22002/d1.1250
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 85
Unique Downloads: 2
between June 19, 2019 and July 02, 2020
More info on how stats are collected
"}], "relatedIdentifiers": [{"relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.2", "relationType": "IsIdenticalTo", "relatedIdentifierType": "URL"}], "rightsList": [{"rights": "license", "rightsURI": "https://data.caltech.edu/license"}], "subjects": [{"subject": "CaltechDATA"}, {"subject": "reporitory"}, {"subject": "usage"}, {"subject": "Jupyter"}, {"subject": "GitHub"}], "version": "v0.0.2", "titles": [{"title": "caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA"}], "dates": [{"date": "2019-06-19", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Software", "resourceType": "Software"}, "identifiers": [{"identifier": "10.22002/D1.1250", "identifierType": "DOI"}, {"identifier": "1250", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "Caltech Library"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9266-5146", "nameIdentifierScheme": "ORCID"}], "name": "Morrell, Thomas E"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1259.json b/caltechdata_api/tester/validfiles/1259.json
deleted file mode 100644
index 09fe197..0000000
--- a/caltechdata_api/tester/validfiles/1259.json
+++ /dev/null
@@ -1 +0,0 @@
-{"descriptions": [{"descriptionType": "Abstract", "description": "This release includes a new notebook that determines the use of ORCID iDs across Caltech Library DOIs. It also updates all notebooks to use the latest version of ames and streamlines dependencies."}, {"descriptionType": "Other", "description": "Jupyter notebooks highlighting usage of CaltechDATA"}, {"descriptionType": "Other", "description": "
Click to run this software: 
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Morrell, T. E. (2019, July 16). caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version (Version v0.1.0). CaltechDATA. https://doi.org/10.22002/d1.1259
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 86
Unique Downloads: 1
between July 16, 2019 and July 02, 2020
More info on how stats are collected
"}], "relatedIdentifiers": [{"relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.1.0", "relationType": "IsIdenticalTo", "relatedIdentifierType": "URL"}], "rightsList": [{"rights": "license", "rightsURI": "https://data.caltech.edu/license"}], "subjects": [{"subject": "CaltechDATA"}, {"subject": "reporitory"}, {"subject": "usage"}, {"subject": "Jupyter"}, {"subject": "GitHub"}], "version": "v0.1.0", "titles": [{"title": "caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version"}], "dates": [{"date": "2019-07-16", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Software", "resourceType": "Software"}, "identifiers": [{"identifier": "10.22002/D1.1259", "identifierType": "DOI"}, {"identifier": "1259", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "Caltech Library"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9266-5146", "nameIdentifierScheme": "ORCID"}], "name": "Morrell, Thomas E"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/1300.json b/caltechdata_api/tester/validfiles/1300.json
deleted file mode 100644
index 3c027c6..0000000
--- a/caltechdata_api/tester/validfiles/1300.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "AeroMeteo Service, Bia\u0142ystok (PL)"}], "name": "Katry\u0144ski, K.", "contributorType": "Other"}, {"name": "Christof Petri", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bialystok, Poland."}, {"descriptionType": "Other", "description": "
Cite this record as:
Deutscher, N. M., Notholt, J., Messerschmidt, J., Weinzierl, C., Warneke, T., Petri, C., & Grupe, P. (2019). TCCON data from Bialystok (PL), Release GGG2014.R2 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bialystok01.r2
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 52
Unique Downloads: 3
between October 21, 2019 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R0/1149277", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R1/1183984", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data License", "rightsURI": "https://data.caltech.edu/tindfiles/serve/7a5e834c-39e9-4d13-9c55-f50a4532885d/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "R2", "titles": [{"title": "TCCON data from Bialystok (PL), Release GGG2014.R2"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2019-10-21", "dateType": "Created"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2009-03-01/2018-10-01", "dateType": "Collected"}, {"date": "2019-10-21", "dateType": "Submitted"}, {"date": "2019-10-21", "dateType": "Issued"}], "publicationYear": "2019", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bialystok01.R2", "identifierType": "DOI"}, {"identifier": "1300", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "bi", "identifierType": "id"}, {"identifier": "bialystok01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "geoLocations": [{"geoLocationPlace": "Bia\u0142ystok (PL)", "geoLocationPoint": {"pointLatitude": "53.23", "pointLongitude": "23.025"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/210.json b/caltechdata_api/tester/validfiles/210.json
deleted file mode 100644
index 927730f..0000000
--- a/caltechdata_api/tester/validfiles/210.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "Dietrich Feist", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen, Lehrstuhl f\u00fcr Physik der Atmosph\u00e4re, Munich (DE)"}, {"name": "Deutsches Zentrum f\u00fcr Luft- und Raumfahrt, Institut f\u00fcr Physik der Atmosph\u00e4re, Oberpfaffenhofen (DE)"}, {"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-5890-6687", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "B-6489-2013", "nameIdentifierScheme": "ResearcherID"}], "name": "Feist, D. G."}, {"affiliation": [{"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "name": "Arnold, S. G."}, {"affiliation": [{"name": "Ariane Tracking Station, Ascension Island (SH)"}], "name": "John, N."}, {"affiliation": [{"name": "Stockholm University, Stockholm (SE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7369-0781", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "B-8591-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Geibel, M. C."}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station on Ascension Island."}, {"descriptionType": "Other", "description": "
Cite this record as:
Feist, D. G., Arnold, S. G., John, N., & Geibel, M. C. (2014). TCCON data from Ascension Island (SH), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.ascension01.r0/1149285
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 673
Unique Downloads: 28
between February 21, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"funderName": "Bundesministerium f\u00fcr Wirtschaft und Energie", "funderIdentifierType": "GRID", "funderIdentifier": "grid.424440.2", "awardNumber": "50EE1711E"}, {"funderName": "Bundesministerium f\u00fcr Wirtschaft und Energie", "funderIdentifierType": "GRID", "funderIdentifier": "grid.424440.2", "awardNumber": "50EE1711C"}, {"funderName": "European Space Agency", "funderIdentifierType": "GRID", "funderIdentifier": "grid.410379.8", "awardNumber": "3-14737"}, {"funderName": "Max Planck Institute for Biogeochemistry", "funderIdentifierType": "GRID", "funderIdentifier": "grid.419500.9"}, {"funderName": "Max Planck Society", "funderIdentifierType": "GRID", "funderIdentifier": "grid.4372.2"}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.5194/acp-19-9797-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-19-7347-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-12-2241-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-12-1495-2019", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-6539-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-5507-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10010155", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2017JD026453", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1109/jstars.2017.2650942", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1261-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1038/s41598-017-13459-0", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-2381-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-1415-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2016JD026164", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2015JD023389", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.1002/2015JD024157", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/cb9b01e4-56ea-4b8c-9543-0c61d0c72148/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Ascension Island (SH), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-01", "dateType": "Created"}, {"date": "2014-10-10", "dateType": "Issued"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2012-05-22/2018-10-31", "dateType": "Collected"}, {"date": "2017-02-21", "dateType": "Submitted"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.ascension01.R0/1149285", "identifierType": "DOI"}, {"identifier": "210", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "ae", "identifierType": "id"}, {"identifier": "ascension01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "geoLocations": [{"geoLocationPlace": "Ariane Tracking Station (AC)", "geoLocationPoint": {"pointLatitude": "-7.9165", "pointLongitude": "-14.3325"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/266.json b/caltechdata_api/tester/validfiles/266.json
deleted file mode 100644
index c7c9945..0000000
--- a/caltechdata_api/tester/validfiles/266.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "Young-Suk Oh", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "National Institute of Meteorological Sciences, Seogwipo-si (KR)"}], "name": "Goo, T.-Y."}, {"affiliation": [{"name": "National Institute of Meteorological Sciences, Seogwipo-si (KR)"}], "name": "Oh, Y.-S."}, {"affiliation": [{"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-1376-438X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "H-2280-2011", "nameIdentifierScheme": "ResearcherID"}], "name": "Velazco, V. A."}], "descriptions": [{"descriptionType": "Abstract", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Anmeyondo, South Korea."}, {"descriptionType": "Other", "description": "
Cite this record as:
Goo, T.-Y., Oh, Y.-S., & Velazco, V. A. (2014). TCCON data from Anmeyondo (KR), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.anmeyondo01.r0/1149284
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 270
Unique Downloads: 23
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/1f568dd3-02e4-4020-a146-12ee8b53f78a/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Anmeyondo (KR), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2020-07-01", "dateType": "Updated"}, {"date": "2015-02-02/2018-04-18", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.anmeyondo01.R0/1149284", "identifierType": "DOI"}, {"identifier": "266", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "an", "identifierType": "id"}, {"identifier": "anmeyondo01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/267.json b/caltechdata_api/tester/validfiles/267.json
deleted file mode 100644
index 9ed0911..0000000
--- a/caltechdata_api/tester/validfiles/267.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "AeroMeteo Service, Bia\u0142ystok (PL)"}], "name": "Katry\u0144ski, K.", "contributorType": "Other"}, {"name": "Christof Petri", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.bialystok01.R2
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bialystok, Poland."}, {"descriptionType": "Other", "description": "
Cite this record as:
Deutscher, N. M., Notholt, J., Messerschmidt, J., Weinzierl, C., Warneke, T., Petri, C., & Grupe, P. (2015). TCCON data from Bialystok (PL), Release GGG2014.R1 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bialystok01.r1/1183984
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 252
Unique Downloads: 7
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Bia\u0142ystok (PL)", "geoLocationPoint": {"pointLatitude": "53.23", "pointLongitude": "23.025"}}], "language": "eng", "publicationYear": "2015", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R0/1149277", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bialystok01.R2", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/gmd-10-1261-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://cd-sandbox.tind.io/tindfiles/serve/ce27a3a2-14f1-40ea-a898-3c6c5adba935/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Bialystok (PL), Release GGG2014.R1"}], "version": "GGG2014.R1", "formats": ["application/x-netcdf"], "dates": [{"date": "2015-06-10", "dateType": "Created"}, {"date": "2018-12-01", "dateType": "Updated"}, {"date": "2009-03-01/2017-11-28", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2015-06-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bialystok01.R1/1183984", "identifierType": "DOI"}, {"identifier": "267", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "bi", "identifierType": "id"}, {"identifier": "bialystok01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/268.json b/caltechdata_api/tester/validfiles/268.json
deleted file mode 100644
index c979248..0000000
--- a/caltechdata_api/tester/validfiles/268.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Kowalewski, S.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Wang, Y.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Wang, Z.", "contributorType": "DataCollector"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J.", "contributorType": "DataCollector"}, {"name": "Nicholas Deutscher", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-7191-6911", "nameIdentifierScheme": "ORCID"}], "name": "Palm, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5077-9524", "nameIdentifierScheme": "ORCID"}], "name": "Buschmann, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "National Astronomical Research Institute of Thailand, Chiang Mai (TH)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-8020-8642", "nameIdentifierScheme": "ORCID"}], "name": "Macatangay, R. C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.bremen01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Bremen, Germany."}, {"descriptionType": "Other", "description": "
Cite this record as:
Notholt, J., Petri, C., Warneke, T., Deutscher, N. M., Palm, M., Buschmann, M., \u2026 Grupe, P. (2014). TCCON data from Bremen (DE), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.bremen01.r0/1149275
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 252
Unique Downloads: 9
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Bremen (DE)", "geoLocationPoint": {"pointLatitude": "53.1", "pointLongitude": "8.85"}}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-5043-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-15-13023-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-12005-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs10030469", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.bremen01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/b6002cc3-520a-42aa-bc63-81c97ab5982a/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Bremen (DE), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2019-06-01", "dateType": "Updated"}, {"date": "2007-01-15/2018-04-20", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.bremen01.R0/1149275", "identifierType": "DOI"}, {"identifier": "268", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "br", "identifierType": "id"}, {"identifier": "bremen01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/283.json b/caltechdata_api/tester/validfiles/283.json
deleted file mode 100644
index b68bc15..0000000
--- a/caltechdata_api/tester/validfiles/283.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Vuillemin, C.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Truong, F.\u00e7.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Schmidt, M.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Laboratoire des Sciences du Climat et de l'Environnement, Gif-sur-Yvette (FR)"}], "name": "Ramonet, M.", "contributorType": "ProjectMember"}, {"affiliation": [{"name": "Institut de Physique du Globe de Paris, Observatoire magn\u00e9tique de Chambon la For\u00eat, Cambon la For\u00eat (FR)"}], "name": "Parmentier, E.", "contributorType": "RelatedPerson"}, {"name": "Thorsten Warneke", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Messerschmidt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Grupe, P."}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.orleans01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Orl\u00e9ans, France."}, {"descriptionType": "Other", "description": "
Cite this record as:
Warneke, T., Messerschmidt, J., Notholt, J., Weinzierl, C., Deutscher, N. M., Petri, C., & Grupe, P. (2014). TCCON data from Orl\u00e9ans (FR), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.orleans01.r0/1149276
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 222
Unique Downloads: 5
between September 08, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"awardTitle": "Infrastructure for Measurement of the European Carbon Cycle (IMECC)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/81606_en.html", "awardNumber": "26188"}, {"awardTitle": "Global Earth observation and monitoring (GEOMON)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/84619_en.html", "awardNumber": "36677"}, {"awardTitle": "Integrated non-CO2 Greenhouse gas Observing System (INGOS)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/101549_en.html", "awardNumber": "284274"}, {"awardTitle": "ICOS improved sensors, network and interoperability for GMES (ICOS-INWIRE)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/106570_en.html", "awardNumber": "313169"}, {"awardTitle": "Gap Analysis for Integrated Atmospheric ECV CLImate Monitoring (GAIA-CLIM)", "funderName": "European Union", "funderIdentifierType": "GRID", "funderIdentifier": "grid.453396.e", "awardURI": "http://cordis.europa.eu/project/rcn/193710_en.html", "awardNumber": "640276"}, {"funderName": "Senate of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.425996.5"}, {"funderName": "Laboratoire des Sciences du Climat et de l'Environnement", "funderIdentifierType": "GRID", "funderIdentifier": "grid.457340.1"}, {"funderName": "University of Bremen", "funderIdentifierType": "GRID", "funderIdentifier": "grid.7704.4"}], "geoLocations": [{"geoLocationPlace": "Tra\u00eenou, Orl\u00e9ans (FR)", "geoLocationPoint": {"pointLatitude": "47.97", "pointLongitude": "2.113"}}], "language": "eng", "publicationYear": "2014", "publisher": "CaltechDATA", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs9010064", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-683-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-227-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-5043-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-14003-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-8-4785-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-15-13023-2015", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-12005-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-4843-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-16-1653-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/rs8050414", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.3390/rs9101033", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/acp-17-4781-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos9050175", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-3111-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-11-1251-2018", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-4135-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-10-2209-2017", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.3390/atmos10070354", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.orleans01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/d0bf0bd6-739b-4aad-9e5d-45338391727f/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "titles": [{"title": "TCCON data from Orl\u00e9ans (FR), Release GGG2014.R0"}], "version": "GGG2014.R0", "formats": ["application/x-netcdf"], "dates": [{"date": "2014-10-10", "dateType": "Created"}, {"date": "2018-12-01", "dateType": "Updated"}, {"date": "2009-08-29/2017-11-28", "dateType": "Collected"}, {"date": "2017-09-08", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.orleans01.R0/1149276", "identifierType": "DOI"}, {"identifier": "283", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "or", "identifierType": "id"}, {"identifier": "orleans01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/293.json b/caltechdata_api/tester/validfiles/293.json
deleted file mode 100644
index cbf8145..0000000
--- a/caltechdata_api/tester/validfiles/293.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"affiliation": [{"name": "California Institute of Technology, Pasadena, CA, U.S.A."}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-4924-0377", "nameIdentifierScheme": "ORCID"}], "name": "Wunch, Debra", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-6126-3854", "nameIdentifierScheme": "ORCID"}], "name": "Wennberg, P. O. ", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": " 0000-0002-7986-1924", "nameIdentifierScheme": "ORCID"}], "name": "Griffith, D. W.T.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": " Institute of Environmental Physics, University of Bremen, Bremen (DE), Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU) "}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}], "name": "Deutscher, N. M.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Max Planck Institute for Biogeochemistry, Jena (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-5890-6687", "nameIdentifierScheme": "ORCID"}], "name": "Feist, D. G.", "contributorType": "ContactPerson"}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}], "name": "Notholt, J.", "contributorType": "ContactPerson"}], "descriptions": [{"descriptionType": "Other", "description": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This is the 2014 data release."}, {"descriptionType": "Other", "description": "
Unique Views: 953
Unique Downloads: 98
between September 13, 2017 and July 02, 2020
More info on how stats are collected
"}, {"descriptionType": "Other", "description": "
Cite this record as:
Total Carbon Column Observing Network (TCCON) Team. (2017). 2014 TCCON Data Release (Version GGG2014) [Data set]. CaltechDATA. https://doi.org/10.14291/TCCON.GGG2014
or choose a different citation style.
Download Citation
"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/TCCON.GGG2014.DOCUMENTATION.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/24d2401d-d2b7-42e1-83b1-1ee01839d84d/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": " CO2"}, {"subject": " CH4"}, {"subject": " CO"}, {"subject": " N2O"}, {"subject": " column-averaged dry-air mole fractions"}, {"subject": " remote sensing"}, {"subject": " FTIR spectroscopy"}, {"subject": " TCCON"}], "version": "GGG2014", "titles": [{"title": "2014 TCCON Data Release"}], "formats": [".tgz", ".nc"], "dates": [{"date": "2020-07-01", "dateType": "Updated"}, {"date": "2017-09-13", "dateType": "Submitted"}, {"date": "2017-09-13", "dateType": "Issued"}], "publicationYear": "2017", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/TCCON.GGG2014", "identifierType": "DOI"}, {"identifier": "293", "identifierType": "CaltechDATA_Identifier"}], "creators": [{"affiliation": [{"name": "TCCON Consortium"}], "name": "Total Carbon Column Observing Network (TCCON) Team"}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/301.json b/caltechdata_api/tester/validfiles/301.json
deleted file mode 100644
index 186bbec..0000000
--- a/caltechdata_api/tester/validfiles/301.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "CaltechDATA, California Institute of Technology, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"name": "AWIPEV Arctic Research Base, Ny-\u00c5lesund, Spitsbergen (NO)", "contributorType": "DataCollector"}, {"name": "Justus Notholt", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.nyalesund01.R1
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station Ny \u00c5lesund, Spitsbergen, Norway."}, {"descriptionType": "Other", "description": "
Cite this record as:
Notholt, J., Warneke, T., Petri, C., Deutscher, N. M., Weinzierl, C., Palm, M., & Buschmann, M. (2014). TCCON data from Ny \u00c5lesund, Spitsbergen (NO), Release GGG2014.R0 [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.nyalesund01.r0/1149278
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 196
Unique Downloads: 5
between October 31, 2017 and July 02, 2020
More info on how stats are collected
"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.archive/1348407", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.5194/amt-9-3491-2016", "relationType": "IsCitedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "http://tccondata.org", "relationType": "IsPartOf", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.nyalesund01.R1", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data Use Policy", "rightsURI": "https://data.caltech.edu/tindfiles/serve/90348ea4-f340-4f43-8db2-b9beb7845519/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "GGG2014.R0", "titles": [{"title": "TCCON data from Ny \u00c5lesund, Spitsbergen (NO), Release GGG2014.R0"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2017-10-31", "dateType": "Created"}, {"date": "2019-06-01", "dateType": "Updated"}, {"date": "2006-03-28/2018-04-27", "dateType": "Collected"}, {"date": "2017-10-31", "dateType": "Submitted"}, {"date": "2014-10-10", "dateType": "Issued"}], "publicationYear": "2014", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.nyalesund01.R0/1149278", "identifierType": "DOI"}, {"identifier": "301", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "sp", "identifierType": "id"}, {"identifier": "nyalesund01", "identifierType": "longName"}, {"identifier": "R0", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-3324-885X", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "P-4520-2016", "nameIdentifierScheme": "ResearcherID"}], "name": "Notholt, J."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5185-3415", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "K-1884-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Warneke, T."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-7010-5532", "nameIdentifierScheme": "ORCID"}], "name": "Petri, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}, {"name": "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0002-2906-2577", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "E-3683-2015", "nameIdentifierScheme": "ResearcherID"}], "name": "Deutscher, N. M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "name": "Weinzierl, C."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-7191-6911", "nameIdentifierScheme": "ORCID"}], "name": "Palm, M."}, {"affiliation": [{"name": "Institute of Environmental Physics, University of Bremen, Bremen (DE)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5077-9524", "nameIdentifierScheme": "ORCID"}], "name": "Buschmann, M."}], "geoLocations": [{"geoLocationPlace": "Ny \u00c5lesund (SJ)", "geoLocationPoint": {"pointLatitude": "78.9", "pointLongitude": "11.9"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/970.json b/caltechdata_api/tester/validfiles/970.json
deleted file mode 100644
index 31600d9..0000000
--- a/caltechdata_api/tester/validfiles/970.json
+++ /dev/null
@@ -1 +0,0 @@
-{"contributors": [{"nameIdentifiers": [{"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}], "name": "California Institute of Techonolgy, Pasadena, CA (US)", "contributorType": "HostingInstitution"}, {"affiliation": [{"name": "California Institute of Technology, Pasadena, CA (US)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-5383-8462", "nameIdentifierScheme": "ORCID"}], "name": "Roehl, C. M.", "contributorType": "DataCurator"}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "nameIdentifiers": [{"nameIdentifier": "0000-0001-9947-1053", "nameIdentifierScheme": "ORCID"}, {"nameIdentifier": "D-2563-2012", "nameIdentifierScheme": "ResearcherID"}], "name": "Kimberly Strong", "contributorType": "ContactPerson"}, {"name": "TCCON", "contributorType": "ResearchGroup"}], "descriptions": [{"descriptionType": "Abstract", "description": "
These data are now obsolete and should be replaced by the most recent data: https://doi.org/10.14291/tccon.ggg2014.eureka01.R3
The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This data set contains observations from the TCCON station at Eureka, Canada."}, {"descriptionType": "Other", "description": "
Cite this record as:
Strong, K., Roche, S., Franklin, J. E., Mendonca, J., Lutsch, E., Weaver, D., \u2026 Lindenmaier, R. (2017). TCCON data from Eureka (CA), Release GGG2014.R2 (Version R2) [Data set]. CaltechDATA. https://doi.org/10.14291/tccon.ggg2014.eureka01.r2
or choose a different citation style.
Download Citation
"}, {"descriptionType": "Other", "description": "
Unique Views: 41
Unique Downloads: 3
between September 20, 2017 and July 02, 2020
More info on how stats are collected
"}], "fundingReferences": [{"funderName": "Atlantic Innovation Fund"}, {"funderName": "Canada Foundation for Innovation", "funderIdentifierType": "GRID", "funderIdentifier": "grid.439998.6"}, {"funderName": "Canadian Foundation for Climate and Atmospheric Sciences"}, {"funderName": "Canadian Space Agency", "funderIdentifierType": "GRID", "funderIdentifier": "grid.236846.d"}, {"funderName": "Environment and Climate Change Canada", "funderIdentifierType": "GRID", "funderIdentifier": "grid.410334.1"}, {"funderName": "Government of Canada (International Polar Year funding)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.451254.3"}, {"funderName": "Natural Sciences and Engineering Research Council of Canada", "funderIdentifierType": "GRID", "funderIdentifier": "grid.452912.9"}, {"funderName": "Polar Commission (Northern Scientific Training Program)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.465477.3"}, {"funderName": "Nova Scotia Research Innovation Trust"}, {"funderName": "Ministry of Research and Innovation (Ontario Innovation Trust and Ontario Research Fund)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.451078.f"}, {"funderName": "Natural Resources Canada (Polar Continental Shelf Program)", "funderIdentifierType": "GRID", "funderIdentifier": "grid.202033.0"}], "language": "eng", "relatedIdentifiers": [{"relatedIdentifier": "10.14291/tccon.ggg2014.documentation.R0/1221662", "relationType": "IsDocumentedBy", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R0/1149271", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", "relationType": "IsDocumentedBy", "relatedIdentifierType": "URL"}, {"relatedIdentifier": "10.14291/TCCON.GGG2014", "relationType": "IsPartOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R1/1325515", "relationType": "IsNewVersionOf", "relatedIdentifierType": "DOI"}, {"relatedIdentifier": "10.14291/tccon.ggg2014.eureka01.R3", "relationType": "IsPreviousVersionOf", "relatedIdentifierType": "DOI"}], "rightsList": [{"rights": "TCCON Data License", "rightsURI": "https://data.caltech.edu/tindfiles/serve/91de6fb9-18a5-4221-bd6b-41a9db8abc7c/"}], "subjects": [{"subject": "atmospheric trace gases"}, {"subject": "CO2"}, {"subject": "CH4"}, {"subject": "CO"}, {"subject": "N2O"}, {"subject": "column-averaged dry-air mole fractions"}, {"subject": "remote sensing"}, {"subject": "FTIR spectroscopy"}, {"subject": "TCCON"}], "version": "R2", "titles": [{"title": "TCCON data from Eureka (CA), Release GGG2014.R2"}], "formats": ["application/x-netcdf"], "dates": [{"date": "2017-09-20", "dateType": "Created"}, {"date": "2018-11-01", "dateType": "Updated"}, {"date": "2010-07-24/2017-09-10", "dateType": "Collected"}, {"date": "2017-09-20", "dateType": "Submitted"}, {"date": "2017-09-20", "dateType": "Issued"}], "publicationYear": "2017", "publisher": "CaltechDATA", "types": {"resourceTypeGeneral": "Dataset", "resourceType": "Dataset"}, "identifiers": [{"identifier": "10.14291/tccon.ggg2014.eureka01.R2", "identifierType": "DOI"}, {"identifier": "970", "identifierType": "CaltechDATA_Identifier"}, {"identifier": "GGG2014", "identifierType": "Software_Version"}, {"identifier": "eu", "identifierType": "id"}, {"identifier": "eureka01", "identifierType": "longName"}, {"identifier": "R1", "identifierType": "Data_Revision"}], "creators": [{"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Strong, K."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Roche, S."}, {"affiliation": [{"name": "School of Engineering and Applied Sciences, Harvard University, Cambridge, MA (USA)"}], "name": "Franklin, J. E."}, {"affiliation": [{"name": "Environment and Climate Change Canada, Downsview, ON (CA)"}], "name": "Mendonca, J."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Lutsch, E."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Weaver, D."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}], "name": "Fogal, P. F."}, {"affiliation": [{"name": "Department of Physics & Atmospheric Science, Dalhousie University, Halifax, NS, CA"}], "name": "Drummond, J. R."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}, {"name": "UCAR Center for Science Education, Boulder, CO (US)"}], "name": "Batchelor, R."}, {"affiliation": [{"name": "Department of Physics, University of Toronto, Toronto, ON (CA)"}, {"name": "Pacific Northwest National Laboratory, Richland, WA (US)"}], "name": "Lindenmaier, R."}], "geoLocations": [{"geoLocationPlace": "Eureka, NU (CA)", "geoLocationPoint": {"pointLatitude": "80.05", "pointLongitude": "-86.42"}}], "schemaVersion": "http://datacite.org/schema/kernel-4"}
\ No newline at end of file
diff --git a/caltechdata_api/tester/validfiles/file.py b/caltechdata_api/tester/validfiles/file.py
deleted file mode 100644
index 8b13789..0000000
--- a/caltechdata_api/tester/validfiles/file.py
+++ /dev/null
@@ -1 +0,0 @@
-
From d1230dc736381e5d5db73d36aabd4cd884cb35a3 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:08:00 -0800
Subject: [PATCH 34/42] Update cli.py
---
caltechdata_api/cli.py | 1008 ++++++++++++++++++++++++++++++----------
1 file changed, 754 insertions(+), 254 deletions(-)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index ffb2cba..0df3c50 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -1,274 +1,774 @@
-import copy
-import json
-import os
+import argparse
import requests
import s3fs
-from requests import session
-from json.decoder import JSONDecodeError
-from caltechdata_api import customize_schema
-from caltechdata_api.utils import humanbytes
-
-
-def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=False):
- f_json = []
- f_list = {}
- fnames = []
- for f in files:
- split = f.split("/")
- filename = split[-1]
- if filename in fnames:
- # We can't have a duplicate filename
- # Assume that the previous path value makes a unique name
- filename = f"{split[-2]}-{split[-1]}"
- fnames.append(filename)
- f_json.append({"key": filename})
- f_list[filename] = f
- # Now we see if any existing draft files need to be replaced
- result = requests.get(file_link, headers=f_headers)
- if result.status_code == 200:
- ex_files = result.json()["entries"]
- for ex in ex_files:
- if ex["key"] in f_list:
- result = requests.delete(ex["links"]["self"], headers=f_headers)
- if result.status_code != 204:
- raise Exception(result.text)
- # Create new file upload links
- result = requests.post(file_link, headers=headers, json=f_json)
- if result.status_code != 201:
- raise Exception(result.text)
- # Now we have the upload links
- for entry in result.json()["entries"]:
- self = entry["links"]["self"]
- link = entry["links"]["content"]
- commit = entry["links"]["commit"]
- name = entry["key"]
- if name in f_list:
- if s3:
- print("Downloading", f_list[name])
- s3.download(f_list[name], name)
- infile = open(name, "rb")
+from caltechdata_api import caltechdata_write, caltechdata_edit
+from md_to_json import parse_readme_to_json
+import json
+import os
+from cryptography.fernet import Fernet
+
+CALTECHDATA_API = "https://data.caltech.edu/api/names?q=identifiers.identifier:{}"
+ORCID_API = "https://orcid.org/"
+HEADERS = {"Accept": "application/json"}
+
+name = ""
+affiliationIdentifierScheme = ""
+affiliation_identifier = ""
+
+awardNumber = ""
+awardTitle = ""
+funderIdentifier = ""
+funderIdentifierType = ""
+funderName = ""
+
+
+home_directory = os.path.expanduser("~")
+caltechdata_directory = os.path.join(home_directory, ".caltechdata")
+
+
+if not os.path.exists(caltechdata_directory):
+ os.makedirs(caltechdata_directory)
+
+
+def generate_key():
+ return Fernet.generate_key()
+
+
+# Load the key from a file or generate a new one if not present
+def load_or_generate_key():
+ key_file = os.path.join(caltechdata_directory, "key.key")
+ if os.path.exists(key_file):
+ with open(key_file, "rb") as f:
+ return f.read()
+ else:
+ key = generate_key()
+ with open(key_file, "wb") as f:
+ f.write(key)
+ return key
+
+
+# Encrypt the token
+def encrypt_token(token, key):
+ f = Fernet(key)
+ return f.encrypt(token.encode())
+
+
+# Decrypt the token
+def decrypt_token(encrypted_token, key):
+ f = Fernet(key)
+ return f.decrypt(encrypted_token).decode()
+
+
+# Function to get or set token with support for test system
+def get_or_set_token(production=True):
+ key = load_or_generate_key()
+
+ # Use different token files for production and test environments
+ token_filename = "token.txt" if production else "token_test.txt"
+ token_file = os.path.join(caltechdata_directory, token_filename)
+
+ try:
+ with open(token_file, "rb") as f:
+ encrypted_token = f.read()
+ token = decrypt_token(encrypted_token, key)
+ print(
+ "Using saved CaltechDATA production token."
+ if production
+ else "Using saved CaltechDATA test token."
+ )
+ return token
+ except FileNotFoundError:
+ while True:
+ token = input(
+ f"Enter your {'Production' if production else 'Test'} CaltechDATA token: "
+ ).strip()
+ confirm_token = input(
+ f"Confirm your {'Production' if production else 'Test'} CaltechDATA token: "
+ ).strip()
+ if token == confirm_token:
+ encrypted_token = encrypt_token(token, key)
+ with open(token_file, "wb") as f:
+ f.write(encrypted_token)
+ return token
else:
- infile = open(f_list[name], "rb")
- result = requests.put(link, headers=f_headers, data=infile)
- if result.status_code != 200:
- raise Exception(result.text)
- result = requests.post(commit, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
+ print("Tokens do not match. Please try again.")
+
+
+def welcome_message():
+ print("Welcome to CaltechDATA CLI")
+
+
+def get_user_input(prompt, required=True):
+ while True:
+ user_input = input(prompt)
+ if required and not user_input:
+ print("This field is required. Please provide a value.")
+ else:
+ return user_input
+
+
+def confirm_upload():
+ while True:
+ user_input = input("Do you want to send this record to CaltechDATA? (y/n): ")
+ if user_input.lower() == "y":
+ return True
+ elif user_input.lower() == "n":
+ print("Upload canceled.")
+ return False
else:
- # Delete any files not included in this write command
- if keepfiles == False:
- result = requests.delete(self, headers=f_headers)
- if result.status_code != 204:
- raise Exception(result.text)
-
-
-
-def add_file_links(
- metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
-):
- # Currently configured for S3 links, assuming all are at the same endpoint
- link_string = ""
- endpoint = "https://" + file_links[0].split("/")[2]
- s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
- index = 0
- for link in file_links:
- file = link.split("/")[-1]
- path = link.split(endpoint)[1]
- size = s3.info(path)["size"]
- size = humanbytes(size)
+ print("Invalid input. Please enter 'y' or 'n'.")
+
+
+def check_award_number(award_number):
+ response = requests.get(
+ f"https://data.caltech.edu/api/awards?q=number:{award_number}"
+ )
+ data = response.json()
+ total_hits = data.get("hits", {}).get("total", 0)
+ return total_hits > 0
+
+
+def get_funding_entries():
+ while True:
try:
- desc = file_descriptions[index] + ","
- except IndexError:
- desc = ""
- if link_string == "":
- if s3_link:
- link_string = f"Files available via S3 at {s3_link}</p>"
+ num_entries = int(
+ input("How many funding entries do you want to provide? ")
+ )
+ if num_entries >= 0:
+ return num_entries
else:
- cleaned = link.strip(file)
- link_string = f"Files available via S3 at {cleaned}</p>"
- link_string += f"""{file}, {desc} {size}
- <a role="button" class="ui compact mini button" href="{link}"
- > <i class="download icon"></i> Download </a>
</p>
- """
- index += 1
- # Tack on any additional descriptions
- if additional_descriptions != "":
- link_string += additional_descriptions
-
- description = {"description": link_string, "descriptionType": "files"}
- metadata["descriptions"].append(description)
- return metadata
-
-
-def send_to_community(review_link, data, headers, publish, community, message=None):
- if not message:
- message = "This record is submitted automatically with the CaltechDATA API"
-
- data = {
- "receiver": {"community": community},
- "type": "community-submission",
- }
- result = requests.put(review_link, json=data, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
- submit_link = review_link.replace("/review", "/actions/submit-review")
- data = comment = {
- "payload": {
- "content": message,
- "format": "html",
- }
+ print("Please enter a non-negative integer.")
+ except ValueError:
+ print("Please enter a valid integer.")
+
+
+def validate_funder_identifier(funder_identifier):
+ response = requests.get(f"https://api.ror.org/organizations/{funder_identifier}")
+ if response.status_code == 200:
+ return response.json().get("name")
+ else:
+ return False
+
+
+def get_funding_details():
+ award_number = get_user_input("Enter the award number for funding: ")
+ award_exists = check_award_number(award_number)
+ if not award_exists:
+ print(
+ f"""Error: No award with number '{award_number}' found in
+ CaltechDATA. You will need to provide more details about the
+ funding."""
+ )
+ award_title = get_user_input("Enter the award title for funding: ")
+ while True:
+ funder_identifier = get_user_input("Enter the funder ROR (https://ror.org): ")
+ name = validate_funder_identifier(funder_identifier)
+ if name:
+ break
+ else:
+ print(
+ """This funder identifier is not a ROR. Please enter a valid
+ ROR identifier (without the url). For example the ROR for the
+ NSF is 021nxhr62."""
+ )
+ print("-" * 10)
+ return {
+ "awardNumber": award_number,
+ "awardTitle": award_title,
+ "funderName": name,
+ "funderIdentifier": funder_identifier,
+ "funderIdentifierType": "ROR",
}
- result = requests.post(submit_link, json=data, headers=headers)
- if result.status_code != 202:
- raise Exception(result.text)
- if publish:
- accept_link = result.json()["links"]["actions"]["accept"]
- data = comment = {
- "payload": {
- "content": "This record is accepted automatically with the CaltechDATA API",
- "format": "html",
- }
- }
- result = requests.post(accept_link, json=data, headers=headers)
- if result.status_code != 200:
- raise Exception(result.text)
- return result
-
-
-def caltechdata_write(
- metadata,
- token=None,
- files=[],
- production=False,
- schema="43",
- publish=False,
- file_links=[],
- s3=None,
- community=None,
- authors=False,
- file_descriptions=[],
- s3_link=None,
- default_preview=None,
- review_message=None,
-):
- """
- File links are links to files existing in external systems that will
- be added directly in a CaltechDATA record, instead of uploading the file.
-
- S3 is a s3sf object for directly opening files
- """
- # Make a copy so that none of our changes leak out
- metadata = copy.deepcopy(metadata)
-
- # If no token is provided, get from RDMTOK environment variable
- if not token:
- token = os.environ["RDMTOK"]
-
- # If files is a string - change to single value array
- if isinstance(files, str) == True:
- files = [files]
-
- if file_links:
- metadata = add_file_links(
- metadata, file_links, file_descriptions, s3_link=s3_link
+
+
+def parse_arguments():
+ welcome_message()
+ args = {}
+ args["title"] = get_user_input("Enter the title of the dataset: ")
+ args["description"] = get_user_input(
+ "Enter the abstract or description of the dataset: "
+ )
+ print("License options:")
+ print("1. Creative Commons Zero Waiver (cc-zero)")
+ print("2. Creative Commons Attribution (cc-by)")
+ print("3. Creative Commons Attribution Non Commercial (cc-by-nc)")
+
+ # Prompt user to select a license
+ while True:
+ license_number = input(
+ "Enter the number corresponding to the desired license: "
+ )
+ if license_number.isdigit() and 1 <= int(license_number) <= 8:
+ # Valid license number selected
+ args["license"] = {
+ "1": {
+ "rights": "Creative Commons Zero v1.0 Universal",
+ "rightsIdentifier": "cc0-1.0",
+ },
+ "2": {
+ "rights": "Creative Commons Attribution v4.0 Universal",
+ "rightsIdentifier": "cc-by-4.0",
+ },
+ "3": {
+ "rights": "Creative Commons Attribution Non-Commercial v4.0 Universal",
+ "rightsIdentifier": "cc-by-nc-4.0",
+ },
+ }[license_number]
+ break
+ else:
+ print("Invalid input. Please enter a number between 1 and 8.")
+
+ while True:
+ orcid = get_user_input("Enter your ORCID identifier: ")
+ family_name, given_name = get_names(orcid)
+ if family_name is not None and given_name is not None:
+ args["orcid"] = orcid
+ break # Break out of the loop if names are successfully retrieved
+ retry = input("Do you want to try again? (y/n): ")
+ if retry.lower() != "y":
+ print("Exiting program.")
+ return
+ # Optional arguments
+ num_funding_entries = get_funding_entries()
+ funding_references = []
+ for _ in range(num_funding_entries):
+ funding_references.append(get_funding_details())
+ args["fundingReferences"] = funding_references
+ return args
+
+
+def query_caltechdata_api(orcid):
+ response = requests.get(CALTECHDATA_API.format(orcid), headers=HEADERS)
+ return response.json()
+
+
+def query_orcid_api(orcid):
+ response = requests.get(ORCID_API + orcid, headers=HEADERS)
+ return response.json()
+
+
+def get_names(orcid):
+ caltechdata_response = query_caltechdata_api(orcid)
+ global affiliationIdentifierScheme, affiliation_identifier, name
+ if caltechdata_response.get("hits", {}).get("hits"):
+ hit = caltechdata_response["hits"]["hits"][0]
+ family_name = hit.get("family_name", "")
+ given_name = hit.get("given_name", "")
+ affiliation_identifier = "05dxps055"
+ affiliationIdentifierScheme = "ROR"
+ name = "California Institute of Technology"
+
+ else:
+ orcid_link = "https://orcid.org/"
+ headers = {"Accept": "application/json"}
+ orcid_response = requests.get(orcid_link + orcid, headers=headers)
+ try:
+ orcid_data = orcid_response.json()
+ name_info = orcid_data.get("person", {}).get("name", {})
+ family_name = name_info.get("family-name", {}).get("value", "")
+ given_name = name_info.get("given-names", {}).get("value", "")
+ except json.decoder.JSONDecodeError:
+ print(
+ f"Error: ORCID identifier not found or invalid. Please check the ORCID identifier and try again."
+ )
+ return None, None
+ return family_name, given_name
+
+
+def write_s3cmd_config(endpoint):
+ configf = os.path.join(home_directory, ".s3cfg")
+ if not os.path.exists(configf):
+ access_key = get_user_input("Enter the access key: ")
+ secret_key = get_user_input("Enter the secret key: ")
+ with open(configf, "w") as file:
+ file.write(
+ f"""[default]
+ access_key = {access_key}
+ host_base = {endpoint}
+ host_bucket = %(bucket).{endpoint}
+ secret_key = {secret_key}
+ """
+ )
+
+
+def upload_supporting_file(record_id=None):
+ filepath = ""
+ filepaths = []
+ file_link = ""
+ file_links = []
+ while True:
+ choice = get_user_input(
+ "Do you want to upload or link data files? (upload/link/n): "
+ ).lower()
+ if choice == "link":
+ endpoint = "sdsc.osn.xsede.org"
+ path = "ini230004-bucket01/"
+ if not record_id:
+ write_s3cmd_config(endpoint)
+ print("""S3 connection configured.""")
+ break
+ endpoint = f"https://{endpoint}/"
+ s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
+ # Find the files
+ files = s3.glob(path + record_id + "/*")
+ for link in files:
+ fname = link.split("/")[-1]
+ if "." not in fname:
+ # If there is a directory, get files
+ folder_files = s3.glob(link + "/*")
+ for file in folder_files:
+ name = file.split("/")[-1]
+ if "." not in name:
+ level_2_files = s3.glob(file + "/*")
+ for f in level_2_files:
+ name = f.split("/")[-1]
+ if "." not in name:
+ level_3_files = s3.glob(f + "/*")
+ for l3 in level_3_files:
+ file_links.append(endpoint + l3)
+ else:
+ file_links.append(endpoint + f)
+ else:
+ file_links.append(endpoint + file)
+ else:
+ file_links.append(endpoint + link)
+ return filepath, file_links
+ elif choice == "upload":
+ print("Current files in the directory:")
+ files = [
+ f for f in os.listdir() if not f.endswith(".json") and os.path.isfile(f)
+ ]
+ print("\n".join(files))
+ while True:
+ filename = get_user_input(
+ "Enter the filename to upload as a supporting file (or 'n' to finish): "
+ )
+ if filename == "n":
+ break
+ if filename in files:
+ file_size = os.path.getsize(filename)
+ if file_size > 1024 * 1024 * 1024:
+ print(
+ """The file is greater than 1 GB. Please upload the
+ metadata to CaltechDATA, and you'll be provided
+ instructions to upload the files to S3 directly."""
+ )
+ else:
+ filepath = os.path.abspath(filename)
+ filepaths.append(filepath)
+ else:
+ print(
+ f"Error: File '{filename}' not found. Please enter a valid filename."
+ )
+ add_more = get_user_input(
+ "Do you want to add more files? (y/n): "
+ ).lower()
+ if add_more != "y":
+ break
+ break
+ elif choice == "n":
+ break
+ else:
+ print("Invalid input. Please enter 'link' or 'upload' or 'n'.")
+ return filepaths, file_links
+
+
+def upload_data_from_file():
+ while True:
+ print("Current JSON files in the directory:")
+ files = [f for f in os.listdir() if f.endswith(".json") and os.path.isfile(f)]
+ print("\n".join(files))
+
+ filename = get_user_input(
+ "Enter a README.md or JSON filename to upload to CaltechDATA (or type 'exit' to go back): "
)
- # Pull out pid information
- if production == True:
- repo_prefix = "10.22002"
+ if filename.lower() == "exit":
+ return None
+
+ if filename == "README.md":
+ data = parse_readme_to_json(filename)
+ return data
+ else:
+ try:
+ with open(filename, "r") as file:
+ data = json.load(file)
+ return data
+
+ except json.JSONDecodeError as e:
+ print(f"Error: Invalid JSON format in the file '{filename}'. {str(e)}")
+
+
+def parse_args():
+ """Parse command-line arguments."""
+ parser = argparse.ArgumentParser(description="CaltechDATA CLI tool.")
+ parser.add_argument(
+ "-test", action="store_true", help="Use test mode, sets production to False"
+ )
+ args = parser.parse_args()
+ return args
+
+
+def main():
+ args = parse_args()
+
+ production = not args.test # Set production to False if -test flag is provided
+
+ choice = get_user_input(
+ "Do you want to create or edit a CaltechDATA record? (create/edit): "
+ ).lower()
+ if choice == "create":
+ create_record(production)
+ elif choice == "edit":
+ edit_record(production)
else:
- repo_prefix = "10.33569"
- pids = {}
- identifiers = []
- if "metadata" in metadata:
- # we have rdm schema
- if "identifiers" in metadata["metadata"]:
- identifiers = metadata["metadata"]["identifiers"]
- elif "identifiers" in metadata:
- identifiers = metadata["identifiers"]
- for identifier in identifiers:
- doi = False
- if "identifierType" in identifier:
- if identifier["identifierType"] == "DOI":
- doi = identifier["identifier"]
- prefix = doi.split("/")[0]
- elif identifier["identifierType"] == "oai":
- pids["oai"] = {
- "identifier": identifier["identifier"],
- "provider": "oai",
- }
- elif "scheme" in identifier:
- # We have RDM internal metadata
- if identifier["scheme"] == "doi":
- doi = identifier["identifier"]
- prefix = doi.split("/")[0]
- if doi != False:
- if prefix == repo_prefix:
- pids["doi"] = {
- "identifier": doi,
- "provider": "datacite",
- "client": "datacite",
- }
+ print("Invalid choice. Please enter 'create' or 'edit'.")
+
+
+def create_record(production):
+ token = get_or_set_token(production)
+ while True:
+ choice = get_user_input(
+ "Do you want to use metadata from an existing file or create new metadata? (existing/create): "
+ ).lower()
+ if choice == "existing":
+ existing_data = upload_data_from_file()
+ filepath, file_link = upload_supporting_file()
+ if existing_data:
+ if filepath != "":
+ response = caltechdata_write(
+ existing_data,
+ token,
+ filepath,
+ production=production,
+ publish=False,
+ )
+ elif file_link != "":
+ response = caltechdata_write(
+ existing_data,
+ token,
+ file_links=[file_link],
+ s3_link=file_link,
+ production=True,
+ publish=False,
+ )
+ else:
+ response = caltechdata_write(
+ existing_data, token, production=production, publish=False
+ )
+ rec_id = response
+ print_upload_message(rec_id, production)
+ break
else:
- pids["doi"] = {
- "identifier": doi,
- "provider": "external",
- }
-
- if "pids" not in metadata:
- metadata["pids"] = pids
-
- if authors == False:
- data = customize_schema.customize_schema(metadata, schema=schema)
- if production == True:
- url = "https://data.caltech.edu/"
+ print("Going back to the main menu.")
+ elif choice == "create":
+ args = parse_arguments()
+ family_name, given_name = get_names(args["orcid"])
+ metadata = {
+ "titles": [{"title": args["title"]}],
+ "descriptions": [
+ {"description": args["description"], "descriptionType": "Abstract"}
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ {
+ "affiliationIdentifier": affiliation_identifier,
+ "affiliationIdentifierScheme": affiliationIdentifierScheme,
+ "name": name,
+ }
+ ],
+ "familyName": family_name,
+ "givenName": given_name,
+ "name": f"{family_name}, {given_name}",
+ "nameIdentifiers": [
+ {
+ "nameIdentifier": args["orcid"],
+ "nameIdentifierScheme": "ORCID",
+ }
+ ],
+ "nameType": "Personal",
+ }
+ ],
+ "types": {"resourceType": "", "resourceTypeGeneral": "Dataset"},
+ "rightsList": [
+ args["license"],
+ ],
+ "fundingReferences": args["fundingReferences"],
+ "schemaVersion": "http://datacite.org/schema/kernel-4",
+ }
+ filepath, file_link = upload_supporting_file()
+ if confirm_upload():
+ if filepath != "":
+ response = caltechdata_write(
+ metadata, token, filepath, production=production, publish=False
+ )
+ elif file_link != "":
+ response = caltechdata_write(
+ metadata,
+ token,
+ file_links=[file_link],
+ production=production,
+ publish=False,
+ )
+ else:
+ response = caltechdata_write(
+ metadata, token, production=production, publish=False
+ )
+ rec_id = response
+
+ print_upload_message(rec_id, production)
+ with open(response + ".json", "w") as file:
+ json.dump(metadata, file, indent=2)
+ break
+ else:
+ break
else:
- url = "https://data.caltechlibrary.dev/"
+ print("Invalid choice. Please enter 'existing' or 'create'.")
+
+
+def print_upload_message(rec_id, production):
+ base_url = (
+ "https://data.caltech.edu/uploads/"
+ if production
+ else "https://data.caltechlibrary.dev/uploads/"
+ )
+ print(
+ f"""You can view and publish this record at
+ {base_url}{rec_id}
+ If you need to upload large files to S3, you can type
+ `s3cmd put DATA_FILE s3://ini230004-bucket01/{rec_id}/`"""
+ )
+
+
+def edit_record(production):
+ record_id = input("Enter the CaltechDATA record ID: ")
+ token = get_or_set_token(production)
+ file_name = download_file_by_id(record_id, token)
+
+ if file_name:
+ try:
+ # Read the edited metadata file
+ with open(file_name, "r") as file:
+ metadata = json.load(file)
+ response = caltechdata_edit(
+ record_id, metadata, token, production=production, publish=False
+ )
+ if response:
+ print("Metadata edited successfully.")
+ else:
+ print("Failed to edit metadata.")
+ except Exception as e:
+ print(f"An error occurred during metadata editing: {e}")
else:
- data = metadata
- if production == True:
- url = "https://authors.library.caltech.edu/"
+ print("No metadata file found.")
+ choice = get_user_input("Do you want to add files? (y/n): ").lower()
+ if choice == "y":
+ if production:
+ API_URL_TEMPLATE = "https://data.caltech.edu/api/records/{record_id}/files"
+ API_URL_TEMPLATE_DRAFT = (
+ "https://data.caltech.edu/api/records/{record_id}/draft/files"
+ )
else:
- url = "https://authors.caltechlibrary.dev/"
+ API_URL_TEMPLATE = (
+ "https://data.caltechlibrary.dev/api/records/{record_id}/files"
+ )
+ API_URL_TEMPLATE_DRAFT = (
+ "https://data.caltechlibrary.dev/api/records/{record_id}/draft/files"
+ )
+
+ url = API_URL_TEMPLATE.format(record_id=record_id)
+ url_draft = API_URL_TEMPLATE_DRAFT.format(record_id=record_id)
+
+ headers = {
+ "accept": "application/json",
+ }
+
+ if token:
+ headers["Authorization"] = "Bearer %s" % token
+
+ response = requests.get(url, headers=headers)
+ response_draft = requests.get(url_draft, headers=headers)
+ data = response.json()
+ data_draft = response_draft.json()
+ # Check if 'entries' exists and its length
+ if (
+ len(data.get("entries", [])) == 0
+ and len(data_draft.get("entries", [])) == 0
+ ):
+ keepfile = False
+ else:
+ keepfile = (
+ input("Do you want to keep existing files? (y/n): ").lower() == "y"
+ )
+
+ filepath, file_link = upload_supporting_file(record_id)
+ if file_link:
+ print(file_link)
+
+ if filepath != "":
+ response = caltechdata_edit(
+ record_id,
+ token=token,
+ files=filepath,
+ production=production,
+ publish=False,
+ keepfiles=keepfile,
+ )
+ elif file_link != "":
+ response = caltechdata_edit(
+ record_id,
+ metadata,
+ token=token,
+ file_links=file_link,
+ production=production,
+ publish=False,
+ keepfiles=keepfile,
+ )
+
+ rec_id = response
+ print_upload_message(rec_id, production)
+
+
+def download_file_by_id(record_id, token=None):
+ url = f"https://data.caltech.edu/api/records/{record_id}"
headers = {
- "Authorization": "Bearer %s" % token,
- "Content-type": "application/json",
- }
- f_headers = {
- "Authorization": "Bearer %s" % token,
- "Content-type": "application/octet-stream",
+ "accept": "application/vnd.datacite.datacite+json",
}
- if not files:
- data["files"] = {"enabled": False}
- elif default_preview:
- data["files"] = {"enabled": True, "default_preview": default_preview}
+ if token:
+ headers["Authorization"] = "Bearer %s" % token
- # Make draft and publish
- result = requests.post(url + "/api/records", headers=headers, json=data)
- if result.status_code != 201:
- if result.status_code == 400 and "Referer checking failed" in result.text:
- raise Exception("Token is incorrect or missing referer.")
- else:
- raise Exception(result.text)
- idv = result.json()["id"]
- publish_link = result.json()["links"]["publish"]
-
- if files:
- file_link = result.json()["links"]["files"]
- write_files_rdm(files, file_link, headers, f_headers, s3)
-
- if community:
- review_link = result.json()["links"]["review"]
- send_to_community(
- review_link, data, headers, publish, community, review_message
- )
+ try:
+ response = requests.get(url, headers=headers)
+ if response.status_code != 200:
+ # Might have a draft
+ response = requests.get(
+ url + "/draft",
+ headers=headers,
+ )
+ if response.status_code != 200:
+ url = f"https://data.caltechlibrary.dev/api/records/{record_id}"
+ response = requests.get(
+ url,
+ headers=headers,
+ )
+ if response.status_code != 200:
+ # Might have a draft
+ response = requests.get(
+ url + "/draft",
+ headers=headers,
+ )
+ if response.status_code != 200:
+ raise Exception(
+ f"Record {record_id} does not exist, cannot edit"
+ )
+ file_content = response.content
+ file_name = f"downloaded_data_{record_id}.json"
+ with open(file_name, "wb") as file:
+ file.write(file_content)
+ print(f"Metadata downloaded successfully: {file_name}")
+ with open(file_name, "r") as file:
+ metadata = json.load(file)
+ while True:
+ print("Fields:")
+ for i, field in enumerate(metadata.keys()):
+ print(f"{i + 1}. {field}")
- else:
- if publish:
- result = requests.post(publish_link, json=data, headers=headers)
- if result.status_code != 202:
- raise Exception(result.text)
- return idv
+ field_choice = int(
+ input(
+ "Enter the number of the field you want to edit (or 0 to skip, 'exit' to exit): "
+ )
+ )
+
+ if field_choice == 0:
+ break
+
+ selected_field = list(metadata.keys())[field_choice - 1]
+
+ if isinstance(metadata[selected_field], list):
+ while True:
+ print(f"Items in {selected_field}:")
+ for i, item in enumerate(metadata[selected_field]):
+ print(f"{i + 1}. {item}")
+
+ item_choice = int(
+ input(
+ "Enter the number of the item you want to edit (or 0 to go back): "
+ )
+ )
+
+ if item_choice == 0:
+ break
+
+ selected_item = metadata[selected_field][item_choice - 1]
+
+ while True:
+ print(f"Subfields for {selected_field}:")
+ for i, subfield in enumerate(selected_item.keys()):
+ print(f"{i + 1}. {subfield}")
+
+ subfield_choice = int(
+ input(
+ "Enter the number of the subfield you want to edit (or 0 to go back): "
+ )
+ )
+
+ if subfield_choice == 0:
+ break
+
+ selected_subfield = list(selected_item.keys())[
+ subfield_choice - 1
+ ]
+
+ new_value = input(
+ f"Enter the new value for {selected_subfield}: "
+ )
+
+ metadata[selected_field][item_choice - 1][
+ selected_subfield
+ ] = new_value
+
+ with open(file_name, "w") as file:
+ json.dump(metadata, file, indent=2)
+
+ print(f"File updated successfully.")
+
+ else:
+ while True:
+ print(f"Subfields for {selected_field}:")
+ for i, subfield in enumerate(metadata[selected_field].keys()):
+ print(f"{i + 1}. {subfield}")
+
+ subfield_choice = int(
+ input(
+ "Enter the number of the subfield you want to edit (or 0 to go back): "
+ )
+ )
+
+ if subfield_choice == 0:
+ break
+
+ selected_subfield = list(metadata[selected_field].keys())[
+ subfield_choice - 1
+ ]
+
+ new_value = input(
+ f"Enter the new value for {selected_subfield}: "
+ )
+
+ metadata[selected_field][selected_subfield] = new_value
+
+ with open(file_name, "w") as file:
+ json.dump(metadata, file, indent=2)
+
+ print(f"File updated successfully.")
+
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ return file_name
+
+
+if __name__ == "__main__":
+ main()
From 7c8a4345a64980eb25d0e5bf3b1616ecb589b6ae Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:09:25 -0800
Subject: [PATCH 35/42] Update tester.py
---
tests/tester.py | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/tests/tester.py b/tests/tester.py
index 72efe49..13e8250 100644
--- a/tests/tester.py
+++ b/tests/tester.py
@@ -6,13 +6,18 @@
# Define the directory containing the test JSON files
VALID_DATACITE43_DIR = "../tests/data/datacite43/" # Directory for valid JSON files
+
# Function to get all JSON files in the directory
def get_all_json_files(directory):
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+ return [
+ os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json")
+ ]
+
# Get list of all valid JSON files in the directory
VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
+
@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
def test_valid_json(valid_file):
"""Test that valid example files validate successfully."""
@@ -23,16 +28,17 @@ def test_valid_json(valid_file):
validation_errors = validator43(json_data)
except ValueError as e:
pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
-
+
if validation_errors:
pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
else:
print(f"Validation passed for: {valid_file}")
+
if __name__ == "__main__":
# Track failures for manual testing
failed_files = []
-
+
# Run the tests and print results for each file
for file in VALID_DATACITE43_FILES:
try:
@@ -40,7 +46,7 @@ def test_valid_json(valid_file):
except AssertionError as e:
failed_files.append(file)
print(f"Error occurred in file: {file}\nError details: {e}")
-
+
# Print a summary of all failed files
if failed_files:
print("\nThe following files failed validation:")
From 0b25f187b6e475b61423c265edcdd4128e539607 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:11:15 -0800
Subject: [PATCH 36/42] Update caltechdata_write.py
---
caltechdata_api/caltechdata_write.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index e0cb0dd..dfb7040 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -63,6 +63,7 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
raise Exception(result.text)
+
def add_file_links(
metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
):
@@ -248,7 +249,10 @@ def caltechdata_write(
# Make draft and publish
result = requests.post(url + "/api/records", headers=headers, json=data)
if result.status_code != 201:
- raise Exception(result.text)
+ if result.status_code == 400 and "Referer checking failed" in result.text:
+ raise Exception("Token is incorrect or missing referer.")
+ else:
+ raise Exception(result.text)
idv = result.json()["id"]
publish_link = result.json()["links"]["publish"]
From 070da79a7b9e24d5a40a7619a26ed71d41cc5422 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:21:34 -0800
Subject: [PATCH 37/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index 7305539..21f7f80 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -596,7 +596,6 @@ def validate_metadata(json_record):
errors.append("'types' must have 'resourceTypeGeneral'.")
if "resourceType" in json_record["types"] and not isinstance(json_record["types"]["resourceType"], str):
errors.append("'resourceType' should be a string if provided.")
-=======
for location in json_record["geoLocations"]:
if not isinstance(location, dict):
errors.append("Each entry in 'geoLocations' must be a dictionary.")
From 1aed23caf683fc185f7c862fbd12cc83493217c4 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:22:47 -0800
Subject: [PATCH 38/42] Update customize_schema.py
---
caltechdata_api/customize_schema.py | 51 +++++++++++++++++++++--------
1 file changed, 38 insertions(+), 13 deletions(-)
diff --git a/caltechdata_api/customize_schema.py b/caltechdata_api/customize_schema.py
index 21f7f80..3199b43 100644
--- a/caltechdata_api/customize_schema.py
+++ b/caltechdata_api/customize_schema.py
@@ -393,7 +393,6 @@ def validate_metadata(json_record):
"""
errors = []
-
if "titles" not in json_record:
errors.append("'titles' field is missing.")
elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
@@ -467,7 +466,6 @@ def validate_metadata(json_record):
"Each subject must be a dictionary with a 'subject' key."
)
-
# Check for 'relatedIdentifiers'
if "relatedIdentifiers" in json_record:
if not isinstance(json_record["relatedIdentifiers"], list):
@@ -483,7 +481,6 @@ def validate_metadata(json_record):
"Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
)
-
# Check for 'rightsList'
if "rightsList" in json_record:
if not isinstance(json_record["rightsList"], list):
@@ -512,23 +509,39 @@ def validate_metadata(json_record):
errors.append("'dates' should be a non-empty list.")
else:
for date in json_record["dates"]:
- if not isinstance(date, dict) or "date" not in date or "dateType" not in date:
+ if (
+ not isinstance(date, dict)
+ or "date" not in date
+ or "dateType" not in date
+ ):
errors.append("Each 'date' must have 'date' and 'dateType'.")
# Check for 'identifiers'
if "identifiers" not in json_record:
errors.append("'identifiers' field is missing.")
- elif not isinstance(json_record["identifiers"], list) or len(json_record["identifiers"]) == 0:
+ elif (
+ not isinstance(json_record["identifiers"], list)
+ or len(json_record["identifiers"]) == 0
+ ):
errors.append("'identifiers' should be a non-empty list.")
else:
for identifier in json_record["identifiers"]:
- if not isinstance(identifier, dict) or "identifier" not in identifier or "identifierType" not in identifier:
- errors.append("Each 'identifier' must have 'identifier' and 'identifierType'.")
+ if (
+ not isinstance(identifier, dict)
+ or "identifier" not in identifier
+ or "identifierType" not in identifier
+ ):
+ errors.append(
+ "Each 'identifier' must have 'identifier' and 'identifierType'."
+ )
# Check for 'creators'
if "creators" not in json_record:
errors.append("'creators' field is missing.")
- elif not isinstance(json_record["creators"], list) or len(json_record["creators"]) == 0:
+ elif (
+ not isinstance(json_record["creators"], list)
+ or len(json_record["creators"]) == 0
+ ):
errors.append("'creators' should be a non-empty list.")
else:
for creator in json_record["creators"]:
@@ -539,7 +552,9 @@ def validate_metadata(json_record):
errors.append("'affiliation' in 'creators' should be a list.")
for affiliation in creator["affiliation"]:
if not isinstance(affiliation, dict) or "name" not in affiliation:
- errors.append("Each 'affiliation' in 'creators' must have a 'name'.")
+ errors.append(
+ "Each 'affiliation' in 'creators' must have a 'name'."
+ )
for rights in json_record["rightsList"]:
if not isinstance(rights, dict) or "rights" not in rights:
@@ -558,11 +573,19 @@ def validate_metadata(json_record):
errors.append("Each 'geoLocation' must have 'geoLocationPlace'.")
if "geoLocationPoint" in geo_loc:
point = geo_loc["geoLocationPoint"]
- if not isinstance(point, dict) or "pointLatitude" not in point or "pointLongitude" not in point:
- errors.append("'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'.")
+ if (
+ not isinstance(point, dict)
+ or "pointLatitude" not in point
+ or "pointLongitude" not in point
+ ):
+ errors.append(
+ "'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'."
+ )
# Check for 'formats'
- if "formats" in json_record and (not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0):
+ if "formats" in json_record and (
+ not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0
+ ):
errors.append("'formats' should be a non-empty list.")
# Check for 'language'
@@ -594,7 +617,9 @@ def validate_metadata(json_record):
else:
if "resourceTypeGeneral" not in json_record["types"]:
errors.append("'types' must have 'resourceTypeGeneral'.")
- if "resourceType" in json_record["types"] and not isinstance(json_record["types"]["resourceType"], str):
+ if "resourceType" in json_record["types"] and not isinstance(
+ json_record["types"]["resourceType"], str
+ ):
errors.append("'resourceType' should be a string if provided.")
for location in json_record["geoLocations"]:
if not isinstance(location, dict):
From 7716bfeb982357da7b183b94f5f2beff5e09e960 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:24:23 -0800
Subject: [PATCH 39/42] Update caltechdata_write.py
---
caltechdata_api/caltechdata_write.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/caltechdata_api/caltechdata_write.py b/caltechdata_api/caltechdata_write.py
index dfb7040..2a46365 100644
--- a/caltechdata_api/caltechdata_write.py
+++ b/caltechdata_api/caltechdata_write.py
@@ -63,7 +63,6 @@ def write_files_rdm(files, file_link, headers, f_headers, s3=None, keepfiles=Fal
raise Exception(result.text)
-
def add_file_links(
metadata, file_links, file_descriptions=[], additional_descriptions="", s3_link=None
):
From 6a696cb699af79d3b5ecfd33de4c18adebc1500c Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:25:12 -0800
Subject: [PATCH 40/42] Update test_unit.py
---
tests/test_unit.py | 74 ++++++++++++++++++++++++++++++++--------------
1 file changed, 52 insertions(+), 22 deletions(-)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 5d1cad6..c9b57d2 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -9,14 +9,19 @@
VALID_DATACITE43_DIR = "../tests/data/datacite43/"
INVALID_DATACITE43_DIR = "../tests/data/invalid_datacite43/"
+
# Function to get all JSON files in the directory
def get_all_json_files(directory):
- return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]
+ return [
+ os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json")
+ ]
+
# Get list of all valid JSON files in the directory
VALID_DATACITE43_FILES = get_all_json_files(VALID_DATACITE43_DIR)
INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
+
@pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
def test_valid_json(valid_file):
"""Test that valid example files validate successfully."""
@@ -27,12 +32,13 @@ def test_valid_json(valid_file):
validation_errors = validator43(json_data)
except ValueError as e:
pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
-
+
if validation_errors:
pytest.fail(f"Validation failed for: {valid_file}\nErrors: {validation_errors}")
else:
print(f"Validation passed for: {valid_file}")
+
@pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
def test_invalid_json(invalid_file):
"""Test that invalid example files do not validate successfully."""
@@ -44,48 +50,70 @@ def test_invalid_json(invalid_file):
except ValueError:
print(f"Validation failed as expected for: {invalid_file}")
return # Test passes if validation raises a ValueError
-
+
if validation_errors:
print(f"Validation failed as expected for: {invalid_file}")
else:
pytest.fail(f"Validation passed unexpectedly for: {invalid_file}")
-@pytest.mark.parametrize("missing_field_file", [
- {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
- {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
-])
+
+@pytest.mark.parametrize(
+ "missing_field_file",
+ [
+ {"file": "../tests/data/missing_creators.json", "missing_field": "creators"},
+ {"file": "../tests/data/missing_titles.json", "missing_field": "titles"},
+ ],
+)
def test_missing_required_fields(missing_field_file):
"""Test that JSON files missing required fields fail validation."""
- print(f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}")
- json_data = load_json_path(missing_field_file['file'])
- with pytest.raises(ValueError, match=f"Missing required metadata field: {missing_field_file['missing_field']}"):
+ print(
+ f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}"
+ )
+ json_data = load_json_path(missing_field_file["file"])
+ with pytest.raises(
+ ValueError,
+ match=f"Missing required metadata field: {missing_field_file['missing_field']}",
+ ):
validator43(json_data)
-@pytest.mark.parametrize("type_error_file", [
- {"file": "../tests/data/type_error_creators.json", "field": "creators"},
- {"file": "../tests/data/type_error_dates.json", "field": "dates"},
-])
+
+@pytest.mark.parametrize(
+ "type_error_file",
+ [
+ {"file": "../tests/data/type_error_creators.json", "field": "creators"},
+ {"file": "../tests/data/type_error_dates.json", "field": "dates"},
+ ],
+)
def test_incorrect_field_types(type_error_file):
"""Test that JSON files with incorrect field types fail validation."""
- print(f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}")
- json_data = load_json_path(type_error_file['file'])
- with pytest.raises(ValueError, match=f"Incorrect type for field: {type_error_file['field']}"):
+ print(
+ f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}"
+ )
+ json_data = load_json_path(type_error_file["file"])
+ with pytest.raises(
+ ValueError, match=f"Incorrect type for field: {type_error_file['field']}"
+ ):
validator43(json_data)
+
def test_multiple_errors():
"""Test JSON file with multiple issues to check all errors are raised."""
json_data = load_json_path("../tests/data/multiple_errors.json")
with pytest.raises(ValueError, match="Multiple validation errors"):
validator43(json_data)
+
def test_error_logging(caplog):
"""Test that errors are logged correctly during validation."""
- json_data = load_json_path("../tests/data/invalid_datacite43/some_invalid_file.json")
+ json_data = load_json_path(
+ "../tests/data/invalid_datacite43/some_invalid_file.json"
+ )
with caplog.at_level(logging.ERROR):
with pytest.raises(ValueError):
validator43(json_data)
assert "Validation failed" in caplog.text
+
if __name__ == "__main__":
# Manual test runner for valid files
failed_valid_files = []
@@ -96,14 +124,14 @@ def test_error_logging(caplog):
except AssertionError as e:
failed_valid_files.append(file)
print(f"Error occurred in valid file: {file}\nError details: {e}")
-
+
if not failed_valid_files:
print("\n✅ All valid files passed validation. Test complete.")
else:
print("\n❌ The following valid files failed validation:")
for failed_file in failed_valid_files:
print(f"- {failed_file}")
-
+
# Manual test runner for invalid files
passed_invalid_files = []
print("\nRunning validation for invalid files...")
@@ -113,9 +141,11 @@ def test_error_logging(caplog):
except AssertionError as e:
passed_invalid_files.append(file)
print(f"Error occurred in invalid file: {file}\nError details: {e}")
-
+
if not passed_invalid_files:
- print("\n✅ All invalid files failed validation as expected. Test is a success.")
+ print(
+ "\n✅ All invalid files failed validation as expected. Test is a success."
+ )
else:
print("\n❌ The following invalid files unexpectedly passed validation:")
for passed_file in passed_invalid_files:
From 31dcc998a06d08c992d8ff5e736f1f4113ae0ffd Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:25:43 -0800
Subject: [PATCH 41/42] Update bot.py
---
tests/bot.py | 71 +++++++++++++++++++++++++++++-----------------------
1 file changed, 39 insertions(+), 32 deletions(-)
diff --git a/tests/bot.py b/tests/bot.py
index 936f1d2..b98360e 100644
--- a/tests/bot.py
+++ b/tests/bot.py
@@ -9,20 +9,21 @@
import pytest
from customize_schema import validate_metadata as validator43 # Import validator
+
class CaltechDataTester:
def __init__(self):
self.test_dir = "caltech_test_data"
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
if not os.path.exists(self.test_dir):
os.makedirs(self.test_dir)
-
+
# Create test data directory with timestamp
self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
os.makedirs(self.test_run_dir)
-
+
# Initialize logging
self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
-
+
def log(self, message):
"""Log message to both console and file"""
print(message)
@@ -38,7 +39,7 @@ def create_test_files(self):
f.write("2023-01-01,25.5,60\n")
f.write("2023-01-02,26.0,62\n")
f.write("2023-01-03,24.8,65\n")
-
+
self.log(f"Created test CSV file: {csv_path}")
return csv_path
@@ -64,9 +65,9 @@ def generate_test_responses(self):
def extract_record_id(self, output_text):
"""Extract record ID from CLI output"""
try:
- for line in output_text.split('\n'):
- if 'uploads/' in line:
- return line.strip().split('/')[-1]
+ for line in output_text.split("\n"):
+ if "uploads/" in line:
+ return line.strip().split("/")[-1]
except Exception as e:
self.log(f"Error extracting record ID: {e}")
return None
@@ -76,22 +77,22 @@ def download_and_validate_record(self, record_id):
try:
# Wait for record to be available
time.sleep(5)
-
+
# Download metadata
- url = f"https://data.caltech.edu/records/{record_id}/export/datacite-json?preview=1"
+ url = f"https://data.caltechlibrary.dev/records/{record_id}/export/datacite-json"
response = requests.get(url)
response.raise_for_status()
-
+
# Save metadata
json_path = os.path.join(self.test_run_dir, f"{record_id}.json")
- with open(json_path, 'w') as f:
+ with open(json_path, "w") as f:
json.dump(response.json(), f, indent=2)
-
+
self.log(f"Downloaded metadata to: {json_path}")
-
+
# Validate metadata using the imported validator
validation_errors = validator43(response.json())
-
+
if validation_errors:
self.log("❌ Validation errors found:")
for error in validation_errors:
@@ -100,7 +101,7 @@ def download_and_validate_record(self, record_id):
else:
self.log("✅ Validation passed successfully")
return True
-
+
except Exception as e:
self.log(f"Error in download and validation: {e}")
return False
@@ -109,28 +110,31 @@ def run_test_submission(self):
"""Run the complete test submission process"""
try:
self.log("Starting test submission process...")
-
+
# Create test files
test_csv = self.create_test_files()
-
+
# Generate responses
responses = self.generate_test_responses()
-
+
# Setup output capture
class OutputCapture:
def __init__(self):
self.output = []
+
def write(self, text):
self.output.append(text)
sys.__stdout__.write(text)
+
def flush(self):
pass
+
def get_output(self):
- return ''.join(self.output)
-
+ return "".join(self.output)
+
output_capture = OutputCapture()
sys.stdout = output_capture
-
+
# Mock input and run CLI
def mock_input(prompt):
self.log(f"Prompt: {prompt}")
@@ -139,31 +143,32 @@ def mock_input(prompt):
self.log(f"Response: {response}")
return response
return ""
-
- with patch('builtins.input', side_effect=mock_input):
+
+ with patch("builtins.input", side_effect=mock_input):
try:
import cli
+
cli.main()
except Exception as e:
self.log(f"Error during CLI execution: {e}")
return False
-
+
# Restore stdout
sys.stdout = sys.__stdout__
-
+
# Get output and extract record ID
cli_output = output_capture.get_output()
record_id = self.extract_record_id(cli_output)
-
+
if not record_id:
self.log("Failed to extract record ID")
return False
-
+
self.log(f"Successfully created record with ID: {record_id}")
-
+
# Validate the record
return self.download_and_validate_record(record_id)
-
+
except Exception as e:
self.log(f"Error in test submission: {e}")
return False
@@ -173,17 +178,19 @@ def mock_input(prompt):
os.remove(test_csv)
self.log("Test files cleaned up")
+
def main():
tester = CaltechDataTester()
-
+
success = tester.run_test_submission()
-
+
if success:
tester.log("\n🎉 Test submission and validation completed successfully!")
else:
tester.log("\n❌ Test submission or validation failed - check logs for details")
-
+
tester.log(f"\nTest logs available at: {tester.log_file}")
+
if __name__ == "__main__":
main()
From 2b74fffc230f5dab3aaeb716a923ecb82af41fa3 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Fri, 8 Nov 2024 13:31:07 -0800
Subject: [PATCH 42/42] Update cli.py
---
caltechdata_api/cli.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py
index 0df3c50..a0b46ae 100644
--- a/caltechdata_api/cli.py
+++ b/caltechdata_api/cli.py
@@ -469,6 +469,7 @@ def create_record(production):
"descriptions": [
{"description": args["description"], "descriptionType": "Abstract"}
],
+ "publisher": "CaltechDATA",
"creators": [
{
"affiliation": [