Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
88366a5
Merge content from rohan
tmorrell Dec 9, 2024
a6aedea
Add updated CITATION.cff from codemeta.json file
tmorrell Dec 9, 2024
c31388d
Update cli.py
tmorrell Dec 13, 2024
3a1559d
Add parse readme to package
tmorrell Dec 13, 2024
327ac9c
use package parse
tmorrell Dec 13, 2024
18c2383
Update bot.yaml
tmorrell Dec 13, 2024
97c3294
relax validation
tmorrell Dec 13, 2024
487a1c8
Update __init__.py
tmorrell Dec 13, 2024
b2a5bf0
Update test_unit.py
tmorrell Dec 13, 2024
ae141b4
Delete tests/test_conversion.py
tmorrell Dec 13, 2024
3b7736d
Delete tests/data/datacite4 directory
tmorrell Dec 13, 2024
53256af
Delete tests/10.22002-D1.1098
tmorrell Dec 13, 2024
05a0355
Delete tests/tester.py
tmorrell Dec 13, 2024
7dc437a
Add example notebook
AbakahAlexander Dec 18, 2024
28aea85
Add updated CITATION.cff from codemeta.json file
tmorrell Dec 18, 2024
7cb361b
Add files via upload
RohanBhattaraiNP Dec 19, 2024
5ae5b6f
Rename bot (2).yaml to bot.yaml
RohanBhattaraiNP Dec 19, 2024
3747a58
Update bot.yaml
RohanBhattaraiNP Dec 20, 2024
04af4eb
Create md_to_json.py
RohanBhattaraiNP Dec 20, 2024
0adcb2e
Update cli.py
RohanBhattaraiNP Dec 20, 2024
cfc8026
Update cli.py
RohanBhattaraiNP Dec 20, 2024
4ed667a
Update cli.py
RohanBhattaraiNP Dec 20, 2024
1f1eefd
Add updated CITATION.cff from codemeta.json file
RohanBhattaraiNP Dec 20, 2024
f79bc81
Delete tests/md_to_json.py
RohanBhattaraiNP Dec 20, 2024
35a3262
Update cli.py
RohanBhattaraiNP Dec 20, 2024
7f04c42
Merge branch 'main' into main
tmorrell Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/bot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Bot validation

on:
push:
paths:
- 'caltechdata_api/cli.py'
- 'caltechdata_api/customize_schema.py'
- 'caltechdata_api/caltechdata_write.py'
- 'caltechdata_api/caltechdata_edit.py'
- 'README.md'
pull_request:
paths:
- 'caltechdata_api/cli.py'
- 'caltechdata_api/customize_schema.py'
- 'caltechdata_api/caltechdata_write.py'
- 'caltechdata_api/caltechdata_edit.py'
- 'README.md'

jobs:
validate-metadata:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Check for Required Environment Variables
env:
CALTECHDATA_TOKEN: ${{ secrets.CALTECHDATA_TOKEN }}
run: |
if [ -z "$CALTECHDATA_TOKEN" ]; then
echo "Error: CALTECHDATA_TOKEN environment variable is not set"
exit 1
fi

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest requests s3fs cryptography
pip install .

- name: Run CaltechDATA Metadata Validation
env:
CALTECHDATA_TOKEN: ${{ secrets.CALTECHDATA_TOKEN }}
run: |
python tests/bot_yaml.py
- name: Run Unit Tests
run: |
cd tests
pytest test_unit.py
pytest test_rdm.py
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ keywords:
- metadata
- software
- InvenioRDM
date-released: 2024-12-18

3 changes: 2 additions & 1 deletion caltechdata_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
caltechdata_unembargo,
caltechdata_accept,
)
from .customize_schema import customize_schema
from .customize_schema import customize_schema, validate_metadata
from .get_metadata import get_metadata
from .download_file import download_file, download_url
from .utils import humanbytes
from .md_to_json import parse_readme_to_json
15 changes: 10 additions & 5 deletions caltechdata_api/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import argparse
import requests
import s3fs
from caltechdata_api import caltechdata_write, caltechdata_edit
from md_to_json import parse_readme_to_json
from caltechdata_api import caltechdata_write, caltechdata_edit, parse_readme_to_json
import json
import os
from cryptography.fernet import Fernet
Expand Down Expand Up @@ -34,7 +33,7 @@ def generate_key():
return Fernet.generate_key()


# Load the key from a file or generate a new one if not present
# Load the key from a file or generate a new one if not present.
def load_or_generate_key():
key_file = os.path.join(caltechdata_directory, "key.key")
if os.path.exists(key_file):
Expand All @@ -59,11 +58,17 @@ def decrypt_token(encrypted_token, key):
return f.decrypt(encrypted_token).decode()


# Function to get or set token with support for test system
# Function to get or set token with support for test systems
def get_or_set_token(production=True):
# First check for environment variable
env_token = os.environ.get("CALTECHDATA_TOKEN")
if env_token:
print("Using token from environment variable")
return env_token

key = load_or_generate_key()

# Use different token files for production and test environments
# Use different token files for production and test environment
token_filename = "token.txt" if production else "token_test.txt"
token_file = os.path.join(caltechdata_directory, token_filename)

Expand Down
30 changes: 14 additions & 16 deletions caltechdata_api/customize_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,18 +480,17 @@ def validate_metadata(json_record):
errors.append("Each 'subject' must have a 'subject' key.")

# Check for 'dates'
if "dates" not in json_record:
errors.append("'dates' field is missing.")
elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
errors.append("'dates' should be a non-empty list.")
else:
for date in json_record["dates"]:
if (
not isinstance(date, dict)
or "date" not in date
or "dateType" not in date
):
errors.append("Each 'date' must have 'date' and 'dateType'.")
if "dates" in json_record:
if not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
errors.append("'dates' should be a non-empty list.")
else:
for date in json_record["dates"]:
if (
not isinstance(date, dict)
or "date" not in date
or "dateType" not in date
):
errors.append("Each 'date' must have 'date' and 'dateType'.")

# Check for 'creators'
if "creators" not in json_record:
Expand Down Expand Up @@ -601,10 +600,9 @@ def validate_metadata(json_record):
errors.append("'publisher' should be a string.")

# Check for 'publicationYear'
if "publicationYear" not in json_record:
errors.append("'publicationYear' field is missing.")
elif not isinstance(json_record["publicationYear"], str):
errors.append("'publicationYear' should be a string.")
if "publicationYear" in json_record:
if not isinstance(json_record["publicationYear"], str):
errors.append("'publicationYear' should be a string.")

# Check for 'types'
if "types" not in json_record:
Expand Down
Binary file removed tests/10.22002-D1.1098
Binary file not shown.
157 changes: 157 additions & 0 deletions tests/bot_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import subprocess
import time
from unittest.mock import patch
import sys
import os
import json
import requests
from datetime import datetime
import pytest
import importlib.util
import traceback


class CaltechDataTester:
def __init__(self):
# Use GitHub Actions environment or create a local test directory
self.test_dir = os.environ.get(
"GITHUB_WORKSPACE", os.path.join(os.getcwd(), "caltech_test_data")
)
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Ensure test directory exists
os.makedirs(self.test_dir, exist_ok=True)

# Create test run directory
self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
os.makedirs(self.test_run_dir, exist_ok=True)

# Initialize logging
self.log_file = os.path.join(self.test_run_dir, "test_log.txt")

def log(self, message):
"""Log message to both console and file"""
print(message)
with open(self.log_file, "a") as f:
f.write(f"{datetime.now()}: {message}\n")

def create_test_files(self):
"""Create necessary test files"""
csv_path = os.path.join(self.test_run_dir, "test_data.csv")
with open(csv_path, "w") as f:
f.write("date,temperature,humidity\n")
f.write("2023-01-01,25.5,60\n")
f.write("2023-01-02,26.0,62\n")
f.write("2023-01-03,24.8,65\n")

self.log(f"Created test CSV file: {csv_path}")
return csv_path

def import_cli_module(self):
"""Dynamically import cli module from the correct path"""
cli_path = os.path.join(
os.environ.get("GITHUB_WORKSPACE", os.getcwd()), "caltechdata_api", "cli.py"
)
spec = importlib.util.spec_from_file_location("cli", cli_path)
cli_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cli_module)
return cli_module

def generate_test_responses(self):
"""Generate test responses for CLI prompts"""
return {
"Do you want to create or edit a CaltechDATA record? (create/edit): ": "create",
"Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
"Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
"Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
"Enter the number corresponding to the desired license: ": "1",
"Enter your ORCID identifier: ": os.environ.get(
"TEST_ORCID", "0000-0002-1825-0097"
),
"How many funding entries do you want to provide? ": "1",
"Enter the award number for funding: ": "NSF-1234567",
"Enter the award title for funding: ": "Automated Testing Grant",
"Enter the funder ROR (https://ror.org): ": "021nxhr62",
"Do you want to upload or link data files? (upload/link/n): ": "upload",
"Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
"Do you want to add more files? (y/n): ": "n",
"Do you want to send this record to CaltechDATA? (y/n): ": "y",
}

def run_test_submission(self):
"""Run the complete test submission process"""
try:
self.log("Starting test submission process...")

# Create test files
test_csv = self.create_test_files()

# Dynamically import cli module
cli_module = self.import_cli_module()

# Generate responses
responses = self.generate_test_responses()

# Setup output capture
class OutputCapture:
def __init__(self):
self.output = []

def write(self, text):
self.output.append(text)
sys.__stdout__.write(text)

def flush(self):
pass

def get_output(self):
return "".join(self.output)

output_capture = OutputCapture()
sys.stdout = output_capture

# Mock input and run CLI
def mock_input(prompt):
self.log(f"Prompt: {prompt}")
if prompt in responses:
response = responses[prompt]
self.log(f"Response: {response}")
return response
return ""

with patch("builtins.input", side_effect=mock_input):
# Use -test flag to use test mode
sys.argv = [sys.argv[0], "-test"]
cli_module.main()

# Restore stdout
sys.stdout = sys.__stdout__

return True

except Exception as e:
self.log(f"Error in test submission: {e}")
traceback.print_exc()
return False
finally:
# Cleanup
if "test_csv" in locals() and os.path.exists(test_csv):
os.remove(test_csv)
self.log("Test files cleaned up")


def main():
tester = CaltechDataTester()

success = tester.run_test_submission()

if success:
tester.log("\n🎉 Test submission completed successfully!")
sys.exit(0)
else:
tester.log("\n❌ Test submission failed - check logs for details")
sys.exit(1)


if __name__ == "__main__":
main()
Loading
Loading