From e5e5dbe00c7ec9fc83bb410f34bb44d3e370d93f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 12:00:19 +0000 Subject: [PATCH 1/7] Add MATLAB to Python porting guide Documents the core rules for porting MATLAB code to Python, including naming conventions, namespace mapping, Pydantic validation, error handling, and docstring requirements. https://claude.ai/code/session_012qih1bFF69sgSPvBnWzEHo --- PYTHON_PORTING_GUIDE.md | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 PYTHON_PORTING_GUIDE.md diff --git a/PYTHON_PORTING_GUIDE.md b/PYTHON_PORTING_GUIDE.md new file mode 100644 index 0000000..c49bbb4 --- /dev/null +++ b/PYTHON_PORTING_GUIDE.md @@ -0,0 +1,42 @@ +# MATLAB to Python Porting Rules + +## 1. The Core Philosophy: Lead-Follow Architecture + +The MATLAB codebase is the **Source of Truth**. The Python version is a "faithful mirror." When a conflict arises between "Pythonic" style and MATLAB symmetry, **symmetry wins**. + +## 2. Function & Variable Naming (The "Strict Mirror" Rule) + +Do **not** attempt to "translate" MATLAB names into Python PEP 8 (`snake_case`). + +- **Exact Match:** Every function name in Python must be an identical string match to the MATLAB function name. +- **Case Sensitivity:** If the MATLAB function is `ListAllDocuments`, the Python function must be `ListAllDocuments`. If the MATLAB function is `get_dataset_id`, the Python function must be `get_dataset_id`. +- **No Aliasing:** Do not create `snake_case` aliases unless explicitly requested. The user should be able to copy-paste function names between environments. + +## 3. Namespace and Directory Structure + +MATLAB namespaces (`+` packages) must be mapped 1:1 to Python packages and modules to ensure discoverability. + +- **Hierarchy:** Every MATLAB `+namespace` folder must become a Python directory containing an `__init__.py`. +- **File Mapping:** If a MATLAB function exists as `+ndi/+fun/+X/Y.m`, the Python equivalent must be located at `ndi/fun/X/Y.py`. +- **Sub-modules:** For functions inside a namespace that aren't in their own file, group them into a `.py` file named after the MATLAB namespace level. + +## 4. Input Validation: Pydantic is Mandatory + +To replicate the robustness of the MATLAB `arguments` block, use Pydantic for all public-facing API functions. + +- **Decorator:** Use the `@pydantic.validate_call` decorator on all functions. +- **Type Mirroring:** + - MATLAB `double` or `numeric` → Python `float` or `int` + - MATLAB `char` or `string` → Python `str` + - MATLAB `{member1, member2}` → Python `Literal["member1", "member2"]` +- **Coercion:** Allow Pydantic's default behavior of casting (e.g., allowing a string `"1"` or integer `1` to satisfy a `bool` type). + +## 5. Error Handling + +- If a MATLAB function throws an error for a specific condition, the Python version must raise a corresponding Exception (`ValueError`, `TypeError`, or a custom `NDIError`). +- The goal is to ensure that a user providing bad input gets a **"Hard Fail"** at the function entry point in both languages. + +## 6. Documentation (Docstring Symmetry) + +- Include the original MATLAB documentation in the Python docstring. +- Note any Python-specific requirements (like specific library dependencies) at the bottom of the docstring. From 85677299f12faccc9cd803b1e3e8ed2ea01c1050 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 18:09:31 +0000 Subject: [PATCH 2/7] Rename all ndi.cloud functions to match MATLAB names (Strict Mirror Rule) Apply the Strict Mirror Rule from PYTHON_PORTING_GUIDE.md: every Python function name must be an identical string match to its MATLAB counterpart. Renamed ~80+ functions across 17 source files, 6 test files, and 2 tutorials from snake_case to camelCase. Updated MATLAB_MAPPING.md to reflect new names. https://claude.ai/code/session_01Skj3WbRtzYBdW5uk6nazhE --- MATLAB_MAPPING.md | 195 ++++---- src/ndi/cloud/__init__.py | 42 +- src/ndi/cloud/admin/crossref.py | 24 +- src/ndi/cloud/admin/doi.py | 14 +- src/ndi/cloud/api/compute.py | 12 +- src/ndi/cloud/api/datasets.py | 32 +- src/ndi/cloud/api/documents.py | 44 +- src/ndi/cloud/api/files.py | 18 +- src/ndi/cloud/api/users.py | 6 +- src/ndi/cloud/auth.py | 24 +- src/ndi/cloud/download.py | 28 +- src/ndi/cloud/filehandler.py | 10 +- src/ndi/cloud/internal.py | 26 +- src/ndi/cloud/orchestration.py | 66 +-- src/ndi/cloud/sync/__init__.py | 20 +- src/ndi/cloud/sync/operations.py | 80 ++-- src/ndi/cloud/upload.py | 24 +- tests/matlab_tests/test_cloud_compute.py | 90 ++-- tests/test_cloud_download_live.py | 18 +- tests/test_cloud_filehandler.py | 10 +- tests/test_cloud_live.py | 416 +++++++++--------- tests/test_cloud_sync.py | 38 +- tests/test_phase2_gaps.py | 18 +- .../tutorial_67f723d574f5f79c6062389d.py | 8 +- .../tutorial_682e7772cdf3f24938176fac.py | 8 +- 25 files changed, 634 insertions(+), 637 deletions(-) diff --git a/MATLAB_MAPPING.md b/MATLAB_MAPPING.md index c9c3d36..27994fe 100644 --- a/MATLAB_MAPPING.md +++ b/MATLAB_MAPPING.md @@ -240,10 +240,10 @@ Auth functions are re-exported from `ndi.cloud.__init__` so `from ndi.cloud impo | `ndi.cloud.authenticate` | `ndi.cloud.authenticate()` | | | `ndi.cloud.api.auth.login` | `ndi.cloud.login()` | Also at `ndi.cloud.auth.login()` | | `ndi.cloud.api.auth.logout` | `ndi.cloud.logout()` | Also at `ndi.cloud.auth.logout()` | -| `ndi.cloud.api.auth.changePassword` | `ndi.cloud.change_password()` | | -| `ndi.cloud.api.auth.resetPassword` | `ndi.cloud.reset_password()` | | -| `ndi.cloud.api.auth.verifyUser` | `ndi.cloud.verify_user()` | | -| `ndi.cloud.api.auth.resendConfirmation` | `ndi.cloud.resend_confirmation()` | | +| `ndi.cloud.api.auth.changePassword` | `ndi.cloud.changePassword()` | | +| `ndi.cloud.api.auth.resetPassword` | `ndi.cloud.resetPassword()` | | +| `ndi.cloud.api.auth.verifyUser` | `ndi.cloud.verifyUser()` | | +| `ndi.cloud.api.auth.resendConfirmation` | `ndi.cloud.resendConfirmation()` | | | `ndi.cloud.uilogin` | — | GUI-only, no Python equivalent | ### Client & Config @@ -260,100 +260,100 @@ Auth functions are re-exported from `ndi.cloud.__init__` so `from ndi.cloud impo | MATLAB | Python | Notes | |--------|--------|-------| -| `getDataset` | `get_dataset(dataset_id)` | | -| `createDataset` | `create_dataset(org_id, name, ...)` | | -| `updateDataset` | `update_dataset(dataset_id, **fields)` | | -| `deleteDataset` | `delete_dataset(dataset_id, when='7d')` | `when` param for soft-delete | -| `listDatasets` | `list_datasets(org_id, ...)` | | -| — | `list_all_datasets(org_id)` | Auto-paginator (Python-only) | -| `getPublished` | `get_published_datasets(...)` | | -| `getUnpublished` | `get_unpublished(...)` | | -| `publishDataset` | `publish_dataset(dataset_id)` | | -| `unpublishDataset` | `unpublish_dataset(dataset_id)` | | -| `submitDataset` | `submit_dataset(dataset_id)` | | -| `createDatasetBranch` | `create_branch(dataset_id)` | | -| `getBranches` | `get_branches(dataset_id)` | | -| — | `undelete_dataset(dataset_id)` | Soft-delete API | -| — | `list_deleted_datasets(...)` | Soft-delete API | +| `getDataset` | `getDataset(dataset_id)` | | +| `createDataset` | `createDataset(org_id, name, ...)` | | +| `updateDataset` | `updateDataset(dataset_id, **fields)` | | +| `deleteDataset` | `deleteDataset(dataset_id, when='7d')` | `when` param for soft-delete | +| `listDatasets` | `listDatasets(org_id, ...)` | | +| — | `listAllDatasets(org_id)` | Auto-paginator (Python-only) | +| `getPublished` | `getPublished(...)` | | +| `getUnpublished` | `getUnpublished(...)` | | +| `publishDataset` | `publishDataset(dataset_id)` | | +| `unpublishDataset` | `unpublishDataset(dataset_id)` | | +| `submitDataset` | `submitDataset(dataset_id)` | | +| `createDatasetBranch` | `createDatasetBranch(dataset_id)` | | +| `getBranches` | `getBranches(dataset_id)` | | +| — | `undeleteDataset(dataset_id)` | Soft-delete API | +| — | `listDeletedDatasets(...)` | Soft-delete API | ### Documents API (`ndi.cloud.api.documents`) | MATLAB | Python | Notes | |--------|--------|-------| -| `getDocument` | `get_document(dataset_id, doc_id)` | | -| `addDocument` | `add_document(dataset_id, doc_json)` | | -| `addDocumentAsFile` | `add_document_as_file(dataset_id, path)` | | -| `updateDocument` | `update_document(dataset_id, doc_id, doc_json)` | | -| `deleteDocument` | `delete_document(dataset_id, doc_id, when='7d')` | `when` param for soft-delete | -| `listDatasetDocuments` | `list_documents(dataset_id, ...)` | | -| `listDatasetDocumentsAll` | `list_all_documents(dataset_id, ...)` | | -| `countDocuments` / `documentCount` | `get_document_count(dataset_id)` | Single function with fallback | -| `getBulkUploadURL` | `get_bulk_upload_url(dataset_id)` | | -| `getBulkDownloadURL` | `get_bulk_download_url(dataset_id, ...)` | | -| `bulkDeleteDocuments` | `bulk_delete(dataset_id, doc_ids, when='7d')` | `when` param for soft-delete | -| `ndiquery` | `ndi_query(scope, search_structure, ...)` | | -| `ndiqueryAll` | `ndi_query_all(scope, search_structure, ...)` | | -| — | `bulk_upload(dataset_id, zip_path)` | Python-only | -| — | `list_deleted_documents(dataset_id, ...)` | Soft-delete API | +| `getDocument` | `getDocument(dataset_id, doc_id)` | | +| `addDocument` | `addDocument(dataset_id, doc_json)` | | +| `addDocumentAsFile` | `addDocumentAsFile(dataset_id, path)` | | +| `updateDocument` | `updateDocument(dataset_id, doc_id, doc_json)` | | +| `deleteDocument` | `deleteDocument(dataset_id, doc_id, when='7d')` | `when` param for soft-delete | +| `listDatasetDocuments` | `listDatasetDocuments(dataset_id, ...)` | | +| `listDatasetDocumentsAll` | `listDatasetDocumentsAll(dataset_id, ...)` | | +| `countDocuments` / `documentCount` | `countDocuments(dataset_id)` | Single function with fallback | +| `getBulkUploadURL` | `getBulkUploadURL(dataset_id)` | | +| `getBulkDownloadURL` | `getBulkDownloadURL(dataset_id, ...)` | | +| `bulkDeleteDocuments` | `bulkDeleteDocuments(dataset_id, doc_ids, when='7d')` | `when` param for soft-delete | +| `ndiquery` | `ndiquery(scope, search_structure, ...)` | | +| `ndiqueryAll` | `ndiqueryAll(scope, search_structure, ...)` | | +| — | `bulkUpload(dataset_id, zip_path)` | Python-only | +| — | `listDeletedDocuments(dataset_id, ...)` | Soft-delete API | ### Files API (`ndi.cloud.api.files`) | MATLAB | Python | Notes | |--------|--------|-------| -| `getFile` | `get_file(url, target_path, ...)` | | -| `getFileUploadURL` | `get_upload_url(org_id, dataset_id, uid)` | | -| `getFileCollectionUploadURL` | `get_file_collection_upload_url(...)` | | -| `getFileDetails` | `get_file_details(dataset_id, uid)` | Used by `fetch_cloud_file` for on-demand download | -| `listFiles` | `list_files(dataset_id)` | | -| `putFiles` | `put_file(url, file_path, ...)` | | -| — | `put_file_bytes(url, data, ...)` | Python-only (raw bytes) | -| — | `get_bulk_upload_url(org_id, dataset_id)` | Python-only | +| `getFile` | `getFile(url, target_path, ...)` | | +| `getFileUploadURL` | `getFileUploadURL(org_id, dataset_id, uid)` | | +| `getFileCollectionUploadURL` | `getFileCollectionUploadURL(...)` | | +| `getFileDetails` | `getFileDetails(dataset_id, uid)` | Used by `fetch_cloud_file` for on-demand download | +| `listFiles` | `listFiles(dataset_id)` | | +| `putFiles` | `putFiles(url, file_path, ...)` | | +| — | `putFileBytes(url, data, ...)` | Python-only (raw bytes) | +| — | `getBulkUploadURL(org_id, dataset_id)` | Python-only | ### Users API (`ndi.cloud.api.users`) | MATLAB | Python | Notes | |--------|--------|-------| -| `createUser` | `create_user(email, name, password)` | | -| `GetUser` | `get_user(user_id)` | | -| `me` | `get_current_user()` | Renamed for clarity | +| `createUser` | `createUser(email, name, password)` | | +| `GetUser` | `GetUser(user_id)` | | +| `me` | `me()` | | ### Compute API (`ndi.cloud.api.compute`) | MATLAB | Python | Notes | |--------|--------|-------| -| `startSession` | `start_session(pipeline_id, ...)` | | -| `getSessionStatus` | `get_session_status(session_id)` | | -| `triggerStage` | `trigger_stage(session_id, stage_id)` | | -| `finalizeSession` | `finalize_session(session_id)` | | -| `abortSession` | `abort_session(session_id)` | | -| `listSessions` | `list_sessions()` | | +| `startSession` | `startSession(pipeline_id, ...)` | | +| `getSessionStatus` | `getSessionStatus(session_id)` | | +| `triggerStage` | `triggerStage(session_id, stage_id)` | | +| `finalizeSession` | `finalizeSession(session_id)` | | +| `abortSession` | `abortSession(session_id)` | | +| `listSessions` | `listSessions()` | | ### Top-Level Convenience Functions These match MATLAB's `ndi.cloud.*` functions and are importable directly from `ndi.cloud`: ```python -from ndi.cloud import download_dataset, upload_dataset, sync_dataset, upload_single_file +from ndi.cloud import downloadDataset, uploadDataset, syncDataset, uploadSingleFile ``` | MATLAB | Python | Notes | |--------|--------|-------| -| `ndi.cloud.downloadDataset` | `ndi.cloud.download_dataset(...)` | Also at `ndi.cloud.orchestration.download_dataset()` | -| `ndi.cloud.uploadDataset` | `ndi.cloud.upload_dataset(...)` | Also at `ndi.cloud.orchestration.upload_dataset()` | -| `ndi.cloud.syncDataset` | `ndi.cloud.sync_dataset(...)` | Also at `ndi.cloud.orchestration.sync_dataset()` | -| `ndi.cloud.uploadSingleFile` | `ndi.cloud.upload_single_file(...)` | Also at `ndi.cloud.upload.upload_single_file()` | -| `ndi.cloud.upload.newDataset` | `ndi.cloud.orchestration.new_dataset(...)` | | -| `ndi.cloud.upload.scanForUpload` | `ndi.cloud.orchestration.scan_for_upload(...)` | | +| `ndi.cloud.downloadDataset` | `ndi.cloud.downloadDataset(...)` | Also at `ndi.cloud.orchestration.downloadDataset()` | +| `ndi.cloud.uploadDataset` | `ndi.cloud.uploadDataset(...)` | Also at `ndi.cloud.orchestration.uploadDataset()` | +| `ndi.cloud.syncDataset` | `ndi.cloud.syncDataset(...)` | Also at `ndi.cloud.orchestration.syncDataset()` | +| `ndi.cloud.uploadSingleFile` | `ndi.cloud.uploadSingleFile(...)` | Also at `ndi.cloud.upload.uploadSingleFile()` | +| `ndi.cloud.upload.newDataset` | `ndi.cloud.orchestration.newDataset(...)` | | +| `ndi.cloud.upload.scanForUpload` | `ndi.cloud.orchestration.scanForUpload(...)` | | | *(customFileHandler in didsqlite.m)* | `ndi.cloud.fetch_cloud_file(ndic_uri, path, ...)` | On-demand binary file download via `ndic://` protocol | ### Download | MATLAB | Python | Notes | |--------|--------|-------| -| `ndi.cloud.download.dataset` | `download.download_full_dataset(...)` | | -| `ndi.cloud.download.downloadDatasetFiles` | `download.download_dataset_files(...)` | | -| `ndi.cloud.download.downloadDocumentCollection` | `download.download_document_collection(...)` | | -| `ndi.cloud.download.jsons2documents` | `download.jsons_to_documents(doc_jsons)` | | +| `ndi.cloud.download.dataset` | `download.downloadFullDataset(...)` | | +| `ndi.cloud.download.downloadDatasetFiles` | `download.downloadDatasetFiles(...)` | | +| `ndi.cloud.download.downloadDocumentCollection` | `download.downloadDocumentCollection(...)` | | +| `ndi.cloud.download.jsons2documents` | `download.jsons2documents(doc_jsons)` | | | `ndi.cloud.download.datasetDocuments` | — | Handled inside orchestration | | `ndi.cloud.download.internal.*` | — | Folded into main functions | | `+sync/+internal/updateFileInfoForRemoteFiles` | `filehandler.rewrite_file_info_for_cloud()` | Rewrites file_info to `ndic://` URIs | @@ -363,9 +363,10 @@ from ndi.cloud import download_dataset, upload_dataset, sync_dataset, upload_sin | MATLAB | Python | Notes | |--------|--------|-------| -| `ndi.cloud.upload.uploadDocumentCollection` | `upload.upload_document_collection(...)` | | -| `ndi.cloud.upload.zipForUpload` | `upload.zip_documents_for_upload(docs, ...)` | | -| `ndi.cloud.upload.uploadToNDICloud` | — | Subsumed by `upload_dataset()` | +| `ndi.cloud.upload.uploadDocumentCollection` | `upload.uploadDocumentCollection(...)` | | +| `ndi.cloud.upload.zipForUpload` | `upload.zipForUpload(docs, ...)` | | +| `ndi.cloud.upload.uploadFilesForDatasetDocuments` | `upload.uploadFilesForDatasetDocuments(...)` | | +| `ndi.cloud.upload.uploadToNDICloud` | — | Subsumed by `uploadDataset()` | | `ndi.cloud.upload.internal.*` | — | Promoted or folded inline | ### Sync @@ -374,12 +375,12 @@ from ndi.cloud import download_dataset, upload_dataset, sync_dataset, upload_sin |--------|--------|-------| | `SyncOptions` classdef | `ndi.cloud.sync.SyncOptions` | Dataclass | | `SyncMode` enum | `ndi.cloud.sync.SyncMode` | Python Enum | -| `downloadNew` | `ndi.cloud.sync.download_new(...)` | | -| `uploadNew` | `ndi.cloud.sync.upload_new(...)` | | -| `mirrorFromRemote` | `ndi.cloud.sync.mirror_from_remote(...)` | | -| `mirrorToRemote` | `ndi.cloud.sync.mirror_to_remote(...)` | | -| `twoWaySync` | `ndi.cloud.sync.two_way_sync(...)` | | -| `validate` | `ndi.cloud.sync.validate_sync(...)` | | +| `downloadNew` | `ndi.cloud.sync.downloadNew(...)` | | +| `uploadNew` | `ndi.cloud.sync.uploadNew(...)` | | +| `mirrorFromRemote` | `ndi.cloud.sync.mirrorFromRemote(...)` | | +| `mirrorToRemote` | `ndi.cloud.sync.mirrorToRemote(...)` | | +| `twoWaySync` | `ndi.cloud.sync.twoWaySync(...)` | | +| `validate` | `ndi.cloud.sync.validate(...)` | | | — | `ndi.cloud.sync.sync(..., mode)` | Dispatch by SyncMode (Python-only) | | `+sync/+internal/Constants` | — | Inlined | | `+sync/+internal/index.*` (5 funcs) | `ndi.cloud.sync.SyncIndex` | Collapsed into dataclass | @@ -388,44 +389,44 @@ from ndi.cloud import download_dataset, upload_dataset, sync_dataset, upload_sin | MATLAB | Python | Notes | |--------|--------|-------| -| `+internal/listRemoteDocumentIds` | `internal.list_remote_document_ids()` | | -| `+internal/getCloudDatasetIdForLocalDataset` | `internal.get_cloud_dataset_id()` | | -| `+internal/createRemoteDatasetDoc` | `internal.create_remote_dataset_doc()` | | -| `+internal/decodeJwt` | `auth.decode_jwt()` | Moved to auth | -| `+internal/getActiveToken` | `auth.get_active_token()` | Moved to auth | -| `+internal/getTokenExpiration` | `auth.get_token_expiration()` | Moved to auth | +| `+internal/listRemoteDocumentIds` | `internal.listRemoteDocumentIds()` | | +| `+internal/getCloudDatasetIdForLocalDataset` | `internal.getCloudDatasetIdForLocalDataset()` | | +| `+internal/createRemoteDatasetDoc` | `internal.createRemoteDatasetDoc()` | | +| `+internal/decodeJwt` | `auth.decodeJwt()` | Moved to auth | +| `+internal/getActiveToken` | `auth.getActiveToken()` | Moved to auth | +| `+internal/getTokenExpiration` | `auth.getTokenExpiration()` | Moved to auth | | `+internal/getWeboptionsWithAuthHeader` | — | Replaced by `CloudClient` | -| `+internal/getUploadedDocumentIds` | — | Via `list_remote_document_ids()` | -| `+internal/getUploadedFileIds` | — | Via `list_files()` | +| `+internal/getUploadedDocumentIds` | — | Via `listRemoteDocumentIds()` | +| `+internal/getUploadedFileIds` | — | Via `listFiles()` | | `+internal/dropDuplicateDocsFromJsonDecode` | — | Not needed (Python JSON is exact) | | `+internal/duplicateDocuments` | — | Not yet ported | -| `+sync/+internal/listLocalDocuments` | `internal.list_local_documents()` | | -| `+sync/+internal/getFileUidsFromDocuments` | `internal.get_file_uids_from_documents()` | | -| `+sync/+internal/filesNotYetUploaded` | `internal.files_not_yet_uploaded()` | | -| `+sync/+internal/datasetSessionIdFromDocs` | `internal.dataset_session_id_from_docs()` | | +| `+sync/+internal/listLocalDocuments` | `internal.listLocalDocuments()` | | +| `+sync/+internal/getFileUidsFromDocuments` | `internal.getFileUidsFromDocuments()` | | +| `+sync/+internal/filesNotYetUploaded` | `internal.filesNotYetUploaded()` | | +| `+sync/+internal/datasetSessionIdFromDocs` | `internal.datasetSessionIdFromDocs()` | | | `+sync/+internal/deleteLocalDocuments` | `sync.operations._delete_local_docs()` | Private | -| `+sync/+internal/deleteRemoteDocuments` | Inline in `mirror_to_remote()` | | +| `+sync/+internal/deleteRemoteDocuments` | Inline in `mirrorToRemote()` | | | `+sync/+internal/downloadNdiDocuments` | `sync.operations._download_docs_by_ids()` | Private | -| `+sync/+internal/uploadFilesForDatasetDocuments` | `upload.upload_files_for_documents()` | | +| `+sync/+internal/uploadFilesForDatasetDocuments` | `upload.uploadFilesForDatasetDocuments()` | | | *(ndic:// URI parsing in didsqlite.m)* | `filehandler.parse_ndic_uri()` | `ndic://dataset_id/file_uid` → tuple | ### Admin (DOI & Crossref) | MATLAB | Python | Notes | |--------|--------|-------| -| `ndi.cloud.admin.createNewDOI` | `admin.doi.create_new_doi()` | | -| `ndi.cloud.admin.registerDatasetDOI` | `admin.doi.register_dataset_doi()` | | -| `ndi.cloud.admin.checkSubmission` | `admin.doi.check_submission()` | | +| `ndi.cloud.admin.createNewDOI` | `admin.doi.createNewDOI()` | | +| `ndi.cloud.admin.registerDatasetDOI` | `admin.doi.registerDatasetDOI()` | | +| `ndi.cloud.admin.checkSubmission` | `admin.doi.checkSubmission()` | | | `+crossref/Constants` | `admin.crossref.CrossrefConstants` | Frozen dataclass | -| `+crossref/createDoiBatchSubmission` | `admin.crossref.create_batch_submission()` | | -| `+crossref/convertCloudDatasetToCrossrefDataset` | `admin.crossref.convert_to_crossref()` | | -| `+crossref/createDatabaseMetadata` | — | Inline in `create_batch_submission()` | -| `+crossref/createDoiBatchHeadElement` | — | Inline in `create_batch_submission()` | -| `+crossref/+conversion/convertContributors` | `admin.crossref.convert_contributors()` | | -| `+crossref/+conversion/convertDatasetDate` | `admin.crossref.convert_dataset_date()` | | -| `+crossref/+conversion/convertFunding` | `admin.crossref.convert_funding()` | | -| `+crossref/+conversion/convertLicense` | `admin.crossref.convert_license()` | | -| `+crossref/+conversion/convertRelatedPublications` | `admin.crossref.convert_related_publications()` | | +| `+crossref/createDoiBatchSubmission` | `admin.crossref.createDoiBatchSubmission()` | | +| `+crossref/convertCloudDatasetToCrossrefDataset` | `admin.crossref.convertCloudDatasetToCrossrefDataset()` | | +| `+crossref/createDatabaseMetadata` | — | Inline in `createDoiBatchSubmission()` | +| `+crossref/createDoiBatchHeadElement` | — | Inline in `createDoiBatchSubmission()` | +| `+crossref/+conversion/convertContributors` | `admin.crossref.convertContributors()` | | +| `+crossref/+conversion/convertDatasetDate` | `admin.crossref.convertDatasetDate()` | | +| `+crossref/+conversion/convertFunding` | `admin.crossref.convertFunding()` | | +| `+crossref/+conversion/convertLicense` | `admin.crossref.convertLicense()` | | +| `+crossref/+conversion/convertRelatedPublications` | `admin.crossref.convertRelatedPublications()` | | ### Cloud: Not Ported diff --git a/src/ndi/cloud/__init__.py b/src/ndi/cloud/__init__.py index 8824da7..217b426 100644 --- a/src/ndi/cloud/__init__.py +++ b/src/ndi/cloud/__init__.py @@ -11,11 +11,11 @@ config = login('user@example.com', 'password') client = CloudClient(config) - ndi.cloud.api.datasets.get_dataset(dataset_id, client=client) + ndi.cloud.api.datasets.getDataset(dataset_id, client=client) # Option 2: Auto-client from environment variables (no client needed) # Set NDI_CLOUD_USERNAME, NDI_CLOUD_PASSWORD (or NDI_CLOUD_TOKEN) - ndi.cloud.api.datasets.get_dataset(dataset_id) + ndi.cloud.api.datasets.getDataset(dataset_id) All ``ndi.cloud.api.*`` functions accept an optional ``client`` keyword parameter. If omitted, a client is built automatically from environment @@ -28,12 +28,12 @@ from .auth import ( authenticate, - change_password, + changePassword, login, logout, - resend_confirmation, - reset_password, - verify_user, + resendConfirmation, + resetPassword, + verifyUser, ) from .config import CloudConfig from .exceptions import ( @@ -56,15 +56,15 @@ "authenticate", "login", "logout", - "change_password", - "reset_password", - "verify_user", - "resend_confirmation", + "changePassword", + "resetPassword", + "verifyUser", + "resendConfirmation", # Top-level convenience functions (mirror MATLAB ndi.cloud.*) - "download_dataset", - "upload_dataset", - "sync_dataset", - "upload_single_file", + "downloadDataset", + "uploadDataset", + "syncDataset", + "uploadSingleFile", "fetch_cloud_file", ] @@ -74,19 +74,13 @@ # when requests is not installed. _LAZY_IMPORTS = { - # Python-style (primary) "APIResponse": ("client", "APIResponse"), "CloudClient": ("client", "CloudClient"), - "download_dataset": ("orchestration", "download_dataset"), - "upload_dataset": ("orchestration", "upload_dataset"), - "sync_dataset": ("orchestration", "sync_dataset"), - "upload_single_file": ("upload", "upload_single_file"), + "downloadDataset": ("orchestration", "downloadDataset"), + "uploadDataset": ("orchestration", "uploadDataset"), + "syncDataset": ("orchestration", "syncDataset"), + "uploadSingleFile": ("upload", "uploadSingleFile"), "fetch_cloud_file": ("filehandler", "fetch_cloud_file"), - # MATLAB-style aliases (for users migrating from MATLAB) - "downloadDataset": ("orchestration", "download_dataset"), - "uploadDataset": ("orchestration", "upload_dataset"), - "syncDataset": ("orchestration", "sync_dataset"), - "uploadSingleFile": ("upload", "upload_single_file"), } diff --git a/src/ndi/cloud/admin/crossref.py b/src/ndi/cloud/admin/crossref.py index df6fb9a..591201e 100644 --- a/src/ndi/cloud/admin/crossref.py +++ b/src/ndi/cloud/admin/crossref.py @@ -31,7 +31,7 @@ class CrossrefConstants: CONSTANTS = CrossrefConstants() -def create_batch_submission( +def createDoiBatchSubmission( dataset_metadata: dict[str, Any], doi: str, ) -> str: @@ -114,7 +114,7 @@ def create_batch_submission( return tostring(root, encoding="unicode", xml_declaration=True) -def convert_to_crossref(dataset_metadata: dict[str, Any]) -> dict[str, Any]: +def convertCloudDatasetToCrossrefDataset(dataset_metadata: dict[str, Any]) -> dict[str, Any]: """Convert NDI dataset metadata to Crossref-compatible format. Args: @@ -126,16 +126,16 @@ def convert_to_crossref(dataset_metadata: dict[str, Any]) -> dict[str, Any]: return { "title": dataset_metadata.get("name", ""), "description": dataset_metadata.get("description", ""), - "contributors": convert_contributors(dataset_metadata), + "contributors": convertContributors(dataset_metadata), "doi_prefix": CONSTANTS.DOI_PREFIX, "database_title": CONSTANTS.DATABASE_TITLE, "resource_url": ( f"{CONSTANTS.DATASET_BASE_URL}" f"{dataset_metadata.get('cloud_dataset_id', '')}" ), - "date": convert_dataset_date(dataset_metadata), - "funding": convert_funding(dataset_metadata), - "license": convert_license(dataset_metadata), - "related_publications": convert_related_publications(dataset_metadata), + "date": convertDatasetDate(dataset_metadata), + "funding": convertFunding(dataset_metadata), + "license": convertLicense(dataset_metadata), + "related_publications": convertRelatedPublications(dataset_metadata), } @@ -144,7 +144,7 @@ def convert_to_crossref(dataset_metadata: dict[str, Any]) -> dict[str, Any]: # --------------------------------------------------------------------------- -def convert_contributors( +def convertContributors( dataset_metadata: dict[str, Any], ) -> list[dict[str, Any]]: """Convert contributor list to Crossref PersonName format. @@ -177,7 +177,7 @@ def convert_contributors( return result -def convert_dataset_date( +def convertDatasetDate( dataset_metadata: dict[str, Any], ) -> dict[str, str]: """Convert dataset timestamps to Crossref date format. @@ -205,7 +205,7 @@ def _parse_date(ts: str) -> dict[str, str]: } -def convert_funding( +def convertFunding( dataset_metadata: dict[str, Any], ) -> list[dict[str, str]]: """Convert funding information to Crossref FrProgram format. @@ -222,7 +222,7 @@ def convert_funding( ] -def convert_license( +def convertLicense( dataset_metadata: dict[str, Any], ) -> dict[str, str]: """Convert license information to Crossref AiProgram format. @@ -260,7 +260,7 @@ def convert_license( return {"name": normalized or name, "url": url} if name else {} -def convert_related_publications( +def convertRelatedPublications( dataset_metadata: dict[str, Any], ) -> list[dict[str, Any]]: """Convert associated publications to Crossref RelProgram format. diff --git a/src/ndi/cloud/admin/doi.py b/src/ndi/cloud/admin/doi.py index c507a7c..b6d3d8a 100644 --- a/src/ndi/cloud/admin/doi.py +++ b/src/ndi/cloud/admin/doi.py @@ -12,13 +12,13 @@ from typing import TYPE_CHECKING, Any from ..client import _auto_client -from .crossref import CONSTANTS, create_batch_submission +from .crossref import CONSTANTS, createDoiBatchSubmission if TYPE_CHECKING: from ..client import CloudClient -def create_new_doi(prefix: str = "") -> str: +def createNewDOI(prefix: str = "") -> str: """Generate a new unique DOI string. Args: @@ -34,7 +34,7 @@ def create_new_doi(prefix: str = "") -> str: @_auto_client -def register_dataset_doi( +def registerDatasetDOI( cloud_dataset_id: str, use_test: bool = False, *, @@ -62,14 +62,14 @@ def register_dataset_doi( from ..exceptions import CloudError # Fetch metadata - metadata = ds_api.get_dataset(cloud_dataset_id, client=client) + metadata = ds_api.getDataset(cloud_dataset_id, client=client) metadata["cloud_dataset_id"] = cloud_dataset_id # Generate DOI - doi = create_new_doi() + doi = createNewDOI() # Build XML - xml = create_batch_submission(metadata, doi) + xml = createDoiBatchSubmission(metadata, doi) # Submit to Crossref deposit_url = CONSTANTS.TEST_DEPOSIT_URL if use_test else CONSTANTS.DEPOSIT_URL @@ -102,7 +102,7 @@ def register_dataset_doi( raise CloudError(f"Crossref submission failed: {exc}") from exc -def check_submission( +def checkSubmission( filename: str, data_type: str = "result", use_test: bool = False, diff --git a/src/ndi/cloud/api/compute.py b/src/ndi/cloud/api/compute.py index 630c1a7..8201ce2 100644 --- a/src/ndi/cloud/api/compute.py +++ b/src/ndi/cloud/api/compute.py @@ -21,7 +21,7 @@ @_auto_client @validate_call(config=VALIDATE_CONFIG) -def start_session( +def startSession( pipeline_id: NonEmptyStr, input_params: dict[str, Any] | None = None, *, @@ -36,14 +36,14 @@ def start_session( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_session_status(session_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: +def getSessionStatus(session_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: """GET /compute/{sessionId} -- Get session status.""" return client.get("/compute/{sessionId}", sessionId=session_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def trigger_stage( +def triggerStage( session_id: NonEmptyStr, stage_id: NonEmptyStr, *, @@ -59,7 +59,7 @@ def trigger_stage( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def finalize_session(session_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: +def finalizeSession(session_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: """POST /compute/{sessionId}/finalize""" return client.post( "/compute/{sessionId}/finalize", @@ -69,14 +69,14 @@ def finalize_session(session_id: NonEmptyStr, *, client: _Client = None) -> dict @_auto_client @validate_call(config=VALIDATE_CONFIG) -def abort_session(session_id: NonEmptyStr, *, client: _Client = None) -> bool: +def abortSession(session_id: NonEmptyStr, *, client: _Client = None) -> bool: """POST /compute/{sessionId}/abort""" client.post("/compute/{sessionId}/abort", sessionId=session_id) return True @_auto_client -def list_sessions(*, client: _Client = None) -> APIResponse: +def listSessions(*, client: _Client = None) -> APIResponse: """GET /compute -- List all compute sessions.""" result = client.get("/compute") # Handle both APIResponse (has .data) and raw dict/list from mocks diff --git a/src/ndi/cloud/api/datasets.py b/src/ndi/cloud/api/datasets.py index b08d121..491b653 100644 --- a/src/ndi/cloud/api/datasets.py +++ b/src/ndi/cloud/api/datasets.py @@ -23,14 +23,14 @@ @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def getDataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """GET /datasets/{datasetId}""" return client.get("/datasets/{datasetId}", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def create_dataset( +def createDataset( org_id: NonEmptyStr, name: NonEmptyStr, description: str = "", @@ -52,7 +52,7 @@ def create_dataset( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def update_dataset( +def updateDataset( dataset_id: CloudId, *, client: _Client = None, @@ -68,7 +68,7 @@ def update_dataset( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def delete_dataset( +def deleteDataset( dataset_id: CloudId, when: str = "7d", *, @@ -93,7 +93,7 @@ def delete_dataset( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_datasets( +def listDatasets( org_id: NonEmptyStr, page: PageNumber = 1, page_size: PageSize = 1000, @@ -113,12 +113,12 @@ def list_datasets( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_all_datasets(org_id: NonEmptyStr, *, client: _Client = None) -> APIResponse: +def listAllDatasets(org_id: NonEmptyStr, *, client: _Client = None) -> APIResponse: """Auto-paginate through all datasets for an organisation.""" all_datasets: list[dict[str, Any]] = [] page = 1 while page <= _MAX_PAGES: - result = list_datasets(org_id, page=page, client=client) + result = listDatasets(org_id, page=page, client=client) datasets = result.get("datasets", []) all_datasets.extend(datasets) total = result.get("totalNumber", 0) @@ -130,7 +130,7 @@ def list_all_datasets(org_id: NonEmptyStr, *, client: _Client = None) -> APIResp @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_published_datasets( +def getPublished( page: PageNumber = 1, page_size: PageSize = 1000, *, @@ -145,42 +145,42 @@ def get_published_datasets( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def publish_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def publishDataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """POST /datasets/{datasetId}/publish""" return client.post("/datasets/{datasetId}/publish", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def unpublish_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def unpublishDataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """POST /datasets/{datasetId}/unpublish""" return client.post("/datasets/{datasetId}/unpublish", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def submit_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def submitDataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """POST /datasets/{datasetId}/submit""" return client.post("/datasets/{datasetId}/submit", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def create_branch(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def createDatasetBranch(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """POST /datasets/{datasetId}/branch""" return client.post("/datasets/{datasetId}/branch", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_branches(dataset_id: CloudId, *, client: _Client = None) -> list[dict[str, Any]]: +def getBranches(dataset_id: CloudId, *, client: _Client = None) -> list[dict[str, Any]]: """GET /datasets/{datasetId}/branches""" return client.get("/datasets/{datasetId}/branches", datasetId=dataset_id) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_unpublished( +def getUnpublished( page: PageNumber = 1, page_size: PageSize = 20, *, @@ -198,7 +198,7 @@ def get_unpublished( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def undelete_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: +def undeleteDataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str, Any]: """POST /datasets/{datasetId}/undelete Reverse a deferred (soft) delete before the pruner runs. @@ -210,7 +210,7 @@ def undelete_dataset(dataset_id: CloudId, *, client: _Client = None) -> dict[str @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_deleted_datasets( +def listDeletedDatasets( page: PageNumber = 1, page_size: PageSize = 1000, *, diff --git a/src/ndi/cloud/api/documents.py b/src/ndi/cloud/api/documents.py index e390262..0912fd3 100644 --- a/src/ndi/cloud/api/documents.py +++ b/src/ndi/cloud/api/documents.py @@ -41,7 +41,7 @@ def _coerce_search_structure(search_structure: Any) -> Any: @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_document( +def getDocument( dataset_id: CloudId, document_id: CloudId, *, @@ -57,7 +57,7 @@ def get_document( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def add_document( +def addDocument( dataset_id: CloudId, doc_json: dict[str, Any], *, @@ -73,7 +73,7 @@ def add_document( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def update_document( +def updateDocument( dataset_id: CloudId, document_id: CloudId, doc_json: dict[str, Any], @@ -91,7 +91,7 @@ def update_document( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def delete_document( +def deleteDocument( dataset_id: CloudId, document_id: CloudId, when: str = "7d", @@ -100,7 +100,7 @@ def delete_document( ) -> dict[str, Any]: """DELETE /datasets/{datasetId}/documents/{documentId}?when=... - Soft-delete a document. See :func:`~ndi.cloud.api.datasets.delete_dataset` + Soft-delete a document. See :func:`~ndi.cloud.api.datasets.deleteDataset` for the *when* parameter format. """ return client.delete( @@ -113,7 +113,7 @@ def delete_document( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_documents( +def listDatasetDocuments( dataset_id: CloudId, page: PageNumber = 1, page_size: PageSize = 1000, @@ -133,7 +133,7 @@ def list_documents( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_all_documents( +def listDatasetDocumentsAll( dataset_id: CloudId, page_size: PageSize = 1000, *, @@ -143,7 +143,7 @@ def list_all_documents( all_docs: list[dict[str, Any]] = [] page = 1 while page <= _MAX_PAGES: - result = list_documents(dataset_id, page=page, page_size=page_size, client=client) + result = listDatasetDocuments(dataset_id, page=page, page_size=page_size, client=client) docs = result.get("documents", []) all_docs.extend(docs) # Stop when a page returns fewer docs than requested (last page) @@ -155,7 +155,7 @@ def list_all_documents( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_document_count(dataset_id: CloudId, *, client: _Client = None) -> int: +def countDocuments(dataset_id: CloudId, *, client: _Client = None) -> int: """Return the document count for a dataset. Tries the dedicated ``GET /datasets/{datasetId}/document-count`` @@ -172,15 +172,15 @@ def get_document_count(dataset_id: CloudId, *, client: _Client = None) -> int: except Exception: pass # Fallback: get from dataset metadata - from .datasets import get_dataset + from .datasets import getDataset - ds = get_dataset(dataset_id, client=client) + ds = getDataset(dataset_id, client=client) return ds.get("documentCount", 0) @_auto_client @validate_call(config=VALIDATE_CONFIG) -def bulk_upload( +def bulkUpload( dataset_id: CloudId, zip_path: str, *, @@ -199,7 +199,7 @@ def bulk_upload( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_bulk_upload_url(dataset_id: CloudId, *, client: _Client = None) -> str: +def getBulkUploadURL(dataset_id: CloudId, *, client: _Client = None) -> str: """Get a presigned URL for bulk document upload.""" result = client.post( "/datasets/{datasetId}/documents/bulk-upload", @@ -210,7 +210,7 @@ def get_bulk_upload_url(dataset_id: CloudId, *, client: _Client = None) -> str: @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_bulk_download_url( +def getBulkDownloadURL( dataset_id: CloudId, doc_ids: list[str] | None = None, *, @@ -233,7 +233,7 @@ def get_bulk_download_url( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def bulk_delete( +def bulkDeleteDocuments( dataset_id: CloudId, doc_ids: list[str], when: str = "7d", @@ -243,7 +243,7 @@ def bulk_delete( """POST /datasets/{datasetId}/documents/bulk-delete Soft-delete multiple documents. See - :func:`~ndi.cloud.api.datasets.delete_dataset` for the *when* + :func:`~ndi.cloud.api.datasets.deleteDataset` for the *when* parameter format. """ return client.post( @@ -255,7 +255,7 @@ def bulk_delete( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def ndi_query( +def ndiquery( scope: Scope, search_structure: Any, page: PageNumber = 1, @@ -288,7 +288,7 @@ def ndi_query( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def ndi_query_all( +def ndiqueryAll( scope: Scope, search_structure: Any, page_size: PageSize = 1000, @@ -303,7 +303,7 @@ def ndi_query_all( all_docs: list[dict[str, Any]] = [] page = 1 while page <= _MAX_PAGES: - result = ndi_query(scope, search_structure, page=page, page_size=page_size, client=client) + result = ndiquery(scope, search_structure, page=page, page_size=page_size, client=client) docs = result.get("documents", []) all_docs.extend(docs) total = result.get("number_matches", result.get("totalItems", result.get("totalNumber", 0))) @@ -315,7 +315,7 @@ def ndi_query_all( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_deleted_documents( +def listDeletedDocuments( dataset_id: CloudId, page: PageNumber = 1, page_size: PageSize = 1000, @@ -335,7 +335,7 @@ def list_deleted_documents( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def add_document_as_file( +def addDocumentAsFile( dataset_id: CloudId, file_path: FilePath, *, @@ -350,4 +350,4 @@ def add_document_as_file( content = Path(file_path).read_text(encoding="utf-8") doc_json = json.loads(content) - return add_document(dataset_id, doc_json, client=client) + return addDocument(dataset_id, doc_json, client=client) diff --git a/src/ndi/cloud/api/files.py b/src/ndi/cloud/api/files.py index 5ddf0cf..83311c7 100644 --- a/src/ndi/cloud/api/files.py +++ b/src/ndi/cloud/api/files.py @@ -23,7 +23,7 @@ @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_upload_url( +def getFileUploadURL( org_id: NonEmptyStr, dataset_id: CloudId, file_uid: NonEmptyStr, @@ -45,7 +45,7 @@ def get_upload_url( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_bulk_upload_url( +def getBulkUploadURL( org_id: NonEmptyStr, dataset_id: CloudId, *, @@ -64,7 +64,7 @@ def get_bulk_upload_url( @validate_call -def put_file( +def putFiles( url: NonEmptyStr, file_path: FilePath, timeout: int = 120, @@ -101,7 +101,7 @@ def put_file( @validate_call -def put_file_bytes( +def putFileBytes( url: NonEmptyStr, data: bytes, timeout: int = 120, @@ -136,7 +136,7 @@ def put_file_bytes( @validate_call -def get_file( +def getFile( url: NonEmptyStr, target_path: str | Path, timeout: int = 120, @@ -178,7 +178,7 @@ def get_file( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def list_files( +def listFiles( dataset_id: CloudId, *, client: _Client = None, @@ -191,14 +191,14 @@ def list_files( """ from . import datasets as ds_api - ds = ds_api.get_dataset(dataset_id, client=client) + ds = ds_api.getDataset(dataset_id, client=client) files = ds.get("files", []) if hasattr(ds, "get") else [] return APIResponse(files, success=True, status_code=200, url="") @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_file_details( +def getFileDetails( dataset_id: CloudId, file_uid: NonEmptyStr, *, @@ -217,7 +217,7 @@ def get_file_details( @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_file_collection_upload_url( +def getFileCollectionUploadURL( org_id: NonEmptyStr, dataset_id: CloudId, *, diff --git a/src/ndi/cloud/api/users.py b/src/ndi/cloud/api/users.py index 44c7b2e..570449e 100644 --- a/src/ndi/cloud/api/users.py +++ b/src/ndi/cloud/api/users.py @@ -22,7 +22,7 @@ @_auto_client @validate_call(config=VALIDATE_CONFIG) -def create_user( +def createUser( email: NonEmptyStr, name: NonEmptyStr, password: NonEmptyStr, @@ -37,7 +37,7 @@ def create_user( @_auto_client -def get_current_user(*, client: _Client = None) -> dict[str, Any]: +def me(*, client: _Client = None) -> dict[str, Any]: """GET /users/me -- Get the authenticated user's profile. The response includes the user's organization memberships. @@ -47,6 +47,6 @@ def get_current_user(*, client: _Client = None) -> dict[str, Any]: @_auto_client @validate_call(config=VALIDATE_CONFIG) -def get_user(user_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: +def GetUser(user_id: NonEmptyStr, *, client: _Client = None) -> dict[str, Any]: """GET /users/{userId}""" return client.get("/users/{userId}", userId=user_id) diff --git a/src/ndi/cloud/auth.py b/src/ndi/cloud/auth.py index 7de2473..ff539d4 100644 --- a/src/ndi/cloud/auth.py +++ b/src/ndi/cloud/auth.py @@ -23,7 +23,7 @@ # --------------------------------------------------------------------------- -def decode_jwt(token: str) -> dict: +def decodeJwt(token: str) -> dict: """Decode a JWT payload without signature verification. Matches MATLAB ``ndi.cloud.internal.decodeJwt``. @@ -60,7 +60,7 @@ def decode_jwt(token: str) -> dict: raise CloudAuthError(f"Failed to decode JWT: {exc}") from exc -def get_token_expiration(token: str) -> datetime: +def getTokenExpiration(token: str) -> datetime: """Extract the ``exp`` claim from a JWT as a UTC datetime. Args: @@ -72,14 +72,14 @@ def get_token_expiration(token: str) -> datetime: Raises: CloudAuthError: If the token has no ``exp`` claim. """ - payload = decode_jwt(token) + payload = decodeJwt(token) exp = payload.get("exp") if exp is None: raise CloudAuthError("JWT has no exp claim") return datetime.fromtimestamp(exp, tz=timezone.utc) -def verify_token(token: str) -> bool: +def verifyToken(token: str) -> bool: """Check whether *token* is still valid (not expired). Does **not** contact the server — only checks the ``exp`` claim. @@ -87,13 +87,13 @@ def verify_token(token: str) -> bool: if not token: return False try: - expiration = get_token_expiration(token) + expiration = getTokenExpiration(token) return datetime.now(timezone.utc) < expiration except CloudAuthError: return False -def get_active_token(config: CloudConfig | None = None) -> tuple[str, str]: +def getActiveToken(config: CloudConfig | None = None) -> tuple[str, str]: """Return ``(token, org_id)`` from *config* or environment. Raises: @@ -105,7 +105,7 @@ def get_active_token(config: CloudConfig | None = None) -> tuple[str, str]: if not config.token: raise CloudAuthError("No token available (NDI_CLOUD_TOKEN not set)") - if not verify_token(config.token): + if not verifyToken(config.token): raise CloudAuthError("Token is expired") return config.token, config.org_id @@ -242,7 +242,7 @@ def authenticate(config: CloudConfig | None = None) -> str: config = CloudConfig.from_env() # 1. Already have a valid token? - if config.token and verify_token(config.token): + if config.token and verifyToken(config.token): return config.token # 2. Try env-var credentials @@ -263,7 +263,7 @@ def authenticate(config: CloudConfig | None = None) -> str: # --------------------------------------------------------------------------- -def change_password( +def changePassword( old_password: str, new_password: str, config: CloudConfig | None = None, @@ -296,7 +296,7 @@ def change_password( return True -def reset_password( +def resetPassword( email: str, config: CloudConfig | None = None, ) -> bool: @@ -325,7 +325,7 @@ def reset_password( return True -def verify_user( +def verifyUser( email: str, confirmation_code: str, config: CloudConfig | None = None, @@ -359,7 +359,7 @@ def verify_user( return True -def resend_confirmation( +def resendConfirmation( email: str, config: CloudConfig | None = None, ) -> bool: diff --git a/src/ndi/cloud/download.py b/src/ndi/cloud/download.py index dcbec38..e45610c 100644 --- a/src/ndi/cloud/download.py +++ b/src/ndi/cloud/download.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def download_full_dataset( +def downloadFullDataset( dataset_id: str, target_dir: str | Path, *, @@ -82,7 +82,9 @@ def _log(msg: str) -> None: page = 1 page_size = 1000 while page <= 1000: - result = docs_api.list_documents(dataset_id, page=page, page_size=page_size, client=client) + result = docs_api.listDatasetDocuments( + dataset_id, page=page, page_size=page_size, client=client + ) docs = result.get("documents", []) if not docs: break @@ -109,7 +111,7 @@ def _log(msg: str) -> None: if remaining_ids: _log(f"Downloading {len(remaining_ids)} documents via bulk chunks...") try: - full_docs = download_document_collection( + full_docs = downloadDocumentCollection( dataset_id, doc_ids=remaining_ids, progress=progress, @@ -136,7 +138,7 @@ def _log(msg: str) -> None: if include_files: _log("Listing dataset files...") try: - file_list = files_api.list_files(dataset_id, client=client).data + file_list = files_api.listFiles(dataset_id, client=client).data except Exception: file_list = [] _log(f"Found {len(file_list)} files") @@ -152,7 +154,7 @@ def _log(msg: str) -> None: report["files_downloaded"] += 1 continue try: - details = files_api.get_file_details(dataset_id, uid, client=client) + details = files_api.getFileDetails(dataset_id, uid, client=client) url = details.get("downloadUrl", "") if hasattr(details, "get") else "" if not url: report["files_failed"] += 1 @@ -237,7 +239,7 @@ def _download_chunk_zip( raise TimeoutError(msg) -def download_document_collection( +def downloadDocumentCollection( dataset_id: str, doc_ids: list[str] | None = None, chunk_size: int = 2000, @@ -282,7 +284,7 @@ def _log(msg: str) -> None: # If no IDs given, discover all via paginated summaries if doc_ids is None: _log("Listing all document IDs...") - summaries = docs_api.list_all_documents(dataset_id, client=client) + summaries = docs_api.listDatasetDocumentsAll(dataset_id, client=client) doc_ids = [ s.get("_id", s.get("id", "")) for s in summaries.data if s.get("_id", s.get("id", "")) ] @@ -304,7 +306,7 @@ def _log(msg: str) -> None: # Get presigned URL for this chunk try: - url = docs_api.get_bulk_download_url(dataset_id, chunk_ids, client=client) + url = docs_api.getBulkDownloadURL(dataset_id, chunk_ids, client=client) except Exception as exc: _log(f" Chunk {i + 1}: failed to get download URL: {exc}") continue @@ -327,7 +329,7 @@ def _log(msg: str) -> None: return all_documents -def download_files_for_document( +def downloadFilesForDocument( dataset_id: str, document: dict[str, Any], target_dir: Path, @@ -359,7 +361,7 @@ def download_files_for_document( from .api import files as files_api try: - details = files_api.get_file_details(dataset_id, file_uid, client=client) + details = files_api.getFileDetails(dataset_id, file_uid, client=client) except Exception: return downloaded @@ -379,7 +381,7 @@ def download_files_for_document( return downloaded -def download_dataset_files( +def downloadDatasetFiles( dataset_id: str, documents: list[dict[str, Any]], target_dir: Path, @@ -400,7 +402,7 @@ def download_dataset_files( for doc in documents: try: - paths = download_files_for_document(dataset_id, doc, target_dir, client=client) + paths = downloadFilesForDocument(dataset_id, doc, target_dir, client=client) report["downloaded"] += len(paths) except Exception as exc: report["failed"] += 1 @@ -409,7 +411,7 @@ def download_dataset_files( return report -def jsons_to_documents( +def jsons2documents( doc_jsons: list[dict[str, Any]], ) -> list[Any]: """Convert a list of raw JSON dicts into ndi.Document objects. diff --git a/src/ndi/cloud/filehandler.py b/src/ndi/cloud/filehandler.py index c36d87b..865d032 100644 --- a/src/ndi/cloud/filehandler.py +++ b/src/ndi/cloud/filehandler.py @@ -10,7 +10,7 @@ files. When a dataset is downloaded without ``sync_files=True``, document file_info locations are rewritten to ``ndic://{dataset_id}/{file_uid}``. When a binary file is opened, the URI is resolved on demand: a fresh -presigned S3 URL is fetched via ``get_file_details`` and the file is +presigned S3 URL is fetched via ``getFileDetails`` and the file is streamed to local storage. """ @@ -120,7 +120,7 @@ def fetch_cloud_file( ) -> bool: """Download a cloud file on demand. - Parses the ``ndic://`` URI, calls ``get_file_details`` for a fresh + Parses the ``ndic://`` URI, calls ``getFileDetails`` for a fresh presigned S3 URL, then streams the file to *target_path*. Uses an atomic write (download to ``.tmp``, then rename) to avoid partial files. @@ -137,7 +137,7 @@ def fetch_cloud_file( ValueError: If the URI is invalid. CloudError: If the download fails. """ - from .api.files import get_file, get_file_details + from .api.files import getFile, getFileDetails dataset_id, file_uid = parse_ndic_uri(ndic_uri) @@ -145,7 +145,7 @@ def fetch_cloud_file( client = get_or_create_cloud_client() # Get fresh presigned URL - details = get_file_details(dataset_id, file_uid, client=client) + details = getFileDetails(dataset_id, file_uid, client=client) download_url = details.get("downloadUrl", "") if not download_url: from .exceptions import CloudError @@ -158,7 +158,7 @@ def fetch_cloud_file( tmp_path = target.with_suffix(target.suffix + ".tmp") logger.debug("Fetching cloud file %s -> %s", ndic_uri, target) - success = get_file(download_url, tmp_path, timeout=300) + success = getFile(download_url, tmp_path, timeout=300) if success: tmp_path.rename(target) diff --git a/src/ndi/cloud/internal.py b/src/ndi/cloud/internal.py index b30bb1e..076e14c 100644 --- a/src/ndi/cloud/internal.py +++ b/src/ndi/cloud/internal.py @@ -13,7 +13,7 @@ from .client import CloudClient -def list_remote_document_ids( +def listRemoteDocumentIds( cloud_dataset_id: str, *, client: CloudClient | None = None, @@ -27,7 +27,7 @@ def list_remote_document_ids( """ from .api import documents as docs_api - all_docs = docs_api.list_all_documents(cloud_dataset_id, client=client) + all_docs = docs_api.listDatasetDocumentsAll(cloud_dataset_id, client=client) mapping: dict[str, str] = {} for doc in all_docs.data: ndi_id = doc.get("ndiId", doc.get("id", "")) @@ -37,7 +37,7 @@ def list_remote_document_ids( return mapping -def get_cloud_dataset_id( +def getCloudDatasetIdForLocalDataset( dataset: Any, *, client: CloudClient | None = None, @@ -74,7 +74,7 @@ def get_cloud_dataset_id( return "", None -def create_remote_dataset_doc( +def createRemoteDatasetDoc( cloud_dataset_id: str, dataset: Any, ) -> Any: @@ -94,7 +94,7 @@ def create_remote_dataset_doc( return doc -def list_local_documents(dataset: Any) -> tuple[list[Any], list[str]]: +def listLocalDocuments(dataset: Any) -> tuple[list[Any], list[str]]: """Retrieve all documents and their IDs from a local dataset. MATLAB equivalent: +sync/+internal/listLocalDocuments.m @@ -117,7 +117,7 @@ def list_local_documents(dataset: Any) -> tuple[list[Any], list[str]]: return docs, ids -def get_file_uids_from_documents(documents: list[Any]) -> list[str]: +def getFileUidsFromDocuments(documents: list[Any]) -> list[str]: """Extract unique file UIDs from a list of documents. MATLAB equivalent: +sync/+internal/getFileUidsFromDocuments.m @@ -143,7 +143,7 @@ def get_file_uids_from_documents(documents: list[Any]) -> list[str]: return list(uids) -def files_not_yet_uploaded( +def filesNotYetUploaded( file_manifest: list[dict[str, Any]], cloud_dataset_id: str, *, @@ -153,10 +153,10 @@ def files_not_yet_uploaded( MATLAB equivalent: +sync/+internal/filesNotYetUploaded.m """ - from .api.files import list_files + from .api.files import listFiles try: - remote_files = list_files(cloud_dataset_id, client=client).data + remote_files = listFiles(cloud_dataset_id, client=client).data except Exception: return file_manifest # can't check, assume all need upload @@ -169,7 +169,7 @@ def files_not_yet_uploaded( return [f for f in file_manifest if f.get("uid", "") not in remote_uids] -def validate_sync( +def validateSync( dataset: Any, cloud_dataset_id: str, *, @@ -182,8 +182,8 @@ def validate_sync( Returns: Report dict with local_only, remote_only, common, mismatched IDs. """ - _, local_ids = list_local_documents(dataset) - remote_id_map = list_remote_document_ids(cloud_dataset_id, client=client) + _, local_ids = listLocalDocuments(dataset) + remote_id_map = listRemoteDocumentIds(cloud_dataset_id, client=client) local_set = set(local_ids) remote_set = set(remote_id_map.keys()) @@ -197,7 +197,7 @@ def validate_sync( } -def dataset_session_id_from_docs(documents: list[Any]) -> str: +def datasetSessionIdFromDocs(documents: list[Any]) -> str: """Extract the unique dataset session ID from a list of documents. MATLAB equivalent: +sync/+internal/datasetSessionIdFromDocs.m diff --git a/src/ndi/cloud/orchestration.py b/src/ndi/cloud/orchestration.py index b8cdbf8..e4e9a96 100644 --- a/src/ndi/cloud/orchestration.py +++ b/src/ndi/cloud/orchestration.py @@ -20,7 +20,7 @@ @_auto_client -def download_dataset( +def downloadDataset( cloud_dataset_id: str, target_folder: str, sync_files: bool = False, @@ -44,23 +44,23 @@ def download_dataset( """ from .api import datasets as ds_api from .download import ( - download_dataset_files, - download_document_collection, - jsons_to_documents, + downloadDatasetFiles, + downloadDocumentCollection, + jsons2documents, ) - from .internal import create_remote_dataset_doc + from .internal import createRemoteDatasetDoc target = Path(target_folder) target.mkdir(parents=True, exist_ok=True) # Verify dataset exists - ds_info = ds_api.get_dataset(cloud_dataset_id, client=client) + ds_info = ds_api.getDataset(cloud_dataset_id, client=client) if verbose: name = ds_info.get("name", cloud_dataset_id) print(f"Downloading dataset: {name}") # Download all full documents via chunked bulk download - doc_jsons = download_document_collection( + doc_jsons = downloadDocumentCollection( cloud_dataset_id, client=client, progress=print if verbose else None, @@ -80,7 +80,7 @@ def download_dataset( from ndi.dataset import Dataset dataset = Dataset(target) - documents = jsons_to_documents(doc_jsons) + documents = jsons2documents(doc_jsons) for doc in documents: try: dataset._session._database.add(doc) @@ -88,7 +88,7 @@ def download_dataset( pass # Create remote link document - remote_doc = create_remote_dataset_doc(cloud_dataset_id, dataset) + remote_doc = createRemoteDatasetDoc(cloud_dataset_id, dataset) try: dataset._session._database.add(remote_doc) except Exception: @@ -100,7 +100,7 @@ def download_dataset( # Optionally download files if sync_files and doc_jsons: file_dir = target / ".ndi" / "files" - report = download_dataset_files(cloud_dataset_id, doc_jsons, file_dir, client=client) + report = downloadDatasetFiles(cloud_dataset_id, doc_jsons, file_dir, client=client) if verbose: print(f' Files downloaded: {report["downloaded"]}, failed: {report["failed"]}') @@ -198,7 +198,7 @@ def load_dataset_from_json_dir( @_auto_client -def upload_dataset( +def uploadDataset( dataset: Any, upload_as_new: bool = False, remote_name: str = "", @@ -223,26 +223,26 @@ def upload_dataset( Tuple of ``(success, cloud_dataset_id, message)``. """ from .api import datasets as ds_api - from .internal import create_remote_dataset_doc, get_cloud_dataset_id - from .upload import upload_document_collection, upload_files_for_documents + from .internal import createRemoteDatasetDoc, getCloudDatasetIdForLocalDataset + from .upload import uploadDocumentCollection, uploadFilesForDatasetDocuments # Resolve or create remote dataset cloud_id = "" if not upload_as_new: - cloud_id, _ = get_cloud_dataset_id(dataset, client=client) + cloud_id, _ = getCloudDatasetIdForLocalDataset(dataset, client=client) if not cloud_id: # Create new remote dataset name = remote_name or getattr(dataset, "name", "Unnamed Dataset") org_id = client.config.org_id try: - result = ds_api.create_dataset(org_id, name, client=client) + result = ds_api.createDataset(org_id, name, client=client) cloud_id = result.get("id", result.get("_id", "")) except Exception as exc: return False, "", f"Failed to create remote dataset: {exc}" # Store link locally - remote_doc = create_remote_dataset_doc(cloud_id, dataset) + remote_doc = createRemoteDatasetDoc(cloud_id, dataset) try: dataset.session.database_add(remote_doc) except Exception: @@ -266,13 +266,13 @@ def upload_dataset( doc_jsons.append(props) # Upload documents - report = upload_document_collection(cloud_id, doc_jsons, client=client) + report = uploadDocumentCollection(cloud_id, doc_jsons, client=client) if verbose: print(f' Documents uploaded: {report["uploaded"]}, skipped: {report["skipped"]}') # Upload files if sync_files: - file_report = upload_files_for_documents( + file_report = uploadFilesForDatasetDocuments( client.config.org_id, cloud_id, doc_jsons, @@ -285,7 +285,7 @@ def upload_dataset( @_auto_client -def sync_dataset( +def syncDataset( dataset: Any, sync_mode: str = "download_new", sync_files: bool = False, @@ -311,9 +311,9 @@ def sync_dataset( Returns: Report dict with counts of changes. """ - from .internal import get_cloud_dataset_id + from .internal import getCloudDatasetIdForLocalDataset - cloud_id, _ = get_cloud_dataset_id(dataset, client=client) + cloud_id, _ = getCloudDatasetIdForLocalDataset(dataset, client=client) if not cloud_id: return {"error": "No cloud dataset linked to this dataset"} @@ -347,7 +347,7 @@ def sync_dataset( @_auto_client -def new_dataset( +def newDataset( dataset: Any, name: str = "", *, @@ -360,7 +360,7 @@ def new_dataset( Returns: The cloud dataset ID. """ - success, cloud_id, msg = upload_dataset( + success, cloud_id, msg = uploadDataset( dataset, upload_as_new=True, remote_name=name, @@ -375,7 +375,7 @@ def new_dataset( @_auto_client -def scan_for_upload( +def scanForUpload( dataset: Any, cloud_dataset_id: str, *, @@ -390,7 +390,7 @@ def scan_for_upload( """ from ndi.query import Query - from .internal import list_remote_document_ids + from .internal import listRemoteDocumentIds # Get local documents try: @@ -402,7 +402,7 @@ def scan_for_upload( remote_ids = {} if cloud_dataset_id: try: - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) except Exception: pass @@ -449,9 +449,9 @@ def _sync_download_new( ) -> dict[str, int]: """Download documents that exist remotely but not locally.""" from .api import documents as docs_api - from .download import jsons_to_documents + from .download import jsons2documents - remote_docs = docs_api.list_all_documents(cloud_id, client=client).data + remote_docs = docs_api.listDatasetDocumentsAll(cloud_id, client=client).data # Find local IDs from ndi.query import Query @@ -476,7 +476,7 @@ def _sync_download_new( if dry_run: return {"downloaded": len(new_docs)} - documents = jsons_to_documents(new_docs) + documents = jsons2documents(new_docs) added = 0 for doc in documents: try: @@ -498,10 +498,10 @@ def _sync_upload_new( client: CloudClient | None = None, ) -> dict[str, int]: """Upload documents that exist locally but not remotely.""" - from .internal import list_remote_document_ids - from .upload import upload_document_collection + from .internal import listRemoteDocumentIds + from .upload import uploadDocumentCollection - remote_ids = list_remote_document_ids(cloud_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_id, client=client) from ndi.query import Query @@ -524,5 +524,5 @@ def _sync_upload_new( if dry_run: return {"uploaded": len(new_jsons)} - report = upload_document_collection(cloud_id, new_jsons, only_missing=False, client=client) + report = uploadDocumentCollection(cloud_id, new_jsons, only_missing=False, client=client) return {"uploaded": report.get("uploaded", 0)} diff --git a/src/ndi/cloud/sync/__init__.py b/src/ndi/cloud/sync/__init__.py index 61d3159..890a364 100644 --- a/src/ndi/cloud/sync/__init__.py +++ b/src/ndi/cloud/sync/__init__.py @@ -8,22 +8,22 @@ from .index import SyncIndex from .mode import SyncMode, SyncOptions from .operations import ( - download_new, - mirror_from_remote, - mirror_to_remote, + downloadNew, + mirrorFromRemote, + mirrorToRemote, sync, - two_way_sync, - upload_new, + twoWaySync, + uploadNew, ) __all__ = [ "SyncMode", "SyncOptions", "SyncIndex", - "upload_new", - "download_new", - "mirror_to_remote", - "mirror_from_remote", - "two_way_sync", + "uploadNew", + "downloadNew", + "mirrorToRemote", + "mirrorFromRemote", + "twoWaySync", "sync", ] diff --git a/src/ndi/cloud/sync/operations.py b/src/ndi/cloud/sync/operations.py index a903e51..8cd4ae6 100644 --- a/src/ndi/cloud/sync/operations.py +++ b/src/ndi/cloud/sync/operations.py @@ -71,7 +71,7 @@ def _download_docs_by_ids( Returns (downloaded_docs, failed_ids). """ - from ..download import download_document_collection + from ..download import downloadDocumentCollection if not ids_to_download: return [], [] @@ -80,7 +80,7 @@ def _download_docs_by_ids( api_ids = [ndi_to_api.get(ndi_id, ndi_id) for ndi_id in ids_to_download] try: - docs = download_document_collection( + docs = downloadDocumentCollection( cloud_dataset_id, doc_ids=api_ids, client=client, @@ -117,7 +117,7 @@ def _download_docs_by_ids( # --------------------------------------------------------------------------- -def upload_new( +def uploadNew( dataset_path: str, cloud_dataset_id: str, options: SyncOptions | None = None, @@ -130,14 +130,14 @@ def upload_new( them, and updates the index. """ from ..api import documents as docs_api - from ..internal import list_remote_document_ids + from ..internal import listRemoteDocumentIds options = options or SyncOptions() ds_path = Path(dataset_path) index = SyncIndex.read(ds_path) # Get remote doc IDs - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) remote_id_set = set(remote_ids.keys()) # Get local doc IDs (from index — actual local enumeration deferred) @@ -160,7 +160,7 @@ def upload_new( failed: list[str] = [] for doc_id in new_ids: try: - docs_api.add_document(cloud_dataset_id, {"ndiId": doc_id}, client=client) + docs_api.addDocument(cloud_dataset_id, {"ndiId": doc_id}, client=client) report["uploaded"].append(doc_id) except Exception as exc: logger.warning("Failed to upload %s: %s", doc_id, exc) @@ -177,7 +177,7 @@ def upload_new( return report -def download_new( +def downloadNew( dataset_path: str, cloud_dataset_id: str, options: SyncOptions | None = None, @@ -185,13 +185,13 @@ def download_new( client: CloudClient | None = None, ) -> dict[str, Any]: """Download documents that exist in the cloud but not locally.""" - from ..internal import list_remote_document_ids + from ..internal import listRemoteDocumentIds options = options or SyncOptions() ds_path = Path(dataset_path) index = SyncIndex.read(ds_path) - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) remote_id_set = set(remote_ids.keys()) local_ids = set(index.local_doc_ids_last_sync) @@ -216,7 +216,7 @@ def download_new( report["failed"] = failed if options.verbose and saved: - logger.info("download_new: downloaded %d documents", len(saved)) + logger.info("downloadNew: downloaded %d documents", len(saved)) # Update index index.update( @@ -228,7 +228,7 @@ def download_new( return report -def mirror_to_remote( +def mirrorToRemote( dataset_path: str, cloud_dataset_id: str, options: SyncOptions | None = None, @@ -237,13 +237,13 @@ def mirror_to_remote( ) -> dict[str, Any]: """Make the remote match the local state (upload new, delete remote-only).""" from ..api import documents as docs_api - from ..internal import list_remote_document_ids + from ..internal import listRemoteDocumentIds options = options or SyncOptions() ds_path = Path(dataset_path) index = SyncIndex.read(ds_path) - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) remote_id_set = set(remote_ids.keys()) local_ids = set(index.local_doc_ids_last_sync) @@ -263,24 +263,24 @@ def mirror_to_remote( if not options.dry_run: for doc_id in to_upload: try: - docs_api.add_document(cloud_dataset_id, {"ndiId": doc_id}, client=client) + docs_api.addDocument(cloud_dataset_id, {"ndiId": doc_id}, client=client) report["uploaded"].append(doc_id) except Exception as exc: - logger.warning("mirror_to_remote: failed to upload %s: %s", doc_id, exc) + logger.warning("mirrorToRemote: failed to upload %s: %s", doc_id, exc) failed.append(doc_id) for doc_id in to_delete: api_id = remote_ids.get(doc_id, doc_id) try: - docs_api.delete_document(cloud_dataset_id, api_id, client=client) + docs_api.deleteDocument(cloud_dataset_id, api_id, client=client) report["deleted"].append(doc_id) except Exception as exc: - logger.warning("mirror_to_remote: failed to delete %s: %s", doc_id, exc) + logger.warning("mirrorToRemote: failed to delete %s: %s", doc_id, exc) failed.append(doc_id) # Upload associated files if requested if options.sync_files and report["uploaded"]: try: - from ..upload import upload_files_for_documents + from ..upload import uploadFilesForDatasetDocuments doc_dir = ds_path / _DOC_DIR doc_dicts = [] @@ -289,14 +289,14 @@ def mirror_to_remote( if doc_file.exists(): doc_dicts.append(json.loads(doc_file.read_text(encoding="utf-8"))) if doc_dicts: - upload_files_for_documents( + uploadFilesForDatasetDocuments( client.config.org_id, cloud_dataset_id, doc_dicts, client=client, ) except Exception as exc: - logger.warning("mirror_to_remote: file upload failed: %s", exc) + logger.warning("mirrorToRemote: file upload failed: %s", exc) report["failed"] = failed @@ -306,7 +306,7 @@ def mirror_to_remote( return report -def mirror_from_remote( +def mirrorFromRemote( dataset_path: str, cloud_dataset_id: str, options: SyncOptions | None = None, @@ -314,13 +314,13 @@ def mirror_from_remote( client: CloudClient | None = None, ) -> dict[str, Any]: """Make the local state match the remote (download new, delete local-only).""" - from ..internal import list_remote_document_ids + from ..internal import listRemoteDocumentIds options = options or SyncOptions() ds_path = Path(dataset_path) index = SyncIndex.read(ds_path) - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) remote_id_set = set(remote_ids.keys()) local_ids = set(index.local_doc_ids_last_sync) @@ -354,7 +354,7 @@ def mirror_from_remote( if options.verbose: logger.info( - "mirror_from_remote: downloaded %d, deleted %d local", + "mirrorFromRemote: downloaded %d, deleted %d local", len(saved), len(deleted), ) @@ -365,7 +365,7 @@ def mirror_from_remote( return report -def two_way_sync( +def twoWaySync( dataset_path: str, cloud_dataset_id: str, options: SyncOptions | None = None, @@ -380,14 +380,14 @@ def two_way_sync( propagated to the other (unless the deleted doc was re-added). """ from ..api import documents as docs_api - from ..internal import list_remote_document_ids + from ..internal import listRemoteDocumentIds options = options or SyncOptions() ds_path = Path(dataset_path) index = SyncIndex.read(ds_path) # Current state - remote_ids = list_remote_document_ids(cloud_dataset_id, client=client) + remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) current_remote = set(remote_ids.keys()) current_local = set(index.local_doc_ids_last_sync) @@ -405,7 +405,7 @@ def two_way_sync( conflicts = added_local & added_remote if conflicts and options.verbose: logger.warning( - "two_way_sync: %d documents added on both sides (skipping): %s", + "twoWaySync: %d documents added on both sides (skipping): %s", len(conflicts), conflicts, ) @@ -455,19 +455,19 @@ def two_way_sync( for doc_id in to_delete_remote: api_id = remote_ids.get(doc_id, doc_id) try: - docs_api.delete_document(cloud_dataset_id, api_id, client=client) + docs_api.deleteDocument(cloud_dataset_id, api_id, client=client) report["deleted_remote"].append(doc_id) except Exception as exc: - logger.warning("two_way_sync: failed to delete remote %s: %s", doc_id, exc) + logger.warning("twoWaySync: failed to delete remote %s: %s", doc_id, exc) failed.append(doc_id) # 3. Upload local-only docs for doc_id in to_upload: try: - docs_api.add_document(cloud_dataset_id, {"ndiId": doc_id}, client=client) + docs_api.addDocument(cloud_dataset_id, {"ndiId": doc_id}, client=client) report["uploaded"].append(doc_id) except Exception as exc: - logger.warning("two_way_sync: failed to upload %s: %s", doc_id, exc) + logger.warning("twoWaySync: failed to upload %s: %s", doc_id, exc) failed.append(doc_id) # 4. Download remote-only docs @@ -482,7 +482,7 @@ def two_way_sync( if options.verbose: logger.info( - "two_way_sync: uploaded=%d downloaded=%d " "del_local=%d del_remote=%d conflicts=%d", + "twoWaySync: uploaded=%d downloaded=%d " "del_local=%d del_remote=%d conflicts=%d", len(report["uploaded"]), len(report["downloaded"]), len(report["deleted_local"]), @@ -499,7 +499,7 @@ def two_way_sync( return report -def validate_sync( +def validate( dataset: Any, cloud_dataset_id: str, *, @@ -512,7 +512,7 @@ def validate_sync( Returns: Report with local_only, remote_only, common ID lists. """ - from ..internal import validate_sync as _validate + from ..internal import validateSync as _validate return _validate(dataset, cloud_dataset_id, client=client) @@ -527,11 +527,11 @@ def sync( ) -> dict[str, Any]: """Dispatch to the appropriate sync operation based on *mode*.""" dispatch = { - SyncMode.UPLOAD_NEW: upload_new, - SyncMode.DOWNLOAD_NEW: download_new, - SyncMode.MIRROR_TO_REMOTE: mirror_to_remote, - SyncMode.MIRROR_FROM_REMOTE: mirror_from_remote, - SyncMode.TWO_WAY_SYNC: two_way_sync, + SyncMode.UPLOAD_NEW: uploadNew, + SyncMode.DOWNLOAD_NEW: downloadNew, + SyncMode.MIRROR_TO_REMOTE: mirrorToRemote, + SyncMode.MIRROR_FROM_REMOTE: mirrorFromRemote, + SyncMode.TWO_WAY_SYNC: twoWaySync, } handler = dispatch.get(mode) if handler is None: diff --git a/src/ndi/cloud/upload.py b/src/ndi/cloud/upload.py index 3b56e1b..f28af85 100644 --- a/src/ndi/cloud/upload.py +++ b/src/ndi/cloud/upload.py @@ -21,7 +21,7 @@ from .client import CloudClient -def upload_document_collection( +def uploadDocumentCollection( dataset_id: str, documents: list[dict[str, Any]], only_missing: bool = True, @@ -54,7 +54,7 @@ def upload_document_collection( if only_missing: try: - existing = docs_api.list_all_documents(dataset_id, client=client) + existing = docs_api.listDatasetDocumentsAll(dataset_id, client=client) existing_ids = {d.get("ndiId", d.get("id", "")) for d in existing.data} filtered = [d for d in documents if d.get("ndiId", d.get("id", "")) not in existing_ids] report["skipped"] = len(documents) - len(filtered) @@ -73,7 +73,7 @@ def upload_document_collection( for chunk in chunks: for doc in chunk: try: - docs_api.add_document(dataset_id, doc, client=client) + docs_api.addDocument(dataset_id, doc, client=client) report["uploaded"] += 1 doc_id = doc.get("ndiId", doc.get("id", "")) report["manifest"].append(doc_id) @@ -86,7 +86,7 @@ def upload_document_collection( return report -def zip_documents_for_upload( +def zipForUpload( documents: list[dict[str, Any]], dataset_id: str, target_dir: Path | None = None, @@ -120,7 +120,7 @@ def zip_documents_for_upload( return zip_path, manifest -def upload_files_for_documents( +def uploadFilesForDatasetDocuments( org_id: str, dataset_id: str, documents: list[dict[str, Any]], @@ -155,8 +155,8 @@ def upload_files_for_documents( if not file_uid or not file_path: continue try: - url = files_api.get_upload_url(org_id, dataset_id, file_uid, client=client) - files_api.put_file(url, file_path) + url = files_api.getFileUploadURL(org_id, dataset_id, file_uid, client=client) + files_api.putFiles(url, file_path) report["uploaded"] += 1 except Exception as exc: report["failed"] += 1 @@ -166,7 +166,7 @@ def upload_files_for_documents( @_auto_client -def upload_single_file( +def uploadSingleFile( dataset_id: str, file_uid: str, file_path: str, @@ -201,23 +201,23 @@ def upload_single_file( try: with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: zf.write(file_path, os.path.basename(file_path)) - url = files_api.get_file_collection_upload_url( + url = files_api.getFileCollectionUploadURL( client.config.org_id, dataset_id, client=client, ) - files_api.put_file(url, str(zip_path)) + files_api.putFiles(url, str(zip_path)) finally: if zip_path.exists(): zip_path.unlink() else: - url = files_api.get_upload_url( + url = files_api.getFileUploadURL( client.config.org_id, dataset_id, file_uid, client=client, ) - files_api.put_file(url, file_path) + files_api.putFiles(url, file_path) return True, "" except Exception as exc: diff --git a/tests/matlab_tests/test_cloud_compute.py b/tests/matlab_tests/test_cloud_compute.py index 637a9b4..44acb32 100644 --- a/tests/matlab_tests/test_cloud_compute.py +++ b/tests/matlab_tests/test_cloud_compute.py @@ -49,20 +49,20 @@ class TestCompute: # ---- Mocked tests ---- - def test_start_session_mocked(self): + def test_startSession_mocked(self): """startSession returns a session ID (mocked).""" - from ndi.cloud.api.compute import start_session + from ndi.cloud.api.compute import startSession client = MagicMock() client.post.return_value = {"sessionId": "session-abc-123"} - result = start_session("hello-world-v1", client=client) + result = startSession("hello-world-v1", client=client) assert result["sessionId"] == "session-abc-123" client.post.assert_called_once() - def test_get_session_status_mocked(self): + def test_getSessionStatus_mocked(self): """getSessionStatus returns status dict (mocked).""" - from ndi.cloud.api.compute import get_session_status + from ndi.cloud.api.compute import getSessionStatus client = MagicMock() client.get.return_value = { @@ -71,13 +71,13 @@ def test_get_session_status_mocked(self): "currentStageId": "stage-1", } - result = get_session_status("session-abc-123", client=client) + result = getSessionStatus("session-abc-123", client=client) assert result["status"] == "RUNNING" assert result["currentStageId"] == "stage-1" - def test_list_sessions_mocked(self): + def test_listSessions_mocked(self): """listSessions returns a list of sessions (mocked).""" - from ndi.cloud.api.compute import list_sessions + from ndi.cloud.api.compute import listSessions client = MagicMock() client.get.return_value = { @@ -87,55 +87,55 @@ def test_list_sessions_mocked(self): ] } - result = list_sessions(client=client) + result = listSessions(client=client) sessions = result.data assert isinstance(sessions, list) assert len(sessions) == 2 assert sessions[0]["sessionId"] == "session-1" - def test_list_sessions_as_list_mocked(self): + def test_listSessions_as_list_mocked(self): """listSessions handles direct list return (mocked).""" - from ndi.cloud.api.compute import list_sessions + from ndi.cloud.api.compute import listSessions client = MagicMock() client.get.return_value = [ {"sessionId": "session-1", "status": "RUNNING"}, ] - result = list_sessions(client=client) + result = listSessions(client=client) sessions = result.data assert isinstance(sessions, list) assert len(sessions) == 1 - def test_abort_session_mocked(self): + def test_abortSession_mocked(self): """abortSession returns True (mocked).""" - from ndi.cloud.api.compute import abort_session + from ndi.cloud.api.compute import abortSession client = MagicMock() client.post.return_value = {} - result = abort_session("session-abc-123", client=client) + result = abortSession("session-abc-123", client=client) assert result is True client.post.assert_called_once() - def test_trigger_stage_mocked(self): + def test_triggerStage_mocked(self): """triggerStage calls the correct endpoint (mocked).""" - from ndi.cloud.api.compute import trigger_stage + from ndi.cloud.api.compute import triggerStage client = MagicMock() client.post.return_value = {"status": "triggered"} - result = trigger_stage("session-abc-123", "stage-1", client=client) + result = triggerStage("session-abc-123", "stage-1", client=client) assert result["status"] == "triggered" - def test_finalize_session_mocked(self): + def test_finalizeSession_mocked(self): """finalizeSession calls the correct endpoint (mocked).""" - from ndi.cloud.api.compute import finalize_session + from ndi.cloud.api.compute import finalizeSession client = MagicMock() client.post.return_value = {"status": "finalized"} - result = finalize_session("session-abc-123", client=client) + result = finalizeSession("session-abc-123", client=client) assert result["status"] == "finalized" # ---- Live tests ---- @@ -152,28 +152,28 @@ def test_hello_world_flow_live(self): 6. finalizeSession (expect possible error, just verify no crash) """ from ndi.cloud.api.compute import ( - abort_session, - finalize_session, - get_session_status, - list_sessions, - start_session, - trigger_stage, + abortSession, + finalizeSession, + getSessionStatus, + listSessions, + startSession, + triggerStage, ) _, client = _login() # 1. Start session - result = start_session("hello-world-v1", client=client) + result = startSession("hello-world-v1", client=client) session_id = result.get("sessionId") or result.get("id", "") assert session_id, f"No sessionId in response: {result}" try: # 2. Get session status - status_result = get_session_status(session_id, client=client) + status_result = getSessionStatus(session_id, client=client) assert "status" in status_result, f"No status in response: {status_result}" # 3. List sessions — verify our session appears - sessions = list_sessions(client=client).data + sessions = listSessions(client=client).data session_ids = [] for s in sessions: sid = s.get("sessionId") or s.get("id", "") @@ -182,27 +182,27 @@ def test_hello_world_flow_live(self): # 4. Abort session (cleanup) try: - abort_session(session_id, client=client) + abortSession(session_id, client=client) except Exception: # If session already finished, abort may 404 pass # 5. triggerStage — just verify no crash try: - trigger_stage(session_id, "dummy-stage", client=client) + triggerStage(session_id, "dummy-stage", client=client) except Exception: pass # Expected: session may be gone # 6. finalizeSession — just verify no crash try: - finalize_session(session_id, client=client) + finalizeSession(session_id, client=client) except Exception: pass # Expected: session may be gone except Exception: # Best-effort cleanup try: - abort_session(session_id, client=client) + abortSession(session_id, client=client) except Exception: pass raise @@ -226,15 +226,15 @@ class TestZombie: def test_zombie_flow_mocked(self): """Zombie flow with mocked responses — verifies logic without waiting.""" from ndi.cloud.api.compute import ( - get_session_status, - start_session, + getSessionStatus, + startSession, ) client = MagicMock() # Start returns session ID client.post.return_value = {"sessionId": "zombie-session-1"} - result = start_session("zombie-test-v1", client=client) + result = startSession("zombie-test-v1", client=client) assert result["sessionId"] == "zombie-session-1" # Status returns RUNNING, then COMPLETED @@ -251,10 +251,10 @@ def test_zombie_flow_mocked(self): }, ] - status1 = get_session_status("zombie-session-1", client=client) + status1 = getSessionStatus("zombie-session-1", client=client) assert status1["status"] == "RUNNING" - status2 = get_session_status("zombie-session-1", client=client) + status2 = getSessionStatus("zombie-session-1", client=client) assert status2["status"] == "COMPLETED" @requires_cloud @@ -266,15 +266,15 @@ def test_zombie_flow_live(self): ABORTED/FAILED/COMPLETED. Times out after 10 minutes. """ from ndi.cloud.api.compute import ( - get_session_status, - list_sessions, - start_session, + getSessionStatus, + listSessions, + startSession, ) _, client = _login() # 1. Start pipeline - result = start_session("zombie-test-v1", client=client) + result = startSession("zombie-test-v1", client=client) session_id = result.get("sessionId") or result.get("id", "") assert session_id, f"No sessionId in response: {result}" @@ -282,7 +282,7 @@ def test_zombie_flow_live(self): time.sleep(10) # 3. Verify session in list - sessions = list_sessions(client=client).data + sessions = listSessions(client=client).data session_ids = [s.get("sessionId") or s.get("id", "") for s in sessions] assert session_id in session_ids, f"Session {session_id} not in list: {session_ids}" @@ -292,7 +292,7 @@ def test_zombie_flow_live(self): for _ in range(max_iterations): try: - status_result = get_session_status(session_id, client=client) + status_result = getSessionStatus(session_id, client=client) status = status_result.get("status", "UNKNOWN") if status in ("ABORTED", "FAILED", "COMPLETED"): diff --git a/tests/test_cloud_download_live.py b/tests/test_cloud_download_live.py index 4cd3543..f543923 100644 --- a/tests/test_cloud_download_live.py +++ b/tests/test_cloud_download_live.py @@ -103,12 +103,12 @@ def run_test(username: str, password: str) -> dict: # ================================================================= section("Step 2: Download Dataset (docs only, ndic:// URIs)") - from ndi.cloud.orchestration import download_dataset + from ndi.cloud.orchestration import downloadDataset with tempfile.TemporaryDirectory(prefix="ndi_live_test_") as tmpdir: t0 = time.time() try: - dataset = download_dataset( + dataset = downloadDataset( CARBON_FIBER_ID, target_folder=tmpdir, sync_files=False, @@ -363,8 +363,8 @@ def run_test(username: str, password: str) -> dict: print(f" Test file: {test_filename}") print(f" ndic URI: {test_uri}") - # Step 9a: Test get_file_details API directly - from ndi.cloud.api.files import get_file_details + # Step 9a: Test getFileDetails API directly + from ndi.cloud.api.files import getFileDetails from ndi.cloud.filehandler import parse_ndic_uri ds_id, file_uid = parse_ndic_uri(test_uri) @@ -372,7 +372,7 @@ def run_test(username: str, password: str) -> dict: print(f" File UID: {file_uid}") try: - details = get_file_details(ds_id, file_uid, client=client) + details = getFileDetails(ds_id, file_uid, client=client) download_url = details.get("downloadUrl", "") print(f" File details response keys: {list(details.keys())}") if download_url: @@ -383,12 +383,12 @@ def run_test(username: str, password: str) -> dict: print(f" Download URL host: {parsed.hostname}") print(f" Download URL path prefix: {parsed.path[:80]}...") results["file_details_api"] = True - check("get_file_details API", True, "presigned URL obtained") + check("getFileDetails API", True, "presigned URL obtained") else: print(f" WARNING: No downloadUrl in response: {details}") - check("get_file_details API", False, "no downloadUrl") + check("getFileDetails API", False, "no downloadUrl") except Exception as exc: - print(f" get_file_details FAILED: {exc}") + print(f" getFileDetails FAILED: {exc}") traceback.print_exc() # Step 9b: Test raw download with detailed error reporting @@ -472,7 +472,7 @@ def run_test(username: str, password: str) -> dict: ds_id2, file_uid2 = parse_ndic_uri(uri) try: - details2 = get_file_details(ds_id2, file_uid2, client=client) + details2 = getFileDetails(ds_id2, file_uid2, client=client) url2 = details2.get("downloadUrl", "") if url2: resp2 = requests.head(url2, timeout=10) diff --git a/tests/test_cloud_filehandler.py b/tests/test_cloud_filehandler.py index 919fbcd..a0e8db0 100644 --- a/tests/test_cloud_filehandler.py +++ b/tests/test_cloud_filehandler.py @@ -213,8 +213,8 @@ def test_fetch_success(self, tmp_path): mock_client = MagicMock() with ( - patch("ndi.cloud.api.files.get_file_details") as mock_details, - patch("ndi.cloud.api.files.get_file") as mock_get_file, + patch("ndi.cloud.api.files.getFileDetails") as mock_details, + patch("ndi.cloud.api.files.getFile") as mock_get_file, ): mock_details.return_value = {"downloadUrl": "https://s3.example.com/file"} @@ -239,7 +239,7 @@ def test_fetch_no_download_url(self, tmp_path): target = tmp_path / "file.bin" mock_client = MagicMock() - with patch("ndi.cloud.api.files.get_file_details") as mock_details: + with patch("ndi.cloud.api.files.getFileDetails") as mock_details: mock_details.return_value = {} with pytest.raises(CloudError, match="No downloadUrl"): @@ -258,8 +258,8 @@ def test_fetch_fallback_to_env_client(self, tmp_path): with ( patch("ndi.cloud.filehandler.get_or_create_cloud_client") as mock_auto, - patch("ndi.cloud.api.files.get_file_details") as mock_details, - patch("ndi.cloud.api.files.get_file") as mock_get_file, + patch("ndi.cloud.api.files.getFileDetails") as mock_details, + patch("ndi.cloud.api.files.getFile") as mock_get_file, ): mock_auto.return_value = MagicMock() mock_details.return_value = {"downloadUrl": "https://s3.example.com/f"} diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py index 1ea39b2..5950ec2 100644 --- a/tests/test_cloud_live.py +++ b/tests/test_cloud_live.py @@ -43,7 +43,7 @@ def _retry_on_server_error(fn, retries=3, delay=10, retry_on_404=False): """Call *fn*; retry on HTTP 502/504 server errors. The NDI Cloud API runs on AWS Lambda with a 30-second gateway timeout. - Write-heavy operations (create_dataset, submit, publish) often exceed + Write-heavy operations (createDataset, submit, publish) often exceed this limit, returning 504. We retry with exponential back-off. Set *retry_on_404* for operations that follow a create — MongoDB @@ -96,9 +96,9 @@ def client(cloud_config): @pytest.fixture(scope="module") def user_info(client): """Fetch and cache current user info.""" - from ndi.cloud.api.users import get_current_user + from ndi.cloud.api.users import me - return get_current_user(client=client) + return me(client=client) @pytest.fixture(scope="module") @@ -123,35 +123,35 @@ def is_admin(user_info): @pytest.fixture(scope="module") def large_dataset_info(client): """Fetch and cache metadata for the large public dataset.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset - return get_dataset(LARGE_DATASET, client=client) + return getDataset(LARGE_DATASET, client=client) @pytest.fixture(scope="module") def small_dataset_info(client): """Fetch and cache metadata for the small public dataset.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset - return get_dataset(SMALL_DATASET, client=client) + return getDataset(SMALL_DATASET, client=client) @pytest.fixture(scope="module") def can_write(client, cloud_config): """Test whether this user can create datasets (returns bool). - Regular users get HTTP 400 from create_dataset — skip CRUD tests. + Regular users get HTTP 400 from createDataset — skip CRUD tests. """ - from ndi.cloud.api.datasets import create_dataset, delete_dataset + from ndi.cloud.api.datasets import createDataset, deleteDataset try: result = _retry_on_server_error( - lambda: create_dataset(cloud_config.org_id, "NDI_PYTEST_WRITE_CHECK", client=client) + lambda: createDataset(cloud_config.org_id, "NDI_PYTEST_WRITE_CHECK", client=client) ) ds_id = result.get("_id", result.get("id", "")) if ds_id: try: - delete_dataset(ds_id, when="now", client=client) + deleteDataset(ds_id, when="now", client=client) except Exception: pass return True @@ -166,13 +166,13 @@ def fresh_dataset(client, cloud_config, can_write): if not can_write: pytest.skip("User does not have dataset creation privileges") - from ndi.cloud.api.datasets import create_dataset, delete_dataset + from ndi.cloud.api.datasets import createDataset, deleteDataset from ndi.cloud.exceptions import CloudAPIError org_id = cloud_config.org_id try: result = _retry_on_server_error( - lambda: create_dataset(org_id, "NDI_PYTEST_TEMP_DATASET", client=client) + lambda: createDataset(org_id, "NDI_PYTEST_TEMP_DATASET", client=client) ) except CloudAPIError as exc: pytest.skip(f"Could not create dataset (server error): {exc}") @@ -183,7 +183,7 @@ def fresh_dataset(client, cloud_config, can_write): # Teardown: delete the dataset try: - delete_dataset(dataset_id, when="now", client=client) + deleteDataset(dataset_id, when="now", client=client) except Exception: pass @@ -270,15 +270,15 @@ def test_token_is_valid_jwt(self, cloud_config): def test_token_not_expired(self, cloud_config): """Token must not be expired.""" - from ndi.cloud.auth import verify_token + from ndi.cloud.auth import verifyToken - assert verify_token(cloud_config.token) + assert verifyToken(cloud_config.token) def test_jwt_has_expected_claims(self, cloud_config): """JWT payload should contain standard claims.""" - from ndi.cloud.auth import decode_jwt + from ndi.cloud.auth import decodeJwt - payload = decode_jwt(cloud_config.token) + payload = decodeJwt(cloud_config.token) assert "exp" in payload assert any(k in payload for k in ("sub", "email", "userId", "id")) @@ -287,11 +287,11 @@ def test_config_has_org_id(self, cloud_config): assert cloud_config.org_id assert len(cloud_config.org_id) > 10 - def test_decode_jwt_structure(self, cloud_config): - """decode_jwt should return a dict with expected keys.""" - from ndi.cloud.auth import decode_jwt + def test_decodeJwt_structure(self, cloud_config): + """decodeJwt should return a dict with expected keys.""" + from ndi.cloud.auth import decodeJwt - payload = decode_jwt(cloud_config.token) + payload = decodeJwt(cloud_config.token) assert isinstance(payload, dict) assert "iat" in payload or "exp" in payload @@ -302,7 +302,7 @@ def test_decode_jwt_structure(self, cloud_config): class TestUser: - def test_get_current_user(self, user_info): + def test_me(self, user_info): """GET /users/me should return authenticated user info.""" assert hasattr(user_info, "get"), f"Expected dict-like response, got {type(user_info)}" assert user_info.get("id") @@ -319,9 +319,9 @@ def test_user_has_organizations(self, user_info): def test_get_user_by_id(self, client, user_info): """GET /users/{userId} should return the same user.""" - from ndi.cloud.api.users import get_user + from ndi.cloud.api.users import GetUser - user = get_user(user_info["id"], client=client) + user = GetUser(user_info["id"], client=client) assert user.get("id") == user_info["id"] def test_user_role_detection(self, user_info, is_admin): @@ -337,42 +337,42 @@ def test_user_role_detection(self, user_info, is_admin): class TestDatasetLifecycle: - def test_create_and_delete_dataset(self, client, cloud_config, can_write): + def test_create_and_deleteDataset(self, client, cloud_config, can_write): """Create a dataset, verify it exists, then delete it.""" if not can_write: pytest.skip("User does not have dataset creation privileges") from ndi.cloud.api.datasets import ( - create_dataset, - delete_dataset, - get_dataset, + createDataset, + deleteDataset, + getDataset, ) from ndi.cloud.exceptions import CloudAPIError as _APIError org_id = cloud_config.org_id try: result = _retry_on_server_error( - lambda: create_dataset(org_id, "NDI_PYTEST_CREATE_DELETE", client=client) + lambda: createDataset(org_id, "NDI_PYTEST_CREATE_DELETE", client=client) ) except _APIError as exc: - pytest.skip(f"create_dataset timed out (server 504): {exc}") + pytest.skip(f"createDataset timed out (server 504): {exc}") ds_id = result.get("_id", result.get("id", "")) assert ds_id, f"Create returned no ID: {result}" try: - ds = get_dataset(ds_id, client=client) + ds = getDataset(ds_id, client=client) assert ds.get("_id") == ds_id or ds.get("id") == ds_id finally: try: - _retry_on_server_error(lambda: delete_dataset(ds_id, when="now", client=client)) + _retry_on_server_error(lambda: deleteDataset(ds_id, when="now", client=client)) except Exception: pass # Best-effort cleanup - def test_get_dataset_metadata(self, client, fresh_dataset): + def test_getDataset_metadata(self, client, fresh_dataset): """Created dataset should have _id, name, createdAt.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset - ds = get_dataset(fresh_dataset, client=client) + ds = getDataset(fresh_dataset, client=client) ds_id = ds.get("_id", ds.get("id", "")) assert ds_id == fresh_dataset assert ds.get("name") @@ -380,47 +380,47 @@ def test_get_dataset_metadata(self, client, fresh_dataset): def test_update_dataset(self, client, fresh_dataset): """Update dataset name and verify the change persists.""" - from ndi.cloud.api.datasets import get_dataset, update_dataset + from ndi.cloud.api.datasets import getDataset, updateDataset new_name = "NDI_PYTEST_UPDATED_NAME" _retry_on_server_error( - lambda: update_dataset(fresh_dataset, name=new_name, client=client), + lambda: updateDataset(fresh_dataset, name=new_name, client=client), ) - ds = get_dataset(fresh_dataset, client=client) + ds = getDataset(fresh_dataset, client=client) assert ds.get("name") == new_name def test_list_datasets(self, client, cloud_config, fresh_dataset): """Created dataset should appear in the org's dataset list.""" - from ndi.cloud.api.datasets import list_datasets + from ndi.cloud.api.datasets import listDatasets - result = list_datasets(cloud_config.org_id, client=client) + result = listDatasets(cloud_config.org_id, client=client) datasets = result.get("datasets", []) ids = {d.get("_id", d.get("id", "")) for d in datasets} assert fresh_dataset in ids - def test_get_branches(self, client, fresh_dataset): + def test_getBranches(self, client, fresh_dataset): """Branches endpoint should return without error.""" - from ndi.cloud.api.datasets import get_branches + from ndi.cloud.api.datasets import getBranches - result = get_branches(fresh_dataset, client=client) + result = getBranches(fresh_dataset, client=client) assert result is not None def test_nonexistent_dataset_raises(self, client): """Fetching a bogus dataset ID should raise CloudAPIError.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset from ndi.cloud.exceptions import CloudAPIError with pytest.raises(CloudAPIError): - get_dataset("000000000000000000000000", client=client) + getDataset("000000000000000000000000", client=client) def test_invalid_dataset_id_raises(self, client): """Fetching with invalid ID format should raise.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset from ndi.cloud.exceptions import CloudAPIError with pytest.raises(CloudAPIError): - get_dataset("not-a-valid-id", client=client) + getDataset("not-a-valid-id", client=client) # =========================================================================== @@ -431,21 +431,21 @@ def test_invalid_dataset_id_raises(self, client): class TestDocumentLifecycle: def test_empty_dataset_has_zero_documents(self, client, fresh_dataset): """A newly created dataset should have 0 documents.""" - from ndi.cloud.api.documents import get_document_count, list_documents + from ndi.cloud.api.documents import countDocuments, listDatasetDocuments - result = list_documents(fresh_dataset, page=1, page_size=10, client=client) + result = listDatasetDocuments(fresh_dataset, page=1, page_size=10, client=client) docs = result.get("documents", []) assert len(docs) == 0 - count = get_document_count(fresh_dataset, client=client) + count = countDocuments(fresh_dataset, client=client) assert count == 0 - def test_add_get_delete_document(self, client, fresh_dataset): + def test_add_get_deleteDocument(self, client, fresh_dataset): """Full document lifecycle: add, get, verify, delete.""" from ndi.cloud.api.documents import ( - add_document, - delete_document, - get_document, + addDocument, + deleteDocument, + getDocument, ) doc_json = { @@ -454,37 +454,37 @@ def test_add_get_delete_document(self, client, fresh_dataset): } # Add - result = add_document(fresh_dataset, doc_json, client=client) + result = addDocument(fresh_dataset, doc_json, client=client) doc_id = result.get("_id", result.get("id", "")) assert doc_id, f"Add returned no ID: {result}" # Get and verify - fetched = get_document(fresh_dataset, doc_id, client=client) + fetched = getDocument(fresh_dataset, doc_id, client=client) assert fetched.get("base", {}).get("name") == "test_document" # Delete - delete_document(fresh_dataset, doc_id, when="now", client=client) + deleteDocument(fresh_dataset, doc_id, when="now", client=client) # Verify gone from ndi.cloud.exceptions import CloudAPIError with pytest.raises(CloudAPIError): - get_document(fresh_dataset, doc_id, client=client) + getDocument(fresh_dataset, doc_id, client=client) - def test_update_document(self, client, fresh_dataset): + def test_updateDocument(self, client, fresh_dataset): """Add a document, update it, verify changes persist.""" from ndi.cloud.api.documents import ( - add_document, - delete_document, - get_document, - update_document, + addDocument, + deleteDocument, + getDocument, + updateDocument, ) doc_json = { "document_class": {"class_name": "ndi_pytest_update"}, "base": {"name": "original"}, } - result = add_document(fresh_dataset, doc_json, client=client) + result = addDocument(fresh_dataset, doc_json, client=client) doc_id = result.get("_id", result.get("id", "")) try: @@ -493,24 +493,24 @@ def test_update_document(self, client, fresh_dataset): "base": {"name": "modified"}, } _retry_on_server_error( - lambda: update_document(fresh_dataset, doc_id, updated_json, client=client), + lambda: updateDocument(fresh_dataset, doc_id, updated_json, client=client), ) - fetched = get_document(fresh_dataset, doc_id, client=client) + fetched = getDocument(fresh_dataset, doc_id, client=client) assert fetched.get("base", {}).get("name") == "modified" finally: try: - delete_document(fresh_dataset, doc_id, when="now", client=client) + deleteDocument(fresh_dataset, doc_id, when="now", client=client) except Exception: pass - def test_list_documents_pagination(self, client, fresh_dataset): + def test_listDatasetDocuments_pagination(self, client, fresh_dataset): """Add multiple docs, paginate through them.""" - from ndi.cloud.api.documents import add_document, list_documents + from ndi.cloud.api.documents import addDocument, listDatasetDocuments # Add 5 documents doc_ids = [] for i in range(5): - result = add_document( + result = addDocument( fresh_dataset, { "document_class": {"class_name": "ndi_pytest_pagination"}, @@ -521,8 +521,8 @@ def test_list_documents_pagination(self, client, fresh_dataset): doc_ids.append(result.get("_id", result.get("id", ""))) # Paginate with page_size=2 - p1 = list_documents(fresh_dataset, page=1, page_size=2, client=client) - p2 = list_documents(fresh_dataset, page=2, page_size=2, client=client) + p1 = listDatasetDocuments(fresh_dataset, page=1, page_size=2, client=client) + p2 = listDatasetDocuments(fresh_dataset, page=2, page_size=2, client=client) docs1 = p1.get("documents", []) docs2 = p2.get("documents", []) assert len(docs1) == 2 @@ -533,13 +533,13 @@ def test_list_documents_pagination(self, client, fresh_dataset): ids2 = {d.get("_id", d.get("id")) for d in docs2} assert ids1.isdisjoint(ids2) - def test_list_all_documents(self, client, fresh_dataset): - """list_all_documents should return all docs via auto-pagination.""" - from ndi.cloud.api.documents import add_document, list_all_documents + def test_listDatasetDocumentsAll(self, client, fresh_dataset): + """listDatasetDocumentsAll should return all docs via auto-pagination.""" + from ndi.cloud.api.documents import addDocument, listDatasetDocumentsAll # Add 5 docs for i in range(5): - add_document( + addDocument( fresh_dataset, { "document_class": {"class_name": "ndi_pytest_listall"}, @@ -548,18 +548,18 @@ def test_list_all_documents(self, client, fresh_dataset): client=client, ) - docs = list_all_documents(fresh_dataset, page_size=2, client=client).data + docs = listDatasetDocumentsAll(fresh_dataset, page_size=2, client=client).data # Should get all docs in the dataset (at least the 5 we added, # plus any from previous tests in same fixture -- but fresh_dataset # is function-scoped so each test gets its own) assert len(docs) >= 5 def test_document_count(self, client, fresh_dataset): - """get_document_count should match actual document count.""" - from ndi.cloud.api.documents import add_document, get_document_count + """countDocuments should match actual document count.""" + from ndi.cloud.api.documents import addDocument, countDocuments for i in range(3): - add_document( + addDocument( fresh_dataset, { "document_class": {"class_name": "ndi_pytest_count"}, @@ -568,45 +568,45 @@ def test_document_count(self, client, fresh_dataset): client=client, ) - count = get_document_count(fresh_dataset, client=client) + count = countDocuments(fresh_dataset, client=client) assert count == 3 def test_bulk_upload_and_download(self, client, fresh_dataset): """Bulk upload docs via ZIP, then bulk download and verify.""" from ndi.cloud.api.documents import ( - get_bulk_download_url, - list_all_documents, + getBulkDownloadURL, + listDatasetDocumentsAll, ) - from ndi.cloud.upload import upload_document_collection + from ndi.cloud.upload import uploadDocumentCollection docs = [ {"document_class": {"class_name": "ndi_pytest_bulk"}, "base": {"name": f"bulk_{i}"}} for i in range(3) ] - report = upload_document_collection(fresh_dataset, docs, client=client) + report = uploadDocumentCollection(fresh_dataset, docs, client=client) assert report.get("uploaded", 0) >= 3 or report.get("added", 0) >= 3 # Verify they exist - all_docs = list_all_documents(fresh_dataset, client=client).data + all_docs = listDatasetDocumentsAll(fresh_dataset, client=client).data assert len(all_docs) >= 3 # Bulk download URL should be generated - url = get_bulk_download_url(fresh_dataset, client=client) + url = getBulkDownloadURL(fresh_dataset, client=client) assert url assert "s3" in url.lower() or "amazonaws" in url.lower() or "http" in url.lower() - def test_bulk_delete(self, client, fresh_dataset): + def test_bulkDeleteDocuments(self, client, fresh_dataset): """Add 5 docs, bulk delete 3, verify 2 remain.""" from ndi.cloud.api.documents import ( - add_document, - bulk_delete, - list_all_documents, + addDocument, + bulkDeleteDocuments, + listDatasetDocumentsAll, ) doc_ids = [] for i in range(5): - result = add_document( + result = addDocument( fresh_dataset, { "document_class": {"class_name": "ndi_pytest_bulkdel"}, @@ -617,21 +617,21 @@ def test_bulk_delete(self, client, fresh_dataset): doc_ids.append(result.get("_id", result.get("id", ""))) # Delete the first 3 - bulk_delete(fresh_dataset, doc_ids[:3], when="now", client=client) + bulkDeleteDocuments(fresh_dataset, doc_ids[:3], when="now", client=client) # Small delay for server processing time.sleep(2) - remaining = list_all_documents(fresh_dataset, client=client).data + remaining = listDatasetDocumentsAll(fresh_dataset, client=client).data assert len(remaining) == 2 def test_nonexistent_document_raises(self, client, fresh_dataset): """Fetching a bogus document ID should raise.""" - from ndi.cloud.api.documents import get_document + from ndi.cloud.api.documents import getDocument from ndi.cloud.exceptions import CloudAPIError with pytest.raises(CloudAPIError): - get_document(fresh_dataset, "000000000000000000000000", client=client) + getDocument(fresh_dataset, "000000000000000000000000", client=client) # =========================================================================== @@ -640,11 +640,11 @@ def test_nonexistent_document_raises(self, client, fresh_dataset): class TestFileLifecycle: - def test_get_upload_url(self, client, cloud_config, fresh_dataset): - """get_upload_url should return a presigned URL.""" - from ndi.cloud.api.files import get_upload_url + def test_getFileUploadURL(self, client, cloud_config, fresh_dataset): + """getFileUploadURL should return a presigned URL.""" + from ndi.cloud.api.files import getFileUploadURL - url = get_upload_url( + url = getFileUploadURL( cloud_config.org_id, fresh_dataset, "pytest-test-file-uid", @@ -659,16 +659,16 @@ def test_upload_and_download_file(self, client, cloud_config, fresh_dataset): import requests from ndi.cloud.api.files import ( - get_file_details, - get_upload_url, - list_files, + getFileDetails, + getFileUploadURL, + listFiles, ) file_uid = "pytest-upload-test-file" test_content = b"Hello from NDI pytest! This is test file content." # Get upload URL - upload_url = get_upload_url(cloud_config.org_id, fresh_dataset, file_uid, client=client) + upload_url = getFileUploadURL(cloud_config.org_id, fresh_dataset, file_uid, client=client) assert upload_url # Upload @@ -685,11 +685,11 @@ def test_upload_and_download_file(self, client, cloud_config, fresh_dataset): details = {} for wait in (3, 5, 10): time.sleep(wait) - files = list_files(fresh_dataset, client=client).data + files = listFiles(fresh_dataset, client=client).data file_uids = [f.get("uid", "") for f in files] if file_uid not in file_uids: continue - details = get_file_details(fresh_dataset, file_uid, client=client) + details = getFileDetails(fresh_dataset, file_uid, client=client) download_url = details.get("downloadUrl", "") if download_url: break @@ -701,14 +701,14 @@ def test_upload_and_download_file(self, client, cloud_config, fresh_dataset): assert dl_resp.status_code == 200 assert dl_resp.content == test_content - def test_list_files(self, client, fresh_dataset, cloud_config): - """After uploading a file, list_files should return it.""" + def test_listFiles(self, client, fresh_dataset, cloud_config): + """After uploading a file, listFiles should return it.""" import requests - from ndi.cloud.api.files import get_upload_url, list_files + from ndi.cloud.api.files import getFileUploadURL, listFiles file_uid = "pytest-list-test-file" - upload_url = get_upload_url(cloud_config.org_id, fresh_dataset, file_uid, client=client) + upload_url = getFileUploadURL(cloud_config.org_id, fresh_dataset, file_uid, client=client) requests.put( upload_url, data=b"list test data", @@ -717,19 +717,19 @@ def test_list_files(self, client, fresh_dataset, cloud_config): ) time.sleep(3) - files = list_files(fresh_dataset, client=client).data + files = listFiles(fresh_dataset, client=client).data assert isinstance(files, list) uids = [f.get("uid", "") for f in files] assert file_uid in uids def test_file_details_has_download_url(self, client, fresh_dataset, cloud_config): - """get_file_details should include downloadUrl.""" + """getFileDetails should include downloadUrl.""" import requests - from ndi.cloud.api.files import get_file_details, get_upload_url + from ndi.cloud.api.files import getFileDetails, getFileUploadURL file_uid = "pytest-details-test-file" - upload_url = get_upload_url(cloud_config.org_id, fresh_dataset, file_uid, client=client) + upload_url = getFileUploadURL(cloud_config.org_id, fresh_dataset, file_uid, client=client) requests.put( upload_url, data=b"details test", @@ -738,7 +738,7 @@ def test_file_details_has_download_url(self, client, fresh_dataset, cloud_config ) time.sleep(3) - details = get_file_details(fresh_dataset, file_uid, client=client) + details = getFileDetails(fresh_dataset, file_uid, client=client) assert hasattr(details, "get"), f"Expected dict-like response, got {type(details)}" assert details.get("downloadUrl") @@ -749,9 +749,9 @@ def test_file_details_has_download_url(self, client, fresh_dataset, cloud_config class TestNDIQuery: - def test_ndi_query_public(self, client): + def test_ndiquery_public(self, client): """ndiquery should return documents matching a search.""" - from ndi.cloud.api.documents import ndi_query + from ndi.cloud.api.documents import ndiquery search = [ { @@ -761,14 +761,14 @@ def test_ndi_query_public(self, client): } ] result = _retry_on_server_error( - lambda: ndi_query("public", search, page=1, page_size=5, client=client) + lambda: ndiquery("public", search, page=1, page_size=5, client=client) ) assert hasattr(result, "get"), f"Expected dict-like response, got {type(result)}" assert "documents" in result - def test_ndi_query_nonexistent_returns_empty(self, client): + def test_ndiquery_nonexistent_returns_empty(self, client): """Searching for a non-existent ID should return empty results.""" - from ndi.cloud.api.documents import ndi_query + from ndi.cloud.api.documents import ndiquery search = [ { @@ -778,14 +778,14 @@ def test_ndi_query_nonexistent_returns_empty(self, client): } ] result = _retry_on_server_error( - lambda: ndi_query("public", search, page=1, page_size=5, client=client) + lambda: ndiquery("public", search, page=1, page_size=5, client=client) ) docs = result.get("documents", []) assert len(docs) == 0 - def test_ndi_query_all_paginates(self, client): - """ndi_query_all should auto-paginate results.""" - from ndi.cloud.api.documents import ndi_query_all + def test_ndiqueryAll_paginates(self, client): + """ndiqueryAll should auto-paginate results.""" + from ndi.cloud.api.documents import ndiqueryAll search = [ { @@ -795,7 +795,7 @@ def test_ndi_query_all_paginates(self, client): } ] result = _retry_on_server_error( - lambda: ndi_query_all("public", search, page_size=3, client=client) + lambda: ndiqueryAll("public", search, page_size=3, client=client) ) docs = result.data assert isinstance(docs, list) @@ -813,67 +813,67 @@ def _skip_if_not_admin(self, is_admin): if not is_admin: pytest.skip("Publish tests require admin privileges") - def test_submit_dataset(self, client, fresh_dataset): + def test_submitDataset(self, client, fresh_dataset): """Submit a dataset for review.""" - from ndi.cloud.api.datasets import get_dataset, submit_dataset + from ndi.cloud.api.datasets import getDataset, submitDataset from ndi.cloud.exceptions import CloudAPIError try: - _retry_on_server_error(lambda: submit_dataset(fresh_dataset, client=client)) + _retry_on_server_error(lambda: submitDataset(fresh_dataset, client=client)) except CloudAPIError as exc: - pytest.skip(f"submit_dataset server timeout: {exc}") + pytest.skip(f"submitDataset server timeout: {exc}") - ds = get_dataset(fresh_dataset, client=client) + ds = getDataset(fresh_dataset, client=client) assert ds.get("isSubmitted") is True def test_publish_unpublish_lifecycle(self, client, fresh_dataset): """Full publish lifecycle: submit -> publish -> unpublish.""" from ndi.cloud.api.datasets import ( - get_dataset, - publish_dataset, - submit_dataset, - unpublish_dataset, + getDataset, + publishDataset, + submitDataset, + unpublishDataset, ) from ndi.cloud.exceptions import CloudAPIError # Submit try: - _retry_on_server_error(lambda: submit_dataset(fresh_dataset, client=client)) + _retry_on_server_error(lambda: submitDataset(fresh_dataset, client=client)) except CloudAPIError as exc: - pytest.skip(f"submit_dataset server timeout: {exc}") + pytest.skip(f"submitDataset server timeout: {exc}") # Publish try: - _retry_on_server_error(lambda: publish_dataset(fresh_dataset, client=client)) + _retry_on_server_error(lambda: publishDataset(fresh_dataset, client=client)) except CloudAPIError as exc: - pytest.skip(f"publish_dataset server timeout: {exc}") + pytest.skip(f"publishDataset server timeout: {exc}") time.sleep(2) # Allow server processing - ds = get_dataset(fresh_dataset, client=client) + ds = getDataset(fresh_dataset, client=client) assert ds.get("isPublished") is True # Unpublish try: - _retry_on_server_error(lambda: unpublish_dataset(fresh_dataset, client=client)) + _retry_on_server_error(lambda: unpublishDataset(fresh_dataset, client=client)) except CloudAPIError as exc: - pytest.skip(f"unpublish_dataset server timeout: {exc}") + pytest.skip(f"unpublishDataset server timeout: {exc}") time.sleep(2) - ds = get_dataset(fresh_dataset, client=client) + ds = getDataset(fresh_dataset, client=client) assert ds.get("isPublished") is not True def test_published_datasets_list(self, client): """GET /datasets/published should return results.""" - from ndi.cloud.api.datasets import get_published_datasets + from ndi.cloud.api.datasets import getPublished - result = get_published_datasets(page=1, page_size=5, client=client) + result = getPublished(page=1, page_size=5, client=client) assert hasattr(result, "get"), f"Expected dict-like response, got {type(result)}" datasets = result.get("datasets", []) assert len(datasets) > 0 def test_unpublished_datasets_list(self, client): """GET /datasets/unpublished should return results.""" - from ndi.cloud.api.datasets import get_unpublished + from ndi.cloud.api.datasets import getUnpublished - result = get_unpublished(page=1, page_size=5, client=client) + result = getUnpublished(page=1, page_size=5, client=client) assert hasattr(result, "get"), f"Expected dict-like response, got {type(result)}" @@ -894,29 +894,29 @@ def test_deferred_delete_and_undelete(self, client, cloud_config, can_write): pytest.skip("User does not have dataset creation privileges") from ndi.cloud.api.datasets import ( - create_dataset, - delete_dataset, - get_dataset, - list_deleted_datasets, - undelete_dataset, + createDataset, + deleteDataset, + getDataset, + listDeletedDatasets, + undeleteDataset, ) - from ndi.cloud.api.documents import add_document, list_all_documents + from ndi.cloud.api.documents import addDocument, listDatasetDocumentsAll from ndi.cloud.exceptions import CloudAPIError as _APIError org_id = cloud_config.org_id try: result = _retry_on_server_error( - lambda: create_dataset(org_id, "NDI_PYTEST_SOFT_DELETE", client=client) + lambda: createDataset(org_id, "NDI_PYTEST_SOFT_DELETE", client=client) ) except _APIError as exc: - pytest.skip(f"create_dataset timed out: {exc}") + pytest.skip(f"createDataset timed out: {exc}") ds_id = result.get("_id", result.get("id", "")) assert ds_id try: # Add documents to make it realistic for i in range(3): - add_document( + addDocument( ds_id, { "document_class": {"class_name": "ndi_pytest_softdel"}, @@ -926,7 +926,7 @@ def test_deferred_delete_and_undelete(self, client, cloud_config, can_write): ) # Deferred delete (7 days) - del_result = delete_dataset(ds_id, when="7d", client=client) + del_result = deleteDataset(ds_id, when="7d", client=client) assert hasattr( del_result, "get" ), f"Expected dict-like response, got {type(del_result)}" @@ -934,12 +934,12 @@ def test_deferred_delete_and_undelete(self, client, cloud_config, can_write): # Should appear in deleted list time.sleep(2) - deleted = list_deleted_datasets(client=client) + deleted = listDeletedDatasets(client=client) deleted_ids = {d.get("_id", d.get("id", "")) for d in deleted.get("datasets", [])} assert ds_id in deleted_ids, f"Dataset {ds_id} not found in deleted list" # Undelete - undelete_result = undelete_dataset(ds_id, client=client) + undelete_result = undeleteDataset(ds_id, client=client) assert hasattr( undelete_result, "get" ), f"Expected dict-like response, got {type(undelete_result)}" @@ -947,18 +947,18 @@ def test_deferred_delete_and_undelete(self, client, cloud_config, can_write): # Should be accessible again with documents intact time.sleep(2) ds = _retry_on_server_error( - lambda: get_dataset(ds_id, client=client), retry_on_404=True + lambda: getDataset(ds_id, client=client), retry_on_404=True ) ds_fetched_id = ds.get("_id", ds.get("id", "")) assert ds_fetched_id == ds_id # Verify documents survived the soft-delete round-trip - docs = list_all_documents(ds_id, client=client).data + docs = listDatasetDocumentsAll(ds_id, client=client).data assert len(docs) >= 3, f"Expected >= 3 docs after undelete, got {len(docs)}" finally: # Final cleanup try: - delete_dataset(ds_id, when="now", client=client) + deleteDataset(ds_id, when="now", client=client) except Exception: pass @@ -971,26 +971,26 @@ def test_immediate_delete_cannot_undelete(self, client, cloud_config, can_write) pytest.skip("User does not have dataset creation privileges") from ndi.cloud.api.datasets import ( - create_dataset, - delete_dataset, - undelete_dataset, + createDataset, + deleteDataset, + undeleteDataset, ) - from ndi.cloud.api.documents import add_document + from ndi.cloud.api.documents import addDocument from ndi.cloud.exceptions import CloudAPIError as _APIError org_id = cloud_config.org_id try: result = _retry_on_server_error( - lambda: create_dataset(org_id, "NDI_PYTEST_HARD_DELETE", client=client) + lambda: createDataset(org_id, "NDI_PYTEST_HARD_DELETE", client=client) ) except _APIError as exc: - pytest.skip(f"create_dataset timed out: {exc}") + pytest.skip(f"createDataset timed out: {exc}") ds_id = result.get("_id", result.get("id", "")) assert ds_id # Add documents to make it realistic for i in range(3): - add_document( + addDocument( ds_id, { "document_class": {"class_name": "ndi_pytest_harddel"}, @@ -1000,73 +1000,73 @@ def test_immediate_delete_cannot_undelete(self, client, cloud_config, can_write) ) # Immediate delete - delete_dataset(ds_id, when="now", client=client) + deleteDataset(ds_id, when="now", client=client) time.sleep(10) # Undelete should fail — dataset is permanently gone with pytest.raises(_APIError): - undelete_dataset(ds_id, client=client) + undeleteDataset(ds_id, client=client) def test_list_deleted_documents(self, client, fresh_dataset): """Add doc, delete it, verify it appears in deleted-documents list.""" from ndi.cloud.api.documents import ( - add_document, - delete_document, - list_deleted_documents, + addDocument, + deleteDocument, + listDeletedDocuments, ) doc_json = { "document_class": {"class_name": "ndi_pytest_softdel"}, "base": {"name": "soft_delete_test"}, } - result = add_document(fresh_dataset, doc_json, client=client) + result = addDocument(fresh_dataset, doc_json, client=client) doc_id = result.get("_id", result.get("id", "")) assert doc_id - delete_document(fresh_dataset, doc_id, when="now", client=client) + deleteDocument(fresh_dataset, doc_id, when="now", client=client) time.sleep(2) - deleted = list_deleted_documents(fresh_dataset, client=client) + deleted = listDeletedDocuments(fresh_dataset, client=client) assert hasattr(deleted, "get"), f"Expected dict-like response, got {type(deleted)}" # The response should have a documents list deleted_docs = deleted.get("documents", []) assert isinstance(deleted_docs, list) - def test_delete_dataset_returns_message(self, client, cloud_config, can_write): - """delete_dataset should return a response dict with a message.""" + def test_deleteDataset_returns_message(self, client, cloud_config, can_write): + """deleteDataset should return a response dict with a message.""" if not can_write: pytest.skip("User does not have dataset creation privileges") - from ndi.cloud.api.datasets import create_dataset, delete_dataset + from ndi.cloud.api.datasets import createDataset, deleteDataset from ndi.cloud.exceptions import CloudAPIError as _APIError org_id = cloud_config.org_id try: result = _retry_on_server_error( - lambda: create_dataset(org_id, "NDI_PYTEST_DEL_MSG", client=client) + lambda: createDataset(org_id, "NDI_PYTEST_DEL_MSG", client=client) ) except _APIError as exc: - pytest.skip(f"create_dataset timed out: {exc}") + pytest.skip(f"createDataset timed out: {exc}") ds_id = result.get("_id", result.get("id", "")) assert ds_id - del_result = delete_dataset(ds_id, when="now", client=client) + del_result = deleteDataset(ds_id, when="now", client=client) assert hasattr(del_result, "get"), f"Expected dict-like response, got {type(del_result)}" assert "message" in del_result - def test_delete_document_returns_message(self, client, fresh_dataset): - """delete_document should return a response dict with a message.""" - from ndi.cloud.api.documents import add_document, delete_document + def test_deleteDocument_returns_message(self, client, fresh_dataset): + """deleteDocument should return a response dict with a message.""" + from ndi.cloud.api.documents import addDocument, deleteDocument doc_json = { "document_class": {"class_name": "ndi_pytest_delmsg"}, "base": {"name": "delete_msg_test"}, } - result = add_document(fresh_dataset, doc_json, client=client) + result = addDocument(fresh_dataset, doc_json, client=client) doc_id = result.get("_id", result.get("id", "")) assert doc_id - del_result = delete_document(fresh_dataset, doc_id, when="now", client=client) + del_result = deleteDocument(fresh_dataset, doc_id, when="now", client=client) assert hasattr(del_result, "get"), f"Expected dict-like response, got {type(del_result)}" assert "message" in del_result @@ -1079,11 +1079,11 @@ def test_delete_document_returns_message(self, client, fresh_dataset): class TestErrorHandling: def test_404_raises_not_found(self, client): """Nonexistent resource should raise CloudAPIError.""" - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset from ndi.cloud.exceptions import CloudAPIError with pytest.raises(CloudAPIError): - get_dataset("000000000000000000000000", client=client) + getDataset("000000000000000000000000", client=client) def test_bad_auth_raises(self): """Expired/invalid token should raise on API call.""" @@ -1105,9 +1105,9 @@ def test_invalid_dataset_download_helpful_error(self, client): from ndi.cloud.exceptions import CloudAPIError with pytest.raises((CloudAPIError, Exception)): - from ndi.cloud.download import download_document_collection + from ndi.cloud.download import downloadDocumentCollection - download_document_collection("000000000000000000000000", client=client) + downloadDocumentCollection("000000000000000000000000", client=client) # =========================================================================== @@ -1130,10 +1130,10 @@ def test_get_small_dataset(self, small_dataset_info): def test_large_dataset_has_document_count(self, client, large_dataset_info): """Large dataset should have documents (via API count endpoint).""" - from ndi.cloud.api.documents import get_document_count + from ndi.cloud.api.documents import countDocuments # Metadata field may be 0 on dev; use the count API instead - count = get_document_count(LARGE_DATASET, client=client) + count = countDocuments(LARGE_DATASET, client=client) metadata_count = large_dataset_info.get("documentCount", 0) # At least one of the two should be > 0 on prod. # On dev, the dataset may genuinely have 0 docs — skip in that case. @@ -1146,11 +1146,11 @@ def test_large_dataset_has_document_count(self, client, large_dataset_info): def test_large_has_more_docs(self, client): """Large dataset should have more documents than small (prod only).""" - from ndi.cloud.api.documents import get_document_count + from ndi.cloud.api.documents import countDocuments env = os.environ.get("CLOUD_API_ENVIRONMENT", "prod") - large_count = get_document_count(LARGE_DATASET, client=client) - small_count = get_document_count(SMALL_DATASET, client=client) + large_count = countDocuments(LARGE_DATASET, client=client) + small_count = countDocuments(SMALL_DATASET, client=client) # On dev, either dataset may have 0 docs — comparison is meaningless if env == "dev" and (large_count == 0 or small_count == 0): pytest.skip("Dataset(s) have no documents on dev environment") @@ -1161,11 +1161,11 @@ def test_both_published(self, large_dataset_info, small_dataset_info): assert large_dataset_info.get("isPublished") is True assert small_dataset_info.get("isPublished") is True - def test_download_document_collection(self, client): + def test_downloadDocumentCollection(self, client): """Download all docs from the small dataset.""" - from ndi.cloud.download import download_document_collection + from ndi.cloud.download import downloadDocumentCollection - docs = download_document_collection(SMALL_DATASET, client=client) + docs = downloadDocumentCollection(SMALL_DATASET, client=client) assert isinstance(docs, list) assert len(docs) > 0 for doc in docs[:3]: @@ -1175,7 +1175,7 @@ def test_download_file_from_dataset(self, client, small_dataset_info): """Download a real file from the small dataset.""" import requests - from ndi.cloud.api.files import get_file_details + from ndi.cloud.api.files import getFileDetails files = small_dataset_info.get("files", []) # Find a file with non-zero size @@ -1187,7 +1187,7 @@ def test_download_file_from_dataset(self, client, small_dataset_info): if target is None: pytest.skip("No non-empty files in dataset") - details = get_file_details(SMALL_DATASET, target["uid"], client=client) + details = getFileDetails(SMALL_DATASET, target["uid"], client=client) url = details.get("downloadUrl", "") assert url, "File details should include downloadUrl" @@ -1195,18 +1195,18 @@ def test_download_file_from_dataset(self, client, small_dataset_info): resp = requests.head(url, timeout=30) assert resp.status_code == 200 - def test_list_documents_from_small(self, client): + def test_listDatasetDocuments_from_small(self, client): """List documents from carbon fiber dataset.""" - from ndi.cloud.api.documents import list_documents + from ndi.cloud.api.documents import listDatasetDocuments - result = list_documents(SMALL_DATASET, page=1, page_size=5, client=client) + result = listDatasetDocuments(SMALL_DATASET, page=1, page_size=5, client=client) docs = result.get("documents", []) assert len(docs) == 5 def test_internal_list_remote_ids(self, client): - """list_remote_document_ids should return a non-empty mapping.""" - from ndi.cloud.internal import list_remote_document_ids + """listRemoteDocumentIds should return a non-empty mapping.""" + from ndi.cloud.internal import listRemoteDocumentIds - mapping = list_remote_document_ids(SMALL_DATASET, client=client) + mapping = listRemoteDocumentIds(SMALL_DATASET, client=client) assert isinstance(mapping, dict) assert len(mapping) > 0 diff --git a/tests/test_cloud_sync.py b/tests/test_cloud_sync.py index a017f12..f2c59c1 100644 --- a/tests/test_cloud_sync.py +++ b/tests/test_cloud_sync.py @@ -1,7 +1,7 @@ """ Tests for cloud sync structural components. -Tests SyncMode, SyncOptions, SyncIndex, zip_documents_for_upload, +Tests SyncMode, SyncOptions, SyncIndex, zipForUpload, CloudClient.from_env(), and the _auto_client decorator. These test local data structures and file I/O — no cloud API calls needed. """ @@ -14,7 +14,7 @@ from ndi.cloud.sync.index import SyncIndex from ndi.cloud.sync.mode import SyncMode, SyncOptions -from ndi.cloud.upload import zip_documents_for_upload +from ndi.cloud.upload import zipForUpload # =========================================================================== # SyncMode & SyncOptions @@ -106,7 +106,7 @@ def test_json_roundtrip(self, tmp_path): class TestZipDocuments: def test_creates_zip(self, tmp_path): docs = [{"ndiId": "doc-1", "type": "base"}, {"ndiId": "doc-2", "type": "probe"}] - zip_path, manifest = zip_documents_for_upload(docs, "ds-1", tmp_path) + zip_path, manifest = zipForUpload(docs, "ds-1", tmp_path) assert zip_path.exists() assert len(manifest) == 2 assert "doc-1" in manifest @@ -118,7 +118,7 @@ def test_creates_zip(self, tmp_path): def test_zip_content_is_valid_json(self, tmp_path): docs = [{"ndiId": "x", "data": [1, 2, 3]}] - zip_path, _ = zip_documents_for_upload(docs, "ds-1", tmp_path) + zip_path, _ = zipForUpload(docs, "ds-1", tmp_path) with zipfile.ZipFile(zip_path) as zf: content = json.loads(zf.read("x.json")) assert content["data"] == [1, 2, 3] @@ -138,22 +138,22 @@ def test_import_sync_from_cloud(self): assert SyncIndex is not None def test_import_sync_operations(self): - from ndi.cloud.sync import download_new, sync, upload_new + from ndi.cloud.sync import downloadNew, sync, uploadNew - assert callable(upload_new) - assert callable(download_new) + assert callable(uploadNew) + assert callable(downloadNew) assert callable(sync) def test_import_upload(self): - from ndi.cloud.upload import upload_document_collection, zip_documents_for_upload + from ndi.cloud.upload import uploadDocumentCollection, zipForUpload - assert callable(upload_document_collection) - assert callable(zip_documents_for_upload) + assert callable(uploadDocumentCollection) + assert callable(zipForUpload) def test_import_download(self): - from ndi.cloud.download import download_document_collection + from ndi.cloud.download import downloadDocumentCollection - assert callable(download_document_collection) + assert callable(downloadDocumentCollection) # =========================================================================== @@ -269,12 +269,12 @@ def test_preserves_function_name(self): from ndi.cloud.client import _auto_client @_auto_client - def get_dataset(dataset_id, *, client=None): + def getDataset(dataset_id, *, client=None): """My docstring.""" pass - assert get_dataset.__name__ == "get_dataset" - assert get_dataset.__doc__ == "My docstring." + assert getDataset.__name__ == "getDataset" + assert getDataset.__doc__ == "My docstring." @patch("ndi.cloud.client.CloudClient.from_env") def test_api_function_without_client(self, mock_from_env): @@ -285,9 +285,9 @@ def test_api_function_without_client(self, mock_from_env): mock_client.get.return_value = {"_id": "abc", "name": "Test"} mock_from_env.return_value = mock_client - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset - result = get_dataset("abc-123") + result = getDataset("abc-123") assert result == {"_id": "abc", "name": "Test"} mock_client.get.assert_called_once() @@ -298,8 +298,8 @@ def test_api_function_with_explicit_client(self): mock_client = MagicMock(spec=CloudClient) mock_client.get.return_value = {"_id": "abc", "name": "Test"} - from ndi.cloud.api.datasets import get_dataset + from ndi.cloud.api.datasets import getDataset - result = get_dataset("abc-123", client=mock_client) + result = getDataset("abc-123", client=mock_client) assert result == {"_id": "abc", "name": "Test"} mock_client.get.assert_called_once() diff --git a/tests/test_phase2_gaps.py b/tests/test_phase2_gaps.py index d40cd41..384ddf3 100644 --- a/tests/test_phase2_gaps.py +++ b/tests/test_phase2_gaps.py @@ -5,7 +5,7 @@ - Batch 1: stimulus_tuningcurve_log, t0_t1_to_array, ontology_table_row_vars - Batch 2: database_to_json, copy_doc_file_to_temp, extract_docs_files - Batch 3: get_probe_type_map, init_probe_type_map -- Batch 4: upload_single_file +- Batch 4: uploadSingleFile - Batch 5: openminds_convert (4 functions) """ @@ -344,20 +344,20 @@ def test_map_has_expected_types(self): class TestUploadSingleFile: - """Tests for ndi.cloud.upload.upload_single_file.""" + """Tests for ndi.cloud.upload.uploadSingleFile.""" def test_direct_upload_success(self): - from ndi.cloud.upload import upload_single_file + from ndi.cloud.upload import uploadSingleFile client = MagicMock() mock_files = MagicMock() - mock_files.get_upload_url.return_value = "https://s3.example.com/upload" - mock_files.put_file.return_value = None + mock_files.getFileUploadURL.return_value = "https://s3.example.com/upload" + mock_files.putFiles.return_value = None with patch.dict("sys.modules", {"ndi.cloud.api.files": mock_files}): with patch("ndi.cloud.api.files", mock_files): - success, err = upload_single_file( + success, err = uploadSingleFile( "ds-123", "file-uid-1", "/tmp/test.dat", client=client ) @@ -365,16 +365,16 @@ def test_direct_upload_success(self): assert err == "" def test_upload_failure(self): - from ndi.cloud.upload import upload_single_file + from ndi.cloud.upload import uploadSingleFile client = MagicMock() mock_files = MagicMock() - mock_files.get_upload_url.side_effect = Exception("Network error") + mock_files.getFileUploadURL.side_effect = Exception("Network error") with patch.dict("sys.modules", {"ndi.cloud.api.files": mock_files}): with patch("ndi.cloud.api.files", mock_files): - success, err = upload_single_file( + success, err = uploadSingleFile( "ds-123", "file-uid-1", "/tmp/test.dat", client=client ) diff --git a/tutorials/tutorial_67f723d574f5f79c6062389d.py b/tutorials/tutorial_67f723d574f5f79c6062389d.py index 6bf83da..72bb463 100644 --- a/tutorials/tutorial_67f723d574f5f79c6062389d.py +++ b/tutorials/tutorial_67f723d574f5f79c6062389d.py @@ -327,7 +327,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: import pandas as pd import ndi.dataset - from ndi.cloud import download_dataset + from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient from ndi.fun.doc import get_doc_types @@ -351,7 +351,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: html.add_code("""\ import os import ndi.dataset -from ndi.cloud import download_dataset +from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient from ndi.fun.doc import get_doc_types @@ -369,7 +369,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: else: config = login(ndi_cloud_username, ndi_cloud_password) client = CloudClient(config) - dataset = download_dataset(cloud_dataset_id, dataset_path, verbose=True, client=client)""") + dataset = downloadDataset(cloud_dataset_id, dataset_path, verbose=True, client=client)""") t0 = time.time() if DATASET_PATH.exists(): @@ -389,7 +389,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: sys.exit(1) config = login(NDI_CLOUD_USERNAME, NDI_CLOUD_PASSWORD) client = CloudClient(config) - dataset = download_dataset(CLOUD_DATASET_ID, str(DATASET_PATH), verbose=True, client=client) + dataset = downloadDataset(CLOUD_DATASET_ID, str(DATASET_PATH), verbose=True, client=client) elapsed = time.time() - t0 html.add_output_text( f"cloud_dataset_id = '{CLOUD_DATASET_ID}'\n" diff --git a/tutorials/tutorial_682e7772cdf3f24938176fac.py b/tutorials/tutorial_682e7772cdf3f24938176fac.py index 113b8d9..f8f08cb 100644 --- a/tutorials/tutorial_682e7772cdf3f24938176fac.py +++ b/tutorials/tutorial_682e7772cdf3f24938176fac.py @@ -343,7 +343,7 @@ def section_import(html: HTMLBuilder) -> None: html.add_code("""\ import os from pathlib import Path -from ndi.cloud import download_dataset +from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient import ndi.dataset @@ -366,7 +366,7 @@ def section_import(html: HTMLBuilder) -> None: def section_load_dataset(html: HTMLBuilder) -> Any: """Section 2: Download or load the NDI dataset.""" import ndi.dataset - from ndi.cloud import download_dataset + from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient @@ -386,7 +386,7 @@ def section_load_dataset(html: HTMLBuilder) -> Any: # Download from NDI Cloud (first time only) config = login(ndi_cloud_username, ndi_cloud_password) client = CloudClient(config) - dataset = download_dataset(cloud_dataset_id, str(dataset_path), verbose=True, client=client)""") + dataset = downloadDataset(cloud_dataset_id, str(dataset_path), verbose=True, client=client)""") t0 = time.time() if DATASET_PATH.exists(): @@ -402,7 +402,7 @@ def section_load_dataset(html: HTMLBuilder) -> Any: sys.exit(1) config = login(NDI_CLOUD_USERNAME, NDI_CLOUD_PASSWORD) client = CloudClient(config) - dataset = download_dataset(CLOUD_DATASET_ID, str(DATASET_PATH), verbose=True, client=client) + dataset = downloadDataset(CLOUD_DATASET_ID, str(DATASET_PATH), verbose=True, client=client) elapsed = time.time() - t0 html.add_output_text(f"Dataset downloaded in {elapsed:.1f}s to {DATASET_PATH}") From 33f199f7022a2420e480383e8966cffb96e03a62 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 18:42:04 +0000 Subject: [PATCH 3/7] Rename all ndi.fun functions to match MATLAB names and port ndi.fun.plot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename functions across ndi.fun.* to use MATLAB-style camelCase naming: - utils: name2variable_name → name2variableName - data: read_image_stack → readImageStack - doc: all_types → allTypes, find_fuid → findFuid, etc. - doc_table: ontology_table_row_doc_to_table → ontologyTableRowDoc2Table, etc. - epoch: t0_t1_to_array → t0_t1cell2array - file: md5 → MD5, date_created → dateCreated, date_updated → dateUpdated - table: identify_matching_rows → identifyMatchingRows, etc. - stimulus: find_mixture_name → findMixtureName, etc. - name_utils: name_to_variable_name → name2variableName All old snake_case names preserved as backward-compatible aliases. New ndi.fun.plot module ported from MATLAB with bar3, multichan, stimulusTimeseries. Updated all references in tests and tutorials. https://claude.ai/code/session_01Skj3WbRtzYBdW5uk6nazhE --- MATLAB_MAPPING.md | 93 ++++-- src/ndi/fun/__init__.py | 2 + src/ndi/fun/data.py | 6 +- src/ndi/fun/doc.py | 21 +- src/ndi/fun/doc_table.py | 30 +- src/ndi/fun/epoch.py | 8 +- src/ndi/fun/file.py | 12 +- src/ndi/fun/name_utils.py | 12 +- src/ndi/fun/plot.py | 300 ++++++++++++++++++ src/ndi/fun/stimulus.py | 9 +- src/ndi/fun/table.py | 12 +- src/ndi/fun/utils.py | 6 +- tests/matlab_tests/test_carbon_fiber.py | 12 +- tests/matlab_tests/test_dabrowska.py | 72 ++--- tests/matlab_tests/test_fun.py | 48 +-- tests/matlab_tests/test_jess_haley.py | 70 ++-- tests/matlab_tests/test_utils.py | 52 +-- tests/test_fun.py | 99 +++--- tests/test_phase1_gaps.py | 50 +-- tests/test_phase2_gaps.py | 30 +- .../tutorial_67f723d574f5f79c6062389d.py | 90 +++--- .../tutorial_682e7772cdf3f24938176fac.py | 64 ++-- 22 files changed, 742 insertions(+), 356 deletions(-) create mode 100644 src/ndi/fun/plot.py diff --git a/MATLAB_MAPPING.md b/MATLAB_MAPPING.md index 27994fe..2e7a643 100644 --- a/MATLAB_MAPPING.md +++ b/MATLAB_MAPPING.md @@ -143,25 +143,26 @@ Complete reference mapping every MATLAB NDI function/class to its Python equival | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.doc.allTypes` | `ndi.fun.doc.all_types()` | `ndi.fun.doc` | -| `ndi.fun.doc.getDocTypes` | `ndi.fun.doc.get_doc_types()` | `ndi.fun.doc` | -| `ndi.fun.doc.findFuid` | `ndi.fun.doc.find_fuid()` | `ndi.fun.doc` | -| `ndi.fun.doc.subject.makeSpeciesStrainSex` | `ndi.fun.doc.make_species_strain_sex()` | `ndi.fun.doc` | -| `ndi.fun.doc.probe.probeLocations4probes` | `ndi.fun.doc.probe_locations_for_probes()` | `ndi.fun.doc` | -| `ndi.fun.doc.diff` | `ndi.fun.doc.doc_diff()` | `ndi.fun.doc` | -| `ndi.fun.doc.ontologyTableRowVars` | `ndi.fun.doc.ontology_table_row_vars()` | `ndi.fun.doc` | +| `ndi.fun.doc.allTypes` | `ndi.fun.doc.allTypes()` | `ndi.fun.doc` | +| `ndi.fun.doc.getDocTypes` | `ndi.fun.doc.getDocTypes()` | `ndi.fun.doc` | +| `ndi.fun.doc.findFuid` | `ndi.fun.doc.findFuid()` | `ndi.fun.doc` | +| `ndi.fun.doc.subject.makeSpeciesStrainSex` | `ndi.fun.doc.makeSpeciesStrainSex()` | `ndi.fun.doc` | +| `ndi.fun.doc.probe.probeLocations4probes` | `ndi.fun.doc.probeLocations4probes()` | `ndi.fun.doc` | +| `ndi.fun.doc.diff` | `ndi.fun.doc.diff()` | `ndi.fun.doc` | +| `ndi.fun.doc.ontologyTableRowVars` | `ndi.fun.doc.ontologyTableRowVars()` | `ndi.fun.doc` | ### DocTable Utilities | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.docTable.docCellArray2Table` | `ndi.fun.doc_table.doc_cell_array_to_table()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.element` | `ndi.fun.doc_table.element_table()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.subject` | `ndi.fun.doc_table.subject_summary()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.probe` | `ndi.fun.doc_table.probe_table()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.epoch` | `ndi.fun.doc_table.epoch_table()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.openminds` | `ndi.fun.doc_table.openminds_table()` | `ndi.fun.doc_table` | -| `ndi.fun.docTable.treatment` | `ndi.fun.doc_table.treatment_table()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.docCellArray2Table` | `ndi.fun.doc_table.docCellArray2Table()` | `ndi.fun.doc_table` | +| `ndi.fun.doc.ontologyTableRowDoc2Table` | `ndi.fun.doc_table.ontologyTableRowDoc2Table()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.element` | `ndi.fun.doc_table.element()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.subject` | `ndi.fun.doc_table.subject()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.probe` | `ndi.fun.doc_table.probe()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.epoch` | `ndi.fun.doc_table.epoch()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.openminds` | `ndi.fun.doc_table.openminds()` | `ndi.fun.doc_table` | +| `ndi.fun.docTable.treatment` | `ndi.fun.doc_table.treatment()` | `ndi.fun.doc_table` | ### Epoch Utilities @@ -169,25 +170,25 @@ Complete reference mapping every MATLAB NDI function/class to its Python equival |--------|--------|--------| | `ndi.fun.epoch.epochid2element` | `ndi.fun.epoch.epochid2element()` | `ndi.fun.epoch` | | `ndi.fun.epoch.filename2epochid` | `ndi.fun.epoch.filename2epochid()` | `ndi.fun.epoch` | -| `ndi.fun.doc.t0_t1cell2array` | `ndi.fun.epoch.t0_t1_to_array()` | `ndi.fun.epoch` | +| `ndi.fun.doc.t0_t1cell2array` | `ndi.fun.epoch.t0_t1cell2array()` | `ndi.fun.epoch` | ### File Utilities | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.file.MD5` | `ndi.fun.file.md5()` | `ndi.fun.file` | -| `ndi.fun.file.dateCreated` | `ndi.fun.file.date_created()` | `ndi.fun.file` | -| `ndi.fun.file.dateUpdated` | `ndi.fun.file.date_updated()` | `ndi.fun.file` | +| `ndi.fun.file.MD5` | `ndi.fun.file.MD5()` | `ndi.fun.file` | +| `ndi.fun.file.dateCreated` | `ndi.fun.file.dateCreated()` | `ndi.fun.file` | +| `ndi.fun.file.dateUpdated` | `ndi.fun.file.dateUpdated()` | `ndi.fun.file` | ### Data Utilities | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.data.readngrid` | `ndi.fun.data.read_ngrid()` | `ndi.fun.data` | -| `ndi.fun.data.writengrid` | `ndi.fun.data.write_ngrid()` | `ndi.fun.data` | -| `ndi.fun.data.mat2ngrid` | `ndi.fun.data.mat_to_ngrid()` | `ndi.fun.data` | -| `ndi.fun.data.evaluate_fitcurve` | `ndi.fun.data.evaluate_fitcurve()` | `ndi.fun.data` | -| `ndi.fun.data.readImageStack` | `ndi.fun.data.read_image_stack()` | `ndi.fun.data` | +| `ndi.fun.data.readngrid` | `ndi.fun.data.readngrid()` | `ndi.fun.data` | +| `ndi.fun.data.writengrid` | `ndi.fun.data.writengrid()` | `ndi.fun.data` | +| `ndi.fun.data.mat2ngrid` | `ndi.fun.data.mat2ngrid()` | `ndi.fun.data` | +| — | `ndi.fun.data.evaluate_fitcurve()` | `ndi.fun.data` | +| `ndi.fun.data.readImageStack` | `ndi.fun.data.readImageStack()` | `ndi.fun.data` | ### Stimulus Utilities @@ -195,35 +196,51 @@ Complete reference mapping every MATLAB NDI function/class to its Python equival |--------|--------|--------| | `ndi.fun.stimulus.tuning_curve_to_response_type` | `ndi.fun.stimulus.tuning_curve_to_response_type()` | `ndi.fun.stimulus` | | `ndi.fun.stimulus.f0_f1_responses` | `ndi.fun.stimulus.f0_f1_responses()` | `ndi.fun.stimulus` | -| `ndi.fun.stimulus.findMixtureName` | `ndi.fun.stimulus.find_mixture_name()` | `ndi.fun.stimulus` | -| `ndi.fun.stimulustemporalfrequency` | `ndi.fun.stimulus.stimulus_temporal_frequency()` | `ndi.fun.stimulus` | +| `ndi.fun.stimulus.findMixtureName` | `ndi.fun.stimulus.findMixtureName()` | `ndi.fun.stimulus` | +| `ndi.fun.stimulustemporalfrequency` | `ndi.fun.stimulus.stimulustemporalfrequency()` | `ndi.fun.stimulus` | | `ndi.fun.calc.stimulus_tuningcurve_log` | `ndi.fun.stimulus.stimulus_tuningcurve_log()` | `ndi.fun.stimulus` | ### Table Utilities | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.table.identifyMatchingRows` | `ndi.fun.table.identify_matching_rows()` | `ndi.fun.table` | -| `ndi.fun.table.identifyValidRows` | `ndi.fun.table.identify_valid_rows()` | `ndi.fun.table` | -| `ndi.fun.table.join` | `ndi.fun.table.join_tables()` | `ndi.fun.table` | -| `ndi.fun.table.moveColumnsLeft` | `ndi.fun.table.move_columns_left()` | `ndi.fun.table` | +| `ndi.fun.table.identifyMatchingRows` | `ndi.fun.table.identifyMatchingRows()` | `ndi.fun.table` | +| `ndi.fun.table.identifyValidRows` | `ndi.fun.table.identifyValidRows()` | `ndi.fun.table` | +| `ndi.fun.table.join` | `ndi.fun.table.join()` | `ndi.fun.table` | +| `ndi.fun.table.moveColumnsLeft` | `ndi.fun.table.moveColumnsLeft()` | `ndi.fun.table` | | `ndi.fun.table.vstack` | `ndi.fun.table.vstack()` | `ndi.fun.table` | +### Probe Utilities + +| MATLAB | Python | Module | +|--------|--------|--------| +| `ndi.fun.probe.export_binary` | `ndi.fun.probe.export_binary()` | `ndi.fun.probe.export_binary` | +| `ndi.fun.probe.export_all_binary` | `ndi.fun.probe.export_all_binary()` | `ndi.fun.probe.export_binary` | +| `ndi.fun.probe.location` | `ndi.fun.probe.location()` | `ndi.fun.probe.location` | + +### Plot Utilities + +| MATLAB | Python | Module | +|--------|--------|--------| +| `ndi.fun.plot.bar3` | `ndi.fun.plot.bar3()` | `ndi.fun.plot` | +| `ndi.fun.plot.multichan` | `ndi.fun.plot.multichan()` | `ndi.fun.plot` | +| `ndi.fun.plot.stimulusTimeseries` | `ndi.fun.plot.stimulusTimeseries()` | `ndi.fun.plot` | + ### General Utilities | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.channelname2prefixnumber` | `ndi.fun.utils.channel_name_to_prefix_number()` | `ndi.fun.utils` | -| `ndi.fun.name2variableName` | `ndi.fun.name_utils.name_to_variable_name()` | `ndi.fun.name_utils` | -| `ndi.fun.pseudorandomint` | `ndi.fun.utils.pseudorandom_int()` | `ndi.fun.utils` | -| `ndi.fun.timestamp` | `ndi.fun.utils.ndi_timestamp()` | `ndi.fun.utils` | +| `ndi.fun.channelname2prefixnumber` | `ndi.fun.channelname2prefixnumber()` | `ndi.fun.utils` | +| `ndi.fun.name2variableName` | `ndi.fun.name2variableName()` | `ndi.fun.utils` / `ndi.fun.name_utils` | +| `ndi.fun.pseudorandomint` | `ndi.fun.pseudorandomint()` | `ndi.fun.utils` | +| `ndi.fun.timestamp` | `ndi.fun.timestamp()` | `ndi.fun.utils` | ### Session & Dataset Diff | MATLAB | Python | Module | |--------|--------|--------| -| `ndi.fun.session.diff` | `ndi.fun.session.session_diff()` | `ndi.fun.session` | -| `ndi.fun.dataset.diff` | `ndi.fun.dataset.dataset_diff()` | `ndi.fun.dataset` | +| `ndi.fun.session.diff` | `ndi.fun.session.diff()` | `ndi.fun.session` | +| `ndi.fun.dataset.diff` | `ndi.fun.dataset.diff()` | `ndi.fun.dataset` | ## Cloud API @@ -477,3 +494,9 @@ The following MATLAB components were intentionally not ported (GUI, MATLAB-speci | `readGenBankNames.m` / `readGenBankNodes.m` | Batch taxonomy scripts | | `createGenBankControlledVocabulary.m` | Batch vocabulary builder | | `find_calc_directories.m` | MATLAB path/toolbox discovery | +| `ndi.fun.assertAddonOnPath` | MATLAB addon/path checker | +| `ndi.fun.check_Matlab_toolboxes` | MATLAB toolbox checker | +| `ndi.fun.console` / `debuglog` / `errlog` / `syslog` | MATLAB console/logging | +| `ndi.fun.convertoldnsd2ndi` | Legacy NSD→NDI migration | +| `ndi.fun.run_Linux_checks` | MATLAB Linux environment checks | +| `ndi.fun.plot_extracellular_spikeshapes` | MATLAB GUI plotting | diff --git a/src/ndi/fun/__init__.py b/src/ndi/fun/__init__.py index 24f956d..424b005 100644 --- a/src/ndi/fun/__init__.py +++ b/src/ndi/fun/__init__.py @@ -13,12 +13,14 @@ from .utils import ( channelname2prefixnumber, name2variable_name, + name2variableName, pseudorandomint, timestamp, ) __all__ = [ "channelname2prefixnumber", + "name2variableName", "name2variable_name", "probe", "pseudorandomint", diff --git a/src/ndi/fun/data.py b/src/ndi/fun/data.py index 871aff6..c4511ee 100644 --- a/src/ndi/fun/data.py +++ b/src/ndi/fun/data.py @@ -205,7 +205,7 @@ def evaluate_fitcurve( return np.asarray(result, dtype=float) -def read_image_stack( +def readImageStack( session: Any, doc: Any, fmt: str = "auto", @@ -389,3 +389,7 @@ def read_image_stack( return stack, info raise ValueError(f"Format '{fmt}' is not a recognized image or video format.") + + +# Backward-compatible alias +read_image_stack = readImageStack diff --git a/src/ndi/fun/doc.py b/src/ndi/fun/doc.py index 8d5bf25..7c12073 100644 --- a/src/ndi/fun/doc.py +++ b/src/ndi/fun/doc.py @@ -10,7 +10,7 @@ from typing import Any -def all_types() -> list[str]: +def allTypes() -> list[str]: """Return all known NDI document types by scanning schema JSON files. MATLAB equivalent: ndi.fun.doc.allTypes @@ -44,7 +44,7 @@ def all_types() -> list[str]: return sorted(types) -def find_fuid(session: Any, fuid: str) -> tuple[Any | None, str]: +def findFuid(session: Any, fuid: str) -> tuple[Any | None, str]: """Search session for a document containing a file with the given UID. MATLAB equivalent: ndi.fun.doc.findFuid @@ -75,7 +75,7 @@ def find_fuid(session: Any, fuid: str) -> tuple[Any | None, str]: return None, "" -def make_species_strain_sex( +def makeSpeciesStrainSex( session: Any, subject_doc: Any, *, @@ -226,7 +226,7 @@ def make_species_strain_sex( return docs -def probe_locations_for_probes( +def probeLocations4probes( session: Any, probe_docs: list[Any], locations: list[dict[str, str]], @@ -346,7 +346,7 @@ def _compare(a: Any, b: Any, path: str = "") -> None: return {"equal": len(details) == 0, "details": details} -def ontology_table_row_vars( +def ontologyTableRowVars( session: Any, ) -> tuple[list[str], list[str], list[str]]: """Return all unique ontologyTableRow variable names in a session. @@ -409,7 +409,7 @@ def ontology_table_row_vars( return sorted_names, variable_names, ontology_nodes -def get_doc_types( +def getDocTypes( session: Any, ) -> tuple[list[str], list[int]]: """Find all unique document types and their counts in a session. @@ -441,3 +441,12 @@ def get_doc_types( counts = [type_counter[t] for t in sorted_types] return sorted_types, counts + + +# Backward-compatible aliases +all_types = allTypes +find_fuid = findFuid +make_species_strain_sex = makeSpeciesStrainSex +probe_locations_for_probes = probeLocations4probes +ontology_table_row_vars = ontologyTableRowVars +get_doc_types = getDocTypes diff --git a/src/ndi/fun/doc_table.py b/src/ndi/fun/doc_table.py index 2d1ea8c..a9b5a27 100644 --- a/src/ndi/fun/doc_table.py +++ b/src/ndi/fun/doc_table.py @@ -24,7 +24,7 @@ def _require_pandas() -> None: ) -def ontology_table_row_doc_to_table( +def ontologyTableRowDoc2Table( documents: list[Any], stack_all: bool = False, ) -> tuple[list[pd.DataFrame], list[list[str]]]: @@ -103,7 +103,7 @@ def ontology_table_row_doc_to_table( return data_tables, doc_ids -def doc_cell_array_to_table( +def docCellArray2Table( documents: list[Any], ) -> pd.DataFrame: """Convert a list of NDI documents to a DataFrame. @@ -140,7 +140,7 @@ def doc_cell_array_to_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def element_table( +def element( session: Any, ) -> pd.DataFrame: """Create a summary table of element documents. @@ -174,7 +174,7 @@ def element_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def subject_table( +def subjectBasic( session: Any, ) -> pd.DataFrame: """Create a summary table of subject documents. @@ -207,7 +207,7 @@ def subject_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def probe_table( +def probe( session: Any, ) -> pd.DataFrame: """Create a summary table of probe documents. @@ -285,7 +285,7 @@ def probe_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def epoch_table( +def epoch( session: Any, ) -> pd.DataFrame: """Create a summary table of epoch-related documents. @@ -424,7 +424,7 @@ def epoch_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def openminds_table( +def openminds( session: Any, doc_type: str = "openminds", ) -> pd.DataFrame: @@ -457,7 +457,7 @@ def openminds_table( return pd.DataFrame(rows) if rows else pd.DataFrame() -def treatment_table( +def treatment( session: Any, ) -> pd.DataFrame: """Gather treatment document properties into a table. @@ -502,7 +502,7 @@ def _get_depends_on(props: dict, name: str) -> str: return "" -def subject_summary( +def subject( session: Any, ) -> pd.DataFrame: """Create a rich subject summary joining subject, openminds, and treatment data. @@ -755,3 +755,15 @@ def subject_summary( treatment_cols = sorted(c for c in all_cols if c not in fixed_cols) col_order = [c for c in fixed_cols if c in all_cols] + treatment_cols return df[col_order] + + +# Backward-compatible aliases +ontology_table_row_doc_to_table = ontologyTableRowDoc2Table +doc_cell_array_to_table = docCellArray2Table +element_table = element +subject_table = subjectBasic +probe_table = probe +epoch_table = epoch +openminds_table = openminds +treatment_table = treatment +subject_summary = subject diff --git a/src/ndi/fun/epoch.py b/src/ndi/fun/epoch.py index 49de705..fb71903 100644 --- a/src/ndi/fun/epoch.py +++ b/src/ndi/fun/epoch.py @@ -117,7 +117,7 @@ def filename2epochid( return result -def t0_t1_to_array( +def t0_t1cell2array( t0t1_in: list | Any, ) -> np.ndarray: """Convert a list of ``[t0, t1]`` interval pairs to an Nx2 numpy array. @@ -136,7 +136,7 @@ def t0_t1_to_array( Returns an empty ``(0, 2)`` array if input is empty. Example: - >>> t0_t1_to_array([[0.0, 1.5], [2.0, 3.5]]) + >>> t0_t1cell2array([[0.0, 1.5], [2.0, 3.5]]) array([[0. , 1.5], [2. , 3.5]]) """ @@ -149,3 +149,7 @@ def t0_t1_to_array( result[k, 1] = pair[1] return result + + +# Backward-compatible alias +t0_t1_to_array = t0_t1cell2array diff --git a/src/ndi/fun/file.py b/src/ndi/fun/file.py index ed8ef91..bf59c1f 100644 --- a/src/ndi/fun/file.py +++ b/src/ndi/fun/file.py @@ -11,7 +11,7 @@ from pathlib import Path -def md5(file_path: str) -> str: +def MD5(file_path: str) -> str: """Compute MD5 checksum of a file. MATLAB equivalent: ndi.fun.file.MD5 @@ -35,7 +35,7 @@ def md5(file_path: str) -> str: return h.hexdigest() -def date_created(file_path: str) -> datetime | None: +def dateCreated(file_path: str) -> datetime | None: """Get the creation date of a file. MATLAB equivalent: ndi.fun.file.dateCreated @@ -62,7 +62,7 @@ def date_created(file_path: str) -> datetime | None: return None -def date_updated(file_path: str) -> datetime | None: +def dateUpdated(file_path: str) -> datetime | None: """Get the last modification date of a file. MATLAB equivalent: ndi.fun.file.dateUpdated @@ -80,3 +80,9 @@ def date_updated(file_path: str) -> datetime | None: return datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc) except Exception: return None + + +# Backward-compatible aliases +md5 = MD5 +date_created = dateCreated +date_updated = dateUpdated diff --git a/src/ndi/fun/name_utils.py b/src/ndi/fun/name_utils.py index 32de5be..82fb4ed 100644 --- a/src/ndi/fun/name_utils.py +++ b/src/ndi/fun/name_utils.py @@ -12,7 +12,7 @@ import re -def name_to_variable_name(name: str) -> str: +def name2variableName(name: str) -> str: """Convert a human-readable name to a PascalCase variable name. MATLAB equivalent: ndi.fun.name2variableName @@ -27,11 +27,11 @@ def name_to_variable_name(name: str) -> str: Examples:: - >>> name_to_variable_name("treatment: food restriction onset time") + >>> name2variableName("treatment: food restriction onset time") 'Treatment_FoodRestrictionOnsetTime' - >>> name_to_variable_name("Optogenetic Tetanus Stimulation Target Location") + >>> name2variableName("Optogenetic Tetanus Stimulation Target Location") 'OptogeneticTetanusStimulationTargetLocation' - >>> name_to_variable_name("elevated plus maze: test duration") + >>> name2variableName("elevated plus maze: test duration") 'ElevatedPlusMaze_TestDuration' Args: @@ -65,3 +65,7 @@ def name_to_variable_name(name: str) -> str: result = re.sub(r"[^a-zA-Z0-9_]", "", result) return result + + +# Backward-compatible alias +name_to_variable_name = name2variableName diff --git a/src/ndi/fun/plot.py b/src/ndi/fun/plot.py new file mode 100644 index 0000000..1adf826 --- /dev/null +++ b/src/ndi/fun/plot.py @@ -0,0 +1,300 @@ +""" +ndi.fun.plot - Plotting utility functions. + +MATLAB equivalents: +ndi/+fun/+plot/bar3.m, multichan.m, stimulusTimeseries.m + +Provides plotting utilities for NDI data visualization using matplotlib. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np + + +def bar3( + data_table: Any, + grouping_variables: list[str], + plotting_variable: str, +) -> Any: + """Create a 3-way grouped bar chart from table data. + + MATLAB equivalent: ndi.fun.plot.bar3 + + Takes a pandas DataFrame and visualizes the mean of *plotting_variable* + across three categorical factors specified in *grouping_variables*. + + Args: + data_table: A pandas DataFrame containing the data. + grouping_variables: List of exactly 3 column names to group by. + Variable 1 -> subplots, Variable 2 -> x-axis clusters, + Variable 3 -> individual bars (color-coded). + plotting_variable: Column name of the numeric variable to plot + (the mean is shown). + + Returns: + The matplotlib Figure object. + + Raises: + ImportError: If matplotlib is not installed. + ValueError: If *grouping_variables* does not have exactly 3 elements. + + Example:: + + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'Region': ['A','A','B','B'] * 3, + ... 'Quarter': ['Q1','Q2'] * 6, + ... 'Product': ['X','X','X','X','Y','Y','Y','Y','Z','Z','Z','Z'], + ... 'Sales': range(12), + ... }) + >>> fig = bar3(df, ['Region', 'Quarter', 'Product'], 'Sales') + """ + try: + import matplotlib.pyplot as plt + except ImportError as exc: + raise ImportError( + "matplotlib is required for ndi.fun.plot. " + "Install it with: pip install matplotlib" + ) from exc + + import pandas as pd + + if len(grouping_variables) != 3: + raise ValueError("grouping_variables must have exactly 3 elements") + + df = data_table if isinstance(data_table, pd.DataFrame) else pd.DataFrame(data_table) + + # Extract unique groups for each variable + groups = [] + group_indices = [] + for var in grouping_variables: + cats = df[var].unique() + cats_sorted = sorted(cats, key=str) + cat_map = {c: i for i, c in enumerate(cats_sorted)} + groups.append(cats_sorted) + group_indices.append(df[var].map(cat_map).values) + + g1_size = len(groups[0]) + g2_size = len(groups[1]) + g3_size = len(groups[2]) + + # Color map for inner group + cmap = plt.cm.get_cmap("tab10") + colors = [cmap(i % 10) for i in range(g3_size)] + + fig, axes = plt.subplots(1, g1_size, figsize=(5 * g1_size, 4), squeeze=False) + axes = axes.flatten() + + for i in range(g1_size): + ax = axes[i] + for j in range(g2_size): + for k in range(g3_size): + mask = ( + (group_indices[0] == i) + & (group_indices[1] == j) + & (group_indices[2] == k) + ) + vals = df.loc[mask, plotting_variable].values + x = j * (g3_size + 1) + k + 1 + if len(vals) > 0: + ax.bar(x, np.nanmean(vals), color=colors[k]) + + # Format subplot + tick_positions = [ + (g3_size + 1) * j + (g3_size + 1) / 2 + for j in range(g2_size) + ] + ax.set_xticks(tick_positions) + ax.set_xticklabels([str(g) for g in groups[1]]) + ax.set_title(str(groups[0][i])) + ax.set_ylabel(plotting_variable) + + if i == g1_size - 1: + # Add legend on last subplot + from matplotlib.patches import Patch + + handles = [Patch(facecolor=colors[k], label=str(groups[2][k])) for k in range(g3_size)] + ax.legend(handles=handles, frameon=False) + + fig.tight_layout() + return fig + + +def multichan( + data: np.ndarray, + t: np.ndarray, + space: float, +) -> list[Any]: + """Plot multiple channels of data with vertical spacing. + + MATLAB equivalent: ndi.fun.plot.multichan + + Plots multiple channels of *data* (assumed to be + ``num_samples x num_channels``) on the current axes, offsetting each + channel vertically by *space*. + + Args: + data: 2-D array of shape ``(num_samples, num_channels)``. + t: 1-D time array of length ``num_samples``. + space: Vertical spacing between channels. + + Returns: + List of matplotlib Line2D objects, one per channel. + + Raises: + ImportError: If matplotlib is not installed. + """ + try: + import matplotlib.pyplot as plt + except ImportError as exc: + raise ImportError( + "matplotlib is required for ndi.fun.plot. " + "Install it with: pip install matplotlib" + ) from exc + + data = np.asarray(data) + t = np.asarray(t) + + if data.ndim == 1: + data = data.reshape(-1, 1) + + num_channels = data.shape[1] + handles = [] + + ax = plt.gca() + for i in range(num_channels): + (h,) = ax.plot(t, i * space + data[:, i], color=(0.7, 0.7, 0.7)) + handles.append(h) + + return handles + + +def stimulusTimeseries( + stimulus_probe: Any, + timeref: Any, + y: float, + *, + stimid: list | np.ndarray | None = None, + linewidth: float = 2.0, + linecolor: tuple[float, float, float] = (0.0, 0.0, 0.0), + fontsize: float = 12.0, + fontweight: str = "normal", + fontcolor: tuple[float, float, float] = (0.0, 0.0, 0.0), + textycoord: float | None = None, + horizontal_alignment: str = "center", +) -> tuple[list[Any], list[Any], Any, Any]: + """Plot stimulus occurrence as thick bars on a time series plot. + + MATLAB equivalent: ndi.fun.plot.stimulusTimeseries + + Reads stimulus timing data from a probe and plots each stimulus + presentation as a horizontal bar at the given y-coordinate. + + Args: + stimulus_probe: An NDI probe/element object with a + ``readtimeseries`` method. + timeref: An :class:`ndi.time.TimeReference` specifying the time + reference for the plot. + y: Y-coordinate at which to draw the stimulus bars. + stimid: Optional stimulus ID numbers. If *None*, the function + attempts to read ``stimid`` from the probe data. + linewidth: Width of the stimulus bars. + linecolor: RGB tuple for bar color. + fontsize: Font size for stimulus ID labels. + fontweight: Font weight for labels (``'normal'`` or ``'bold'``). + fontcolor: RGB tuple for label color. + textycoord: Y-coordinate for text labels. Defaults to ``y + 1``. + horizontal_alignment: Horizontal alignment for labels. + + Returns: + Tuple ``(h_lines, h_texts, stimulus_data, stimulus_time_data)`` + where *h_lines* are the line handles, *h_texts* are text handles, + *stimulus_data* and *stimulus_time_data* are the raw data read + from the probe. + + Raises: + ImportError: If matplotlib is not installed. + ValueError: If stimulus time data lacks ``stimon``/``stimoff``. + """ + try: + import matplotlib.pyplot as plt + except ImportError as exc: + raise ImportError( + "matplotlib is required for ndi.fun.plot. " + "Install it with: pip install matplotlib" + ) from exc + + # Read stimulus data from the probe + stimulus_data, stimulus_time_data, _ = stimulus_probe.readtimeseries( + timeref, float("-inf"), float("inf") + ) + + # Resolve stimid + if stimid is None: + if isinstance(stimulus_data, dict) and "stimid" in stimulus_data: + stimid = stimulus_data["stimid"] + elif isinstance(stimulus_data, list): + ids = [] + for entry in stimulus_data: + if isinstance(entry, dict) and "stimid" in entry: + ids.extend( + entry["stimid"] + if isinstance(entry["stimid"], list) + else [entry["stimid"]] + ) + stimid = ids if ids else None + + # Extract stimon / stimoff + if isinstance(stimulus_time_data, dict): + stimon = stimulus_time_data.get("stimon") + stimoff = stimulus_time_data.get("stimoff") + elif hasattr(stimulus_time_data, "stimon") and hasattr(stimulus_time_data, "stimoff"): + stimon = stimulus_time_data.stimon + stimoff = stimulus_time_data.stimoff + else: + raise ValueError( + "stimulus_time_data must contain 'stimon' and 'stimoff' fields" + ) + + if stimon is None or stimoff is None: + raise ValueError( + "stimulus_time_data must contain 'stimon' and 'stimoff' fields" + ) + + stimon = np.asarray(stimon).ravel() + stimoff = np.asarray(stimoff).ravel() + + if textycoord is None: + textycoord = y + 1 + + ax = plt.gca() + h_lines = [] + h_texts = [] + + for idx in range(len(stimon)): + (h,) = ax.plot( + [stimon[idx], stimoff[idx]], + [y, y], + linewidth=linewidth, + color=linecolor, + solid_capstyle="butt", + ) + h_lines.append(h) + + if stimid is not None and idx < len(stimid): + mid = (stimon[idx] + stimoff[idx]) / 2 + ht = ax.text( + mid, + textycoord, + str(stimid[idx]), + fontsize=fontsize, + fontweight=fontweight, + color=fontcolor, + ha=horizontal_alignment, + va="bottom", + ) + h_texts.append(ht) + + return h_lines, h_texts, stimulus_data, stimulus_time_data diff --git a/src/ndi/fun/stimulus.py b/src/ndi/fun/stimulus.py index ad946a7..f5f3481 100644 --- a/src/ndi/fun/stimulus.py +++ b/src/ndi/fun/stimulus.py @@ -115,7 +115,7 @@ def f0_f1_responses( } -def find_mixture_name( +def findMixtureName( dictionary_path: str, mixture: list[dict[str, Any]], ) -> list[str]: @@ -169,7 +169,7 @@ def find_mixture_name( return matches -def stimulus_temporal_frequency( +def stimulustemporalfrequency( stimulus_parameters: dict[str, Any], config_path: str | None = None, ) -> tuple[float | None, str]: @@ -287,3 +287,8 @@ def stimulus_tuningcurve_log( return rp.get("tuningcurve_calc", {}).get("log", "") return "" + + +# Backward-compatible aliases +find_mixture_name = findMixtureName +stimulus_temporal_frequency = stimulustemporalfrequency diff --git a/src/ndi/fun/table.py b/src/ndi/fun/table.py index 0ed9afc..bcb9275 100644 --- a/src/ndi/fun/table.py +++ b/src/ndi/fun/table.py @@ -25,7 +25,7 @@ def _require_pandas() -> None: ) -def identify_matching_rows( +def identifyMatchingRows( df: pd.DataFrame, column: str | list[str], value: Any, @@ -103,7 +103,7 @@ def identify_matching_rows( return mask -def identify_valid_rows( +def identifyValidRows( df: pd.DataFrame, columns: list[str] | None = None, invalid_value: Any = None, @@ -231,7 +231,7 @@ def join_tables( return result -def move_columns_left( +def moveColumnsLeft( df: pd.DataFrame, columns: list[str], ) -> pd.DataFrame: @@ -275,3 +275,9 @@ def vstack( return pd.DataFrame() return pd.concat(tables, ignore_index=True, sort=False) + + +# Backward-compatible aliases +identify_matching_rows = identifyMatchingRows +identify_valid_rows = identifyValidRows +move_columns_left = moveColumnsLeft diff --git a/src/ndi/fun/utils.py b/src/ndi/fun/utils.py index 13b2ddc..621350b 100644 --- a/src/ndi/fun/utils.py +++ b/src/ndi/fun/utils.py @@ -37,7 +37,7 @@ def channelname2prefixnumber(channelname: str) -> tuple[str, int]: return prefix, number -def name2variable_name(name: str) -> str: +def name2variableName(name: str) -> str: """Convert arbitrary string to a camelCase variable name. MATLAB equivalent: ndi.fun.name2variableName @@ -68,6 +68,10 @@ def name2variable_name(name: str) -> str: return result +# Backward-compatible alias +name2variable_name = name2variableName + + def pseudorandomint() -> int: """Generate a pseudo-random integer from date/time + random component. diff --git a/tests/matlab_tests/test_carbon_fiber.py b/tests/matlab_tests/test_carbon_fiber.py index 8160f77..bd7a0e1 100644 --- a/tests/matlab_tests/test_carbon_fiber.py +++ b/tests/matlab_tests/test_carbon_fiber.py @@ -126,9 +126,9 @@ def test_load_document_count(self, carbon_fiber_dataset): def test_document_type_counts(self, carbon_fiber_dataset): """All 27 document types have expected counts.""" - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes - doc_types, doc_counts = get_doc_types(carbon_fiber_dataset) + doc_types, doc_counts = getDocTypes(carbon_fiber_dataset) actual = dict(zip(doc_types, doc_counts)) for dtype, expected in EXPECTED_TYPE_COUNTS.items(): actual_count = actual.get(dtype, 0) @@ -140,11 +140,11 @@ def test_document_type_counts(self, carbon_fiber_dataset): else: assert actual_count == expected, f"{dtype}: expected {expected}, got {actual_count}" - def test_get_doc_types(self, carbon_fiber_dataset): - """get_doc_types returns sorted types and matching counts.""" - from ndi.fun.doc import get_doc_types + def test_getDocTypes(self, carbon_fiber_dataset): + """getDocTypes returns sorted types and matching counts.""" + from ndi.fun.doc import getDocTypes - doc_types, doc_counts = get_doc_types(carbon_fiber_dataset) + doc_types, doc_counts = getDocTypes(carbon_fiber_dataset) assert doc_types == sorted(doc_types) # 27 from JSON + session doc auto-created by Dataset init = 28 assert len(doc_types) >= 27 diff --git a/tests/matlab_tests/test_dabrowska.py b/tests/matlab_tests/test_dabrowska.py index 5a8f9f9..23a7b14 100644 --- a/tests/matlab_tests/test_dabrowska.py +++ b/tests/matlab_tests/test_dabrowska.py @@ -78,7 +78,7 @@ def dabrowska_dataset(): @pytest.fixture(scope="session") def subject_table(dabrowska_dataset): """Build subject summary table.""" - from ndi.fun.doc_table import subject_summary + from ndi.fun.doc_table import subject as subject_summary return subject_summary(dabrowska_dataset) @@ -86,7 +86,7 @@ def subject_table(dabrowska_dataset): @pytest.fixture(scope="session") def probe_summary(dabrowska_dataset): """Build probe summary table.""" - from ndi.fun.doc_table import probe_table + from ndi.fun.doc_table import probe as probe_table return probe_table(dabrowska_dataset) @@ -94,7 +94,7 @@ def probe_summary(dabrowska_dataset): @pytest.fixture(scope="session") def epoch_summary(dabrowska_dataset): """Build epoch summary table.""" - from ndi.fun.doc_table import epoch_table + from ndi.fun.doc_table import epoch as epoch_table return epoch_table(dabrowska_dataset) @@ -102,24 +102,24 @@ def epoch_summary(dabrowska_dataset): @pytest.fixture(scope="session") def epm_table(dabrowska_dataset): """Query and convert EPM OTR docs to table.""" - from ndi.fun.doc_table import ontology_table_row_doc_to_table + from ndi.fun.doc_table import ontologyTableRowDoc2Table from ndi.query import Query query = Query("ontologyTableRow.variableNames").contains("ElevatedPlusMaze") docs = dabrowska_dataset.database_search(query) - tables, _ = ontology_table_row_doc_to_table(docs) + tables, _ = ontologyTableRowDoc2Table(docs) return tables[0] @pytest.fixture(scope="session") def fps_table(dabrowska_dataset): """Query and convert FPS OTR docs to table.""" - from ndi.fun.doc_table import ontology_table_row_doc_to_table + from ndi.fun.doc_table import ontologyTableRowDoc2Table from ndi.query import Query query = Query("ontologyTableRow.variableNames").contains("Fear_potentiatedStartle") docs = dabrowska_dataset.database_search(query) - tables, _ = ontology_table_row_doc_to_table(docs) + tables, _ = ontologyTableRowDoc2Table(docs) return tables[0] @@ -137,9 +137,9 @@ def test_dataset_loads(self, dabrowska_dataset): def test_document_type_counts(self, dabrowska_dataset): """All 13+ document types have expected counts.""" - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes - doc_types, doc_counts = get_doc_types(dabrowska_dataset) + doc_types, doc_counts = getDocTypes(dabrowska_dataset) actual = dict(zip(doc_types, doc_counts)) for dtype, expected in EXPECTED_TYPE_COUNTS.items(): actual_count = actual.get(dtype, 0) @@ -214,9 +214,9 @@ def test_strain_distribution(self, subject_table): def test_filter_avp_cre(self, subject_table): """AVP-Cre strain filtering returns expected count.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( subject_table, "StrainName", "AVP-Cre", string_match="contains" ) filtered = subject_table[row_ind] @@ -224,9 +224,9 @@ def test_filter_avp_cre(self, subject_table): def test_filter_otr_cre(self, subject_table): """OTR-IRES-Cre filtering works.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( subject_table, "StrainName", "OTR-IRES-Cre", string_match="contains" ) filtered = subject_table[row_ind] @@ -359,12 +359,12 @@ def test_joined_columns(self, subject_table, probe_summary, epoch_summary): assert "ProbeType" in combined.columns assert "EpochNumber" in combined.columns - def test_move_columns_left(self, subject_table, probe_summary, epoch_summary): - """move_columns_left reorders columns correctly.""" - from ndi.fun.table import join, move_columns_left + def test_moveColumnsLeft(self, subject_table, probe_summary, epoch_summary): + """moveColumnsLeft reorders columns correctly.""" + from ndi.fun.table import join, moveColumnsLeft combined = join([subject_table, probe_summary, epoch_summary]) - reordered = move_columns_left(combined, ["SubjectLocalIdentifier", "EpochNumber"]) + reordered = moveColumnsLeft(combined, ["SubjectLocalIdentifier", "EpochNumber"]) assert list(reordered.columns[:2]) == [ "SubjectLocalIdentifier", "EpochNumber", @@ -372,10 +372,10 @@ def test_move_columns_left(self, subject_table, probe_summary, epoch_summary): def test_filter_by_approach(self, subject_table, probe_summary, epoch_summary): """Filter by ApproachName containing 'optogenetic' works.""" - from ndi.fun.table import identify_matching_rows, join + from ndi.fun.table import identifyMatchingRows, join combined = join([subject_table, probe_summary, epoch_summary]) - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( combined, "ApproachName", "optogenetic", string_match="contains" ) opto = combined[row_ind] @@ -548,11 +548,11 @@ def test_fps_fear_percentage_calculation(self, fps_table): class TestOntologyIntegration: """Validate EMPTY ontology integration for the Dabrowska dataset.""" - def test_ontology_table_row_vars(self, dabrowska_dataset): - """ontology_table_row_vars returns names, short names, nodes.""" - from ndi.fun.doc import ontology_table_row_vars + def test_ontologyTableRowVars(self, dabrowska_dataset): + """ontologyTableRowVars returns names, short names, nodes.""" + from ndi.fun.doc import ontologyTableRowVars - names, short_names, nodes = ontology_table_row_vars(dabrowska_dataset) + names, short_names, nodes = ontologyTableRowVars(dabrowska_dataset) assert len(names) > 0 assert len(names) == len(short_names) == len(nodes) @@ -565,28 +565,28 @@ def test_empty_ontology_lookup(self): assert result.name is not None assert len(result.name) > 0 - def test_name_to_variable_name(self): - """name_to_variable_name produces correct PascalCase output.""" - from ndi.fun.name_utils import name_to_variable_name + def test_name2variableName(self): + """name2variableName produces correct PascalCase output.""" + from ndi.fun.name_utils import name2variableName assert ( - name_to_variable_name("treatment: food restriction onset time") + name2variableName("treatment: food restriction onset time") == "Treatment_FoodRestrictionOnsetTime" ) assert ( - name_to_variable_name("elevated plus maze: test duration") + name2variableName("elevated plus maze: test duration") == "ElevatedPlusMaze_TestDuration" ) assert ( - name_to_variable_name("Optogenetic Tetanus Stimulation Target Location") + name2variableName("Optogenetic Tetanus Stimulation Target Location") == "OptogeneticTetanusStimulationTargetLocation" ) - def test_name_to_variable_name_edge_cases(self): - """name_to_variable_name handles edge cases.""" - from ndi.fun.name_utils import name_to_variable_name + def test_name2variableName_edge_cases(self): + """name2variableName handles edge cases.""" + from ndi.fun.name_utils import name2variableName - assert name_to_variable_name("") == "" - assert name_to_variable_name(" ") == "" - assert name_to_variable_name("123abc") == "var_123abc" - assert name_to_variable_name("simple") == "Simple" + assert name2variableName("") == "" + assert name2variableName(" ") == "" + assert name2variableName("123abc") == "var_123abc" + assert name2variableName("simple") == "Simple" diff --git a/tests/matlab_tests/test_fun.py b/tests/matlab_tests/test_fun.py index 74753be..4ff1acc 100644 --- a/tests/matlab_tests/test_fun.py +++ b/tests/matlab_tests/test_fun.py @@ -10,7 +10,7 @@ +table/TestVStack.m -> TestVStack Python modules under test: - ndi.fun.doc — all_types, find_fuid, diff + ndi.fun.doc — allTypes, findFuid, diff ndi.fun.session — diff ndi.fun.dataset — diff ndi.fun.table — vstack @@ -24,7 +24,7 @@ from ndi.dataset import Dataset from ndi.document import Document from ndi.fun.dataset import diff as dataset_diff -from ndi.fun.doc import all_types, find_fuid +from ndi.fun.doc import allTypes, findFuid from ndi.fun.doc import diff as doc_diff from ndi.fun.session import diff as session_diff from ndi.fun.table import vstack @@ -76,41 +76,41 @@ def _make_session_with_docs(tmp_path, ref, doc_specs): class TestAllTypes: """Port of ndi.unittest.fun.doc.TestAllTypes.""" - def test_all_types_returns_nonempty_list(self): - """all_types() returns a non-empty list. + def test_allTypes_returns_nonempty_list(self): + """allTypes() returns a non-empty list. MATLAB equivalent: TestAllTypes.testNonEmptyList """ - types = all_types() + types = allTypes() assert isinstance(types, list) assert len(types) > 0 - def test_all_types_contains_strings(self): - """all_types() returns list of strings. + def test_allTypes_contains_strings(self): + """allTypes() returns list of strings. MATLAB equivalent: TestAllTypes.testContainsStrings """ - types = all_types() + types = allTypes() for t in types: assert isinstance(t, str) assert len(t) > 0 - def test_all_types_contains_known_types(self): - """all_types() should include well-known document types. + def test_allTypes_contains_known_types(self): + """allTypes() should include well-known document types. MATLAB equivalent: TestAllTypes.testContainsKnownTypes """ - types = all_types() + types = allTypes() # 'base' and 'demoNDI' are always expected to exist - assert "base" in types, "base should be in all_types()" - assert "demoNDI" in types, "demoNDI should be in all_types()" + assert "base" in types, "base should be in allTypes()" + assert "demoNDI" in types, "demoNDI should be in allTypes()" - def test_all_types_sorted(self): - """all_types() returns a sorted list. + def test_allTypes_sorted(self): + """allTypes() returns a sorted list. MATLAB equivalent: TestAllTypes.testSorted """ - types = all_types() + types = allTypes() assert types == sorted(types) @@ -124,7 +124,7 @@ class TestFindFuid: """Port of ndi.unittest.fun.doc.TestFindFuid.""" def test_find_known_fuid(self, tmp_path): - """find_fuid locates a document by its file UID. + """findFuid locates a document by its file UID. MATLAB equivalent: TestFindFuid.testFindKnownFuid """ @@ -146,13 +146,13 @@ def test_find_known_fuid(self, tmp_path): session.database_add(doc) # Now search for it - found_doc, found_name = find_fuid(session, fuid) + found_doc, found_name = findFuid(session, fuid) assert found_doc is not None, "Should find the document by FUID" assert found_name == "filename1.ext", f"File name should be filename1.ext, got {found_name}" - def test_find_fuid_not_found(self, tmp_path): - """find_fuid returns (None, '') for nonexistent FUID. + def test_findFuid_not_found(self, tmp_path): + """findFuid returns (None, '') for nonexistent FUID. MATLAB equivalent: TestFindFuid.testFuidNotFound """ @@ -164,13 +164,13 @@ def test_find_fuid_not_found(self, tmp_path): doc = _make_demo_doc("some_doc", 99, session.id()) session.database_add(doc) - found_doc, found_name = find_fuid(session, "nonexistent_uid_12345") + found_doc, found_name = findFuid(session, "nonexistent_uid_12345") assert found_doc is None assert found_name == "" - def test_find_fuid_in_populated_session(self, tmp_path): - """find_fuid searches among multiple documents correctly. + def test_findFuid_in_populated_session(self, tmp_path): + """findFuid searches among multiple documents correctly. MATLAB equivalent: TestFindFuid.testFindInSession """ @@ -197,7 +197,7 @@ def test_find_fuid_in_populated_session(self, tmp_path): assert target_fuid is not None - found_doc, found_name = find_fuid(session, target_fuid) + found_doc, found_name = findFuid(session, target_fuid) assert found_doc is not None assert found_name == "filename1.ext" diff --git a/tests/matlab_tests/test_jess_haley.py b/tests/matlab_tests/test_jess_haley.py index 7af36c9..ee7f099 100644 --- a/tests/matlab_tests/test_jess_haley.py +++ b/tests/matlab_tests/test_jess_haley.py @@ -109,10 +109,10 @@ def ontology_table_row_docs(jess_haley_dataset): @pytest.fixture(scope="session") def otr_tables(ontology_table_row_docs): - """Run ontology_table_row_doc_to_table and cache result.""" - from ndi.fun.doc_table import ontology_table_row_doc_to_table + """Run ontologyTableRowDoc2Table and cache result.""" + from ndi.fun.doc_table import ontologyTableRowDoc2Table - return ontology_table_row_doc_to_table(ontology_table_row_docs) + return ontologyTableRowDoc2Table(ontology_table_row_docs) # =========================================================================== @@ -132,9 +132,9 @@ def test_load_document_count(self, jess_haley_dataset): def test_document_type_counts(self, jess_haley_dataset): """All 15 document types have expected counts (session may be +1).""" - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes - doc_types, doc_counts = get_doc_types(jess_haley_dataset) + doc_types, doc_counts = getDocTypes(jess_haley_dataset) actual = dict(zip(doc_types, doc_counts)) for dtype, expected in EXPECTED_TYPE_COUNTS.items(): actual_count = actual.get(dtype, 0) @@ -154,11 +154,11 @@ def test_all_documents_have_base_id(self, all_docs_raw): missing += 1 assert missing == 0, f"{missing} documents missing base.id" - def test_get_doc_types(self, jess_haley_dataset): - """get_doc_types returns sorted types and matching counts.""" - from ndi.fun.doc import get_doc_types + def test_getDocTypes(self, jess_haley_dataset): + """getDocTypes returns sorted types and matching counts.""" + from ndi.fun.doc import getDocTypes - doc_types, doc_counts = get_doc_types(jess_haley_dataset) + doc_types, doc_counts = getDocTypes(jess_haley_dataset) assert doc_types == sorted(doc_types) assert len(doc_types) == 15 assert sum(doc_counts) >= TOTAL_JSON_DOCS @@ -242,9 +242,9 @@ def test_doc_ids_match(self, otr_tables): assert len(dt) == len(ids) def test_stack_all_mode(self, ontology_table_row_docs): - from ndi.fun.doc_table import ontology_table_row_doc_to_table + from ndi.fun.doc_table import ontologyTableRowDoc2Table - data_tables, doc_ids = ontology_table_row_doc_to_table( + data_tables, doc_ids = ontologyTableRowDoc2Table( ontology_table_row_docs, stack_all=True ) assert len(data_tables) == 1 @@ -285,23 +285,23 @@ class TestOntologyTableRowVars: """MATLAB: ndi.fun.doc.ontologyTableRowVars(dataset).""" def test_returns_nonempty_tuples(self, jess_haley_dataset): - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars - names, var_names, ont_nodes = ontology_table_row_vars(jess_haley_dataset) + names, var_names, ont_nodes = ontologyTableRowVars(jess_haley_dataset) assert len(names) > 0 assert len(var_names) > 0 assert len(ont_nodes) > 0 def test_names_and_variable_names_same_length(self, jess_haley_dataset): - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars - names, var_names, ont_nodes = ontology_table_row_vars(jess_haley_dataset) + names, var_names, ont_nodes = ontologyTableRowVars(jess_haley_dataset) assert len(names) == len(var_names) == len(ont_nodes) def test_known_variables_present(self, jess_haley_dataset): - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars - names, _, _ = ontology_table_row_vars(jess_haley_dataset) + names, _, _ = ontologyTableRowVars(jess_haley_dataset) expected = "C. elegans behavioral assay: deceleration upon encounter" assert expected in names, f"'{expected}' not in ontologyTableRowVars" @@ -321,14 +321,14 @@ def test_subject_count(self, jess_haley_dataset): assert len(docs) == 1656 def test_subject_table(self, jess_haley_dataset): - from ndi.fun.doc_table import subject_table + from ndi.fun.doc_table import subjectBasic as subject_table df = subject_table(jess_haley_dataset) assert len(df) == 1656 assert "local_identifier" in df.columns def test_subject_local_identifier_nonempty(self, jess_haley_dataset): - from ndi.fun.doc_table import subject_table + from ndi.fun.doc_table import subjectBasic as subject_table df = subject_table(jess_haley_dataset) empty_ids = df[df["local_identifier"] == ""] @@ -336,7 +336,7 @@ def test_subject_local_identifier_nonempty(self, jess_haley_dataset): def test_subject_strains(self, jess_haley_dataset): """Subjects include N2 (wildtype) and PR811 (transgenic) strains.""" - from ndi.fun.doc_table import subject_table + from ndi.fun.doc_table import subjectBasic as subject_table df = subject_table(jess_haley_dataset) ids = df["local_identifier"].tolist() @@ -349,11 +349,11 @@ def test_subject_strains(self, jess_haley_dataset): def test_subject_filter_pr811(self, jess_haley_dataset): """Filter for PR811 strain subjects matches MATLAB count.""" - from ndi.fun.doc_table import subject_table - from ndi.fun.table import identify_matching_rows + from ndi.fun.doc_table import subjectBasic as subject_table + from ndi.fun.table import identifyMatchingRows df = subject_table(jess_haley_dataset) - mask = identify_matching_rows(df, "local_identifier", "PR811", string_match="contains") + mask = identifyMatchingRows(df, "local_identifier", "PR811", string_match="contains") filtered = df[mask] # MATLAB tutorial shows 76 PR811 subjects assert len(filtered) == 76, f"Expected 76 PR811 subjects, got {len(filtered)}" @@ -387,24 +387,24 @@ def test_join_behavior_plate_tables(self, otr_tables): break assert joined, "No pair of OTR tables could be joined on common columns" - def test_identify_matching_rows_string_contains(self): + def test_identifyMatchingRows_string_contains(self): """String 'contains' matching works on DataFrame.""" import pandas as pd - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows df = pd.DataFrame({"name": ["apple", "banana", "cherry", "APPLE pie"]}) - mask = identify_matching_rows(df, "name", "apple", string_match="contains") + mask = identifyMatchingRows(df, "name", "apple", string_match="contains") assert mask.sum() == 1 # case-sensitive: only 'apple' - def test_identify_matching_rows_numeric(self): + def test_identifyMatchingRows_numeric(self): """Numeric comparison matching.""" import pandas as pd - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows df = pd.DataFrame({"value": [10, 20, 30, 40]}) - mask = identify_matching_rows(df, "value", 25, numeric_match="gt") + mask = identifyMatchingRows(df, "value", 25, numeric_match="gt") assert mask.sum() == 2 # 30 and 40 @@ -658,10 +658,10 @@ def test_plot_document_type_distribution(self, jess_haley_dataset): matplotlib.use("Agg") import matplotlib.pyplot as plt - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes out = self._ensure_output_dir() - doc_types, doc_counts = get_doc_types(jess_haley_dataset) + doc_types, doc_counts = getDocTypes(jess_haley_dataset) fig, ax = plt.subplots(figsize=(12, 6)) ax.barh(doc_types, doc_counts, color="steelblue") @@ -684,7 +684,7 @@ def test_plot_subject_experiment_types(self, jess_haley_dataset): import matplotlib.pyplot as plt - from ndi.fun.doc_table import subject_table + from ndi.fun.doc_table import subjectBasic as subject_table out = self._ensure_output_dir() df = subject_table(jess_haley_dataset) @@ -978,15 +978,15 @@ def test_plot_summary_dashboard(self, jess_haley_dataset, all_docs_raw): import matplotlib.pyplot as plt import numpy as np - from ndi.fun.doc import get_doc_types - from ndi.fun.doc_table import subject_table + from ndi.fun.doc import getDocTypes + from ndi.fun.doc_table import subjectBasic as subject_table out = self._ensure_output_dir() fig, axes = plt.subplots(2, 3, figsize=(20, 12)) # (1) Document type distribution ax = axes[0, 0] - doc_types, doc_counts = get_doc_types(jess_haley_dataset) + doc_types, doc_counts = getDocTypes(jess_haley_dataset) ax.barh(doc_types, doc_counts, color="steelblue") ax.set_xlabel("Count") ax.set_title("Document Types") diff --git a/tests/matlab_tests/test_utils.py b/tests/matlab_tests/test_utils.py index df6e540..171e90e 100644 --- a/tests/matlab_tests/test_utils.py +++ b/tests/matlab_tests/test_utils.py @@ -14,20 +14,20 @@ Tests for: - ndi.fun.utils.channelname2prefixnumber() -- ndi.fun.utils.name2variable_name() +- ndi.fun.utils.name2variableName() - ndi.fun.utils.pseudorandomint() - ndi.fun.utils.timestamp() -- ndi.fun.doc.all_types() +- ndi.fun.doc.allTypes() """ import re import pytest -from ndi.fun.doc import all_types +from ndi.fun.doc import allTypes from ndi.fun.utils import ( channelname2prefixnumber, - name2variable_name, + name2variableName, pseudorandomint, timestamp, ) @@ -132,7 +132,7 @@ def test_name2variable_simple(self): MATLAB equivalent: name2variableName('my name') """ - result = name2variable_name("my name") + result = name2variableName("my name") assert result == "myName" def test_name2variable_with_special_chars(self): @@ -140,7 +140,7 @@ def test_name2variable_with_special_chars(self): MATLAB equivalent: name2variableName('hello-world') """ - result = name2variable_name("hello-world") + result = name2variableName("hello-world") assert result == "helloWorld" def test_name2variable_with_dots(self): @@ -148,7 +148,7 @@ def test_name2variable_with_dots(self): MATLAB equivalent: name2variableName('my.variable.name') """ - result = name2variable_name("my.variable.name") + result = name2variableName("my.variable.name") assert result == "myVariableName" def test_name2variable_starts_with_digit(self): @@ -156,7 +156,7 @@ def test_name2variable_starts_with_digit(self): MATLAB equivalent: name2variableName('1abc') """ - result = name2variable_name("1abc") + result = name2variableName("1abc") assert result.startswith("x") def test_name2variable_empty(self): @@ -164,7 +164,7 @@ def test_name2variable_empty(self): MATLAB equivalent: name2variableName('') """ - result = name2variable_name("") + result = name2variableName("") assert result == "" def test_name2variable_underscore_preserved(self): @@ -172,7 +172,7 @@ def test_name2variable_underscore_preserved(self): MATLAB equivalent: name2variableName('my_var') """ - result = name2variable_name("my_var") + result = name2variableName("my_var") # Underscores are kept in the cleaned string, split happens on spaces # So 'my_var' -> cleaned 'my_var' -> split ['my_var'] -> 'my_var' assert isinstance(result, str) @@ -263,51 +263,51 @@ def test_pseudorandomint_uniqueness(self): class TestAllTypes: """Port of MATLAB ndi.fun.doc.allTypes tests. - Verifies that all_types() returns a list of known document types. + Verifies that allTypes() returns a list of known document types. """ - def test_all_types_returns_list(self): - """all_types() returns a sorted list of strings. + def test_allTypes_returns_list(self): + """allTypes() returns a sorted list of strings. MATLAB equivalent: ndi.fun.doc.allTypes() """ - types = all_types() + types = allTypes() assert isinstance(types, list) assert len(types) > 0 - def test_all_types_contains_base(self): - """all_types() includes 'base' document type. + def test_allTypes_contains_base(self): + """allTypes() includes 'base' document type. MATLAB equivalent: ndi.fun.doc.allTypes() (contains 'base') """ - types = all_types() + types = allTypes() assert "base" in types, "'base' should be a known document type" - def test_all_types_are_strings(self): + def test_allTypes_are_strings(self): """All returned types are non-empty strings. MATLAB equivalent: ndi.fun.doc.allTypes() (type check) """ - types = all_types() + types = allTypes() for t in types: assert isinstance(t, str) assert len(t) > 0 - def test_all_types_sorted(self): - """all_types() returns a sorted list. + def test_allTypes_sorted(self): + """allTypes() returns a sorted list. MATLAB equivalent: ndi.fun.doc.allTypes() (sorted) """ - types = all_types() + types = allTypes() assert types == sorted(types), "Types should be sorted alphabetically" - def test_all_types_contains_known_types(self): - """all_types() includes several known document types. + def test_allTypes_contains_known_types(self): + """allTypes() includes several known document types. MATLAB equivalent: ndi.fun.doc.allTypes() (spot check) """ - types = all_types() + types = allTypes() # These are expected schema files in database_documents/ expected = ["base", "demoNDI"] for doc_type in expected: - assert doc_type in types, f"Expected '{doc_type}' to be in all_types()" + assert doc_type in types, f"Expected '{doc_type}' to be in allTypes()" diff --git a/tests/test_fun.py b/tests/test_fun.py index fef8adb..1e3f3e0 100644 --- a/tests/test_fun.py +++ b/tests/test_fun.py @@ -49,35 +49,35 @@ def test_starts_with_digit_raises(self): class TestName2VariableName: - """Tests for name2variable_name.""" + """Tests for name2variableName.""" def test_basic(self): - from ndi.fun.utils import name2variable_name + from ndi.fun.utils import name2variableName - assert name2variable_name("hello world") == "helloWorld" + assert name2variableName("hello world") == "helloWorld" def test_special_chars(self): - from ndi.fun.utils import name2variable_name + from ndi.fun.utils import name2variableName - assert name2variable_name("my-variable.name") == "myVariableName" + assert name2variableName("my-variable.name") == "myVariableName" def test_starts_with_digit(self): - from ndi.fun.utils import name2variable_name + from ndi.fun.utils import name2variableName - result = name2variable_name("123test") + result = name2variableName("123test") assert result.startswith("x") assert not result[0].isdigit() def test_underscore_preserved(self): - from ndi.fun.utils import name2variable_name + from ndi.fun.utils import name2variableName - result = name2variable_name("my_var") + result = name2variableName("my_var") assert "_" in result or "my" in result.lower() def test_empty(self): - from ndi.fun.utils import name2variable_name + from ndi.fun.utils import name2variableName - assert name2variable_name("") == "" + assert name2variableName("") == "" class TestPseudorandomint: @@ -142,30 +142,30 @@ def test_file_not_found(self): class TestFileDates: - """Tests for date_created and date_updated.""" + """Tests for dateCreated and dateUpdated.""" def test_date_updated(self, tmp_path): - from ndi.fun.file import date_updated + from ndi.fun.file import dateUpdated f = tmp_path / "test.txt" f.write_text("hello") - dt = date_updated(str(f)) + dt = dateUpdated(str(f)) assert dt is not None assert dt.tzinfo is not None def test_date_created(self, tmp_path): - from ndi.fun.file import date_created + from ndi.fun.file import dateCreated f = tmp_path / "test.txt" f.write_text("hello") - dt = date_created(str(f)) + dt = dateCreated(str(f)) assert dt is not None def test_nonexistent_returns_none(self): - from ndi.fun.file import date_created, date_updated + from ndi.fun.file import dateCreated, dateUpdated - assert date_updated("/nonexistent") is None - assert date_created("/nonexistent") is None + assert dateUpdated("/nonexistent") is None + assert dateCreated("/nonexistent") is None # =========================================================================== @@ -302,28 +302,28 @@ def test_missing_key(self): class TestDocAllTypes: - """Tests for doc.all_types.""" + """Tests for doc.allTypes.""" def test_returns_list(self): - from ndi.fun.doc import all_types + from ndi.fun.doc import allTypes - types = all_types() + types = allTypes() assert isinstance(types, list) # Should find at least base types from schema_documents assert len(types) > 0 def test_sorted(self): - from ndi.fun.doc import all_types + from ndi.fun.doc import allTypes - types = all_types() + types = allTypes() assert types == sorted(types) class TestDocGetDocTypes: - """Tests for doc.get_doc_types.""" + """Tests for doc.getDocTypes.""" def test_with_mock_session(self): - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes doc1 = MagicMock() doc1.document_properties = { @@ -336,7 +336,7 @@ def test_with_mock_session(self): session = MagicMock() session.database_search.return_value = [doc1, doc2] - types, counts = get_doc_types(session) + types, counts = getDocTypes(session) assert "element" in types assert "subject" in types # counts is a list aligned with types @@ -345,10 +345,10 @@ def test_with_mock_session(self): class TestDocFindFuid: - """Tests for doc.find_fuid.""" + """Tests for doc.findFuid.""" def test_found(self): - from ndi.fun.doc import find_fuid + from ndi.fun.doc import findFuid doc = MagicMock() doc.document_properties = { @@ -364,16 +364,16 @@ def test_found(self): session = MagicMock() session.database_search.return_value = [doc] - result_doc, filename = find_fuid(session, "abc123") + result_doc, filename = findFuid(session, "abc123") assert result_doc is doc assert filename == "data.bin" def test_not_found(self): - from ndi.fun.doc import find_fuid + from ndi.fun.doc import findFuid session = MagicMock() session.database_search.return_value = [] - result_doc, filename = find_fuid(session, "nonexistent") + result_doc, filename = findFuid(session, "nonexistent") assert result_doc is None assert filename == "" @@ -495,10 +495,10 @@ def test_no_match(self): class TestFindMixtureName: - """Tests for stimulus.find_mixture_name.""" + """Tests for stimulus.findMixtureName.""" def test_match(self, tmp_path): - from ndi.fun.stimulus import find_mixture_name + from ndi.fun.stimulus import findMixtureName dictionary = { "saline": [ @@ -523,11 +523,11 @@ def test_match(self, tmp_path): "unitName": "percent", }, ] - result = find_mixture_name(str(f), mixture) + result = findMixtureName(str(f), mixture) assert "saline" in result def test_no_match(self, tmp_path): - from ndi.fun.stimulus import find_mixture_name + from ndi.fun.stimulus import findMixtureName dictionary = { "saline": [ @@ -542,7 +542,7 @@ def test_no_match(self, tmp_path): } f = tmp_path / "mixtures.json" f.write_text(json.dumps(dictionary)) - result = find_mixture_name( + result = findMixtureName( str(f), [ { @@ -557,16 +557,16 @@ def test_no_match(self, tmp_path): assert result == [] def test_file_not_found(self): - from ndi.fun.stimulus import find_mixture_name + from ndi.fun.stimulus import findMixtureName - assert find_mixture_name("/nonexistent.json", []) == [] + assert findMixtureName("/nonexistent.json", []) == [] class TestStimulusTemporalFrequency: - """Tests for stimulus.stimulus_temporal_frequency.""" + """Tests for stimulus.stimulustemporalfrequency.""" def test_basic_rule(self, tmp_path): - from ndi.fun.stimulus import stimulus_temporal_frequency + from ndi.fun.stimulus import stimulustemporalfrequency rules = [ {"parameterName": "tFrequency", "multiplier": 1.0, "adder": 0.0, "isPeriod": False}, @@ -574,12 +574,12 @@ def test_basic_rule(self, tmp_path): f = tmp_path / "rules.json" f.write_text(json.dumps(rules)) - tf, name = stimulus_temporal_frequency({"tFrequency": 4.0}, config_path=str(f)) + tf, name = stimulustemporalfrequency({"tFrequency": 4.0}, config_path=str(f)) assert tf == 4.0 assert name == "tFrequency" def test_period_inversion(self, tmp_path): - from ndi.fun.stimulus import stimulus_temporal_frequency + from ndi.fun.stimulus import stimulustemporalfrequency rules = [ {"parameterName": "period", "multiplier": 1.0, "adder": 0.0, "isPeriod": True}, @@ -587,17 +587,17 @@ def test_period_inversion(self, tmp_path): f = tmp_path / "rules.json" f.write_text(json.dumps(rules)) - tf, name = stimulus_temporal_frequency({"period": 0.5}, config_path=str(f)) + tf, name = stimulustemporalfrequency({"period": 0.5}, config_path=str(f)) assert tf == pytest.approx(2.0) def test_no_match(self, tmp_path): - from ndi.fun.stimulus import stimulus_temporal_frequency + from ndi.fun.stimulus import stimulustemporalfrequency rules = [{"parameterName": "tFrequency", "multiplier": 1.0, "adder": 0.0}] f = tmp_path / "rules.json" f.write_text(json.dumps(rules)) - tf, name = stimulus_temporal_frequency({"other": 5.0}, config_path=str(f)) + tf, name = stimulustemporalfrequency({"other": 5.0}, config_path=str(f)) assert tf is None assert name == "" @@ -685,9 +685,16 @@ def test_import_submodules(self): from ndi.fun import data, dataset, doc, epoch, file, session, stimulus assert hasattr(doc, "diff") + assert hasattr(doc, "allTypes") + assert hasattr(doc, "getDocTypes") + assert hasattr(doc, "findFuid") assert hasattr(epoch, "epochid2element") assert hasattr(file, "md5") + assert hasattr(file, "dateCreated") + assert hasattr(file, "dateUpdated") assert hasattr(data, "readngrid") assert hasattr(stimulus, "tuning_curve_to_response_type") + assert hasattr(stimulus, "findMixtureName") + assert hasattr(stimulus, "stimulustemporalfrequency") assert hasattr(session, "diff") assert hasattr(dataset, "diff") diff --git a/tests/test_phase1_gaps.py b/tests/test_phase1_gaps.py index d91fc2e..59346ab 100644 --- a/tests/test_phase1_gaps.py +++ b/tests/test_phase1_gaps.py @@ -328,7 +328,7 @@ def test_non_dict_props(self): class TestDocTable: def test_doc_cell_array_to_table(self): - from ndi.fun.doc_table import doc_cell_array_to_table + from ndi.fun.doc_table import docCellArray2Table doc1 = MagicMock() doc1.document_properties = { @@ -341,15 +341,15 @@ def test_doc_cell_array_to_table(self): "element": {"name": "e2"}, } - df = doc_cell_array_to_table([doc1, doc2]) + df = docCellArray2Table([doc1, doc2]) assert len(df) == 2 assert "base.id" in df.columns assert df["element.name"].tolist() == ["e1", "e2"] def test_empty_list(self): - from ndi.fun.doc_table import doc_cell_array_to_table + from ndi.fun.doc_table import docCellArray2Table - df = doc_cell_array_to_table([]) + df = docCellArray2Table([]) assert len(df) == 0 @@ -363,34 +363,34 @@ class TestTableUtils: def test_identify_matching_rows_identical(self): import pandas as pd - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows df = pd.DataFrame({"name": ["a", "b", "c", "a"]}) - mask = identify_matching_rows(df, "name", "a") + mask = identifyMatchingRows(df, "name", "a") assert mask.tolist() == [True, False, False, True] def test_identify_matching_rows_contains(self): import pandas as pd - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows df = pd.DataFrame({"name": ["alpha", "beta", "gamma"]}) - mask = identify_matching_rows(df, "name", "al", "contains") + mask = identifyMatchingRows(df, "name", "al", "contains") assert mask.tolist() == [True, False, False] def test_identify_matching_rows_numeric(self): import pandas as pd - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows df = pd.DataFrame({"val": [1, 2, 3, 4]}) - mask = identify_matching_rows(df, "val", 2, "gt") + mask = identifyMatchingRows(df, "val", 2, "gt") assert mask.tolist() == [False, False, True, True] def test_identify_valid_rows(self): import pandas as pd - from ndi.fun.table import identify_valid_rows + from ndi.fun.table import identifyValidRows df = pd.DataFrame( { @@ -398,13 +398,13 @@ def test_identify_valid_rows(self): "b": [4, 5, float("nan")], } ) - mask = identify_valid_rows(df) + mask = identifyValidRows(df) assert mask.tolist() == [True, False, False] def test_identify_valid_rows_specific_cols(self): import pandas as pd - from ndi.fun.table import identify_valid_rows + from ndi.fun.table import identifyValidRows df = pd.DataFrame( { @@ -412,7 +412,7 @@ def test_identify_valid_rows_specific_cols(self): "b": [4, 5, float("nan")], } ) - mask = identify_valid_rows(df, columns=["a"]) + mask = identifyValidRows(df, columns=["a"]) assert mask.tolist() == [True, False, True] def test_vstack(self): @@ -432,10 +432,10 @@ def test_vstack(self): def test_move_columns_left(self): import pandas as pd - from ndi.fun.table import move_columns_left + from ndi.fun.table import moveColumnsLeft df = pd.DataFrame({"a": [1], "b": [2], "c": [3]}) - result = move_columns_left(df, ["c", "b"]) + result = moveColumnsLeft(df, ["c", "b"]) assert list(result.columns) == ["c", "b", "a"] def test_join_tables(self): @@ -621,10 +621,10 @@ def test_read_subset(self): class TestDocHelpers: def test_make_species_strain_sex(self): - """make_species_strain_sex creates real openMINDS NDI Documents.""" + """makeSpeciesStrainSex creates real openMINDS NDI Documents.""" pytest.importorskip("openminds", reason="openminds package not installed") from ndi.document import Document - from ndi.fun.doc import make_species_strain_sex + from ndi.fun.doc import makeSpeciesStrainSex session = MagicMock() session.id.return_value = "sess_1" @@ -632,7 +632,7 @@ def test_make_species_strain_sex(self): subj_doc = MagicMock() subj_doc.document_properties = {"base": {"id": "subj_123"}} - docs = make_species_strain_sex( + docs = makeSpeciesStrainSex( session, subj_doc, species="Mus musculus", @@ -650,26 +650,26 @@ def test_make_species_strain_sex(self): assert dep_names.get("subject_id") == "subj_123" def test_make_species_only(self): - """make_species_strain_sex with species only creates 1 document.""" + """makeSpeciesStrainSex with species only creates 1 document.""" pytest.importorskip("openminds", reason="openminds package not installed") from ndi.document import Document - from ndi.fun.doc import make_species_strain_sex + from ndi.fun.doc import makeSpeciesStrainSex session = MagicMock() session.id.return_value = "sess_1" subj_doc = MagicMock() subj_doc.document_properties = {"base": {"id": "subj_123"}} - docs = make_species_strain_sex(session, subj_doc, species="Rattus") + docs = makeSpeciesStrainSex(session, subj_doc, species="Rattus") assert len(docs) == 1 assert isinstance(docs[0], Document) om = docs[0].document_properties.get("openminds", {}) assert "Species" in om.get("openminds_type", "") def test_probe_locations_for_probes(self): - """probe_locations_for_probes creates real probe_location Documents.""" + """probeLocations4probes creates real probe_location Documents.""" from ndi.document import Document - from ndi.fun.doc import probe_locations_for_probes + from ndi.fun.doc import probeLocations4probes session = MagicMock() session.id.return_value = "sess_1" @@ -684,7 +684,7 @@ def test_probe_locations_for_probes(self): {"name": "LGN"}, ] - docs = probe_locations_for_probes( + docs = probeLocations4probes( session, [probe1, probe2], locations, diff --git a/tests/test_phase2_gaps.py b/tests/test_phase2_gaps.py index 384ddf3..52d40ab 100644 --- a/tests/test_phase2_gaps.py +++ b/tests/test_phase2_gaps.py @@ -2,7 +2,7 @@ Tests for Phase 2 low-priority gap implementations. Covers: -- Batch 1: stimulus_tuningcurve_log, t0_t1_to_array, ontology_table_row_vars +- Batch 1: stimulus_tuningcurve_log, t0_t1cell2array, ontologyTableRowVars - Batch 2: database_to_json, copy_doc_file_to_temp, extract_docs_files - Batch 3: get_probe_type_map, init_probe_type_map - Batch 4: uploadSingleFile @@ -96,42 +96,42 @@ def test_returns_empty_if_no_log_field(self): class TestT0T1ToArray: - """Tests for ndi.fun.epoch.t0_t1_to_array.""" + """Tests for ndi.fun.epoch.t0_t1cell2array.""" def test_basic_conversion(self): - from ndi.fun.epoch import t0_t1_to_array + from ndi.fun.epoch import t0_t1cell2array - result = t0_t1_to_array([[0.0, 1.5], [2.0, 3.5]]) + result = t0_t1cell2array([[0.0, 1.5], [2.0, 3.5]]) expected = np.array([[0.0, 1.5], [2.0, 3.5]]) np.testing.assert_array_equal(result, expected) def test_empty_input(self): - from ndi.fun.epoch import t0_t1_to_array + from ndi.fun.epoch import t0_t1cell2array - result = t0_t1_to_array([]) + result = t0_t1cell2array([]) assert result.shape == (0, 2) def test_single_pair(self): - from ndi.fun.epoch import t0_t1_to_array + from ndi.fun.epoch import t0_t1cell2array - result = t0_t1_to_array([[10.0, 20.0]]) + result = t0_t1cell2array([[10.0, 20.0]]) assert result.shape == (1, 2) assert result[0, 0] == 10.0 assert result[0, 1] == 20.0 def test_tuples(self): - from ndi.fun.epoch import t0_t1_to_array + from ndi.fun.epoch import t0_t1cell2array - result = t0_t1_to_array([(0.0, 1.0), (2.0, 3.0)]) + result = t0_t1cell2array([(0.0, 1.0), (2.0, 3.0)]) assert result.shape == (2, 2) assert result[1, 0] == 2.0 class TestOntologyTableRowVars: - """Tests for ndi.fun.doc.ontology_table_row_vars.""" + """Tests for ndi.fun.doc.ontologyTableRowVars.""" def test_extracts_unique_vars(self): - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars doc1 = MagicMock() doc1.document_properties = { @@ -154,7 +154,7 @@ def test_extracts_unique_vars(self): session = MagicMock() session.database_search.return_value = [doc1, doc2] - names, var_names, ont_nodes = ontology_table_row_vars(session) + names, var_names, ont_nodes = ontologyTableRowVars(session) assert "alpha" in names assert "beta" in names @@ -162,12 +162,12 @@ def test_extracts_unique_vars(self): assert len(names) == 3 def test_empty_session(self): - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars session = MagicMock() session.database_search.return_value = [] - names, var_names, ont_nodes = ontology_table_row_vars(session) + names, var_names, ont_nodes = ontologyTableRowVars(session) assert names == [] assert var_names == [] assert ont_nodes == [] diff --git a/tutorials/tutorial_67f723d574f5f79c6062389d.py b/tutorials/tutorial_67f723d574f5f79c6062389d.py index 72bb463..6422fea 100644 --- a/tutorials/tutorial_67f723d574f5f79c6062389d.py +++ b/tutorials/tutorial_67f723d574f5f79c6062389d.py @@ -330,7 +330,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes html.add_heading("Import and load NDI dataset") html.add_text( @@ -354,7 +354,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: from ndi.cloud import downloadDataset from ndi.cloud.auth import login from ndi.cloud.client import CloudClient -from ndi.fun.doc import get_doc_types +from ndi.fun.doc import getDocTypes cloud_dataset_id = '67f723d574f5f79c6062389d' data_path = os.path.expanduser('~/Documents/MATLAB/Datasets') @@ -404,11 +404,11 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: ) html.add_code("""\ -doc_types, doc_counts = get_doc_types(dataset) +doc_types, doc_counts = getDocTypes(dataset) for t, c in zip(doc_types, doc_counts): print(f' {t}: {c}')""") - doc_types, doc_counts = get_doc_types(dataset) + doc_types, doc_counts = getDocTypes(dataset) output_lines = [] total_docs = 0 for t, c in zip(doc_types, doc_counts): @@ -428,7 +428,7 @@ def section_1_import_and_load(html: HTMLBuilder) -> Any: def section_2_subject_summary(html: HTMLBuilder, dataset: Any) -> Any: """Section 2: View subject summary table.""" - from ndi.fun.doc_table import subject_summary + from ndi.fun.doc_table import subject as subject_summary html.add_heading("View subject summary table") html.add_text( @@ -443,7 +443,7 @@ def section_2_subject_summary(html: HTMLBuilder, dataset: Any) -> Any: html.add_text("A summary table showing the metadata for each subject can be " "viewed below.") html.add_code("""\ -from ndi.fun.doc_table import subject_summary +from ndi.fun.doc_table import subject as subject_summary subject_table = subject_summary(dataset) print(f'subjectTable: {subject_table.shape[0]} x {subject_table.shape[1]} table') @@ -467,7 +467,7 @@ def section_2_subject_summary(html: HTMLBuilder, dataset: Any) -> Any: @timed def section_3_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: """Section 3: Filter subjects by strain.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows html.add_heading("Filter subjects by strain") html.add_text( @@ -478,18 +478,18 @@ def section_3_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: ) html.add_code("""\ -from ndi.fun.table import identify_matching_rows +from ndi.fun.table import identifyMatchingRows column_name = 'StrainName' data_value = 'AVP-Cre' -row_ind = identify_matching_rows( +row_ind = identifyMatchingRows( subject_table, column_name, data_value, string_match='contains' ) filtered_subjects = subject_table[row_ind] print(f'filteredSubjects: {len(filtered_subjects)} rows x {len(filtered_subjects.columns)} columns') print(filtered_subjects)""") - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( subject_table, "StrainName", "AVP-Cre", string_match="contains" ) filtered_subjects = subject_table[row_ind] @@ -515,7 +515,7 @@ def section_3_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: def section_4_probe_summary(html: HTMLBuilder, dataset: Any) -> Any: """Section 4: View probe summary table.""" - from ndi.fun.doc_table import probe_table + from ndi.fun.doc_table import probe as probe_table html.add_heading("View probe summary table") html.add_text( @@ -526,7 +526,7 @@ def section_4_probe_summary(html: HTMLBuilder, dataset: Any) -> Any: ) html.add_code("""\ -from ndi.fun.doc_table import probe_table +from ndi.fun.doc_table import probe as probe_table probe_summary = probe_table(dataset) print(f'probeTable: {probe_summary.shape[0]} x {probe_summary.shape[1]} table') @@ -584,7 +584,7 @@ def section_4_probe_summary(html: HTMLBuilder, dataset: Any) -> Any: def section_5_epoch_summary(html: HTMLBuilder, dataset: Any) -> Any: """Section 5: View epoch summary table.""" - from ndi.fun.doc_table import epoch_table + from ndi.fun.doc_table import epoch as epoch_table html.add_heading("View epoch summary table") html.add_text( @@ -596,7 +596,7 @@ def section_5_epoch_summary(html: HTMLBuilder, dataset: Any) -> Any: ) html.add_code("""\ -from ndi.fun.doc_table import epoch_table +from ndi.fun.doc_table import epoch as epoch_table epoch_summary = epoch_table(dataset) print(f'epochTable: {epoch_summary.shape[0]} x {epoch_summary.shape[1]} table') @@ -639,7 +639,7 @@ def section_6_combined_table( ) -> Any: """Section 6: Combined summary table and epoch filtering.""" - from ndi.fun.table import identify_matching_rows, join, move_columns_left + from ndi.fun.table import identifyMatchingRows, join, moveColumnsLeft html.add_heading("Combined summary table and epoch filtering") html.add_text( @@ -650,15 +650,15 @@ def section_6_combined_table( ) html.add_code("""\ -from ndi.fun.table import join, move_columns_left, identify_matching_rows +from ndi.fun.table import join, moveColumnsLeft, identifyMatchingRows combined = join([subject_table, probe_summary, epoch_summary]) -combined = move_columns_left(combined, ['SubjectLocalIdentifier', 'EpochNumber']) +combined = moveColumnsLeft(combined, ['SubjectLocalIdentifier', 'EpochNumber']) print(f'combined: {combined.shape[0]} x {combined.shape[1]} table') print(f'Columns: {list(combined.columns)}')""") combined = join([subject_table, probe_summary, epoch_summary]) - combined = move_columns_left(combined, ["SubjectLocalIdentifier", "EpochNumber"]) + combined = moveColumnsLeft(combined, ["SubjectLocalIdentifier", "EpochNumber"]) html.add_output_text( f"combined: {combined.shape[0]} x {combined.shape[1]} table\n" @@ -677,13 +677,13 @@ def section_6_combined_table( ) html.add_code("""\ -row_ind = identify_matching_rows( +row_ind = identifyMatchingRows( combined, 'ApproachName', 'optogenetic', string_match='contains' ) opto_epochs = combined[row_ind] print(f'Epochs with optogenetic approach: {len(opto_epochs)} rows')""") - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( combined, "ApproachName", "optogenetic", string_match="contains" ) opto_epochs = combined[row_ind] @@ -703,13 +703,13 @@ def section_6_combined_table( ) html.add_code("""\ -row_ind = identify_matching_rows( +row_ind = identifyMatchingRows( combined, 'MixtureName', 'aCSF', string_match='contains' ) acsf_epochs = combined[row_ind] print(f'Epochs with aCSF mixture: {len(acsf_epochs)} rows')""") - row_ind = identify_matching_rows(combined, "MixtureName", "aCSF", string_match="contains") + row_ind = identifyMatchingRows(combined, "MixtureName", "aCSF", string_match="contains") acsf_epochs = combined[row_ind] html.add_output_text(f"Epochs with aCSF mixture: {len(acsf_epochs)} rows") @@ -727,13 +727,13 @@ def section_6_combined_table( ) html.add_code("""\ -row_ind = identify_matching_rows( +row_ind = identifyMatchingRows( combined, 'CellTypeName', 'type I BNST neuron', string_match='identical' ) type1_epochs = combined[row_ind] print(f'Epochs with type I BNST neuron: {len(type1_epochs)} rows')""") - row_ind = identify_matching_rows( + row_ind = identifyMatchingRows( combined, "CellTypeName", "type I BNST neuron", string_match="identical" ) type1_epochs = combined[row_ind] @@ -762,7 +762,7 @@ def section_7_plot_electrophysiology( ) -> None: """Section 7: Plot electrophysiology data for a selected subject.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows from ndi.query import Query html.add_heading("Plot electrophysiology data") @@ -837,7 +837,7 @@ def section_7_plot_electrophysiology( ) html.add_code("""\ -epoch_mask = identify_matching_rows( +epoch_mask = identifyMatchingRows( combined_summary, 'SubjectDocumentIdentifier', subject_id ) epoch_conditions = combined_summary[epoch_mask] @@ -849,7 +849,7 @@ def section_7_plot_electrophysiology( epoch_nums = sorted(vm_epochs['EpochNumber'].tolist()) print(f'Vm probe has {len(epoch_nums)} epochs: {epoch_nums}')""") - epoch_mask = identify_matching_rows(combined_summary, "SubjectDocumentIdentifier", subject_id) + epoch_mask = identifyMatchingRows(combined_summary, "SubjectDocumentIdentifier", subject_id) epoch_conditions = combined_summary[epoch_mask] vm_epochs = epoch_conditions[epoch_conditions["ProbeDocumentIdentifier"] == vm_probe_id] @@ -1143,8 +1143,8 @@ def section_8_plot_epm(html: HTMLBuilder, dataset: Any) -> Any: """Section 8: Plot Elevated Plus Maze data.""" import pandas as pd - from ndi.fun.doc_table import ontology_table_row_doc_to_table - from ndi.fun.table import move_columns_left + from ndi.fun.doc_table import ontologyTableRowDoc2Table + from ndi.fun.table import moveColumnsLeft from ndi.query import Query html.add_heading("Plot Elevated Plus Maze data") @@ -1158,25 +1158,25 @@ def section_8_plot_epm(html: HTMLBuilder, dataset: Any) -> Any: # --- Step 1: Query and convert EPM data --- html.add_code("""\ from ndi.query import Query -from ndi.fun.doc_table import ontology_table_row_doc_to_table -from ndi.fun.table import move_columns_left +from ndi.fun.doc_table import ontologyTableRowDoc2Table +from ndi.fun.table import moveColumnsLeft query = Query('ontologyTableRow.variableNames').contains('ElevatedPlusMaze') docs_epm = dataset.database_search(query) -tables_epm, ids_epm = ontology_table_row_doc_to_table(docs_epm) +tables_epm, ids_epm = ontologyTableRowDoc2Table(docs_epm) table_epm = tables_epm[0] print(f'tableEPM: {table_epm.shape[0]} x {table_epm.shape[1]} table')""") query = Query("ontologyTableRow.variableNames").contains("ElevatedPlusMaze") docs_epm = dataset.database_search(query) - tables_epm, ids_epm = ontology_table_row_doc_to_table(docs_epm) + tables_epm, ids_epm = ontologyTableRowDoc2Table(docs_epm) table_epm = tables_epm[0] html.add_output_text(f"tableEPM: {table_epm.shape[0]} x {table_epm.shape[1]} table") # --- Step 2: Reorganize columns --- html.add_code("""\ -table_epm = move_columns_left(table_epm, [ +table_epm = moveColumnsLeft(table_epm, [ 'SubjectLocalIdentifier', 'Treatment_CNOOrSalineAdministration', 'ExperimentalGroupCode', @@ -1185,7 +1185,7 @@ def section_8_plot_epm(html: HTMLBuilder, dataset: Any) -> Any: ]) print(f'Columns: {list(table_epm.columns[:8])} ...')""") - table_epm = move_columns_left( + table_epm = moveColumnsLeft( table_epm, [ "SubjectLocalIdentifier", @@ -1215,11 +1215,11 @@ def section_8_plot_epm(html: HTMLBuilder, dataset: Any) -> Any: grouping_variable = "Treatment_CNOOrSalineAdministration" html.add_code(f"""\ -from ndi.fun.doc import ontology_table_row_vars +from ndi.fun.doc import ontologyTableRowVars from ndi.ontology import lookup as ontology_lookup # Get ontology metadata for all OTR variables -full_names, short_names, ontology_nodes = ontology_table_row_vars(dataset) +full_names, short_names, ontology_nodes = ontologyTableRowVars(dataset) # Look up the selected variable plotting_variable = 'ElevatedPlusMaze_OpenArmNorth_Entries' @@ -1238,10 +1238,10 @@ def section_8_plot_epm(html: HTMLBuilder, dataset: Any) -> Any: except ValueError: print(f'Variable {{plotting_variable}} not found in ontology metadata')""") - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars from ndi.ontology import lookup as ontology_lookup - full_names, short_names, ontology_nodes = ontology_table_row_vars(dataset) + full_names, short_names, ontology_nodes = ontologyTableRowVars(dataset) term_info_lines = [] term_full_name = plotting_variable @@ -1398,8 +1398,8 @@ def section_9_plot_fps(html: HTMLBuilder, dataset: Any, table_epm: Any) -> None: """Section 9: Plot Fear-Potentiated Startle data.""" import pandas as pd - from ndi.fun.doc_table import ontology_table_row_doc_to_table - from ndi.fun.table import move_columns_left + from ndi.fun.doc_table import ontologyTableRowDoc2Table + from ndi.fun.table import moveColumnsLeft from ndi.query import Query html.add_heading("Plot Fear-Potentiated Startle data") @@ -1416,26 +1416,26 @@ def section_9_plot_fps(html: HTMLBuilder, dataset: Any, table_epm: Any) -> None: 'Fear_potentiatedStartle' ) docs_fps = dataset.database_search(query) -tables_fps, ids_fps = ontology_table_row_doc_to_table(docs_fps) +tables_fps, ids_fps = ontologyTableRowDoc2Table(docs_fps) table_fps = tables_fps[0] print(f'tableFPS: {table_fps.shape[0]} x {table_fps.shape[1]} table')""") query = Query("ontologyTableRow.variableNames").contains("Fear_potentiatedStartle") docs_fps = dataset.database_search(query) - tables_fps, ids_fps = ontology_table_row_doc_to_table(docs_fps) + tables_fps, ids_fps = ontologyTableRowDoc2Table(docs_fps) table_fps = tables_fps[0] html.add_output_text(f"tableFPS: {table_fps.shape[0]} x {table_fps.shape[1]} table") # --- Step 2: Reorganize columns --- html.add_code("""\ -table_fps = move_columns_left(table_fps, [ +table_fps = moveColumnsLeft(table_fps, [ 'Fear_potentiatedStartle_ExperimentalPhaseOrTestName', 'SubjectLocalIdentifier', ]) print(f'Columns: {list(table_fps.columns)}')""") - table_fps = move_columns_left( + table_fps = moveColumnsLeft( table_fps, [ "Fear_potentiatedStartle_ExperimentalPhaseOrTestName", diff --git a/tutorials/tutorial_682e7772cdf3f24938176fac.py b/tutorials/tutorial_682e7772cdf3f24938176fac.py index f8f08cb..00273d5 100644 --- a/tutorials/tutorial_682e7772cdf3f24938176fac.py +++ b/tutorials/tutorial_682e7772cdf3f24938176fac.py @@ -448,7 +448,7 @@ def section_doc_types(html: HTMLBuilder, dataset: Any) -> tuple: """Section 4: View NDI file types.""" import pandas as pd - from ndi.fun.doc import get_doc_types + from ndi.fun.doc import getDocTypes html.add_heading("View NDI file types") html.add_text( @@ -458,13 +458,13 @@ def section_doc_types(html: HTMLBuilder, dataset: Any) -> tuple: ) html.add_code("""\ -from ndi.fun.doc import get_doc_types +from ndi.fun.doc import getDocTypes -doc_types, doc_counts = get_doc_types(dataset) +doc_types, doc_counts = getDocTypes(dataset) documents_ndi = pd.DataFrame({'docTypes': doc_types, 'docCounts': doc_counts}) print(documents_ndi)""") - doc_types, doc_counts = get_doc_types(dataset) + doc_types, doc_counts = getDocTypes(dataset) df = pd.DataFrame({"docTypes": doc_types, "docCounts": doc_counts}) html.add_table_html(df_to_html(df, max_rows=30), f"documentsNDI ({len(df)} x 2 table)") @@ -476,7 +476,7 @@ def section_ontology_terms(html: HTMLBuilder, dataset: Any) -> None: """Section 5: View ontology term definitions.""" import pandas as pd - from ndi.fun.doc import ontology_table_row_vars + from ndi.fun.doc import ontologyTableRowVars html.add_heading("View ontology term definitions") html.add_text( @@ -487,10 +487,10 @@ def section_ontology_terms(html: HTMLBuilder, dataset: Any) -> None: ) html.add_code("""\ -from ndi.fun.doc import ontology_table_row_vars +from ndi.fun.doc import ontologyTableRowVars from ndi.ontology import lookup -full_names, variable_names, ontology_nodes = ontology_table_row_vars(dataset) +full_names, variable_names, ontology_nodes = ontologyTableRowVars(dataset) # Look up a specific ontology term by name target_name = 'C. elegans behavioral assay: deceleration upon encounter' @@ -502,7 +502,7 @@ def section_ontology_terms(html: HTMLBuilder, dataset: Any) -> None: print(f'definition: {result.definition}') print(f'shortName: {result.short_name}')""") - full_names, variable_names, ontology_nodes = ontology_table_row_vars(dataset) + full_names, variable_names, ontology_nodes = ontologyTableRowVars(dataset) # Show the full variable list var_df = pd.DataFrame( @@ -541,7 +541,7 @@ def section_ontology_terms(html: HTMLBuilder, dataset: Any) -> None: @timed def section_retrieve_metadata(html: HTMLBuilder, dataset: Any) -> tuple: """Section 6-7: Retrieve experiment metadata (ontologyTableRow tables).""" - from ndi.fun.doc_table import ontology_table_row_doc_to_table + from ndi.fun.doc_table import ontologyTableRowDoc2Table from ndi.query import Query html.add_heading("View C. elegans dataset") @@ -561,17 +561,17 @@ def section_retrieve_metadata(html: HTMLBuilder, dataset: Any) -> tuple: html.add_code("""\ from ndi.query import Query -from ndi.fun.doc_table import ontology_table_row_doc_to_table +from ndi.fun.doc_table import ontologyTableRowDoc2Table query = Query('').isa('ontologyTableRow') docs = dataset.database_search(query) -data_tables, doc_ids = ontology_table_row_doc_to_table(docs) +data_tables, doc_ids = ontologyTableRowDoc2Table(docs) for i, (dt, ids) in enumerate(zip(data_tables, doc_ids)): print(f'Table {i+1}: {len(dt)} rows x {len(dt.columns)} cols — {list(dt.columns)[:3]}...')""") docs = dataset.database_search(Query("").isa("ontologyTableRow")) - data_tables, doc_ids = ontology_table_row_doc_to_table(docs) + data_tables, doc_ids = ontologyTableRowDoc2Table(docs) output_lines = [] for i, (dt, _ids) in enumerate(zip(data_tables, doc_ids)): @@ -608,7 +608,7 @@ def section_retrieve_metadata(html: HTMLBuilder, dataset: Any) -> tuple: def section_subject_summary(html: HTMLBuilder, dataset: Any, data_tables: list) -> Any: """Section 8: View subject summary table.""" - from ndi.fun.doc_table import subject_summary + from ndi.fun.doc_table import subject as subject_summary from ndi.fun.table import join html.add_heading("View subject summary table", level=3) @@ -625,7 +625,7 @@ def section_subject_summary(html: HTMLBuilder, dataset: Any, data_tables: list) html.add_text("A summary table showing the metadata for each subject can be viewed below.") html.add_code("""\ -from ndi.fun.doc_table import subject_summary +from ndi.fun.doc_table import subject as subject_summary from ndi.fun.table import join subject_summ = subject_summary(dataset) @@ -669,7 +669,7 @@ def section_subject_summary(html: HTMLBuilder, dataset: Any, data_tables: list) @timed def section_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: """Section 9: Filter subjects.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows html.add_heading("Filter subjects", level=3) html.add_text( @@ -678,11 +678,11 @@ def section_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: ) html.add_code("""\ -from ndi.fun.table import identify_matching_rows +from ndi.fun.table import identifyMatchingRows column_name = 'StrainName' data_value = 'PR811' -row_ind = identify_matching_rows( +row_ind = identifyMatchingRows( subject_table, column_name, data_value, string_match='contains' ) filtered_subjects = subject_table[row_ind] @@ -690,7 +690,7 @@ def section_filter_subjects(html: HTMLBuilder, subject_table: Any) -> Any: col = "StrainName" if "StrainName" in subject_table.columns else "SubjectLocalIdentifier" value = "PR811" - row_ind = identify_matching_rows(subject_table, col, value, string_match="contains") + row_ind = identifyMatchingRows(subject_table, col, value, string_match="contains") filtered = subject_table[row_ind] html.add_output_text( @@ -1306,7 +1306,7 @@ def section_plot_image_with_position( html.add_code("""\ import numpy as np -from ndi.fun.data import read_image_stack +from ndi.fun.data import readImageStack # Choose an image type: the patch identifier map # (where each pixel's value = identifier of the closest bacterial patch) @@ -1315,7 +1315,7 @@ def section_plot_image_with_position( # Read image via the database binary API (auto-detects PNG/TIFF/raw binary) # Binary files are fetched on demand from NDI Cloud via ndic:// protocol -img, info = read_image_stack(dataset, doc_info['doc'], 'auto') +img, info = readImageStack(dataset, doc_info['doc'], 'auto') # Normalize to [0, 1] for display (equivalent to MATLAB mat2gray) img_float = img.astype(np.float64) @@ -1348,9 +1348,9 @@ def section_plot_image_with_position( return try: - from ndi.fun.data import read_image_stack + from ndi.fun.data import readImageStack - img, _info = read_image_stack(dataset, img_info["doc"], "auto") + img, _info = readImageStack(dataset, img_info["doc"], "auto") except FileNotFoundError: html.add_output_text( "Binary file not available. Set NDI_CLOUD_USERNAME/PASSWORD " @@ -1408,14 +1408,14 @@ def section_play_video(html: HTMLBuilder, dataset: Any, image_doc_map: dict) -> html.add_code("""\ import cv2 -from ndi.fun.data import read_image_stack +from ndi.fun.data import readImageStack # Get the video recording from imageStack documents image_name = 'video recording' # keyword to match in imageStack labels doc_info = image_doc_map[image_name] # Read video via the database binary API (fetches on demand via ndic://) -video_data, info = read_image_stack(dataset, doc_info['doc'], 'mp4') +video_data, info = readImageStack(dataset, doc_info['doc'], 'mp4') # Get time scale from imageStack_parameters.dimension_scale is_params = doc_info['props']['imageStack_parameters'] @@ -1684,7 +1684,7 @@ def section_distance_element(html: HTMLBuilder, dataset: Any, ctx: dict) -> None @timed def section_encounter_per_subject(html: HTMLBuilder, data_tables: list, ctx: dict) -> None: """Section 17: Get analysis of patch encounters for the chosen subject.""" - from ndi.fun.table import identify_matching_rows + from ndi.fun.table import identifyMatchingRows subject_id = ctx.get("subject_id", "") tables = ctx.get("tables", {}) @@ -1697,14 +1697,14 @@ def section_encounter_per_subject(html: HTMLBuilder, data_tables: list, ctx: dic ) html.add_code("""\ -from ndi.fun.table import identify_matching_rows +from ndi.fun.table import identifyMatchingRows # The encounter table is the largest ontologyTableRow group # (contains C. elegans encounter behavior data with ~20K rows) encounter_table = data_tables[0] # largest group by row count # Filter to encounters for this subject -ind = identify_matching_rows( +ind = identifyMatchingRows( encounter_table, 'SubjectDocumentIdentifier', subject_id) current_encounters = encounter_table[ind] print(f'currentEncounters: {current_encounters.shape}')""") @@ -1721,7 +1721,7 @@ def section_encounter_per_subject(html: HTMLBuilder, data_tables: list, ctx: dic ) return - ind = identify_matching_rows(encounter_table, "SubjectDocumentIdentifier", subject_id) + ind = identifyMatchingRows(encounter_table, "SubjectDocumentIdentifier", subject_id) current_encounters = encounter_table[ind] html.add_output_text( @@ -2038,7 +2038,7 @@ def section_ecoli_plot_image( html.add_code("""\ import numpy as np -from ndi.fun.data import read_image_stack +from ndi.fun.data import readImageStack # Choose a fluorescence image (prefer normalized) for name, info in ecoli_image_map.items(): @@ -2049,7 +2049,7 @@ def section_ecoli_plot_image( # Read image via the database binary API (auto-detects PNG/TIFF/raw binary) # Binary files are fetched on demand from NDI Cloud via ndic:// protocol -img, info = read_image_stack(dataset, doc_info['doc'], 'auto') +img, info = readImageStack(dataset, doc_info['doc'], 'auto') # Plot with colorbar and metadata title fig, ax = plt.subplots(figsize=(8, 8)) @@ -2086,9 +2086,9 @@ def section_ecoli_plot_image( return try: - from ndi.fun.data import read_image_stack + from ndi.fun.data import readImageStack - img, _info = read_image_stack(dataset, img_info["doc"], "auto") + img, _info = readImageStack(dataset, img_info["doc"], "auto") except FileNotFoundError: html.add_output_text( "Binary file not available. Set NDI_CLOUD_USERNAME/PASSWORD " From e827f5c8f844e1cec0d6b4f423a32fc63d1efa25 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 18:49:05 +0000 Subject: [PATCH 4/7] Fix black formatting in plot.py, test_jess_haley.py, and test_cloud_live.py Apply black auto-formatting to resolve CI lint failures. https://claude.ai/code/session_014koNTAiN4GCfCArsHWxnDW --- src/ndi/fun/plot.py | 32 +++++++-------------------- tests/matlab_tests/test_jess_haley.py | 4 +--- tests/test_cloud_live.py | 4 +--- 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/ndi/fun/plot.py b/src/ndi/fun/plot.py index 1adf826..da44f8f 100644 --- a/src/ndi/fun/plot.py +++ b/src/ndi/fun/plot.py @@ -55,8 +55,7 @@ def bar3( import matplotlib.pyplot as plt except ImportError as exc: raise ImportError( - "matplotlib is required for ndi.fun.plot. " - "Install it with: pip install matplotlib" + "matplotlib is required for ndi.fun.plot. " "Install it with: pip install matplotlib" ) from exc import pandas as pd @@ -91,21 +90,14 @@ def bar3( ax = axes[i] for j in range(g2_size): for k in range(g3_size): - mask = ( - (group_indices[0] == i) - & (group_indices[1] == j) - & (group_indices[2] == k) - ) + mask = (group_indices[0] == i) & (group_indices[1] == j) & (group_indices[2] == k) vals = df.loc[mask, plotting_variable].values x = j * (g3_size + 1) + k + 1 if len(vals) > 0: ax.bar(x, np.nanmean(vals), color=colors[k]) # Format subplot - tick_positions = [ - (g3_size + 1) * j + (g3_size + 1) / 2 - for j in range(g2_size) - ] + tick_positions = [(g3_size + 1) * j + (g3_size + 1) / 2 for j in range(g2_size)] ax.set_xticks(tick_positions) ax.set_xticklabels([str(g) for g in groups[1]]) ax.set_title(str(groups[0][i])) @@ -150,8 +142,7 @@ def multichan( import matplotlib.pyplot as plt except ImportError as exc: raise ImportError( - "matplotlib is required for ndi.fun.plot. " - "Install it with: pip install matplotlib" + "matplotlib is required for ndi.fun.plot. " "Install it with: pip install matplotlib" ) from exc data = np.asarray(data) @@ -222,8 +213,7 @@ def stimulusTimeseries( import matplotlib.pyplot as plt except ImportError as exc: raise ImportError( - "matplotlib is required for ndi.fun.plot. " - "Install it with: pip install matplotlib" + "matplotlib is required for ndi.fun.plot. " "Install it with: pip install matplotlib" ) from exc # Read stimulus data from the probe @@ -240,9 +230,7 @@ def stimulusTimeseries( for entry in stimulus_data: if isinstance(entry, dict) and "stimid" in entry: ids.extend( - entry["stimid"] - if isinstance(entry["stimid"], list) - else [entry["stimid"]] + entry["stimid"] if isinstance(entry["stimid"], list) else [entry["stimid"]] ) stimid = ids if ids else None @@ -254,14 +242,10 @@ def stimulusTimeseries( stimon = stimulus_time_data.stimon stimoff = stimulus_time_data.stimoff else: - raise ValueError( - "stimulus_time_data must contain 'stimon' and 'stimoff' fields" - ) + raise ValueError("stimulus_time_data must contain 'stimon' and 'stimoff' fields") if stimon is None or stimoff is None: - raise ValueError( - "stimulus_time_data must contain 'stimon' and 'stimoff' fields" - ) + raise ValueError("stimulus_time_data must contain 'stimon' and 'stimoff' fields") stimon = np.asarray(stimon).ravel() stimoff = np.asarray(stimoff).ravel() diff --git a/tests/matlab_tests/test_jess_haley.py b/tests/matlab_tests/test_jess_haley.py index ee7f099..033e581 100644 --- a/tests/matlab_tests/test_jess_haley.py +++ b/tests/matlab_tests/test_jess_haley.py @@ -244,9 +244,7 @@ def test_doc_ids_match(self, otr_tables): def test_stack_all_mode(self, ontology_table_row_docs): from ndi.fun.doc_table import ontologyTableRowDoc2Table - data_tables, doc_ids = ontologyTableRowDoc2Table( - ontology_table_row_docs, stack_all=True - ) + data_tables, doc_ids = ontologyTableRowDoc2Table(ontology_table_row_docs, stack_all=True) assert len(data_tables) == 1 assert len(data_tables[0]) == sum(EXPECTED_OTR_GROUP_SIZES_SORTED) diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py index 5950ec2..448d61d 100644 --- a/tests/test_cloud_live.py +++ b/tests/test_cloud_live.py @@ -946,9 +946,7 @@ def test_deferred_delete_and_undelete(self, client, cloud_config, can_write): # Should be accessible again with documents intact time.sleep(2) - ds = _retry_on_server_error( - lambda: getDataset(ds_id, client=client), retry_on_404=True - ) + ds = _retry_on_server_error(lambda: getDataset(ds_id, client=client), retry_on_404=True) ds_fetched_id = ds.get("_id", ds.get("id", "")) assert ds_fetched_id == ds_id From 39a31e6c2eebc0ecac9b400efe2e203dbc37aea1 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 11:26:58 +0000 Subject: [PATCH 5/7] Enrich APIResponse with headers, reason, elapsed, and raw http_response Aligns Python's APIResponse with the richer structure returned by MATLAB's ndi.cloud.api apiResponse output argument. All new fields use safe defaults so existing call-sites (including manually-constructed APIResponse instances in pagination helpers) are fully backward-compatible. https://claude.ai/code/session_01Y9G6ysXeXzrXRsZGe2Pe3G --- src/ndi/cloud/client.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/ndi/cloud/client.py b/src/ndi/cloud/client.py index f03931f..9ccf71e 100644 --- a/src/ndi/cloud/client.py +++ b/src/ndi/cloud/client.py @@ -40,9 +40,25 @@ class APIResponse: data: The parsed response payload (dict, list, str, or None). status_code: The HTTP status code. url: The full request URL. + headers: Response headers (dict-like, or empty dict). + reason: HTTP reason phrase (e.g. ``"OK"``, ``"Not Found"``). + elapsed: Time between sending the request and receiving the + response headers (``datetime.timedelta``, or ``None``). + http_response: The raw ``requests.Response`` object, when + available. ``None`` for locally-constructed instances + (e.g. paginated aggregations). """ - __slots__ = ("success", "data", "status_code", "url") + __slots__ = ( + "success", + "data", + "status_code", + "url", + "headers", + "reason", + "elapsed", + "http_response", + ) def __init__( self, @@ -51,11 +67,19 @@ def __init__( success: bool = True, status_code: int = 200, url: str = "", + headers: Any = None, + reason: str = "", + elapsed: Any = None, + http_response: Any = None, ): self.success = success self.data = data self.status_code = status_code self.url = url + self.headers = headers if headers is not None else {} + self.reason = reason + self.elapsed = elapsed + self.http_response = http_response # -- Dict proxy (when data is a dict) -- @@ -97,7 +121,12 @@ def __bool__(self) -> bool: def __repr__(self) -> str: status = "OK" if self.success else "FAIL" - return f"APIResponse({status}, status={self.status_code}, url={self.url!r})" + reason = f" {self.reason}" if self.reason else "" + elapsed = f", elapsed={self.elapsed}" if self.elapsed is not None else "" + return ( + f"APIResponse({status}, status={self.status_code}{reason}" + f"{elapsed}, url={self.url!r})" + ) class CloudClient: @@ -230,6 +259,10 @@ def _request( success=True, status_code=resp.status_code, url=url, + headers=resp.headers, + reason=getattr(resp, "reason", ""), + elapsed=getattr(resp, "elapsed", None), + http_response=resp, ) def _handle_response(self, resp: Any) -> Any: From 0ab3376f4d07575709bc668d5cbe37b256482374 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 11:50:42 +0000 Subject: [PATCH 6/7] Port missing MATLAB ndi.cloud.* functions and align naming to MATLAB source of truth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames (with backward-compatible aliases): - download.downloadFullDataset → download.dataset - filehandler.rewrite_file_info_for_cloud → filehandler.updateFileInfoForRemoteFiles - sync._delete_local_docs → sync.deleteLocalDocuments (now public) - sync._download_docs_by_ids → sync.downloadNdiDocuments (now public) New functions ported from MATLAB: - download.datasetDocuments — per-document download with mode handling - download.downloadGenericFiles — download generic_file docs with extensions - download.setFileInfo — set file_info for local/hybrid modes - download.structsToNdiDocuments — alias for jsons2documents - upload.uploadToNDICloud — legacy upload entry point - upload.scanForUpload — moved from orchestration to match MATLAB location - filehandler.updateFileInfoForLocalFiles — update file_info for local files - internal.duplicateDocuments — find/remove duplicate cloud documents Test cleanup: - Add module-scoped autouse fixture to sweep up any leftover NDI_PYTEST_* datasets after all tests complete, preventing stale dataset accumulation. https://claude.ai/code/session_01Y9G6ysXeXzrXRsZGe2Pe3G --- src/ndi/cloud/download.py | 352 ++++++++++++++++++++++++++++++- src/ndi/cloud/filehandler.py | 94 ++++++++- src/ndi/cloud/internal.py | 96 +++++++++ src/ndi/cloud/orchestration.py | 67 +----- src/ndi/cloud/sync/__init__.py | 4 + src/ndi/cloud/sync/operations.py | 24 ++- src/ndi/cloud/upload.py | 136 ++++++++++++ tests/test_cloud_live.py | 28 +++ 8 files changed, 727 insertions(+), 74 deletions(-) diff --git a/src/ndi/cloud/download.py b/src/ndi/cloud/download.py index e45610c..c5e7c21 100644 --- a/src/ndi/cloud/download.py +++ b/src/ndi/cloud/download.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def downloadFullDataset( +def dataset( dataset_id: str, target_dir: str | Path, *, @@ -28,6 +28,8 @@ def downloadFullDataset( ) -> dict[str, Any]: """Download a complete dataset (full documents + binary files) to disk. + MATLAB equivalent: ``ndi.cloud.download.dataset`` + This is the recommended way to download an entire dataset. It fetches the full JSON for every document (not just summaries) and optionally downloads all associated binary files. Already-downloaded items are @@ -427,3 +429,351 @@ def jsons2documents( except Exception: pass return documents + + +def datasetDocuments( + dataset_info: dict[str, Any], + mode: str = "local", + json_dir: str | Path | None = None, + files_dir: str | Path | None = None, + *, + verbose: bool = True, + client: CloudClient | None = None, +) -> tuple[bool, str]: + """Download dataset documents one-by-one from the cloud. + + MATLAB equivalent: ``ndi.cloud.download.datasetDocuments`` + + This fetches each document individually via ``getDocument``, sets + file info according to *mode* (``'local'`` or ``'hybrid'``), and + saves each document as a JSON file in *json_dir*. + + Args: + dataset_info: Dataset dict as returned by ``getDataset``, must + include ``documents`` (list of document IDs) and ``_id``. + mode: ``'local'`` — files are expected on disk, set ingest/delete + flags. ``'hybrid'`` — leave files in cloud, set ndic:// URIs. + json_dir: Directory to save document JSON files. + files_dir: Directory containing locally-downloaded binary files + (used only when *mode* is ``'local'``). + verbose: Print progress messages. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + Tuple of ``(success, error_message)``. + """ + from .api import documents as docs_api + + dataset_id = dataset_info.get("_id", dataset_info.get("id", "")) + doc_ids = dataset_info.get("documents", []) + + if verbose: + print(f"Will download {len(doc_ids)} documents...") + + if json_dir is not None: + json_path = Path(json_dir) + json_path.mkdir(parents=True, exist_ok=True) + else: + json_path = None + + for i, document_id in enumerate(doc_ids): + if verbose: + pct = 100 * (i + 1) / max(len(doc_ids), 1) + print(f"Downloading document {i + 1} of {len(doc_ids)} ({pct:.0f}%)...") + + if json_path is not None: + out_file = json_path / f"{document_id}.json" + if out_file.exists(): + if verbose: + print(f" Document {i + 1} already exists. Skipping...") + continue + + try: + doc_struct = docs_api.getDocument(dataset_id, document_id, client=client) + if hasattr(doc_struct, "data"): + doc_struct = doc_struct.data + except Exception as exc: + logger.warning("Failed to get document %s: %s", document_id, exc) + continue + + # Remove cloud-only 'id' field (MATLAB: rmfield(docStruct, 'id')) + doc_struct.pop("id", None) + + # Set file info according to mode + doc_struct = setFileInfo(doc_struct, mode, str(files_dir or "")) + + if json_path is not None: + out_file = json_path / f"{document_id}.json" + out_file.write_text(json.dumps(doc_struct, indent=2), encoding="utf-8") + + return True, "" + + +def downloadGenericFiles( + ndi_dataset: Any, + ndi_document_ids: list[str], + target_folder: str | Path, + *, + verbose: bool = True, + zip_result: bool = False, + naming_strategy: str = "original", + client: CloudClient | None = None, +) -> tuple[bool, str, dict[str, Any]]: + """Download generic_file documents from cloud to a folder with extensions. + + MATLAB equivalent: ``ndi.cloud.download.downloadGenericFiles`` + + Identifies ``generic_file`` documents among the specified NDI document + IDs and their dependencies, downloads their associated data files to + *target_folder* using their original filenames (including extensions). + + Args: + ndi_dataset: The local NDI dataset (or session) object. + ndi_document_ids: NDI document IDs to download. + target_folder: Destination folder for the files. + verbose: Print progress messages. + zip_result: If True, zip the downloaded files. + naming_strategy: One of ``'original'``, ``'id'``, ``'id_original'``. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + Tuple of ``(success, error_message, report)`` where *report* + contains ``downloaded_filenames`` and optionally ``zip_file``. + """ + import requests as _requests + + from .api import files as files_api + from .internal import getCloudDatasetIdForLocalDataset + + target = Path(target_folder) + target.mkdir(parents=True, exist_ok=True) + + report: dict[str, Any] = {"downloaded_filenames": [], "zip_file": ""} + + if not ndi_document_ids: + if verbose: + print("No NDI document IDs provided.") + return True, "", report + + try: + # Resolve cloud dataset ID + cloud_dataset_id, _ = getCloudDatasetIdForLocalDataset(ndi_dataset, client=client) + if not cloud_dataset_id: + return False, "Dataset is not linked to an NDI cloud dataset", report + + # Get documents from the local database + from ndi.query import Query + + all_docs = [] + for doc_id in ndi_document_ids: + q = Query("base.id", "exact_string", doc_id, "") + results = ndi_dataset.database_search(q) if hasattr(ndi_dataset, "database_search") else [] + all_docs.extend(results) + + if not all_docs: + if verbose: + print("No matching documents found in the dataset.") + return True, "", report + + # Filter for generic_file documents + generic_docs = [] + for doc in all_docs: + props = doc.document_properties if hasattr(doc, "document_properties") else doc + if isinstance(props, dict) and "generic_file" in props: + generic_docs.append(doc) + + if not generic_docs: + if verbose: + print("No generic_file documents found.") + return True, "", report + + # Build download list + download_list: list[dict[str, str]] = [] + for doc in generic_docs: + props = doc.document_properties if hasattr(doc, "document_properties") else doc + if not isinstance(props, dict): + continue + files_info = props.get("files", {}).get("file_info", []) + if isinstance(files_info, dict): + files_info = [files_info] + generic_file = props.get("generic_file", {}) + original_filename = generic_file.get("filename", "") + doc_id = props.get("base", {}).get("id", "") + + for fi in files_info: + locations = fi.get("locations", []) + if isinstance(locations, dict): + locations = [locations] + if locations: + uid = locations[0].get("uid", "") + if not uid: + continue + + import os + + name_part, ext_part = os.path.splitext(original_filename) + if not name_part: + name_part, ext_part = os.path.splitext(fi.get("name", "")) + + if naming_strategy == "id": + filename = f"{doc_id}{ext_part}" + elif naming_strategy == "id_original": + filename = f"{doc_id}_{name_part}{ext_part}" + else: # "original" + filename = f"{name_part}{ext_part}" + + download_list.append({"uid": uid, "filename": filename}) + + if not download_list: + if verbose: + print("No files associated with these documents.") + return True, "", report + + # Download files + if verbose: + print(f"Downloading {len(download_list)} files to {target}...") + + for i, item in enumerate(download_list): + uid = item["uid"] + filename = item["filename"] + target_path = target / filename + + if verbose: + print(f" [{i + 1}/{len(download_list)}] Downloading {filename} (UID: {uid})...") + + try: + details = files_api.getFileDetails(cloud_dataset_id, uid, client=client) + url = details.get("downloadUrl", "") if hasattr(details, "get") else "" + if not url: + logger.warning("No download URL for file %s (UID: %s)", filename, uid) + continue + + resp = _requests.get(url, timeout=300, stream=True) + if resp.status_code == 200: + with open(target_path, "wb") as fh: + for chunk in resp.iter_content(chunk_size=65536): + fh.write(chunk) + report["downloaded_filenames"].append(filename) + except Exception as exc: + logger.warning("Failed to download file %s: %s", filename, exc) + + # Optional zip + if zip_result and report["downloaded_filenames"]: + import zipfile as _zipfile + + zip_name = target / f"exported_generic_files.zip" + with _zipfile.ZipFile(zip_name, "w", _zipfile.ZIP_DEFLATED) as zf: + for fname in report["downloaded_filenames"]: + zf.write(target / fname, fname) + report["zip_file"] = str(zip_name) + if verbose: + print(f"Zip complete: {zip_name}") + + except Exception as exc: + return False, str(exc), report + + return True, "", report + + +def setFileInfo( + doc_struct: dict[str, Any], + mode: str, + filepath: str, +) -> dict[str, Any]: + """Set file_info parameters for different download modes. + + MATLAB equivalent: ``ndi.cloud.download.internal.setFileInfo`` + + Args: + doc_struct: Document properties dict. + mode: ``'local'`` — set delete_original and ingest to 1 and + update file locations to local paths. ``'hybrid'`` — set + delete_original and ingest to 0 (leave files in cloud). + filepath: Directory containing locally-downloaded files. + + Returns: + Updated document properties dict. + """ + new_struct = dict(doc_struct) + files = new_struct.get("files") + if not files or not isinstance(files, dict): + return new_struct + + file_info = files.get("file_info") + if file_info is None: + return new_struct + + if isinstance(file_info, dict): + file_info = [file_info] + + if mode == "local": + # Rewrite file info to point to local files + import os + + new_file_info = [] + for fi in file_info: + if not isinstance(fi, dict): + new_file_info.append(fi) + continue + locations = fi.get("locations", []) + if isinstance(locations, dict): + locations = [locations] + if locations: + uid = locations[0].get("uid", "") + file_location = os.path.join(filepath, uid) if uid else "" + new_fi = dict(fi) + new_fi["locations"] = [ + { + "uid": uid, + "location": file_location, + "location_type": "file", + "delete_original": 1, + "ingest": 1, + **{ + k: v + for k, v in locations[0].items() + if k not in ("location", "location_type", "delete_original", "ingest") + }, + } + ] + new_file_info.append(new_fi) + else: + new_file_info.append(fi) + files["file_info"] = new_file_info if len(new_file_info) != 1 else new_file_info + else: + # hybrid: set flags to 0 + for fi in file_info: + if not isinstance(fi, dict): + continue + locations = fi.get("locations", []) + if isinstance(locations, dict): + locations = [locations] + for loc in locations: + if isinstance(loc, dict): + loc["delete_original"] = 0 + loc["ingest"] = 0 + + return new_struct + + +def structsToNdiDocuments( + ndi_document_structs: list[dict[str, Any]], +) -> list[Any]: + """Convert downloaded NDI document structures to ndi.Document objects. + + MATLAB equivalent: ``ndi.cloud.download.internal.structsToNdiDocuments`` + + This is equivalent to :func:`jsons2documents` but named to match + the MATLAB function. + + Args: + ndi_document_structs: List of document property dicts. + + Returns: + List of :class:`ndi.Document` objects. + """ + return jsons2documents(ndi_document_structs) + + +# Backward-compatible alias +downloadFullDataset = dataset diff --git a/src/ndi/cloud/filehandler.py b/src/ndi/cloud/filehandler.py index 865d032..85b5b79 100644 --- a/src/ndi/cloud/filehandler.py +++ b/src/ndi/cloud/filehandler.py @@ -49,11 +49,17 @@ def parse_ndic_uri(uri: str) -> tuple[str, str]: return parts[0], parts[1] -def rewrite_file_info_for_cloud(doc_props: dict, cloud_dataset_id: str) -> None: +def updateFileInfoForRemoteFiles(doc_props: dict, cloud_dataset_id: str) -> None: """Rewrite a document's file_info locations to use ``ndic://`` URIs. - Mutates *doc_props* in-place. Handles both list-style and dict-style - (MATLAB struct) ``file_info`` and ``locations`` fields. + MATLAB equivalent: ``ndi.cloud.sync.internal.updateFileInfoForRemoteFiles`` + + Mutates *doc_props* in-place. Sets each location to + ``ndic://{dataset_id}/{file_uid}`` with ``location_type='ndicloud'`` + and ``ingest=0``, ``delete_original=0``. + + Handles both list-style and dict-style (MATLAB struct) ``file_info`` + and ``locations`` fields. Args: doc_props: Document properties dict (as from JSON). @@ -188,3 +194,85 @@ def get_or_create_cloud_client() -> CloudClient: from .client import CloudClient return CloudClient.from_env() + + +def updateFileInfoForLocalFiles( + doc_props: dict, + file_directory: str, +) -> None: + """Update file_info locations to point to local files. + + MATLAB equivalent: ``ndi.cloud.sync.internal.updateFileInfoForLocalFiles`` + + Mutates *doc_props* in-place. For each file_info entry, replaces + the location with the local file path ``{file_directory}/{uid}`` + and sets ``delete_original=1``, ``ingest=1``. + + Args: + doc_props: Document properties dict (as from JSON). + file_directory: Directory where local files are stored. + """ + import os + + files = doc_props.get("files") + if not files or not isinstance(files, dict): + return + + file_info = files.get("file_info") + if file_info is None: + return + + if isinstance(file_info, dict): + fi_list = [file_info] + was_dict = True + elif isinstance(file_info, list): + fi_list = file_info + was_dict = False + else: + return + + for fi in fi_list: + if not isinstance(fi, dict): + continue + + locations = fi.get("locations") + if locations is None: + continue + + if isinstance(locations, dict): + loc_list = [locations] + loc_was_dict = True + elif isinstance(locations, list): + loc_list = locations + loc_was_dict = False + else: + continue + + for loc in loc_list: + if not isinstance(loc, dict): + continue + uid = loc.get("uid", "") + if not uid: + continue + file_location = os.path.join(file_directory, uid) + if os.path.isfile(file_location): + loc["location"] = file_location + loc["location_type"] = "file" + loc["delete_original"] = 1 + loc["ingest"] = 1 + else: + logger.warning( + "Local file does not exist for uid %s at %s", + uid, + file_location, + ) + + if loc_was_dict: + fi["locations"] = loc_list[0] + + if was_dict: + files["file_info"] = fi_list[0] + + +# Backward-compatible alias +rewrite_file_info_for_cloud = updateFileInfoForRemoteFiles diff --git a/src/ndi/cloud/internal.py b/src/ndi/cloud/internal.py index 076e14c..c2fadb7 100644 --- a/src/ndi/cloud/internal.py +++ b/src/ndi/cloud/internal.py @@ -213,3 +213,99 @@ def datasetSessionIdFromDocs(documents: list[Any]) -> str: if len(session_ids) == 1: return session_ids.pop() return "" + + +def duplicateDocuments( + cloud_dataset_id: str, + *, + delete_duplicates: bool = True, + maximum_delete_batch_size: int = 1000, + verbose: bool = False, + client: CloudClient | None = None, +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + """Find and optionally remove duplicate documents in a cloud dataset. + + MATLAB equivalent: ``ndi.cloud.internal.duplicateDocuments`` + + Duplicates are documents sharing the same ``ndiId`` (or ``name`` + as fallback) but with different cloud ``id`` values. The document + with the alphabetically earliest ``id`` is kept as the original. + + Args: + cloud_dataset_id: The cloud dataset ID to scan. + delete_duplicates: If True, delete identified duplicates. + maximum_delete_batch_size: Max documents per bulk delete call. + verbose: Print progress messages. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + Tuple of ``(duplicate_docs, original_docs)``. + """ + from .api import documents as docs_api + + if verbose: + print("Searching for all documents...") + all_docs_result = docs_api.listDatasetDocumentsAll(cloud_dataset_id, client=client) + all_docs = all_docs_result.data if hasattr(all_docs_result, "data") else all_docs_result + if verbose: + print("Done.") + + if not all_docs: + return [], [] + + # Group by ndiId (or name as fallback) — keep the one with earliest id + doc_map: dict[str, dict[str, Any]] = {} + duplicate_docs: list[dict[str, Any]] = [] + + for doc in all_docs: + group_key = doc.get("ndiId", "") or doc.get("name", "") + if not group_key: + continue + + if group_key not in doc_map: + doc_map[group_key] = doc + else: + existing = doc_map[group_key] + current_id = doc.get("id", doc.get("_id", "")) + existing_id = existing.get("id", existing.get("_id", "")) + if current_id < existing_id: + duplicate_docs.append(existing) + doc_map[group_key] = doc + else: + duplicate_docs.append(doc) + + original_docs = list(doc_map.values()) + + if delete_duplicates and duplicate_docs: + if verbose: + print(f"Found {len(duplicate_docs)} duplicates to delete.") + + doc_ids_to_delete = [ + d.get("id", d.get("_id", "")) for d in duplicate_docs if d.get("id", d.get("_id", "")) + ] + + # Delete in batches + for i in range(0, len(doc_ids_to_delete), maximum_delete_batch_size): + batch = doc_ids_to_delete[i : i + maximum_delete_batch_size] + batch_num = i // maximum_delete_batch_size + 1 + total_batches = (len(doc_ids_to_delete) + maximum_delete_batch_size - 1) // maximum_delete_batch_size + if verbose: + print(f"Deleting batch {batch_num} of {total_batches}...") + try: + docs_api.bulkDeleteDocuments(cloud_dataset_id, batch, client=client) + except Exception as exc: + if verbose: + print(f" Warning: batch delete failed: {exc}") + if verbose: + print(f"Batch {batch_num} deleted.") + + if verbose: + print("All duplicate documents deleted.") + else: + if not duplicate_docs: + if verbose: + print("No duplicate documents found.") + elif verbose: + print(f"Found {len(duplicate_docs)} duplicates, but deletion was not requested.") + + return duplicate_docs, original_docs diff --git a/src/ndi/cloud/orchestration.py b/src/ndi/cloud/orchestration.py index e4e9a96..517f9f0 100644 --- a/src/ndi/cloud/orchestration.py +++ b/src/ndi/cloud/orchestration.py @@ -71,10 +71,10 @@ def downloadDataset( # When not syncing files, rewrite file_info locations to ndic:// URIs # so binary files can be fetched on demand later. if not sync_files: - from .filehandler import rewrite_file_info_for_cloud + from .filehandler import updateFileInfoForRemoteFiles for dj in doc_jsons: - rewrite_file_info_for_cloud(dj, cloud_dataset_id) + updateFileInfoForRemoteFiles(dj, cloud_dataset_id) # Convert to Document objects and add to a local Dataset from ndi.dataset import Dataset @@ -166,10 +166,10 @@ def load_dataset_from_json_dir( # Rewrite file_info to ndic:// URIs for on-demand fetching if cloud_dataset_id: - from .filehandler import rewrite_file_info_for_cloud + from .filehandler import updateFileInfoForRemoteFiles for dj in doc_jsons: - rewrite_file_info_for_cloud(dj, cloud_dataset_id) + updateFileInfoForRemoteFiles(dj, cloud_dataset_id) # Create Dataset from ndi.dataset import Dataset @@ -374,63 +374,8 @@ def newDataset( return cloud_id -@_auto_client -def scanForUpload( - dataset: Any, - cloud_dataset_id: str, - *, - client: CloudClient | None = None, -) -> tuple[list[dict], list[dict], float]: - """Scan local documents/files to determine what needs uploading. - - MATLAB equivalent: +cloud/+upload/scanForUpload.m - - Returns: - Tuple of (doc_structs, file_structs, total_size_kb). - """ - from ndi.query import Query - - from .internal import listRemoteDocumentIds - - # Get local documents - try: - all_docs = dataset.session.database_search(Query("")) - except Exception: - all_docs = [] - - # Get remote IDs - remote_ids = {} - if cloud_dataset_id: - try: - remote_ids = listRemoteDocumentIds(cloud_dataset_id, client=client) - except Exception: - pass - - doc_structs: list[dict] = [] - file_structs: list[dict] = [] - total_size = 0.0 - - for doc in all_docs: - props = doc.document_properties if hasattr(doc, "document_properties") else doc - doc_id = "" - if isinstance(props, dict): - doc_id = props.get("base", {}).get("id", "") - - is_uploaded = doc_id in remote_ids - doc_structs.append({"docid": doc_id, "is_uploaded": is_uploaded}) - - # Check for associated files - file_uid = props.get("file_uid", "") if isinstance(props, dict) else "" - if file_uid: - file_structs.append( - { - "uid": file_uid, - "docid": doc_id, - "is_uploaded": is_uploaded, - } - ) - - return doc_structs, file_structs, total_size +# Re-export from upload module (MATLAB: ndi.cloud.upload.scanForUpload) +from .upload import scanForUpload # noqa: F401 # --------------------------------------------------------------------------- diff --git a/src/ndi/cloud/sync/__init__.py b/src/ndi/cloud/sync/__init__.py index 890a364..fdc5062 100644 --- a/src/ndi/cloud/sync/__init__.py +++ b/src/ndi/cloud/sync/__init__.py @@ -8,6 +8,8 @@ from .index import SyncIndex from .mode import SyncMode, SyncOptions from .operations import ( + deleteLocalDocuments, + downloadNdiDocuments, downloadNew, mirrorFromRemote, mirrorToRemote, @@ -26,4 +28,6 @@ "mirrorFromRemote", "twoWaySync", "sync", + "deleteLocalDocuments", + "downloadNdiDocuments", ] diff --git a/src/ndi/cloud/sync/operations.py b/src/ndi/cloud/sync/operations.py index 8cd4ae6..35e746c 100644 --- a/src/ndi/cloud/sync/operations.py +++ b/src/ndi/cloud/sync/operations.py @@ -48,8 +48,11 @@ def _save_downloaded_docs( return saved -def _delete_local_docs(ds_path: Path, doc_ids: set[str]) -> list[str]: - """Remove local document JSON files for the given IDs.""" +def deleteLocalDocuments(ds_path: Path, doc_ids: set[str]) -> list[str]: + """Remove local document JSON files for the given IDs. + + MATLAB equivalent: ``ndi.cloud.sync.internal.deleteLocalDocuments`` + """ doc_dir = ds_path / _DOC_DIR deleted: list[str] = [] for doc_id in doc_ids: @@ -60,7 +63,7 @@ def _delete_local_docs(ds_path: Path, doc_ids: set[str]) -> list[str]: return deleted -def _download_docs_by_ids( +def downloadNdiDocuments( cloud_dataset_id: str, ndi_to_api: dict[str, str], ids_to_download: set[str], @@ -69,7 +72,10 @@ def _download_docs_by_ids( ) -> tuple[list[dict[str, Any]], list[str]]: """Fetch documents from the cloud by NDI ID using chunked bulk download. - Returns (downloaded_docs, failed_ids). + MATLAB equivalent: ``ndi.cloud.sync.internal.downloadNdiDocuments`` + + Returns: + Tuple of ``(downloaded_docs, failed_ids)``. """ from ..download import downloadDocumentCollection @@ -210,7 +216,7 @@ def downloadNew( return report # Actually fetch documents from the cloud - docs, failed = _download_docs_by_ids(cloud_dataset_id, remote_ids, new_ids, client=client) + docs, failed = downloadNdiDocuments(cloud_dataset_id, remote_ids, new_ids, client=client) saved = _save_downloaded_docs(ds_path, docs) report["downloaded"] = saved report["failed"] = failed @@ -343,11 +349,11 @@ def mirrorFromRemote( return report # Delete local-only documents - deleted = _delete_local_docs(ds_path, to_delete_local) + deleted = deleteLocalDocuments(ds_path, to_delete_local) report["deleted_local"] = deleted # Download remote-only documents - docs, failed = _download_docs_by_ids(cloud_dataset_id, remote_ids, to_download, client=client) + docs, failed = downloadNdiDocuments(cloud_dataset_id, remote_ids, to_download, client=client) saved = _save_downloaded_docs(ds_path, docs) report["downloaded"] = saved report["failed"] = failed @@ -448,7 +454,7 @@ def twoWaySync( failed: list[str] = [] # 1. Delete local docs that were removed on the remote - deleted_local_ids = _delete_local_docs(ds_path, to_delete_local) + deleted_local_ids = deleteLocalDocuments(ds_path, to_delete_local) report["deleted_local"] = deleted_local_ids # 2. Delete remote docs that were removed locally @@ -471,7 +477,7 @@ def twoWaySync( failed.append(doc_id) # 4. Download remote-only docs - docs, dl_failed = _download_docs_by_ids( + docs, dl_failed = downloadNdiDocuments( cloud_dataset_id, remote_ids, to_download, client=client ) saved = _save_downloaded_docs(ds_path, docs) diff --git a/src/ndi/cloud/upload.py b/src/ndi/cloud/upload.py index f28af85..56e0b51 100644 --- a/src/ndi/cloud/upload.py +++ b/src/ndi/cloud/upload.py @@ -222,3 +222,139 @@ def uploadSingleFile( return True, "" except Exception as exc: return False, str(exc) + + +@_auto_client +def uploadToNDICloud( + dataset: Any, + dataset_id: str, + *, + verbose: bool = True, + client: CloudClient | None = None, +) -> tuple[bool, str]: + """Upload an NDI database to NDI Cloud. + + MATLAB equivalent: ``ndi.cloud.upload.uploadToNDICloud`` + + Reads all documents from the local dataset, determines which + are already uploaded, and uploads the remainder. + + Args: + dataset: An ndi.session or ndi.dataset object. + dataset_id: The cloud dataset ID to upload to. + verbose: Print progress messages. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + Tuple of ``(success, error_message)``. + """ + from ndi.query import Query + + from .api import documents as docs_api + + try: + if verbose: + print("Loading documents...") + all_docs = dataset.database_search(Query("")) if hasattr(dataset, "database_search") else [] + + if verbose: + print("Getting list of previously uploaded documents...") + doc_structs, file_structs, total_size = scanForUpload(dataset, dataset_id, client=client) + + docs_left = sum(1 for ds in doc_structs if not ds["is_uploaded"]) + files_left = sum(1 for fs in file_structs if not fs["is_uploaded"]) + if verbose: + print(f"Found {docs_left} new documents and {files_left} files. Uploading...") + + # Build docid → index lookup + doc_id_to_idx = {ds["docid"]: i for i, ds in enumerate(doc_structs)} + + cur_doc = 0 + for doc in all_docs: + props = doc.document_properties if hasattr(doc, "document_properties") else doc + if not isinstance(props, dict): + continue + doc_id = props.get("base", {}).get("id", "") + idx = doc_id_to_idx.get(doc_id) + if idx is not None and not doc_structs[idx]["is_uploaded"]: + cur_doc += 1 + if verbose: + print( + f"Uploading {cur_doc} JSON portions of {docs_left} " + f"({100 * cur_doc / max(docs_left, 1):.0f}%)" + ) + try: + docs_api.addDocumentAsFile(dataset_id, props, client=client) + doc_structs[idx]["is_uploaded"] = True + except Exception: + if verbose: + print(f" Warning: Failed to add document {doc_id}") + + # Upload files via zip + file_docs = [ + (doc.document_properties if hasattr(doc, "document_properties") else doc) + for doc in all_docs + ] + file_docs = [d for d in file_docs if isinstance(d, dict)] + success, msg = zipForUpload(file_docs, dataset_id) + if not success: + return False, msg + + return True, "" + except Exception as exc: + return False, str(exc) + + +def scanForUpload( + dataset: Any, + dataset_id: str, + *, + client: CloudClient | None = None, +) -> tuple[list[dict], list[dict], float]: + """Scan local documents/files to determine what needs uploading. + + MATLAB equivalent: ``ndi.cloud.upload.scanForUpload`` + + Returns: + Tuple of ``(doc_structs, file_structs, total_size_kb)``. + """ + from ndi.query import Query + + from .internal import listRemoteDocumentIds + + try: + all_docs = dataset.database_search(Query("")) if hasattr(dataset, "database_search") else [] + except Exception: + all_docs = [] + + remote_ids = {} + if dataset_id: + try: + remote_ids = listRemoteDocumentIds(dataset_id, client=client) + except Exception: + pass + + doc_structs: list[dict] = [] + file_structs: list[dict] = [] + total_size = 0.0 + + for doc in all_docs: + props = doc.document_properties if hasattr(doc, "document_properties") else doc + doc_id = "" + if isinstance(props, dict): + doc_id = props.get("base", {}).get("id", "") + + is_uploaded = doc_id in remote_ids + doc_structs.append({"docid": doc_id, "is_uploaded": is_uploaded}) + + file_uid = props.get("file_uid", "") if isinstance(props, dict) else "" + if file_uid: + file_structs.append( + { + "uid": file_uid, + "docid": doc_id, + "is_uploaded": is_uploaded, + } + ) + + return doc_structs, file_structs, total_size diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py index 448d61d..8afdb36 100644 --- a/tests/test_cloud_live.py +++ b/tests/test_cloud_live.py @@ -188,6 +188,34 @@ def fresh_dataset(client, cloud_config, can_write): pass +@pytest.fixture(scope="module", autouse=True) +def _cleanup_stale_pytest_datasets(client, cloud_config): + """Safety-net: delete any leftover NDI_PYTEST_* datasets after all tests. + + Individual tests and fixtures do their own cleanup, but if the test + runner crashes or a teardown is skipped, datasets can be left behind. + This module-scoped autouse fixture runs at the very end and sweeps up + any remaining NDI_PYTEST_* datasets so they don't accumulate. + """ + yield # Let all tests run first + + from ndi.cloud.api.datasets import deleteDataset, listDatasets + + try: + result = listDatasets(cloud_config.org_id, client=client) + datasets = result.get("datasets", []) + for ds in datasets: + name = ds.get("name", "") + ds_id = ds.get("_id", ds.get("id", "")) + if name.startswith("NDI_PYTEST") and ds_id: + try: + deleteDataset(ds_id, when="now", client=client) + except Exception: + pass # Best-effort cleanup + except Exception: + pass # Don't fail the test run over cleanup + + # =========================================================================== # TestCloudConfig -- replaces mocked foundation config tests # =========================================================================== From 4c0cfbd6445746497745cac2652423270b7b7218 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 11:55:07 +0000 Subject: [PATCH 7/7] Emit warning when stale NDI_PYTEST_* datasets are found during cleanup The safety-net fixture now prints a warnings.warn with the names and IDs of every leftover dataset it deletes, making silent teardown failures visible in CI logs. https://claude.ai/code/session_01Y9G6ysXeXzrXRsZGe2Pe3G --- tests/test_cloud_live.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py index 8afdb36..821af91 100644 --- a/tests/test_cloud_live.py +++ b/tests/test_cloud_live.py @@ -199,15 +199,27 @@ def _cleanup_stale_pytest_datasets(client, cloud_config): """ yield # Let all tests run first + import warnings + from ndi.cloud.api.datasets import deleteDataset, listDatasets try: result = listDatasets(cloud_config.org_id, client=client) datasets = result.get("datasets", []) - for ds in datasets: - name = ds.get("name", "") - ds_id = ds.get("_id", ds.get("id", "")) - if name.startswith("NDI_PYTEST") and ds_id: + stale = [ + ds for ds in datasets + if ds.get("name", "").startswith("NDI_PYTEST") and ds.get("_id", ds.get("id", "")) + ] + if stale: + names = [f"{ds.get('name')} (id={ds.get('_id', ds.get('id', '?'))})" for ds in stale] + warnings.warn( + f"Cleaning up {len(stale)} leftover NDI_PYTEST_* dataset(s) — " + f"this indicates a test or teardown failed silently:\n" + + "\n".join(f" - {n}" for n in names), + stacklevel=1, + ) + for ds in stale: + ds_id = ds.get("_id", ds.get("id", "")) try: deleteDataset(ds_id, when="now", client=client) except Exception: