Merge pull request #441 from bento-platform/features/last-data-ingest…

…ion-for-public feat: add last ingestion time to dataset data type summaries
bento-platform · Nov 20, 2023 · 5b1d01d · 5b1d01d
2 parents 8bb8dfb + 8202694
commit 5b1d01d
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 12 deletions.
diff --git a/chord_metadata_service/chord/tests/test_api_data_types.py b/chord_metadata_service/chord/tests/test_api_data_types.py
@@ -49,6 +49,7 @@ def test_data_type_detail(self):
             **DATA_TYPES[DATA_TYPE_PHENOPACKET],
             "queryable": True,
             "count": 0,
+            "last_ingested": None,
         })
 
     def test_data_type_detail_non_uuid_project(self):

diff --git a/chord_metadata_service/chord/tests/test_api_datasets.py b/chord_metadata_service/chord/tests/test_api_datasets.py
@@ -1,4 +1,5 @@
 import uuid
+import re
 from django.urls import reverse
 from rest_framework import status
 from rest_framework.test import APITestCase
@@ -68,12 +69,18 @@ def test_get_dataset_datatype(self):
                 )
                 self.assertEqual(r.status_code, status.HTTP_200_OK)
                 c = r.json()
+                self.assertIn("last_ingested", c)
+                # Check timestamp format for last_ingested
+                timestamp_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z")
+                self.assertTrue(timestamp_pattern.match(c["last_ingested"]))
+                del c["last_ingested"]
+
                 self.assertDictEqual(c, {
                     "id": dt,
                     "label": "Clinical Data",
                     **DATA_TYPES[dt],
                     "queryable": True,
-                    "count": 1,
+                    "count": 1
                 })
 
     def test_del_dataset_datatype(self):

diff --git a/chord_metadata_service/chord/views_data_types.py b/chord_metadata_service/chord/views_data_types.py
@@ -28,18 +28,13 @@
 }
 
 
-async def get_count_for_data_type(
-    data_type: str,
-    project: Optional[str] = None,
-    dataset: Optional[str] = None,
-) -> Optional[int]:
+async def _filtered_query(data_type: str, project: Optional[str] = None,
+                          dataset: Optional[str] = None) -> Optional[QuerySet]:
     """
-    Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
-    provided, the count will be for the whole node.
+    Returns a filtered query based on the data type, project, and dataset.
     """
-
     if data_type == dt.DATA_TYPE_READSET:
-        # No counts for readset, it's a fake data type inside Katsu...
+        # No records for readset, it's a fake data type inside Katsu...
         return None
 
     q: Optional[QuerySet] = None
@@ -71,9 +66,33 @@ async def get_count_for_data_type(
                 raise ValueError("Project ID must be a UUID")
 
     if q is None:
-        raise ValueError(f"Unsupported data type for count function: {data_type}")
+        raise ValueError(f"Unsupported data type: {data_type}")
+
+    return q
+
+
+async def get_count_for_data_type(data_type: str, project: Optional[str] = None,
+                                  dataset: Optional[str] = None) -> Optional[int]:
+    """
+    Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
+    provided, the count will be for the whole node.
+    """
+    q = await _filtered_query(data_type, project, dataset)
+    return None if q is None else await q.acount()
+
+
+async def get_last_ingested_for_data_type(data_type: str, project: Optional[str] = None,
+                                          dataset: Optional[str] = None) -> Optional[dict]:
+
+    q = await _filtered_query(data_type, project, dataset)
+    if q is None:
+        return None
+    latest_obj = await q.order_by('-created').afirst()
+
+    if not latest_obj:
+        return None
 
-    return await q.acount()
+    return latest_obj.created
 
 
 async def make_data_type_response_object(
@@ -86,6 +105,7 @@ async def make_data_type_response_object(
         **data_type_details,
         "id": data_type_id,
         "count": await get_count_for_data_type(data_type_id, project, dataset),
+        "last_ingested": await get_last_ingested_for_data_type(data_type_id, project, dataset)
     }