Skip to content

Commit

Permalink
Merge pull request #441 from bento-platform/features/last-data-ingest…
Browse files Browse the repository at this point in the history
…ion-for-public

feat: add last ingestion time to dataset data type summaries
  • Loading branch information
noctillion committed Nov 20, 2023
2 parents 8bb8dfb + 8202694 commit 5b1d01d
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 12 deletions.
1 change: 1 addition & 0 deletions chord_metadata_service/chord/tests/test_api_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_data_type_detail(self):
**DATA_TYPES[DATA_TYPE_PHENOPACKET],
"queryable": True,
"count": 0,
"last_ingested": None,
})

def test_data_type_detail_non_uuid_project(self):
Expand Down
9 changes: 8 additions & 1 deletion chord_metadata_service/chord/tests/test_api_datasets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
import re
from django.urls import reverse
from rest_framework import status
from rest_framework.test import APITestCase
Expand Down Expand Up @@ -68,12 +69,18 @@ def test_get_dataset_datatype(self):
)
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertIn("last_ingested", c)
# Check timestamp format for last_ingested
timestamp_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z")
self.assertTrue(timestamp_pattern.match(c["last_ingested"]))
del c["last_ingested"]

self.assertDictEqual(c, {
"id": dt,
"label": "Clinical Data",
**DATA_TYPES[dt],
"queryable": True,
"count": 1,
"count": 1
})

def test_del_dataset_datatype(self):
Expand Down
42 changes: 31 additions & 11 deletions chord_metadata_service/chord/views_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,13 @@
}


async def get_count_for_data_type(
data_type: str,
project: Optional[str] = None,
dataset: Optional[str] = None,
) -> Optional[int]:
async def _filtered_query(data_type: str, project: Optional[str] = None,
dataset: Optional[str] = None) -> Optional[QuerySet]:
"""
Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
provided, the count will be for the whole node.
Returns a filtered query based on the data type, project, and dataset.
"""

if data_type == dt.DATA_TYPE_READSET:
# No counts for readset, it's a fake data type inside Katsu...
# No records for readset, it's a fake data type inside Katsu...
return None

q: Optional[QuerySet] = None
Expand Down Expand Up @@ -71,9 +66,33 @@ async def get_count_for_data_type(
raise ValueError("Project ID must be a UUID")

if q is None:
raise ValueError(f"Unsupported data type for count function: {data_type}")
raise ValueError(f"Unsupported data type: {data_type}")

return q


async def get_count_for_data_type(data_type: str, project: Optional[str] = None,
dataset: Optional[str] = None) -> Optional[int]:
"""
Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
provided, the count will be for the whole node.
"""
q = await _filtered_query(data_type, project, dataset)
return None if q is None else await q.acount()


async def get_last_ingested_for_data_type(data_type: str, project: Optional[str] = None,
dataset: Optional[str] = None) -> Optional[dict]:

q = await _filtered_query(data_type, project, dataset)
if q is None:
return None
latest_obj = await q.order_by('-created').afirst()

if not latest_obj:
return None

return await q.acount()
return latest_obj.created


async def make_data_type_response_object(
Expand All @@ -86,6 +105,7 @@ async def make_data_type_response_object(
**data_type_details,
"id": data_type_id,
"count": await get_count_for_data_type(data_type_id, project, dataset),
"last_ingested": await get_last_ingested_for_data_type(data_type_id, project, dataset)
}


Expand Down

0 comments on commit 5b1d01d

Please sign in to comment.