Skip to content

Commit

Permalink
Merge pull request #403 from bento-platform/feat/data-type-counts
Browse files Browse the repository at this point in the history
Add data type counts & queryable flag to data types
  • Loading branch information
davidlougheed committed May 17, 2023
2 parents 08d8e87 + abb57b7 commit 8eeaa6d
Show file tree
Hide file tree
Showing 9 changed files with 274 additions and 102 deletions.
5 changes: 5 additions & 0 deletions chord_metadata_service/chord/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"DATA_TYPES",
]


DATA_TYPE_EXPERIMENT = "experiment"
DATA_TYPE_EXPERIMENT_RESULT = "experiment_result"
DATA_TYPE_PHENOPACKET = "phenopacket"
Expand All @@ -23,13 +24,15 @@
DATA_TYPES = {
DATA_TYPE_EXPERIMENT: {
"label": "Experiments",
"queryable": True,
"schema": EXPERIMENT_SEARCH_SCHEMA,
"metadata_schema": {
"type": "object", # TODO
},
},
DATA_TYPE_PHENOPACKET: {
"label": settings.KATSU_PHENOPACKET_LABEL,
"queryable": True,
"schema": PHENOPACKET_SEARCH_SCHEMA,
"metadata_schema": {
"type": "object", # TODO
Expand All @@ -44,6 +47,7 @@
# },
DATA_TYPE_READSET: {
"label": "Readsets",
"queryable": False,
"schema": {
"file_format": EXPERIMENT_RESULT_SCHEMA["properties"]["file_format"]
},
Expand All @@ -53,6 +57,7 @@
},
DATA_TYPE_EXPERIMENT_RESULT: {
"label": "Experiment Results",
"queryable": False,
"schema": EXPERIMENT_RESULT_SCHEMA,
"metadata_schema": {
"type": "object"
Expand Down
106 changes: 106 additions & 0 deletions chord_metadata_service/chord/tests/test_api_data_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from django.urls import reverse
from rest_framework import status
from rest_framework.test import APITestCase

from ..data_types import (
DATA_TYPE_EXPERIMENT,
DATA_TYPE_EXPERIMENT_RESULT,
DATA_TYPE_PHENOPACKET,
DATA_TYPE_READSET,
DATA_TYPES
)
from ..views_data_types import get_count_for_data_type

POST_GET = ("POST", "GET")

DATA_TYPE_NOT_REAL = "not_a_real_data_type"


class DataTypeTest(APITestCase):
def test_data_type_list(self):
r = self.client.get(reverse("data-type-list"))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertEqual(len(c), len(DATA_TYPES))
ids = [dt["id"] for dt in c]
self.assertIn(DATA_TYPE_EXPERIMENT, ids)
# self.assertIn(DATA_TYPE_MCODEPACKET, ids)
self.assertIn(DATA_TYPE_PHENOPACKET, ids)
self.assertIn(DATA_TYPE_READSET, ids)
self.assertIn(DATA_TYPE_EXPERIMENT_RESULT, ids)

def test_data_type_list_non_uuid_project(self):
# Non-UUID project
r = self.client.get(reverse("data-type-list"), {"project": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

def test_data_type_list_non_uuid_dataset(self):
# Non-UUID dataset
r = self.client.get(reverse("data-type-list"), {"dataset": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

def test_data_type_detail(self):
self.maxDiff = None
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, {
"id": DATA_TYPE_PHENOPACKET,
"label": "Clinical Data",
**DATA_TYPES[DATA_TYPE_PHENOPACKET],
"queryable": True,
"count": 0,
})

def test_data_type_detail_non_uuid_project(self):
# Non-UUID project
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"project": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

r = self.client.get(
reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_EXPERIMENT_RESULT}), {"project": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

def test_data_type_detail_non_uuid_dataset(self):
# Non-UUID dataset
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}), {"dataset": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

r = self.client.get(reverse(
"data-type-detail", kwargs={"data_type": DATA_TYPE_EXPERIMENT_RESULT}), {"dataset": "a"})
self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)

def test_data_type_detail_bad_data_type_for_count(self):
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_READSET}))
self.assertIsNone(r.json()["count"])

async def test_data_type_count_bad_data_type(self):
with self.assertRaises(ValueError):
await get_count_for_data_type(DATA_TYPE_NOT_REAL)

def test_data_type_detail_404(self):
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response

def test_data_type_schema(self):
r = self.client.get(reverse("data-type-schema", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, DATA_TYPES[DATA_TYPE_PHENOPACKET]["schema"])

def test_data_type_schema_404(self):
r = self.client.get(reverse("data-type-schema", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response

def test_data_type_metadata_schema(self):
r = self.client.get(reverse("data-type-metadata-schema", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, DATA_TYPES[DATA_TYPE_PHENOPACKET]["metadata_schema"])

def test_data_type_metadata_schema_404(self):
r = self.client.get(reverse("data-type-metadata-schema", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response
54 changes: 0 additions & 54 deletions chord_metadata_service/chord/tests/test_api_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,66 +41,12 @@
from ..models import Project, Dataset, TableOwnership, Table
from ..data_types import (
DATA_TYPE_EXPERIMENT,
DATA_TYPE_EXPERIMENT_RESULT,
DATA_TYPE_PHENOPACKET,
DATA_TYPE_READSET,
DATA_TYPES
)

POST_GET = ("POST", "GET")

DATA_TYPE_NOT_REAL = "not_a_real_data_type"


class DataTypeTest(APITestCase):
def test_data_type_list(self):
r = self.client.get(reverse("data-type-list"))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertEqual(len(c), len(DATA_TYPES))
ids = [dt["id"] for dt in c]
self.assertIn(DATA_TYPE_EXPERIMENT, ids)
# self.assertIn(DATA_TYPE_MCODEPACKET, ids)
self.assertIn(DATA_TYPE_PHENOPACKET, ids)
self.assertIn(DATA_TYPE_READSET, ids)
self.assertIn(DATA_TYPE_EXPERIMENT_RESULT, ids)

def test_data_type_detail(self):
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, {
"id": DATA_TYPE_PHENOPACKET,
**DATA_TYPES[DATA_TYPE_PHENOPACKET],
})

def test_data_type_detail_404(self):
r = self.client.get(reverse("data-type-detail", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response

def test_data_type_schema(self):
r = self.client.get(reverse("data-type-schema", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, DATA_TYPES[DATA_TYPE_PHENOPACKET]["schema"])

def test_data_type_schema_404(self):
r = self.client.get(reverse("data-type-schema", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response

def test_data_type_metadata_schema(self):
r = self.client.get(reverse("data-type-metadata-schema", kwargs={"data_type": DATA_TYPE_PHENOPACKET}))
self.assertEqual(r.status_code, status.HTTP_200_OK)
c = r.json()
self.assertDictEqual(c, DATA_TYPES[DATA_TYPE_PHENOPACKET]["metadata_schema"])

def test_data_type_metadata_schema_404(self):
r = self.client.get(reverse("data-type-metadata-schema", kwargs={"data_type": DATA_TYPE_NOT_REAL}))
self.assertEqual(r.status_code, status.HTTP_404_NOT_FOUND)
r.json() # assert json response


class TableTest(APITestCase):
@staticmethod
Expand Down
11 changes: 6 additions & 5 deletions chord_metadata_service/chord/urls.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.urls import path

from . import views_search
from . import views_data_types, views_search
from .export import views as views_export
from .ingest import views as views_ingest
from .workflows import views as views_workflow
Expand All @@ -13,12 +13,13 @@
path('private/ingest', views_ingest.ingest, name="ingest"),
path('private/export', views_export.export, name="export"),

path('data-types', views_search.data_type_list, name="data-type-list"),
path('data-types/<str:data_type>', views_search.data_type_detail, name="data-type-detail"),
path('data-types/<str:data_type>/schema', views_search.data_type_schema, name="data-type-schema"),
path('data-types', views_data_types.data_type_list, name="data-type-list"),
path('data-types/<str:data_type>', views_data_types.data_type_detail, name="data-type-detail"),
path('data-types/<str:data_type>/schema', views_data_types.data_type_schema, name="data-type-schema"),
# TODO: Consistent snake or kebab
path('data-types/<str:data_type>/metadata_schema', views_search.data_type_metadata_schema,
path('data-types/<str:data_type>/metadata_schema', views_data_types.data_type_metadata_schema,
name="data-type-metadata-schema"),

path('tables', views_search.table_list, name="chord-table-list"),
path('tables/<str:table_id>', views_search.table_detail, name="chord-table-detail"),
path('tables/<str:table_id>/summary', views_search.chord_table_summary, name="table-summary"),
Expand Down
132 changes: 132 additions & 0 deletions chord_metadata_service/chord/views_data_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from bento_lib.responses import errors
from django.core.exceptions import ValidationError
from django.db.models import QuerySet
from django.http import HttpRequest

from adrf.decorators import api_view
from rest_framework import status
from rest_framework.decorators import permission_classes
from rest_framework.permissions import AllowAny
from rest_framework.response import Response

from typing import Optional

from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.experiments.models import Experiment, ExperimentResult

from . import data_types as dt


async def get_count_for_data_type(
data_type: str,
project: Optional[str] = None,
dataset: Optional[str] = None,
) -> Optional[int]:
"""
Returns the count for a particular data type. If dataset is provided, project will be ignored. If neither are
provided, the count will be for the whole node.
"""

if data_type == dt.DATA_TYPE_READSET:
# No counts for readset, it's a fake data type inside Katsu...
return None

q: Optional[QuerySet] = None

if data_type in (dt.DATA_TYPE_PHENOPACKET, dt.DATA_TYPE_EXPERIMENT):
q = (Phenopacket if data_type == dt.DATA_TYPE_PHENOPACKET else Experiment).objects.all()
if dataset:
try:
q = q.filter(table__ownership_record__dataset_id=dataset)
except ValidationError:
raise ValueError("Dataset ID must be a UUID")
elif project:
try:
q = q.filter(table__ownership_record__dataset__project_id=project)
except ValidationError:
raise ValueError("Project ID must be a UUID")

elif data_type == dt.DATA_TYPE_EXPERIMENT_RESULT:
q = ExperimentResult.objects.all()
if dataset:
try:
q = q.filter(experiment__table__ownership_record__dataset_id=dataset)
except ValidationError:
raise ValueError("Dataset ID must be a UUID")
elif project:
try:
q = q.filter(experiment__table__ownership_record__dataset__project_id=project)
except ValidationError:
raise ValueError("Project ID must be a UUID")

if q is None:
raise ValueError(f"Unsupported data type for count function: {data_type}")

return await q.acount()


async def make_data_type_response_object(
data_type_id: str,
data_type_details: dict,
project: Optional[str],
dataset: Optional[str],
) -> dict:
return {
**data_type_details,
"id": data_type_id,
"count": await get_count_for_data_type(data_type_id, project, dataset),
}


@api_view(["GET"])
@permission_classes([AllowAny])
async def data_type_list(request: HttpRequest):
# TODO: Permissions: only return counts when we are authenticated/have access to counts or full data.

project = request.GET.get("project", "").strip() or None
dataset = request.GET.get("dataset", "").strip() or None

dt_response = []
for dt_id, dt_d in dt.DATA_TYPES.items():
try:
dt_response.append(await make_data_type_response_object(dt_id, dt_d, project, dataset))
except ValueError as e:
return Response(errors.bad_request_error(str(e)), status=status.HTTP_400_BAD_REQUEST)

dt_response.sort(key=lambda d: d["id"])
return Response(dt_response)


@api_view(["GET"])
@permission_classes([AllowAny])
async def data_type_detail(request: HttpRequest, data_type: str):
# TODO: Permissions: only return counts when we are authenticated/have access to counts or full data.

if data_type not in dt.DATA_TYPES:
return Response(errors.not_found_error(f"Date type {data_type} not found"), status=status.HTTP_404_NOT_FOUND)

project = request.GET.get("project", "").strip() or None
dataset = request.GET.get("dataset", "").strip() or None

try:
return Response(await make_data_type_response_object(data_type, dt.DATA_TYPES[data_type], project, dataset))
except ValueError as e:
return Response(errors.bad_request_error(str(e)), status=status.HTTP_400_BAD_REQUEST)


@api_view(["GET"])
@permission_classes([AllowAny])
async def data_type_schema(_request: HttpRequest, data_type: str):
if data_type not in dt.DATA_TYPES:
return Response(errors.not_found_error(f"Date type {data_type} not found"), status=status.HTTP_404_NOT_FOUND)

return Response(dt.DATA_TYPES[data_type]["schema"])


@api_view(["GET"])
@permission_classes([AllowAny])
async def data_type_metadata_schema(_request: HttpRequest, data_type: str):
if data_type not in dt.DATA_TYPES:
return Response(errors.not_found_error(f"Date type {data_type} not found"), status=status.HTTP_404_NOT_FOUND)

return Response(dt.DATA_TYPES[data_type]["metadata_schema"])

0 comments on commit 8eeaa6d

Please sign in to comment.