Skip to content

Commit

Permalink
Merge pull request #342 from bento-platform/features/fast_text_search
Browse files Browse the repository at this point in the history
Features/fast text search
  • Loading branch information
ppillot committed Oct 5, 2022
2 parents 9aacb9b + 5b96042 commit aa883c6
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 26 deletions.
39 changes: 37 additions & 2 deletions chord_metadata_service/patients/api_views.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

from rest_framework import viewsets, filters, mixins
from rest_framework.response import Response
from rest_framework.views import APIView
Expand All @@ -7,12 +9,17 @@
from django.views.decorators.cache import cache_page
from django_filters.rest_framework import DjangoFilterBackend
from django.core.exceptions import ValidationError
from django.db.models import Count, F
from django.db.models.functions import Coalesce
from django.contrib.postgres.aggregates import ArrayAgg
from bento_lib.responses import errors
from bento_lib.search import build_search_response

from .serializers import IndividualSerializer
from .models import Individual
from .filters import IndividualFilter
from chord_metadata_service.phenopackets.api_views import BIOSAMPLE_PREFETCH, PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.restapi.api_renderers import (
FHIRRenderer,
PhenopacketsRenderer,
Expand All @@ -30,6 +37,8 @@
from drf_spectacular.utils import extend_schema, inline_serializer
from rest_framework import serializers

OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result"


class IndividualViewSet(viewsets.ModelViewSet):
"""
Expand Down Expand Up @@ -60,6 +69,32 @@ class IndividualViewSet(viewsets.ModelViewSet):
def dispatch(self, *args, **kwargs):
return super(IndividualViewSet, self).dispatch(*args, **kwargs)

def list(self, request, *args, **kwargs):
if request.query_params.get("format") == OUTPUT_FORMAT_BENTO_SEARCH_RESULT:
start = datetime.now()
# filterset applies filtering from the GET parameters
filterset = self.filterset_class(request.query_params, queryset=self.queryset)
# Note: it is necessary here to use a second queryset because
# filterset is a queryset containing a `distinct()` method which
# is incompatible with the annotations defined bellow.
# (in SQL the DISTINCT clause is not compatible with GROUP BY statements
# which serve a similar purpose)
individual_ids = filterset.qs.values_list("id", flat=True)
# TODO: code duplicated from chord/view_search.py
qs = Phenopacket.objects.filter(subject__id__in=individual_ids).values(
"subject_id",
alternate_ids=Coalesce(F("subject__alternate_ids"), [])
).annotate(
num_experiments=Count("biosamples__experiment"),
biosamples=Coalesce(
ArrayAgg("biosamples__id"),
[]
)
)
return Response(build_search_response(list(qs), start))

return super(IndividualViewSet, self).list(request, *args, **kwargs)


class BatchViewSet(mixins.ListModelMixin, viewsets.GenericViewSet):
"""
Expand All @@ -80,8 +115,8 @@ class IndividualBatchViewSet(BatchViewSet):
content_negotiation_class = FormatInPostContentNegotiation

def get_queryset(self):
individual_id = self.request.data.get("id", None)
filter_by_id = {"id__in": individual_id} if individual_id else {}
individual_ids = self.request.data.get("id", None)
filter_by_id = {"id__in": individual_ids} if individual_ids else {}
queryset = Individual.objects.filter(**filter_by_id)\
.prefetch_related(
*(f"phenopackets__{p}" for p in PHENOPACKET_PREFETCH if p != "subject"),
Expand Down
6 changes: 6 additions & 0 deletions chord_metadata_service/patients/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from chord_metadata_service.patients.models import Individual
from chord_metadata_service.restapi.tests.constants import CONFIG_PUBLIC_TEST, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY
from chord_metadata_service.restapi.utils import iso_duration_to_years
from chord_metadata_service.phenopackets.tests import constants as ph_c
from chord_metadata_service.phenopackets import models as ph_m

from . import constants as c

Expand Down Expand Up @@ -153,6 +155,10 @@ class IndividualFullTextSearchTest(APITestCase):
def setUp(self):
self.individual_one = Individual.objects.create(**c.VALID_INDIVIDUAL)
self.individual_two = Individual.objects.create(**c.VALID_INDIVIDUAL_2)
self.metadata_1 = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1)
self.phenopacket_1 = ph_m.Phenopacket.objects.create(
**ph_c.valid_phenopacket(subject=self.individual_one, meta_data=self.metadata_1)
)

def test_search(self):
get_resp_1 = self.client.get('/api/individuals?search=P49Y')
Expand Down
27 changes: 5 additions & 22 deletions chord_metadata_service/restapi/api_renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,27 +169,10 @@ def render(self, data, media_type=None, renderer_context=None):


class IndividualBentoSearchRenderer(JSONRenderer):
"""
This renderer directly maps bento_search_result to the JSON Renderer
Note: this seems necessary to be able to use the format parameter
"bento_search_result" in the Individual ViewSet.
"""
media_type = 'application/json'
format = OUTPUT_FORMAT_BENTO_SEARCH_RESULT

def render(self, data, media_type=None, renderer_context=None):
individuals = []
for individual in data.get('results', []):
ind_obj = {
'subject_id': individual['id'],
'alternate_ids': individual.get('alternate_ids', []), # may be NULL
'biosamples': [],
'num_experiments': 0
}
if 'phenopackets' in individual:
ids = []
for p in individual['phenopackets']:
if 'biosamples' in p:
for biosample in p['biosamples']:
ids.append(biosample['id'])
if 'experiments' in biosample:
ind_obj['num_experiments'] += len(biosample['experiments'])
ind_obj['biosamples'] = ids
individuals.append(ind_obj)
data['results'] = individuals
return super(IndividualBentoSearchRenderer, self).render(data, self.media_type, renderer_context)
4 changes: 2 additions & 2 deletions chord_metadata_service/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def parse_onset(onset):
if 'age' in onset:
return onset['age']
# age ontology
elif 'id' and 'label' in onset:
elif 'id' in onset and 'label' in onset:
return f"{onset['label']} {onset['id']}"
# age range
elif 'start' and 'end' in onset:
elif 'start' in onset and 'end' in onset:
if 'age' in onset['start'] and 'age' in onset['end']:
return f"{onset['start']['age']} - {onset['end']['age']}"
else:
Expand Down

0 comments on commit aa883c6

Please sign in to comment.