Skip to content

Commit

Permalink
Merge pull request #92 from c3g/table-summary
Browse files Browse the repository at this point in the history
Add table summary endpoint; miscellaneous code cleanup
  • Loading branch information
zxenia committed Mar 4, 2020
2 parents 22ad3ff + 6cd8291 commit ae93f4a
Show file tree
Hide file tree
Showing 15 changed files with 178 additions and 145 deletions.
13 changes: 12 additions & 1 deletion chord_metadata_service/chord/tests/test_api_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from unittest.mock import Mock, patch
import uuid

from unittest.mock import patch

from django.test import override_settings
from django.urls import reverse
Expand Down Expand Up @@ -83,6 +85,15 @@ def test_table_list(self):
self.assertEqual(len(c), 1)
self.assertEqual(c[0], self.dataset_rep(self.dataset, c[0]["metadata"]["created"], c[0]["metadata"]["updated"]))

def test_table_summary(self):
r = self.client.get(reverse("table-summary", kwargs={"table_id": str(uuid.uuid4())}))
self.assertEqual(r.status_code, 404)

r = self.client.get(reverse("table-summary", kwargs={"table_id": self.dataset["identifier"]}))
s = r.json()
self.assertEqual(s["count"], 0) # No phenopackets
self.assertIn("data_type_specific", s)


class SearchTest(APITestCase):
def setUp(self) -> None:
Expand Down
59 changes: 56 additions & 3 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
from datetime import datetime

from collections import Counter
from datetime import datetime
from django.db import connection
from django.conf import settings
from psycopg2 import sql
Expand All @@ -11,11 +12,13 @@
from chord_lib.responses.errors import *
from chord_lib.search import build_search_response, postgres
from chord_metadata_service.metadata.settings import DEBUG
from chord_metadata_service.patients.models import Individual
from chord_metadata_service.phenopackets.api_views import PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.phenopackets.models import Phenopacket, Biosample
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA
from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer
from chord_metadata_service.metadata.elastic import es

from .models import Dataset
from .permissions import OverrideOrSuperUserOnly

Expand Down Expand Up @@ -81,7 +84,8 @@ def table_list(request):
@permission_classes([OverrideOrSuperUserOnly])
def table_detail(request, table_id): # pragma: no cover
# TODO: Implement GET, POST
# TODO: Permissions: Check if user has control / more direct access over this dataset? Or just always use owner...
# TODO: Permissions: Check if user has control / more direct access over this table and/or dataset?
# Or just always use owner...
try:
table = Dataset.objects.get(identifier=table_id)
except Dataset.DoesNotExist:
Expand All @@ -92,6 +96,55 @@ def table_detail(request, table_id): # pragma: no cover
return Response(status=204)


@api_view(["GET"])
@permission_classes([OverrideOrSuperUserOnly])
def chord_table_summary(_request, table_id):
try:
table = Dataset.objects.get(identifier=table_id)
phenopackets = Phenopacket.objects.filter(dataset=table)

biosamples_set = frozenset(
p["biosamples__id"] for p in phenopackets.prefetch_related("biosamples").values("biosamples__id"))

biosamples_cs = Counter(b.is_control_sample for b in Biosample.objects.filter(id__in=biosamples_set))

biosamples_taxonomy = Counter(b.taxonomy["id"] for b in Biosample.objects.filter(id__in=biosamples_set)
if b.taxonomy is not None)

individuals_set = frozenset({
*(p["subject"] for p in phenopackets.values("subject")),
*(p["biosamples__individual_id"]
for p in phenopackets.prefetch_related("biosamples").values("biosamples__individual_id")),
})

individuals_sex = Counter(i.sex for i in Individual.objects.filter(id__in=individuals_set))
individuals_k_sex = Counter(i.karyotypic_sex for i in Individual.objects.filter(id__in=individuals_set))
individuals_taxonomy = Counter(i.taxonomy["id"] for i in Individual.objects.filter(id__in=individuals_set)
if i.taxonomy is not None)

return Response({
"count": phenopackets.count(),
"data_type_specific": {
"biosamples": {
"count": len(biosamples_set),
"is_control_sample": dict(biosamples_cs),
"taxonomy": dict(biosamples_taxonomy),
},
"individuals": {
"count": len(individuals_set),
"sex": {k: individuals_sex[k] for k in (s[0] for s in Individual.SEX)},
"karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in Individual.KARYOTYPIC_SEX)},
"diseases": {},
"taxonomy": dict(individuals_taxonomy),
# TODO: age histogram
},
}
})

except Dataset.DoesNotExist:
return Response(not_found_error(f"Table with ID {table_id} not found"), status=404)


# TODO: CHORD-standardized logging
def debug_log(message): # pragma: no cover
if DEBUG:
Expand Down
3 changes: 2 additions & 1 deletion chord_metadata_service/metadata/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
path('data-types/phenopacket/metadata_schema', views_search.data_type_phenopacket_metadata_schema,
name="data-type-metadata-schema"),
path('tables', views_search.table_list, name="table-list"),
path('tables/<str:dataset_id>', views_search.table_detail, name="table-detail"),
path('tables/<str:table_id>', views_search.table_detail, name="table-detail"),
path('tables/<str:table_id>/summary', views_search.chord_table_summary, name="table-summary"),
path('search', views_search.chord_search, name="search"),
path('fhir-search', views_search.fhir_public_search, name="fhir-search"),
path('private/fhir-search', views_search.fhir_private_search, name="fhir-private-search"),
Expand Down
39 changes: 15 additions & 24 deletions chord_metadata_service/patients/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,36 +27,27 @@ class Individual(models.Model, IndexableMixin):
('OTHER_KARYOTYPE', 'OTHER_KARYOTYPE'),
)

id = models.CharField(primary_key=True, max_length=200,
help_text='An arbitrary identifier for the individual.')
id = models.CharField(primary_key=True, max_length=200, help_text='An arbitrary identifier for the individual.')
# TODO check for CURIE
alternate_ids = ArrayField(models.CharField(max_length=200),
blank=True, null=True,
help_text='A list of alternative identifiers for the individual.')
date_of_birth = models.DateField(null=True, blank=True,
help_text='A timestamp either exact or imprecise.')
alternate_ids = ArrayField(models.CharField(max_length=200), blank=True, null=True,
help_text='A list of alternative identifiers for the individual.')
date_of_birth = models.DateField(null=True, blank=True, help_text='A timestamp either exact or imprecise.')
# An ISO8601 string represent age
age = JSONField(blank=True, null=True,
help_text='The age or age range of the individual.')
age = JSONField(blank=True, null=True, help_text='The age or age range of the individual.')
sex = models.CharField(choices=SEX, max_length=200, blank=True, null=True,
help_text='Observed apparent sex of the individual.')
karyotypic_sex = models.CharField(choices=KARYOTYPIC_SEX, max_length=200,
default='UNKNOWN_KARYOTYPE',
help_text='The karyotypic sex of the individual.')
taxonomy = JSONField(blank=True, null=True, help_text='Ontology resource '
'representing the species (e.g., NCBITaxon:9615).')
help_text='Observed apparent sex of the individual.')
karyotypic_sex = models.CharField(choices=KARYOTYPIC_SEX, max_length=200, default='UNKNOWN_KARYOTYPE',
help_text='The karyotypic sex of the individual.')
taxonomy = JSONField(blank=True, null=True,
help_text='Ontology resource representing the species (e.g., NCBITaxon:9615).')
# FHIR specific
active = models.BooleanField(default=False,
help_text='Whether this patient\'s record is in active use.')
deceased = models.BooleanField(default=False,
help_text='Indicates if the individual is deceased or not.')
active = models.BooleanField(default=False, help_text='Whether this patient\'s record is in active use.')
deceased = models.BooleanField(default=False, help_text='Indicates if the individual is deceased or not.')
# mCode specific
race = models.CharField(max_length=200, blank=True,
help_text='A code for the person\'s race.')
ethnicity = models.CharField(max_length=200, blank=True,
help_text='A code for the person\'s ethnicity.')
race = models.CharField(max_length=200, blank=True, help_text='A code for the person\'s race.')
ethnicity = models.CharField(max_length=200, blank=True, help_text='A code for the person\'s ethnicity.')
extra_properties = JSONField(blank=True, null=True,
help_text='Extra properties that are not supported by current schema')
help_text='Extra properties that are not supported by current schema')
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)

Expand Down
4 changes: 2 additions & 2 deletions chord_metadata_service/phenopackets/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@


class PhenopacketsModelViewSet(viewsets.ModelViewSet):
renderer_classes = tuple(api_settings.DEFAULT_RENDERER_CLASSES) + (PhenopacketsRenderer,)
renderer_classes = (*api_settings.DEFAULT_RENDERER_CLASSES, PhenopacketsRenderer)
pagination_class = LargeResultsSetPagination


class ExtendedPhenopacketsModelViewSet(PhenopacketsModelViewSet):
renderer_classes = tuple(PhenopacketsModelViewSet.renderer_classes) + (FHIRRenderer,)
renderer_classes = (*PhenopacketsModelViewSet.renderer_classes, FHIRRenderer)


class PhenotypicFeatureViewSet(ExtendedPhenopacketsModelViewSet):
Expand Down
3 changes: 1 addition & 2 deletions chord_metadata_service/phenopackets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from django.utils import timezone
from django.core.exceptions import ValidationError
from django.contrib.postgres.fields import JSONField, ArrayField
from elasticsearch import Elasticsearch
from chord_metadata_service.patients.models import Individual
from chord_metadata_service.restapi.description_utils import rec_help
from chord_metadata_service.restapi.models import IndexableMixin
Expand Down Expand Up @@ -55,7 +54,7 @@ class MetaData(models.Model):
JSONField(null=True, blank=True), blank=True, null=True,
help_text=rec_help(d.META_DATA, "updates"))
phenopacket_schema_version = models.CharField(max_length=200, blank=True,
help_text='Schema version of the current phenopacket.')
help_text='Schema version of the current phenopacket.')
external_references = ArrayField(
JSONField(null=True, blank=True), blank=True, null=True,
help_text=rec_help(d.META_DATA, "external_references"))
Expand Down
54 changes: 16 additions & 38 deletions chord_metadata_service/phenopackets/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def validate_external_references(self, value):
#############################################################

class PhenotypicFeatureSerializer(GenericSerializer):
type = serializers.JSONField(source='pftype',
validators=[JsonSchemaValidator(schema=ONTOLOGY_CLASS)])
type = serializers.JSONField(source='pftype', validators=[JsonSchemaValidator(schema=ONTOLOGY_CLASS)])
severity = serializers.JSONField(
validators=[JsonSchemaValidator(schema=ONTOLOGY_CLASS)],
allow_null=True, required=False)
Expand Down Expand Up @@ -210,8 +209,8 @@ class BiosampleSerializer(GenericSerializer):
tumor_grade = serializers.JSONField(
validators=[JsonSchemaValidator(schema=ONTOLOGY_CLASS)],
allow_null=True, required=False)
phenotypic_features = PhenotypicFeatureSerializer(read_only=True,
many=True, exclude_when_nested=['id', 'biosample'])
phenotypic_features = PhenotypicFeatureSerializer(
read_only=True, many=True, exclude_when_nested=['id', 'biosample'])
procedure = ProcedureSerializer(exclude_when_nested=['id'])

class Meta:
Expand All @@ -238,18 +237,12 @@ def create(self, validated_data):
return biosample

def update(self, instance, validated_data):
instance.sampled_tissue = validated_data.get('sampled_tissue',
instance.sampled_tissue)
instance.taxonomy = validated_data.get('taxonomy',
instance.taxonomy)
instance.histological_diagnosis = validated_data.get('histological_diagnosis',
instance.histological_diagnosis)
instance.tumor_progression = validated_data.get('tumor_progression',
instance.tumor_progression)
instance.tumor_grade = validated_data.get('tumor_grade',
instance.tumor_grade)
instance.diagnostic_markers = validated_data.get('diagnostic_markers',
instance.diagnostic_markers)
instance.sampled_tissue = validated_data.get('sampled_tissue', instance.sampled_tissue)
instance.taxonomy = validated_data.get('taxonomy', instance.taxonomy)
instance.histological_diagnosis = validated_data.get('histological_diagnosis', instance.histological_diagnosis)
instance.tumor_progression = validated_data.get('tumor_progression', instance.tumor_progression)
instance.tumor_grade = validated_data.get('tumor_grade', instance.tumor_grade)
instance.diagnostic_markers = validated_data.get('diagnostic_markers', instance.diagnostic_markers)
instance.save()
procedure_data = validated_data.pop('procedure', None)
if procedure_data:
Expand All @@ -276,25 +269,13 @@ def to_representation(self, instance):
"""
response = super().to_representation(instance)
response['biosamples'] = BiosampleSerializer(
instance.biosamples, many=True, required=False,
exclude_when_nested=["individual"]
).data
response['genes'] = GeneSerializer(
instance.genes, many=True, required=False
).data
response['variants'] = VariantSerializer(
instance.variants, many=True, required=False
).data
response['diseases'] = DiseaseSerializer(
instance.diseases, many=True, required=False
).data
response['hts_files'] = HtsFileSerializer(
instance.hts_files, many=True, required=False
).data
response['meta_data'] = MetaDataSerializer(
instance.meta_data, exclude_when_nested=['id']
).data
response['biosamples'] = BiosampleSerializer(instance.biosamples, many=True, required=False,
exclude_when_nested=["individual"]).data
response['genes'] = GeneSerializer(instance.genes, many=True, required=False).data
response['variants'] = VariantSerializer(instance.variants, many=True, required=False).data
response['diseases'] = DiseaseSerializer(instance.diseases, many=True, required=False).data
response['hts_files'] = HtsFileSerializer(instance.hts_files, many=True, required=False).data
response['meta_data'] = MetaDataSerializer(instance.meta_data, exclude_when_nested=['id']).data
return response


Expand All @@ -319,21 +300,18 @@ def to_representation(self, instance):
#############################################################

class GenomicInterpretationSerializer(GenericSerializer):

class Meta:
model = GenomicInterpretation
fields = '__all__'


class DiagnosisSerializer(GenericSerializer):

class Meta:
model = Diagnosis
fields = '__all__'


class InterpretationSerializer(GenericSerializer):

class Meta:
model = Interpretation
fields = '__all__'

0 comments on commit ae93f4a

Please sign in to comment.