Skip to content

Commit

Permalink
Merge pull request #343 from bento-platform/fix/table_summary
Browse files Browse the repository at this point in the history
Fix/table summary
  • Loading branch information
brouillette committed Oct 7, 2022
2 parents 31ad089 + 4d883be commit fd60ecf
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 56 deletions.
72 changes: 17 additions & 55 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from bento_lib.responses import errors
from bento_lib.search import build_search_response, postgres
from collections import Counter
from datetime import datetime
from django.db import connection
from django.db.models import Count, F
Expand All @@ -14,6 +13,7 @@
from django.conf import settings
from django.views.decorators.cache import cache_page
from psycopg2 import sql
from chord_metadata_service.restapi.utils import get_field_bins, queryset_stats_for_field
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import AllowAny
from rest_framework.response import Response
Expand Down Expand Up @@ -179,68 +179,30 @@ def mcodepacket_table_summary(table):


def phenopacket_table_summary(table):
phenopackets = Phenopacket.objects.filter(table=table) # TODO
phenopacket_qs = Phenopacket.objects.filter(table=table) # TODO

diseases_counter = Counter()
phenotypic_features_counter = Counter()

biosamples_set = set()
individuals_set = set()

biosamples_cs = Counter()
biosamples_taxonomy = Counter()

individuals_sex = Counter()
individuals_k_sex = Counter()
individuals_taxonomy = Counter()

def count_individual(ind):
individuals_set.add(ind.id)
individuals_sex.update((ind.sex,))
individuals_k_sex.update((ind.karyotypic_sex,))
if ind.taxonomy is not None:
individuals_taxonomy.update((ind.taxonomy["id"],))

for p in phenopackets.prefetch_related("biosamples"):
for b in p.biosamples.all():
biosamples_set.add(b.id)
biosamples_cs.update((b.is_control_sample,))

if b.taxonomy is not None:
biosamples_taxonomy.update((b.taxonomy["id"],))

if b.individual is not None:
count_individual(b.individual)

for pf in b.phenotypic_features.all():
phenotypic_features_counter.update((pf.pftype["id"],))

for d in p.diseases.all():
diseases_counter.update((d.term["id"],))

for pf in p.phenotypic_features.all():
phenotypic_features_counter.update((pf.pftype["id"],))

# Currently, phenopacket subject is required so we can assume it's not None
count_individual(p.subject)
# Sex related fields stats are precomputed here and post processed later
# to include missing values inferred from the schema
individuals_sex = queryset_stats_for_field(phenopacket_qs, "subject__sex")
individuals_k_sex = queryset_stats_for_field(phenopacket_qs, "subject__karyotypic_sex")

return Response({
"count": phenopackets.count(),
"count": phenopacket_qs.count(),
"data_type_specific": {
"biosamples": {
"count": len(biosamples_set),
"is_control_sample": dict(biosamples_cs),
"taxonomy": dict(biosamples_taxonomy),
"count": phenopacket_qs.values("biosamples__id").count(),
"is_control_sample": queryset_stats_for_field(phenopacket_qs, "biosamples__is_control_sample"),
"taxonomy": queryset_stats_for_field(phenopacket_qs, "biosamples__taxonomy__label"),
},
"diseases": dict(diseases_counter),
"diseases": queryset_stats_for_field(phenopacket_qs, "diseases__term__label"),
"individuals": {
"count": len(individuals_set),
"sex": {k: individuals_sex[k] for k in (s[0] for s in Individual.SEX)},
"karyotypic_sex": {k: individuals_k_sex[k] for k in (s[0] for s in Individual.KARYOTYPIC_SEX)},
"taxonomy": dict(individuals_taxonomy),
# TODO: age histogram
"count": phenopacket_qs.values("subject__id").count(),
"sex": {k: individuals_sex.get(k, 0) for k in (s[0] for s in Individual.SEX)},
"karyotypic_sex": {k: individuals_k_sex.get(k, 0) for k in (s[0] for s in Individual.KARYOTYPIC_SEX)},
"taxonomy": queryset_stats_for_field(phenopacket_qs, "subject__taxonomy__label"),
"age": get_field_bins(phenopacket_qs, "subject__age_numeric", 10),
},
"phenotypic_features": dict(phenotypic_features_counter),
"phenotypic_features": queryset_stats_for_field(phenopacket_qs, "phenotypic_features__pftype__label"),
}
})

Expand Down
6 changes: 5 additions & 1 deletion chord_metadata_service/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,11 @@ def queryset_stats_for_field(queryset, field: str, add_missing=False) -> Mapping
if key is None:
continue

key = key.strip()
if not isinstance(key, str):
key = str(key)
else:
key = key.strip()

if key == "":
continue

Expand Down

0 comments on commit fd60ecf

Please sign in to comment.