Skip to content

Commit

Permalink
Merge pull request #424 from bento-platform/fix/leaked-small-cells
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Aug 7, 2023
2 parents 8899dda + 1383c2d commit b5190c0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
22 changes: 20 additions & 2 deletions chord_metadata_service/patients/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import csv
import io
import random
from django.conf import settings
from django.urls import reverse
from django.test import override_settings
Expand Down Expand Up @@ -317,19 +318,22 @@ class PublicFilteringIndividualsTest(APITestCase):
""" Test for api/public GET filtering """

response_threshold = CONFIG_PUBLIC_TEST["rules"]["count_threshold"]
random_range = 137
num_individuals = 137
random_seed = 341 # do not change this please :))

@staticmethod
def response_threshold_check(response):
return response['count'] if 'count' in response else settings.INSUFFICIENT_DATA_AVAILABLE

def setUp(self):
individuals = [c.generate_valid_individual() for _ in range(self.random_range)] # random range
individuals = [c.generate_valid_individual() for _ in range(self.num_individuals)]
for individual in individuals:
Individual.objects.create(**individual)
p = ph_m.Procedure.objects.create(**ph_c.VALID_PROCEDURE_1)
ph_m.Biosample.objects.create(**ph_c.valid_biosample_1(Individual.objects.all()[0], p))

random.seed(self.random_seed)

@override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST)
def test_public_filtering_sex(self):
# sex field search
Expand Down Expand Up @@ -589,6 +593,20 @@ def test_public_filtering_mapping_for_search_filter(self):
self.assertEqual(1, response_obj["count"])


class PublicFilteringIndividualsTestSmallCellCount(PublicFilteringIndividualsTest):
num_individuals = 3 # below configured test threshold
# rest of the methods are inherited

@override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST)
def test_public_overview_sex(self):
response = self.client.get('/api/public_search_fields')
self.assertEqual(response.status_code, status.HTTP_200_OK)
response_obj = response.json()

# overview for sex should have 0 entries due to small cell counts
self.assertEqual(len(response_obj["sections"][0]["fields"][0]["options"]), 0) # path to sex field


class PublicAgeRangeFilteringIndividualsTest(APITestCase):
""" Test for api/public GET filtering """

Expand Down
12 changes: 11 additions & 1 deletion chord_metadata_service/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,9 @@ def get_field_options(field_props: dict) -> list[Any]:
options = field_props["config"].get("enum")
# Special case: no list of values specified
if options is None:
# We must be careful here not to leak 'small cell' values as options
# - e.g., if there are three individuals with sex=UNKNOWN_SEX, this
# should be treated as if the field isn't in the database at all.
options = get_distinct_field_values(field_props)
elif field_props["datatype"] == "number":
options = [label for floor, ceil, label in labelled_range_generator(field_props)]
Expand All @@ -594,8 +597,15 @@ def get_field_options(field_props: dict) -> list[Any]:


def get_distinct_field_values(field_props: dict) -> list[Any]:
# We must be careful here not to leak 'small cell' values as options
# - e.g., if there are three individuals with sex=UNKNOWN_SEX, this
# should be treated as if the field isn't in the database at all.

model, field = get_model_and_field(field_props["mapping"])
return model.objects.values_list(field, flat=True).order_by(field).distinct()
threshold = get_threshold()

values_with_counts = model.objects.values_list(field).annotate(count=Count(field, distinct=True))
return [val for val, count in values_with_counts if count > threshold]


def filter_queryset_field_value(qs, field_props, value: str):
Expand Down

0 comments on commit b5190c0

Please sign in to comment.