From 54c51aa078e8113aaefd47f0b6f0db8fb3c413e2 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 20 May 2022 13:22:01 +0200 Subject: [PATCH] include forms with no app ID in get_all_xmlns_app_id_pairs_submitted_to_in_domain --- .../commands/calculate_physical_size.py | 10 ++++- corehq/apps/es/aggregations.py | 8 +++- corehq/ex-submodules/couchforms/analytics.py | 3 +- .../couchforms/tests/test_analytics.py | 39 ++++++++++++------- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/corehq/apps/domain/management/commands/calculate_physical_size.py b/corehq/apps/domain/management/commands/calculate_physical_size.py index 7fdbb88d9b02..7dca1dd82c12 100644 --- a/corehq/apps/domain/management/commands/calculate_physical_size.py +++ b/corehq/apps/domain/management/commands/calculate_physical_size.py @@ -3,10 +3,11 @@ from django.core.management.base import BaseCommand -from corehq.apps.es import FormES, CaseSearchES, CaseES +from corehq.apps.es import FormES, CaseSearchES, CaseES, filters from corehq.apps.reports.analytics.esaccessors import ( get_case_types_for_domain_es, ) +from corehq.const import MISSING_APP_ID from couchforms.analytics import ( get_all_xmlns_app_id_pairs_submitted_to_in_domain, ) @@ -118,9 +119,14 @@ def _get_form_size_stats(domain, sample_size): query = (FormES() .domain(domain) .sort('received_on', desc=True) - .app(app_id) .xmlns(xmlns) .size(sample_size)) + + if app_id == MISSING_APP_ID: + query = query.filter(filters.missing("app_id")) + else: + query = query.app(app_id) + num_forms, avg_size = _get_totals_for_query(query) total_bytes += num_forms * avg_size total_forms += num_forms diff --git a/corehq/apps/es/aggregations.py b/corehq/apps/es/aggregations.py index 7e07bb8ca483..0d9fb7f49db0 100644 --- a/corehq/apps/es/aggregations.py +++ b/corehq/apps/es/aggregations.py @@ -204,11 +204,15 @@ class TermsAggregation(Aggregation): :param name: aggregation name :param field: name of the field to bucket on :param size: + :param missing: define how documents that are missing a value should be treated. + By default, they will be ignored. If a value is supplied here it will be used where + the value is missing. + """ type = "terms" result_class = BucketResult - def __init__(self, name, field, size=None): + def __init__(self, name, field, size=None, missing=None): assert re.match(r'\w+$', name), \ "Names must be valid python variable names, was {}".format(name) self.name = name @@ -216,6 +220,8 @@ def __init__(self, name, field, size=None): "field": field, "size": size if size is not None else SIZE_LIMIT, } + if missing: + self.body["missing"] = missing def order(self, field, order="asc", reset=True): query = deepcopy(self) diff --git a/corehq/ex-submodules/couchforms/analytics.py b/corehq/ex-submodules/couchforms/analytics.py index f49545cc5faa..702cb81c64ca 100644 --- a/corehq/ex-submodules/couchforms/analytics.py +++ b/corehq/ex-submodules/couchforms/analytics.py @@ -2,6 +2,7 @@ from corehq.apps.es import FormES from corehq.apps.es.aggregations import TermsAggregation +from corehq.const import MISSING_APP_ID from corehq.util.quickcache import quickcache from corehq.util.couch import stale_ok @@ -74,7 +75,7 @@ def get_all_xmlns_app_id_pairs_submitted_to_in_domain(domain): query = (FormES() .domain(domain) .aggregation( - TermsAggregation("app_id", "app_id").aggregation( + TermsAggregation("app_id", "app_id", missing=MISSING_APP_ID).aggregation( TermsAggregation("xmlns", "xmlns.exact"))) .remove_default_filter("has_xmlns") .remove_default_filter("has_user") diff --git a/corehq/ex-submodules/couchforms/tests/test_analytics.py b/corehq/ex-submodules/couchforms/tests/test_analytics.py index 42cc2043e518..1d87be08bd9c 100644 --- a/corehq/ex-submodules/couchforms/tests/test_analytics.py +++ b/corehq/ex-submodules/couchforms/tests/test_analytics.py @@ -4,6 +4,7 @@ from django.test import TestCase from requests import ConnectionError +from corehq.const import MISSING_APP_ID from couchforms.analytics import ( app_has_been_submitted_to_in_last_30_days, domain_has_submission_in_last_30_days, @@ -130,27 +131,35 @@ class CouchformsESAnalyticsTest(TestCase): def setUpClass(cls): super(CouchformsESAnalyticsTest, cls).setUpClass() - def create_form_and_sync_to_es(received_on): - with process_pillow_changes('xform-pillow', {'skip_ucr': True}): - with process_pillow_changes('DefaultChangeFeedPillow'): - metadata = TestFormMetadata(domain=cls.domain, app_id=cls.app_id, - xmlns=cls.xmlns, received_on=received_on) - form = get_form_ready_to_save(metadata, is_db_test=True) - form_processor = FormProcessorInterface(domain=cls.domain) - form_processor.save_processed_models([form]) - return form - - from casexml.apps.case.tests.util import delete_all_xforms - delete_all_xforms() cls.now = datetime.datetime.utcnow() cls._60_days = datetime.timedelta(days=60) cls.domain = 'my_crazy_analytics_domain' cls.app_id = uuid.uuid4().hex cls.xmlns = 'my://crazy.xmlns/' + + def create_form(received_on, app_id=cls.app_id, xmlns=cls.xmlns): + metadata = TestFormMetadata(domain=cls.domain, app_id=app_id, + xmlns=xmlns, received_on=received_on) + form = get_form_ready_to_save(metadata, is_db_test=True) + form_processor = FormProcessorInterface(domain=cls.domain) + form_processor.save_processed_models([form]) + return form + + def create_forms_and_sync_to_es(): + forms = [] + with process_pillow_changes('xform-pillow', {'skip_ucr': True}): + with process_pillow_changes('DefaultChangeFeedPillow'): + for received_on in [cls.now, cls.now - cls._60_days]: + forms.append(create_form(received_on)) + forms.append(create_form(cls.now, app_id=None, xmlns="system")) + return forms + + from casexml.apps.case.tests.util import delete_all_xforms + delete_all_xforms() with trap_extra_setup(ConnectionError): cls.elasticsearch = get_es_new() initialize_index_and_mapping(cls.elasticsearch, XFORM_INDEX_INFO) - cls.forms = [create_form_and_sync_to_es(cls.now), create_form_and_sync_to_es(cls.now - cls._60_days)] + cls.forms = create_forms_and_sync_to_es() cls.elasticsearch.indices.refresh(XFORM_INDEX_INFO.alias) @@ -160,7 +169,7 @@ def tearDownClass(cls): FormProcessorTestUtils.delete_all_cases_forms_ledgers(cls.domain) super(CouchformsESAnalyticsTest, cls).tearDownClass() - def test_get_number_of_cases_in_domain(self): + def test_get_number_of_forms_in_domain(self): self.assertEqual( get_number_of_forms_in_domain(self.domain), len(self.forms) @@ -187,4 +196,4 @@ def test_app_has_been_submitted_to_in_last_30_days(self): def test_get_all_xmlns_app_id_pairs_submitted_to_in_domain(self): self.assertEqual( get_all_xmlns_app_id_pairs_submitted_to_in_domain(self.domain), - {(self.xmlns, self.app_id)}) + {(self.xmlns, self.app_id), ("system", MISSING_APP_ID)})