Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve report filtering related to system forms #31654

Merged
merged 7 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

from django.core.management.base import BaseCommand

from corehq.apps.es import FormES, CaseSearchES, CaseES
from corehq.apps.es import FormES, CaseSearchES, CaseES, filters
from corehq.apps.reports.analytics.esaccessors import (
get_case_types_for_domain_es,
)
from corehq.const import MISSING_APP_ID
from couchforms.analytics import (
get_all_xmlns_app_id_pairs_submitted_to_in_domain,
)
Expand Down Expand Up @@ -118,9 +119,14 @@ def _get_form_size_stats(domain, sample_size):
query = (FormES()
.domain(domain)
.sort('received_on', desc=True)
.app(app_id)
.xmlns(xmlns)
.size(sample_size))

if app_id == MISSING_APP_ID:
query = query.filter(filters.missing("app_id"))
else:
query = query.app(app_id)

num_forms, avg_size = _get_totals_for_query(query)
total_bytes += num_forms * avg_size
total_forms += num_forms
Expand Down
8 changes: 7 additions & 1 deletion corehq/apps/es/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,24 @@ class TermsAggregation(Aggregation):
:param name: aggregation name
:param field: name of the field to bucket on
:param size:
:param missing: define how documents that are missing a value should be treated.
By default, they will be ignored. If a value is supplied here it will be used where
the value is missing.

"""
type = "terms"
result_class = BucketResult

def __init__(self, name, field, size=None):
def __init__(self, name, field, size=None, missing=None):
assert re.match(r'\w+$', name), \
"Names must be valid python variable names, was {}".format(name)
self.name = name
self.body = {
"field": field,
"size": size if size is not None else SIZE_LIMIT,
}
if missing:
self.body["missing"] = missing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def order(self, field, order="asc", reset=True):
query = deepcopy(self)
Expand Down
14 changes: 6 additions & 8 deletions corehq/apps/hqcase/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,26 @@
from xml.etree import cElementTree as ElementTree

from django.template.loader import render_to_string
from django.utils.translation import gettext_lazy

from casexml.apps.case.mock import CaseBlock
from casexml.apps.case.util import property_changed_in_action
from corehq.apps.es.cases import CaseES
from corehq.apps.es import filters
from dimagi.utils.parsing import json_format_datetime

from corehq.apps.es import filters
from corehq.apps.es.cases import CaseES
from corehq.apps.receiverwrapper.util import submit_form_locally
from corehq.apps.users.util import SYSTEM_USER_ID
from corehq.form_processor.exceptions import (
CaseNotFound,
MissingFormXml,
)
from corehq.form_processor.exceptions import CaseNotFound, MissingFormXml
from corehq.form_processor.models import CommCareCase

CASEBLOCK_CHUNKSIZE = 100
SYSTEM_FORM_XMLNS = 'http://commcarehq.org/case'
EDIT_FORM_XMLNS = 'http://commcarehq.org/case/edit'

SYSTEM_FORM_XMLNS_MAP = {
SYSTEM_FORM_XMLNS: 'System Form',
EDIT_FORM_XMLNS: 'Data Cleaning Form',
SYSTEM_FORM_XMLNS: gettext_lazy('System Form'),
EDIT_FORM_XMLNS: gettext_lazy('Data Cleaning Form'),
}

ALLOWED_CASE_IDENTIFIER_TYPES = [
Expand Down
9 changes: 8 additions & 1 deletion corehq/apps/reports/filters/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from couchdbkit.exceptions import ResourceNotFound
from memoized import memoized

from corehq.apps.hqcase.utils import SYSTEM_FORM_XMLNS_MAP
from couchforms.analytics import (
get_all_xmlns_app_id_pairs_submitted_to_in_domain,
)
Expand Down Expand Up @@ -404,7 +405,13 @@ def get_unknown_form_name(self, xmlns, app_id=None, none_if_not_found=False):
return list(form['name'].values())[0]

guessed_name = guess_form_name_from_submissions_using_xmlns(self.domain, xmlns)
return guessed_name or (None if none_if_not_found else _("Name Unknown"))
if guessed_name:
return guessed_name

if xmlns in SYSTEM_FORM_XMLNS_MAP:
return SYSTEM_FORM_XMLNS_MAP[xmlns]

return None if none_if_not_found else _("Name Unknown")

@staticmethod
def get_translated_value(display_lang, app_langs, obj):
Expand Down
20 changes: 13 additions & 7 deletions corehq/apps/reports/standard/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ def default_datespan(self):
return datespan_from_beginning(self.domain_object, self.timezone)

def _get_users_filter(self, mobile_user_and_group_slugs):
if (
EMWF.no_filters_selected(mobile_user_and_group_slugs)
and self.request.couch_user.has_permission(self.domain, 'access_all_locations')
):
return None

user_ids = (EMWF.user_es_query(self.domain,
mobile_user_and_group_slugs,
self.request.couch_user)
Expand Down Expand Up @@ -102,20 +108,20 @@ def es_query(self):
query = (form_es.FormES()
.domain(self.domain)
.filter(time_filter(gte=self.datespan.startdate,
lt=self.datespan.enddate_adjusted))
.filter(self._get_users_filter(mobile_user_and_group_slugs)))
lt=self.datespan.enddate_adjusted)))

users_filter = self._get_users_filter(mobile_user_and_group_slugs)
if users_filter:
query = query.filter(users_filter)

# filter results by app and xmlns if applicable
if FormsByApplicationFilter.has_selections(self.request):
form_values = list(self.all_relevant_forms.values())
if form_values:
query = query.OR(*[self._form_filter(f) for f in form_values])
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aside: FormsByApplicationFilter.has_selections counts "Show Advanced Options" as a selection, which is a little unintuitive in my opinion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree but if you to select it and not change anything else then it does change the results to only include 'known forms'. I toyed with changing but the added complexity didn't seem worth it in the context of this PR.

query = query.NOT(es_filters.missing("app_id"))

# Exclude system forms unless they selected "Unknown User"
if HQUserType.UNKNOWN not in EMWF.selected_user_types(mobile_user_and_group_slugs):
query = query.NOT(form_es.xmlns(
list(SYSTEM_FORM_XMLNS_MAP.keys())
))
return query

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
{% trans 'Known Forms' %}
<span class="hq-help-template"
data-title="{% trans "What are Known Forms?" %}"
data-content="{% trans "Known Forms are forms that have IDs which can be matched to existing or deleted CommCare Applications in your Project." %}"
></span>
data-content="{% blocktrans %}
These are forms that can be matched to an existing or deleted CommCare Application
in your project.
{% endblocktrans %}">
</span>
</label>
</div>
<div class="radio">
Expand All @@ -27,11 +30,15 @@
name="{{ slug }}_{{ unknown.slug }}"
id="{{ css_id }}_{{ unknown.slug }}_show"
value="yes">
{% trans 'Unknown Forms (Possibly Deleted)' %}
{% trans 'Unknown Forms' %}
<span class="hq-help-template"
data-title="{% trans "What are Unknown Forms?" %}"
data-content="{% trans "We tried and tried, but these form IDs did not belong to any CommCare Applications (existing or deleted) in your Project. It might mean that these forms once belonged to an application, were deleted from it, and then replaced by a different form." %}"
></span>
data-content="{% blocktrans %}
These forms do not belong to a CommCare Application (existing or deleted) in your project.
It might mean that these forms once belonged to an application but the application form
has since been removed. They may also be 'system forms' or forms from an integration or API.
{% endblocktrans %}">
</span>
</label>
</div>
</div>
Expand Down
3 changes: 2 additions & 1 deletion corehq/ex-submodules/couchforms/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from corehq.apps.es import FormES
from corehq.apps.es.aggregations import TermsAggregation
from corehq.const import MISSING_APP_ID
from corehq.util.quickcache import quickcache

from corehq.util.couch import stale_ok
Expand Down Expand Up @@ -74,7 +75,7 @@ def get_all_xmlns_app_id_pairs_submitted_to_in_domain(domain):
query = (FormES()
.domain(domain)
.aggregation(
TermsAggregation("app_id", "app_id").aggregation(
TermsAggregation("app_id", "app_id", missing=MISSING_APP_ID).aggregation(
TermsAggregation("xmlns", "xmlns.exact")))
.remove_default_filter("has_xmlns")
.remove_default_filter("has_user")
Expand Down
39 changes: 24 additions & 15 deletions corehq/ex-submodules/couchforms/tests/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.test import TestCase
from requests import ConnectionError

from corehq.const import MISSING_APP_ID
from couchforms.analytics import (
app_has_been_submitted_to_in_last_30_days,
domain_has_submission_in_last_30_days,
Expand Down Expand Up @@ -130,27 +131,35 @@ class CouchformsESAnalyticsTest(TestCase):
def setUpClass(cls):
super(CouchformsESAnalyticsTest, cls).setUpClass()

def create_form_and_sync_to_es(received_on):
with process_pillow_changes('xform-pillow', {'skip_ucr': True}):
with process_pillow_changes('DefaultChangeFeedPillow'):
metadata = TestFormMetadata(domain=cls.domain, app_id=cls.app_id,
xmlns=cls.xmlns, received_on=received_on)
form = get_form_ready_to_save(metadata, is_db_test=True)
form_processor = FormProcessorInterface(domain=cls.domain)
form_processor.save_processed_models([form])
return form

from casexml.apps.case.tests.util import delete_all_xforms
delete_all_xforms()
cls.now = datetime.datetime.utcnow()
cls._60_days = datetime.timedelta(days=60)
cls.domain = 'my_crazy_analytics_domain'
cls.app_id = uuid.uuid4().hex
cls.xmlns = 'my://crazy.xmlns/'

def create_form(received_on, app_id=cls.app_id, xmlns=cls.xmlns):
metadata = TestFormMetadata(domain=cls.domain, app_id=app_id,
xmlns=xmlns, received_on=received_on)
form = get_form_ready_to_save(metadata, is_db_test=True)
form_processor = FormProcessorInterface(domain=cls.domain)
form_processor.save_processed_models([form])
return form

def create_forms_and_sync_to_es():
forms = []
with process_pillow_changes('xform-pillow', {'skip_ucr': True}):
with process_pillow_changes('DefaultChangeFeedPillow'):
for received_on in [cls.now, cls.now - cls._60_days]:
forms.append(create_form(received_on))
forms.append(create_form(cls.now, app_id=None, xmlns="system"))
return forms

from casexml.apps.case.tests.util import delete_all_xforms
delete_all_xforms()
with trap_extra_setup(ConnectionError):
cls.elasticsearch = get_es_new()
initialize_index_and_mapping(cls.elasticsearch, XFORM_INDEX_INFO)
cls.forms = [create_form_and_sync_to_es(cls.now), create_form_and_sync_to_es(cls.now - cls._60_days)]
cls.forms = create_forms_and_sync_to_es()

cls.elasticsearch.indices.refresh(XFORM_INDEX_INFO.alias)

Expand All @@ -160,7 +169,7 @@ def tearDownClass(cls):
FormProcessorTestUtils.delete_all_cases_forms_ledgers(cls.domain)
super(CouchformsESAnalyticsTest, cls).tearDownClass()

def test_get_number_of_cases_in_domain(self):
def test_get_number_of_forms_in_domain(self):
self.assertEqual(
get_number_of_forms_in_domain(self.domain),
len(self.forms)
Expand All @@ -187,4 +196,4 @@ def test_app_has_been_submitted_to_in_last_30_days(self):
def test_get_all_xmlns_app_id_pairs_submitted_to_in_domain(self):
self.assertEqual(
get_all_xmlns_app_id_pairs_submitted_to_in_domain(self.domain),
{(self.xmlns, self.app_id)})
{(self.xmlns, self.app_id), ("system", MISSING_APP_ID)})