Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Merge branch 'senrabc-feature/keep_all_results' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
senrabc committed May 10, 2016
2 parents 1309cc3 + 6d179c4 commit 9076657
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 26 deletions.
11 changes: 11 additions & 0 deletions redi/TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,16 @@
TODO: make tests requirement- msg=sudo easy_install sftpserver
TODO: make all settings files pythonic aka config.yml .. etc using YAML standard
TODO: take NOT_DONE examples for values without units data points out of vagrant/redi_out_reference.csv for hcv examples
<<<<<<< HEAD
# TODO: -K 'keep all results' fail if in the settings.ini doesn't have Y-M-D H:M:S



# TODO: Modify dates on ConMeds forms to validate for H:M:S.
=======

** TODO: To be able to make pull request for keep-all switch . Write unit test(s) for all the functions touched by the keep-all commandline switch.
** TODO: TO be able to make a pull request for keep-all switch. Write end-to-end tests to take in fake conmeds data, call the sithc, pass in the file, use redcap api to query out the data and compare to input csv. Use N Rejack scripts to create test data.
** TODO: change install redcap setting to enable table based authentication.
** TODO: find out why sometimes redcap can't talk to mysql. restart mysql fixes. may need to add this to the bootstrap to restart apache and mysql right at the e nd.
>>>>>>> afd8c4190fafc4092152fcfb7c5dcee57cd5eb47
48 changes: 31 additions & 17 deletions redi/redi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Usage:
redi.py -h | --help
redi.py [-v] [-V] [-k] [-e] [-d] [-f=<path>] [-r] [-c=<path>] [-D=<datadir>] [-s] [-b]
redi.py [-v] [-V] [-k] [-e] [-d] [-f=<path>] [-r] [-c=<path>] [-D=<datadir>] [-s] [-b] [-K]
Options:
-h --help Show this help message and exit
Expand Down Expand Up @@ -52,6 +52,7 @@
[default:False]
-b --bulk-send-blanks Send blank events in bulk instead of
individually [default:False]
-K --keep-all Keep all results, do not compress by date
"""
__author__ = "University of Florida CTS-IT Team"
__email__ = "ctsit@ctsi.ufl.edu"
Expand Down Expand Up @@ -147,8 +148,10 @@ def main():
# obtaining command line arguments for path to configuration directory
args = docopt(__doc__, help=True)


# capture any cli args passed in that are needed to pass into other funcs.
data_directory = args['--datadir']
keep_all_results = args['--keep-all']

if data_directory is None:
data_directory = DEFAULT_DATA_DIRECTORY

Expand Down Expand Up @@ -266,7 +269,7 @@ def main():
_run(config_file, configuration_directory, do_keep_gen_files, dry_run,
get_emr_data, settings, output_files, db_path, raw_txt_file, redcap_client,
report_courier, report_creator, args['--resume'],
args['--skip-blanks'], args['--bulk-send-blanks'])
args['--skip-blanks'], args['--bulk-send-blanks'], keep_all_results, input_file_path)

# TODO: post processing will go here

Expand Down Expand Up @@ -355,7 +358,7 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False):
def _run(config_file, configuration_directory, do_keep_gen_files, dry_run,
get_emr_data, settings, data_folder, database_path, raw_txt_file, redcap_client,
report_courier, report_creator, resume=False, skip_blanks=False,
bulk_send_blanks=False):
bulk_send_blanks=False, keep_all_results=False, input_file_path=None):
global translational_table_tree

assert _person_form_events_service is not None
Expand Down Expand Up @@ -395,6 +398,15 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run,
# delete rawEscaped.txt
GetEmrData.cleanup(escaped_file)

# TODO: clean this up as well was the get_emr_ stuff above

# if either -K or -f are specifed run the steps to make raw.xml
if (keep_all_results != False or input_file_path != None):
GetEmrData.data_preprocessing(raw_txt_file, escaped_file)
GetEmrData.generate_xml(escaped_file, raw_xml_file)
GetEmrData.cleanup(escaped_file)



raw_xml_file = os.path.join(configuration_directory, settings.raw_xml_file)
email_settings = get_email_settings(settings)
Expand All @@ -420,7 +432,7 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run,
_create_person_form_event_tree_with_data(
config_file, configuration_directory, redcap_client,
form_events_file, raw_xml_file, rules, settings, data_folder,
translation_table_file)
translation_table_file, keep_all_results)

_store_run_data(data_folder, alert_summary,
person_form_event_tree_with_data, rule_errors,
Expand Down Expand Up @@ -499,7 +511,7 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run,

def _create_person_form_event_tree_with_data(
config_file, configuration_directory, redcap_client, form_events_file,
raw_xml_file, rules, settings, data_folder, translation_table_file):
raw_xml_file, rules, settings, data_folder, translation_table_file, keep_all_results):

global translational_table_tree
# parse the raw.xml file and fill the etree rawElementTree
Expand Down Expand Up @@ -606,7 +618,7 @@ def _create_person_form_event_tree_with_data(
data,
os.path.join(data_folder, 'rawDataWithDatumAndUnitsFieldNames.xml'))
# sort the data tree and compress
sort_element_tree(data, data_folder)
sort_element_tree(data, data_folder, keep_all_results)
write_element_tree_to_file(data, os.path.join(data_folder, \
'rawDataSortedAfterCompression.xml'))
# update eventName element
Expand Down Expand Up @@ -862,7 +874,7 @@ def update_redcap_form(data, lookup_data, undefined):
undefined)


def sort_element_tree(data, data_folder):
def sort_element_tree(data, data_folder, keep_all_results):
"""
Sort element tree based on three given indices.
@see #update_time_stamp()
Expand All @@ -882,7 +894,15 @@ def sort_element_tree(data, data_folder):
write_element_tree_to_file(data, os.path.join(data_folder,
"rawDataSortedBeforeCompression.xml"))

compress_data_using_study_form_date(data)
# TODO: look at adding a switch to RED-I, that will need to be caught here, that
# will allow another behavioe here that will let us keep all results vs
# the current behavior of sorting the events by timestamp and keeping only
# the first one to occur on a given day. Example: whne this feature is
# implemented red-i will be able to keep only 1 data point for each day
# for 50 days or keep 50 data points that may occur on the same day and
# map the 50 into 50 event slots in redcap.
if (keep_all_results == False):
compress_data_using_study_form_date(data)

#batch.printxml(container)

Expand Down Expand Up @@ -955,13 +975,7 @@ def compress_data_using_study_form_date(data):
logger.debug("Remove duplicate result using key: {}".format(key_debug))
subj.getparent().remove(subj)

# TODO: look at adding a switch to RED-I, that will need to be caught here, that
# will allow another behavioe here that will let us keep all results vs
# the current behavior of sorting the events by timestamp and keeping only
# the first one to occur on a given day. Example: whne this feature is
# implemented red-i will be able to keep only 1 data point for each day
# for 50 days or keep 50 data points that may occur on the same day and
# map the 50 into 50 event slots in redcap.


filt = dict()

Expand Down Expand Up @@ -1009,7 +1023,7 @@ def get_key_timestamp(ele):
return (study_id, form_name, timestamp)


def get_key_date(ele):
def get_key_date(ele,keep_all_results=False):
"""
Helper function for #compress_data_using_study_form_date()
Expand Down
100 changes: 91 additions & 9 deletions test/TestSortElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def setUp(self):
<study>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE></ORD_VALUE>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
Expand All @@ -50,7 +50,15 @@ def setUp(self):
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE> </ORD_VALUE>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-01 00:12:01</DATE_TIME_STAMP>
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
Expand All @@ -60,7 +68,7 @@ def setUp(self):
"""

# we expect the following sorted tree
self.sorted_tree = """<?xml version="1.0" encoding="UTF-8"?>
self.sorted_tree_keep_all_false = """<?xml version="1.0" encoding="UTF-8"?>
<study>
<subject>
<NAME>PLATELET COUNT</NAME>
Expand All @@ -72,15 +80,51 @@ def setUp(self):
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE> </ORD_VALUE>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-02 00:00:00</DATE_TIME_STAMP>
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE></ORD_VALUE>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-03 00:00:00</DATE_TIME_STAMP>
</subject>
</study>
"""
self.sorted_tree_keep_all_true = """<?xml version="1.0" encoding="UTF-8"?>
<study>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-01 00:00:00</DATE_TIME_STAMP>
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-01 00:12:01</DATE_TIME_STAMP>
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
<DATE_TIME_STAMP>2013-12-02 00:00:00</DATE_TIME_STAMP>
</subject>
<subject>
<NAME>PLATELET COUNT</NAME>
<ORD_VALUE>123</ORD_VALUE>
<STUDY_ID>999-0262</STUDY_ID>
<redcapFormName>cbc</redcapFormName>
<loinc_code>component_A</loinc_code>
Expand All @@ -91,12 +135,51 @@ def setUp(self):
self.dirpath = tempfile.mkdtemp()


def test_sort_elementtree(self):
# def test_sort_elementtree(self):
# tree_to_sort = etree.ElementTree(etree.fromstring(self.unsorted))
# # make the original test work
# redi.sort_element_tree(tree_to_sort, self.dirpath)
#
# # TODO: create a way to test if --keep-all is True
# # test the keep all results functionality
# # redi.sort_element_tree(tree_to_sort, self.dirpath, True)
#
# par = etree.XMLParser(remove_blank_text = True)
# clean_expect = etree.XML(self.sorted_tree, parser=par)
# clean_result = etree.XML(etree.tostring(tree_to_sort), parser=par)
# self.assertEqual(etree.tostring(clean_expect), etree.tostring(clean_result))

def test_sort_elementtree_keep_all_true(self):
tree_to_sort = etree.ElementTree(etree.fromstring(self.unsorted))
# make the original test work
redi.sort_element_tree(tree_to_sort, self.dirpath, True)

# TODO: create a way to test if --keep-all is True
# test the keep all results functionality
# redi.sort_element_tree(tree_to_sort, self.dirpath, True)
# then the log should NOT!! have the line "Remove duplicate result using key:"

par = etree.XMLParser(remove_blank_text = True)
clean_expect = etree.XML(self.sorted_tree_keep_all_true, parser=par)
clean_result = etree.XML(etree.tostring(tree_to_sort), parser=par)
self.assertEqual(etree.tostring(clean_expect), etree.tostring(clean_result))




def test_sort_elementtree_keep_all_false(self):
tree_to_sort = etree.ElementTree(etree.fromstring(self.unsorted))
redi.sort_element_tree(tree_to_sort, self.dirpath)
# make the original test work
redi.sort_element_tree(tree_to_sort, self.dirpath, False)

# TODO: create a way to test if --keep-all is false
# test the keep all results functionality
# redi.sort_element_tree(tree_to_sort, self.dirpath, False)

# then the log should have the line "Remove duplicate result using key:"

par = etree.XMLParser(remove_blank_text = True)
clean_expect = etree.XML(self.sorted_tree, parser=par)
clean_expect = etree.XML(self.sorted_tree_keep_all_false, parser=par)
clean_result = etree.XML(etree.tostring(tree_to_sort), parser=par)
self.assertEqual(etree.tostring(clean_expect), etree.tostring(clean_result))

Expand All @@ -115,4 +198,3 @@ def tearDown(self):

if __name__ == '__main__':
unittest.main()

0 comments on commit 9076657

Please sign in to comment.