Skip to content

Commit

Permalink
Merge pull request #133 from solleks/users-and-locations-queries
Browse files Browse the repository at this point in the history
Users and locations queries
  • Loading branch information
snopoke committed Apr 21, 2020
2 parents 7585364 + ec11e08 commit 13a4422
Show file tree
Hide file tree
Showing 10 changed files with 454 additions and 65 deletions.
30 changes: 27 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ The basic usage of the command-line tool is with a saved Excel or JSON query (se
$ commcare-export --commcare-hq <URL or alias like "local" or "prod"> \
--username <username> \
--project <project> \
--version <api version, defaults to latest known> \
--api-version <api version, defaults to latest known> \
--version <print current version> \
--query <excel file, json file, or raw json> \
--output-format <csv, xls, xlsx, json, markdown, sql> \
--output <file name or SQL database URL>
--output <file name or SQL database URL> \
--users <export data about project's mobile workers> \
--locations <export data about project's location hierarchy>
```

See `commcare-export --help` for the full list of options.
Expand Down Expand Up @@ -434,7 +437,7 @@ MSSQL_URL=mssql+pyodbc://user:password@host/
Postgresql
==========
```
$ docker pull postgres 9.6
$ docker pull postgres:9.6
$ docker run --name ccexport-postgres -p 5432:5432 -d postgres:9.6
```

Expand Down Expand Up @@ -465,6 +468,27 @@ $ sudo ACCEPT_EULA=Y apt-get install msodbcsql17
$ odbcinst -q -d
```

MSSQL for Mac OS
==========
```
$ docker pull microsoft/mssql-server-linux:2017-latest
$ docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Password@123" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest
# Install driver
$ brew install unixodbc freetds
# Add the following 5 lines to /usr/local/etc/odbcinst.ini
[ODBC Driver 17 for SQL Server]
Description=FreeTDS Driver for Linux & MSSQL
Driver=/usr/local/lib/libtdsodbc.so
Setup=/usr/local/lib/libtdsodbc.so
UsageCount=1
# Create a soft link from /etc/odbcinst.ini to actual file
sudo ln -s /usr/local/etc/odbcinst.ini /etc/odbcinst.ini
```

Integration Tests
-----------------
Running the integration tests requires API credentials from CommCare HQ
Expand Down
92 changes: 92 additions & 0 deletions commcare_export/builtin_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

from commcare_export import excel_query
from commcare_export.minilinq import Apply, List, Literal, Reference

USERS_TABLE_NAME = 'commcare_users'
LOCATIONS_TABLE_NAME = 'commcare_locations'

excel_query.blacklist(USERS_TABLE_NAME)
excel_query.blacklist(LOCATIONS_TABLE_NAME)


class Column:
def __init__(self, name, source, map_function=None, *extra_args):
self.name = Literal(name)
self.source = source
self.map_function = map_function
self.extra_args = extra_args

@property
def mapped_source_field(self):
if not self.map_function:
return Reference(self.source)
else:
return Apply(Reference(self.map_function), Reference(self.source),
*self.extra_args)


def compile_query(columns, data_source, table_name):
source = Apply(Reference('api_data'), Literal(data_source),
Reference('checkpoint_manager'))
part = excel_query.SheetParts(table_name, [c.name for c in columns], source,
List([c.mapped_source_field for c in columns]),
None)
return excel_query.compile_queries([part], None, False)[0]


# A MiniLinq query for internal CommCare user table.
# It reads every field produced by the /user/ API endpoint and
# writes the data to a table named "commcare_users" in a database.

user_columns = [
Column('id', 'id'),
Column('default_phone_number', 'default_phone_number'),
Column('email', 'email'),
Column('first_name', 'first_name'),
Column('groups', 'groups'),
Column('last_name', 'last_name'),
Column('phone_numbers', 'phone_numbers'),
Column('resource_uri', 'resource_uri'),
Column('commcare_location_id', 'user_data.commcare_location_id'),
Column('commcare_location_ids', 'user_data.commcare_location_ids'),
Column('commcare_primary_case_sharing_id',
'user_data.commcare_primary_case_sharing_id'),
Column('commcare_project', 'user_data.commcare_project'),
Column('username', 'username')
]

users_query = compile_query(user_columns, 'user', USERS_TABLE_NAME)


# A MiniLinq query for internal CommCare location table.
# It reads every field produced by the /location/ API endpoint and
# appends several fields from stored location_type information before
# writing the data to a table named "commcare_locations" in a database.

location_columns = [
Column('id', 'id'),
Column('created_at', 'created_at', 'str2date'),
Column('domain', 'domain'),
Column('external_id', 'external_id'),
Column('last_modified', 'last_modified', 'str2date'),
Column('latitude', 'latitude'),
Column('location_data', 'location_data'),
Column('location_id', 'location_id'),
Column('location_type', 'location_type'),
Column('longitude', 'longitude'),
Column('name', 'name'),
Column('parent', 'parent'),
Column('resource_uri', 'resource_uri'),
Column('site_code', 'site_code'),
Column('location_type_administrative', 'location_type',
'get_location_info', Literal('administrative')),
Column('location_type_code', 'location_type',
'get_location_info', Literal('code')),
Column('location_type_name', 'location_type',
'get_location_info', Literal('name')),
Column('location_type_parent', 'location_type',
'get_location_info', Literal('parent')),
]

locations_query = compile_query(location_columns, 'location',
LOCATIONS_TABLE_NAME)
96 changes: 69 additions & 27 deletions commcare_export/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
from commcare_export.commcare_hq_client import CommCareHqClient, LATEST_KNOWN_VERSION
from commcare_export.commcare_minilinq import CommCareHqEnv
from commcare_export.env import BuiltInEnv, JsonPathEnv, EmitterEnv
from commcare_export.exceptions import LongFieldsException, DataExportException
from commcare_export.minilinq import MiniLinq
from commcare_export.exceptions import LongFieldsException, DataExportException, MissingQueryFileException
from commcare_export.minilinq import MiniLinq, List
from commcare_export.repeatable_iterator import RepeatableIterator
from commcare_export.version import __version__
from commcare_export import builtin_queries
from commcare_export.location_info_provider import LocationInfoProvider

EXIT_STATUS_ERROR = 1

Expand Down Expand Up @@ -52,7 +54,7 @@ def add_to_parser(self, parser, **additional_kwargs):
CLI_ARGS = [
Argument('version', default=False, action='store_true',
help='Print the current version of the commcare-export tool.'),
Argument('query', required=True, help='JSON or Excel query file'),
Argument('query', required=False, help='JSON or Excel query file'),
Argument('dump-query', default=False, action='store_true'),
Argument('commcare-hq', default='prod',
help='Base url for the CommCare HQ instance e.g. https://www.commcarehq.org'),
Expand All @@ -77,6 +79,12 @@ def add_to_parser(self, parser, **additional_kwargs):
Argument('batch-size', default=100, help="Number of records to process per batch."),
Argument('checkpoint-key', help="Use this key for all checkpoints instead of the query file MD5 hash "
"in order to prevent table rebuilds after a query file has been edited."),
Argument('users', default=False, action='store_true',
help="Export a table containing data about this project's "
"mobile workers"),
Argument('locations', default=False, action='store_true',
help="Export a table containing data about this project's "
"locations"),
]


Expand All @@ -97,7 +105,7 @@ def main(argv):
sys.exit(1)

if args.verbose:
logging.basicConfig(level=logging.DEBUG,
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
else:
logging.basicConfig(level=logging.WARN,
Expand Down Expand Up @@ -156,6 +164,26 @@ def _get_query_from_file(query_arg, missing_value, combine_emits, max_column_len
return MiniLinq.from_jvalue(json.loads(fh.read()))


def get_queries(args, writer):
query_list = []
if args.query is not None:
query = _get_query(args, writer)

if not query:
raise MissingQueryFileException(args.query)
query_list.append(query)

if args.users:
# Add user data to query
query_list.append(builtin_queries.users_query)

if args.locations:
# Add location data to query
query_list.append(builtin_queries.locations_query)

return List(query_list) if len(query_list) > 1 else query_list[0]


def _get_writer(output_format, output, strict_types):
if output_format == 'xlsx':
return writers.Excel2007TableWriter(output)
Expand Down Expand Up @@ -197,8 +225,9 @@ def _get_api_client(args, commcarehq_base_url):


def _get_checkpoint_manager(args):
if not os.path.exists(args.query):
logger.warning("Checkpointing disabled for non file-based query")
if not args.users and not args.locations and not os.path.exists(args.query):
logger.warning("Checkpointing disabled for non builtin, "
"non file-based query")
elif args.since or args.until:
logger.warning("Checkpointing disabled when using '--since' or '--until'")
else:
Expand All @@ -207,20 +236,46 @@ def _get_checkpoint_manager(args):
return checkpoint_manager


def force_lazy_result(lazy_result):
if lazy_result is not None:
if isinstance(lazy_result, RepeatableIterator):
list(lazy_result) if lazy_result else lazy_result
else:
for nested_result in lazy_result:
force_lazy_result(nested_result)


def evaluate_query(env, query):
with env:
try:
lazy_result = query.eval(env)
force_lazy_result(lazy_result)
except requests.exceptions.RequestException as e:
if e.response.status_code == 401:
print("\nAuthentication failed. Please check your credentials.")
return None
else:
raise
except KeyboardInterrupt:
print('\nExport aborted')
return None


def main_with_args(args):
logger.info("CommCare Export Version {}".format(__version__))
writer = _get_writer(args.output_format, args.output, args.strict_types)

if args.query is None and args.users is False and args.locations is False:
print('At least one the following arguments is required: '
'--query, --users, --locations')
return EXIT_STATUS_ERROR

try:
query = _get_query(args, writer)
query = get_queries(args, writer)
except DataExportException as e:
print(e.message)
return EXIT_STATUS_ERROR

if not query:
print('Query file not found: %s' % args.query)
return EXIT_STATUS_ERROR

if args.dump_query:
print(json.dumps(query.to_jvalue(), indent=4))
return
Expand All @@ -247,9 +302,11 @@ def main_with_args(args):
logger.debug('Starting from %s', args.since)

cm = CheckpointManagerProvider(checkpoint_manager, since, args.start_over)
lp = LocationInfoProvider(api_client)
static_env = {
'commcarehq_base_url': commcarehq_base_url,
'get_checkpoint_manager': cm.get_checkpoint_manager,
'get_location_info': lp.get_location_info
}
env = (
BuiltInEnv(static_env)
Expand All @@ -258,22 +315,7 @@ def main_with_args(args):
| EmitterEnv(writer)
)

with env:
try:
lazy_result = query.eval(env)
if lazy_result is not None:
# evaluate lazy results
for r in lazy_result:
list(r) if r else r
except requests.exceptions.RequestException as e:
if e.response.status_code == 401:
print("\nAuthentication failed. Please check your credentials.")
return
else:
raise
except KeyboardInterrupt:
print('\nExport aborted')
return
evaluate_query(env, query)

if args.output_format == 'json':
print(json.dumps(list(writer.tables.values()), indent=4, default=RepeatableIterator.to_jvalue))
Expand Down
11 changes: 11 additions & 0 deletions commcare_export/commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ def iterate(self, resource, paginator, params=None, checkpoint_manager=None):
logger.debug('Mock client call to resource "%s" with params "%s"', resource, params)
return self.mock_data[resource][urlencode(OrderedDict(sorted(params.items())))]

def get(self, resource):
logger.debug('Mock client call to get resource "%s"', resource)
objects = self.mock_data[resource][urlencode(OrderedDict([('get', True)]))]
if objects:
return {'meta': {'limit': len(objects), 'next': None,
'offset': 0, 'previous': None,
'total_count': len(objects)},
'objects': objects}
else:
return None


class ApiKeyAuth(AuthBase):
def __init__(self, username, apikey):
Expand Down
8 changes: 7 additions & 1 deletion commcare_export/excel_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from jsonpath_rw import jsonpath
from jsonpath_rw.parser import parse as parse_jsonpath

from commcare_export.exceptions import LongFieldsException, MissingColumnException
from commcare_export.exceptions import LongFieldsException, MissingColumnException, ReservedTableNameException
from commcare_export.map_format import compile_map_format_via
from commcare_export.minilinq import *

Expand Down Expand Up @@ -447,10 +447,16 @@ def check_columns(parsed_sheets, columns):
if errors_by_sheet:
raise MissingColumnException(errors_by_sheet)

blacklisted_tables = []
def blacklist(table_name):
blacklisted_tables.append(table_name)

def get_queries_from_excel(workbook, missing_value=None, combine_emits=False,
max_column_length=None, required_columns=None):
parsed_sheets = parse_workbook(workbook)
for sheet in parsed_sheets:
if sheet.name in blacklisted_tables:
raise ReservedTableNameException(sheet.name)
if max_column_length:
check_field_length(parsed_sheets, max_column_length)
if required_columns:
Expand Down
18 changes: 18 additions & 0 deletions commcare_export/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,21 @@ def message(self):
) for sheet, missing_cols in self.errors_by_sheet.items()
]
return '\n'.join(lines)


class MissingQueryFileException(DataExportException):
def __init__(self, query_file):
self.query_file = query_file

@property
def message(self):
return 'Query file not found: {}'.format(self.query_file)


class ReservedTableNameException(DataExportException):
def __init__(self, conflicting_name):
self.conflicting_name = conflicting_name

@property
def message(self):
return 'Table name "{}" conflicts with an internal table name. Please export to a different table.'.format(self.conflicting_name)
Loading

0 comments on commit 13a4422

Please sign in to comment.