Skip to content

Commit

Permalink
add: library API for non-path input, accept markdown and dict input
Browse files Browse the repository at this point in the history
- add: xls2xform.convert for library users to call pyxform without
  needing to use files (accepts bytes/file handles/strings)
  - accepts markdown input since this is widely used by pyxform
  - accepts dict to avoid needing to use internal funcs that may change
- chg: avoid writing to files unless validate=True (for ODK Validate)
  - also avoid assuming any files were written, e.g. missing_ok=True
- chg: move xls/x_sheet_to_csv, sheet_to_csv from utils.py to
  xls2json_backends.py because they are backends for csv input.
- chg: move md_to_dict from test directory into xls2json_backends.py
- chg: refactor pyxform_test_case.py to use xls2xform.convert only,
  instead of internal funcs associated with md_to_dict, so that the
  existing tests check API stability e.g. file types, dict input, etc.
  • Loading branch information
lindsay-stevens committed Jun 25, 2024
1 parent 59c37e0 commit 1a1462a
Show file tree
Hide file tree
Showing 51 changed files with 1,385 additions and 1,414 deletions.
24 changes: 0 additions & 24 deletions clean_for_build.py

This file was deleted.

1 change: 1 addition & 0 deletions pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
SUBMISSION_URL = "submission_url"
AUTO_SEND = "auto_send"
AUTO_DELETE = "auto_delete"
DEFAULT_FORM_NAME = "data"
DEFAULT_LANGUAGE_KEY = "default_language"
DEFAULT_LANGUAGE_VALUE = "default"
LABEL = "label"
Expand Down
4 changes: 2 additions & 2 deletions pyxform/entities/entities_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_validated_dataset_name(entity):

if not is_valid_xml_tag(dataset):
if isinstance(dataset, bytes):
dataset = dataset.encode("utf-8")
dataset = dataset.decode("utf-8")

raise PyXFormError(
f"Invalid entity list name: '{dataset}'. Names must begin with a letter, colon, or underscore. Other characters can include numbers or dashes."
Expand Down Expand Up @@ -117,7 +117,7 @@ def validate_entity_saveto(

if not is_valid_xml_tag(save_to):
if isinstance(save_to, bytes):
save_to = save_to.encode("utf-8")
save_to = save_to.decode("utf-8")

raise PyXFormError(
f"{error_start} '{save_to}'. Entity property names {const.XML_IDENTIFIER_ERROR_MESSAGE}"
Expand Down
4 changes: 4 additions & 0 deletions pyxform/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ class PyXFormError(Exception):

class ValidationError(PyXFormError):
"""Common base class for pyxform validation exceptions."""


class PyXFormReadError(PyXFormError):
"""Common base class for pyxform exceptions occuring during reading XLSForm data."""
4 changes: 2 additions & 2 deletions pyxform/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
SurveyInstance class module.
"""

import os.path

from pyxform.errors import PyXFormError
from pyxform.xform_instance_parser import parse_xform_instance

Expand Down Expand Up @@ -76,8 +78,6 @@ def answers(self):
return self._answers

def import_from_xml(self, xml_string_or_filename):
import os.path

if os.path.isfile(xml_string_or_filename):
xml_str = open(xml_string_or_filename, encoding="utf-8").read()
else:
Expand Down
28 changes: 13 additions & 15 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from collections.abc import Generator, Iterator
from datetime import datetime
from functools import lru_cache
from pathlib import Path

from pyxform import aliases, constants
from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS, NSMAP
Expand Down Expand Up @@ -970,10 +971,10 @@ def date_stamp(self):
"""Returns a date string with the format of %Y_%m_%d."""
return self._created.strftime("%Y_%m_%d")

def _to_ugly_xml(self):
def _to_ugly_xml(self) -> str:
return '<?xml version="1.0"?>' + self.xml().toxml()

def _to_pretty_xml(self):
def _to_pretty_xml(self) -> str:
"""Get the XForm with human readable formatting."""
return '<?xml version="1.0"?>\n' + self.xml().toprettyxml(indent=" ")

Expand Down Expand Up @@ -1171,10 +1172,9 @@ def _var_repl_output_function(matchobj):
else:
return text, False

# pylint: disable=too-many-arguments
def print_xform_to_file(
self, path=None, validate=True, pretty_print=True, warnings=None, enketo=False
):
) -> str:
"""
Print the xForm to a file and optionally validate it as well by
throwing exceptions and adding warnings to the warnings array.
Expand All @@ -1183,12 +1183,13 @@ def print_xform_to_file(
warnings = []
if not path:
path = self._print_name + ".xml"
if pretty_print:
xml = self._to_pretty_xml()
else:
xml = self._to_ugly_xml()
try:
with open(path, mode="w", encoding="utf-8") as file_obj:
if pretty_print:
file_obj.write(self._to_pretty_xml())
else:
file_obj.write(self._to_ugly_xml())
file_obj.write(xml)
except Exception:
if os.path.exists(path):
os.unlink(path)
Expand All @@ -1210,6 +1211,7 @@ def print_xform_to_file(
+ ". "
+ "Learn more: http://xlsform.org#multiple-language-support"
)
return xml

def to_xml(self, validate=True, pretty_print=True, warnings=None, enketo=False):
"""
Expand All @@ -1227,20 +1229,16 @@ def to_xml(self, validate=True, pretty_print=True, warnings=None, enketo=False):
tmp.close()
try:
# this will throw an exception if the xml is not valid
self.print_xform_to_file(
xml = self.print_xform_to_file(
path=tmp.name,
validate=validate,
pretty_print=pretty_print,
warnings=warnings,
enketo=enketo,
)
finally:
if os.path.exists(tmp.name):
os.remove(tmp.name)
if pretty_print:
return self._to_pretty_xml()

return self._to_ugly_xml()
Path(tmp.name).unlink(missing_ok=True)
return xml

def instantiate(self):
"""
Expand Down
95 changes: 32 additions & 63 deletions pyxform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@
import json
import os
import re
from io import StringIO
from json.decoder import JSONDecodeError
from typing import NamedTuple
from typing import Any, NamedTuple
from xml.dom import Node
from xml.dom.minidom import Element, Text, _write_data

import openpyxl
import xlrd
from defusedxml.minidom import parseString

from pyxform import constants as const
from pyxform.errors import PyXFormError
from pyxform.xls2json_backends import is_empty, xls_value_to_unicode, xlsx_value_to_str

SEP = "_"

Expand Down Expand Up @@ -167,66 +166,32 @@ def flatten(li):
yield from subli


def sheet_to_csv(workbook_path, csv_path, sheet_name):
if workbook_path.endswith(".xls"):
return xls_sheet_to_csv(workbook_path, csv_path, sheet_name)
else:
return xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name)

def external_choices_to_csv(
workbook_dict: dict[str, Any], warnings: list | None = None
) -> str | None:
"""
Convert the 'external_choices' sheet data to CSV.
def xls_sheet_to_csv(workbook_path, csv_path, sheet_name):
wb = xlrd.open_workbook(workbook_path)
try:
sheet = wb.sheet_by_name(sheet_name)
except xlrd.biffh.XLRDError:
return False
if not sheet or sheet.nrows < 2:
return False
with open(csv_path, mode="w", encoding="utf-8", newline="") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
mask = [v and len(v.strip()) > 0 for v in sheet.row_values(0)]
for row_idx in range(sheet.nrows):
csv_data = []
try:
for v, m in zip(sheet.row(row_idx), mask, strict=False):
if m:
value = v.value
value_type = v.ctype
data = xls_value_to_unicode(value, value_type, wb.datemode)
# clean the values of leading and trailing whitespaces
data = data.strip()
csv_data.append(data)
except TypeError:
continue
writer.writerow(csv_data)

return True


def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name):
wb = openpyxl.open(workbook_path, read_only=True, data_only=True)
:param workbook_dict: The result from xls2json.workbook_to_json.
:param warnings: The conversions warnings list.
"""
warnings = coalesce(warnings, [])
if const.EXTERNAL_CHOICES not in workbook_dict:
warnings.append(
f"Could not export itemsets.csv, the '{const.EXTERNAL_CHOICES}' sheet is missing."
)
return None

itemsets = StringIO(newline="")
csv_writer = csv.writer(itemsets, quoting=csv.QUOTE_ALL)
try:
sheet = wb[sheet_name]
except KeyError:
return False

with open(csv_path, mode="w", encoding="utf-8", newline="") as f:
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
mask = [not is_empty(cell.value) for cell in sheet[1]]
for row in sheet.rows:
csv_data = []
try:
for v, m in zip(row, mask, strict=False):
if m:
data = xlsx_value_to_str(v.value)
# clean the values of leading and trailing whitespaces
data = data.strip()
csv_data.append(data)
except TypeError:
continue
writer.writerow(csv_data)
wb.close()
return True
header = workbook_dict["external_choices_header"][0]
except (IndexError, KeyError, TypeError):
header = {k for d in workbook_dict[const.EXTERNAL_CHOICES] for k in d}
csv_writer.writerow(header)
for row in workbook_dict[const.EXTERNAL_CHOICES]:
csv_writer.writerow(row.values())
return itemsets.getvalue()


def has_external_choices(json_struct):
Expand All @@ -235,7 +200,11 @@ def has_external_choices(json_struct):
"""
if isinstance(json_struct, dict):
for k, v in json_struct.items():
if k == "type" and isinstance(v, str) and v.startswith("select one external"):
if (
k == const.TYPE
and isinstance(v, str)
and v.startswith(const.SELECT_ONE_EXTERNAL)
):
return True
elif has_external_choices(v):
return True
Expand Down
19 changes: 10 additions & 9 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from pyxform.errors import PyXFormError
from pyxform.parsing.expression import is_single_token_expression
from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic
from pyxform.utils import PYXFORM_REFERENCE_REGEX, coalesce, default_is_dynamic
from pyxform.validators.pyxform import parameters_generic, select_from_file
from pyxform.validators.pyxform.android_package_name import validate_android_package_name
from pyxform.validators.pyxform.translations_checks import SheetTranslations
Expand Down Expand Up @@ -395,7 +395,7 @@ def workbook_to_json(
workbook_dict,
form_name: str | None = None,
fallback_form_name: str | None = None,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
default_language: str | None = None,
warnings: list[str] | None = None,
) -> dict[str, Any]:
"""
Expand All @@ -416,8 +416,7 @@ def workbook_to_json(
returns a nested dictionary equivalent to the format specified in the
json form spec.
"""
if warnings is None:
warnings = []
warnings = coalesce(warnings, [])
is_valid = False
# Sheet names should be case-insensitive
workbook_dict = {x.lower(): y for x, y in workbook_dict.items()}
Expand All @@ -441,8 +440,8 @@ def workbook_to_json(
)

# Make sure the passed in vars are unicode
form_name = str(form_name)
default_language = str(default_language)
form_name = str(coalesce(form_name, constants.DEFAULT_FORM_NAME))
default_language = str(coalesce(default_language, constants.DEFAULT_LANGUAGE_VALUE))

# We check for double columns to determine whether to use them
# or single colons to delimit grouped headers.
Expand Down Expand Up @@ -500,7 +499,9 @@ def workbook_to_json(
)

# Here we create our json dict root with default settings:
id_string = settings.get(constants.ID_STRING, fallback_form_name)
id_string = settings.get(
constants.ID_STRING, coalesce(fallback_form_name, constants.DEFAULT_FORM_NAME)
)
sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
json_dict = {
constants.TYPE: constants.SURVEY,
Expand Down Expand Up @@ -970,7 +971,7 @@ def workbook_to_json(
question_name = str(row[constants.NAME])
if not is_valid_xml_tag(question_name):
if isinstance(question_name, bytes):
question_name = question_name.encode("utf-8")
question_name = question_name.decode("utf-8")

raise PyXFormError(
f"{ROW_FORMAT_STRING % row_number} Invalid question name '{question_name}'. Names {XML_IDENTIFIER_ERROR_MESSAGE}"
Expand Down Expand Up @@ -1591,7 +1592,7 @@ def get_filename(path):

def parse_file_to_json(
path: str,
default_name: str = "data",
default_name: str = constants.DEFAULT_FORM_NAME,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
warnings: list[str] | None = None,
file_object: IO | None = None,
Expand Down
Loading

0 comments on commit 1a1462a

Please sign in to comment.