-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added read_xml method, moved validate xml to its proper method. Added…
… submission type compatibility and exception on error message. Signed-off-by: javier.hernandez <javier.hernandez@meaningfuldata.eu>
- Loading branch information
1 parent
a1ea23a
commit 0c83742
Showing
13 changed files
with
533 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
"""Enumeration for the XML message types.""" | ||
|
||
from enum import Enum | ||
|
||
|
||
class MessageType(Enum): | ||
"""MessageType enumeration. | ||
Enumeration that withholds the Message type for writing purposes. | ||
""" | ||
|
||
GenericDataSet = 1 | ||
StructureSpecificDataSet = 2 | ||
Structure = 3 | ||
Error = 4 | ||
Submission = 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
"""Parsing configuration for SDMX-ML 2.1 messages.""" | ||
|
||
SCHEMA_ROOT = "http://www.sdmx.org/resources/sdmxml/schemas/v2_1/" | ||
NAMESPACES_21 = { | ||
SCHEMA_ROOT + "message": None, | ||
SCHEMA_ROOT + "common": None, | ||
SCHEMA_ROOT + "structure": None, | ||
"http://www.w3.org/2001/XMLSchema-instance": "xsi", | ||
"http://www.w3.org/XML/1998/namespace": None, | ||
SCHEMA_ROOT + "data/structurespecific": None, | ||
SCHEMA_ROOT + "data/generic": None, | ||
SCHEMA_ROOT + "registry": None, | ||
"http://schemas.xmlsoap.org/soap/envelope/": None, | ||
} | ||
|
||
XML_OPTIONS = { | ||
"process_namespaces": True, | ||
"namespaces": NAMESPACES_21, | ||
"dict_constructor": dict, | ||
"attr_prefix": "", | ||
} | ||
|
||
# Common | ||
HEADER = "Header" | ||
DATASET = "DataSet" | ||
SERIES = "Series" | ||
OBS = "Obs" | ||
AGENCY_ID = "agencyID" | ||
ID = "id" | ||
VERSION = "version" | ||
|
||
# Structure Specific | ||
STRSPE = "StructureSpecificData" | ||
GENERIC = "GenericData" | ||
SERIESKEY = "SeriesKey" | ||
ATTRIBUTES = "Attributes" | ||
VALUE = "Value" | ||
OBS_DIM = "ObsDimension" | ||
OBSVALUE = "ObsValue" | ||
OBSKEY = "ObsKey" | ||
# Header | ||
STRREF = "structureRef" | ||
STRUCTURE = "Structure" | ||
STR_USAGE = "StructureUsage" | ||
STRID = "structureID" | ||
STRTYPE = "structure_type" | ||
DIM_OBS = "dimensionAtObservation" | ||
ALL_DIM = "AllDimensions" | ||
REF = "Ref" | ||
DATASET_ID = "DataSetID" | ||
|
||
# SDMX Error handling | ||
ERROR = "Error" | ||
ERROR_MESSAGE = "ErrorMessage" | ||
ERROR_CODE = "code" | ||
ERROR_TEXT = "Text" | ||
|
||
# SDMX Registry Interface handling | ||
REG_INTERFACE = "RegistryInterface" | ||
SUBMIT_STRUCTURE_RESPONSE = "SubmitStructureResponse" | ||
SUBMISSION_RESULT = "SubmissionResult" | ||
SUBMITTED_STRUCTURE = "SubmittedStructure" | ||
MAINTAINABLE_OBJECT = "MaintainableObject" | ||
ACTION = "action" | ||
STATUS_MSG = "StatusMessage" | ||
STATUS = "status" | ||
|
||
# SOAP API handling | ||
FAULT = "Fault" | ||
FAULTCODE = "faultcode" | ||
FAULTSTRING = "faultstring" | ||
|
||
|
||
# Structures | ||
# Common | ||
NAME = "Name" | ||
DESC = "Description" | ||
LANG = "lang" | ||
XML_TEXT = "#text" | ||
STR_URL = "structureURL" | ||
STR_URL_LOW = "structureUrl" | ||
SER_URL = "serviceURL" | ||
SER_URL_LOW = "serviceUrl" | ||
# General | ||
ANNOTATIONS = "Annotations" | ||
STRUCTURES = "Structures" | ||
ORGS = "OrganisationSchemes" | ||
AGENCIES = "AgencyScheme" | ||
CODELISTS = "Codelists" | ||
CONCEPTS = "Concepts" | ||
DSDS = "DataStructures" | ||
DATAFLOWS = "Dataflows" | ||
CONSTRAINTS = "Constraints" | ||
|
||
# Individual | ||
AGENCY = "Agency" | ||
CL = "Codelist" | ||
CODE = "Code" | ||
CS = "ConceptScheme" | ||
CS_LOW = "concept_scheme" | ||
CON = "Concept" | ||
DSD = "DataStructure" | ||
|
||
# DSD components | ||
DSD_COMPS = "DataStructureComponents" | ||
CON_ID = "ConceptIdentity" | ||
CON_ID_LOW = "concept_identity" | ||
CON_ROLE = "ConceptRole" | ||
CON_ROLE_LOW = "concept_role" | ||
# Dimension | ||
DIM_LIST = "DimensionList" | ||
DIM_LIST_LOW = "dimension_list" | ||
DIM = "Dimension" | ||
TIME_DIM = "TimeDimension" | ||
# Attribute | ||
ATT_LIST = "AttributeList" | ||
ATT_LIST_LOW = "attribute_list" | ||
ATT = "Attribute" | ||
ATT_REL = "AttributeRelationship" | ||
AS_STATUS = "assignmentStatus" | ||
# Measure | ||
ME_LIST = "MeasureList" | ||
ME_LIST_LOW = "measure_list" | ||
MEASURE = "Measure" | ||
PRIM_MEASURE = "PrimaryMeasure" | ||
# Group Dimension | ||
GROUP = "Group" | ||
GROUP_DIM_LOW = "group_dimension_descriptor" | ||
GROUP_DIM = "GroupDimension" | ||
DIM_REF = "DimensionReference" | ||
|
||
# Dataflows | ||
DF = "Dataflow" | ||
|
||
# Constraints | ||
CON_CONS = "ContentConstraint" | ||
CONS_ATT = "ConstraintAttachment" | ||
CUBE_REGION = "CubeRegion" | ||
CONTENT_REGION = "dataContentRegion" | ||
KEY = "Key" | ||
KEY_VALUE = "KeyValue" | ||
DATA_KEY_SET = "DataKeySet" | ||
DATA_KEY_SET_LOW = "dataKeySet" | ||
INCLUDED = "isIncluded" | ||
INCLUDE = "include" | ||
|
||
# Annotation | ||
ANNOTATION = "Annotation" | ||
ANNOTATION_TITLE = "AnnotationTitle" | ||
ANNOTATION_TYPE = "AnnotationType" | ||
ANNOTATION_TEXT = "AnnotationText" | ||
ANNOTATION_URL = "AnnotationURL" | ||
|
||
TITLE = "title" | ||
TYPE_ = "type_" | ||
TYPE = "type" | ||
TEXT = "text" | ||
URL = "url" | ||
URN = "URN" | ||
|
||
# Representation | ||
CORE_REP = "CoreRepresentation" | ||
CORE_REP_LOW = "core_representation" | ||
LOCAL_REP = "LocalRepresentation" | ||
LOCAL_REP_LOW = "local_representation" | ||
ENUM = "Enumeration" | ||
ENUM_FORMAT = "EnumerationFormat" | ||
TEXT_FORMAT = "TextFormat" | ||
|
||
# Facets | ||
FACETS = "facets" | ||
TEXT_TYPE = "textType" | ||
TEXT_TYPE_LOW = "text_type" | ||
|
||
# Contact | ||
CONTACT = "Contact" | ||
DEPARTMENT = "Department" | ||
ROLE = "Role" | ||
URI = "URI" | ||
EMAIL = "Email" | ||
X400 = "X400" | ||
TELEPHONE = "Telephone" | ||
FAX = "Fax" | ||
|
||
# Extras | ||
MAINTAINER = "maintainer" | ||
XMLNS = "xmlns" | ||
COMPS = "components" | ||
PARENT = "Parent" | ||
PAR_ID = "maintainableParentID" | ||
PAR_VER = "maintainableParentVersion" | ||
REL_TO = "relatedTo" | ||
NO_REL = "NoSpecifiedRelationship" | ||
|
||
# To exclude from attached_attributes | ||
exc_attributes = [STRREF, "action", "dataScope", "xsi:type", SERIES, OBS] | ||
|
||
# Content types | ||
DATASTRUCTURES_CM = "DataStructures" | ||
DATAFLOWS_CM = "Dataflows" | ||
CODELISTS_CM = "Codelists" | ||
CONCEPTS_CM = "Concepts" | ||
ORGANISATIONSCHEMES_CM = "OrganisationSchemes" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,99 @@ | ||
"""SDMX 2.1 reader package.""" | ||
|
||
from typing import Any, Dict, Optional | ||
|
||
import xmltodict | ||
|
||
from pysdmx.errors import ClientError | ||
from pysdmx.io.xml.enums import MessageType | ||
from pysdmx.io.xml.sdmx21.__parsing_config import ( | ||
ERROR, | ||
ERROR_CODE, | ||
ERROR_MESSAGE, | ||
ERROR_TEXT, | ||
GENERIC, | ||
REG_INTERFACE, | ||
STRSPE, | ||
STRUCTURE, | ||
XML_OPTIONS, | ||
) | ||
from pysdmx.io.xml.sdmx21.doc_validation import validate_doc | ||
from pysdmx.io.xml.sdmx21.reader.submission_reader import ( | ||
handle_registry_interface, | ||
) | ||
|
||
MODES = { | ||
MessageType.GenericDataSet.value: GENERIC, | ||
MessageType.StructureSpecificDataSet.value: STRSPE, | ||
MessageType.Structure.value: STRUCTURE, | ||
MessageType.Submission.value: REG_INTERFACE, | ||
MessageType.Error.value: ERROR, | ||
} | ||
|
||
|
||
def read_xml( | ||
infile: str, | ||
validate: bool = True, | ||
mode: Optional[MessageType] = None, | ||
use_dataset_id: bool = False, | ||
) -> Dict[str, Any]: | ||
"""Reads an SDMX-ML file and returns a dictionary with the parsed data. | ||
Args: | ||
infile: Path to file, URL, or string. | ||
validate: If True, the XML data will be validated against the XSD. | ||
mode: The type of message to parse. | ||
use_dataset_id: If True, the dataset ID will be used as the key in the | ||
resulting dictionary. | ||
Returns: | ||
dict: Dictionary with the parsed data. | ||
Raises: | ||
ValueError: If the SDMX data cannot be parsed. | ||
""" | ||
if validate: | ||
validate_doc(infile) | ||
dict_info = xmltodict.parse( | ||
infile, **XML_OPTIONS # type: ignore[arg-type] | ||
) | ||
|
||
del infile | ||
|
||
if mode is not None and MODES[mode.value] not in dict_info: | ||
raise ValueError( | ||
f"Unable to parse sdmx file as {MODES[mode.value]} file" | ||
) | ||
|
||
result = __generate_sdmx_objects_from_xml(dict_info, use_dataset_id) | ||
|
||
return result | ||
|
||
|
||
def __generate_sdmx_objects_from_xml( | ||
dict_info: Dict[str, Any], use_dataset_id: bool = False | ||
) -> Dict[str, Any]: | ||
"""Generates SDMX objects from the XML dictionary (xmltodict). | ||
Args: | ||
dict_info: XML dictionary (xmltodict) | ||
use_dataset_id: Use the dataset ID as the key in | ||
the resulting dictionary | ||
Returns: | ||
dict: Dictionary with the parsed data. | ||
Raises: | ||
ClientError: If a SOAP error message is found. | ||
ValueError: If the SDMX data cannot be parsed. | ||
""" | ||
if ERROR in dict_info: | ||
code = dict_info[ERROR][ERROR_MESSAGE][ERROR_CODE] | ||
text = dict_info[ERROR][ERROR_MESSAGE][ERROR_TEXT] | ||
raise ClientError(int(code), text) | ||
# Leaving this commented for metadata read (#39) | ||
# if STRUCTURE in dict_info: | ||
# return create_structures(dict_info[STRUCTURE][STRUCTURES]) | ||
if REG_INTERFACE in dict_info: | ||
return handle_registry_interface(dict_info) | ||
raise ValueError("Cannot parse this sdmx data") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""Read SDMX-ML submission messages.""" | ||
|
||
from typing import Any, Dict | ||
|
||
from pysdmx.io.xml.sdmx21.__parsing_config import ( | ||
ACTION, | ||
MAINTAINABLE_OBJECT, | ||
REG_INTERFACE, | ||
STATUS, | ||
STATUS_MSG, | ||
SUBMISSION_RESULT, | ||
SUBMIT_STRUCTURE_RESPONSE, | ||
SUBMITTED_STRUCTURE, | ||
URN, | ||
) | ||
from pysdmx.model.submission import SubmissionResult | ||
from pysdmx.util import parse_urn | ||
|
||
|
||
def handle_registry_interface(dict_info: Dict[str, Any]) -> Dict[str, Any]: | ||
"""Handle the Registry Interface message. | ||
Args: | ||
dict_info: Dictionary with the parsed data. | ||
Returns: | ||
dict: Dictionary with the parsed data. | ||
""" | ||
response = dict_info[REG_INTERFACE][SUBMIT_STRUCTURE_RESPONSE] | ||
|
||
result = {} | ||
for submission_result in response[SUBMISSION_RESULT]: | ||
structure = submission_result[SUBMITTED_STRUCTURE] | ||
action = structure[ACTION] | ||
urn = structure[MAINTAINABLE_OBJECT][URN] | ||
full_id = parse_urn(urn).full_id | ||
status = submission_result[STATUS_MSG][STATUS] | ||
result[full_id] = SubmissionResult(action, full_id, status) | ||
return result |
Oops, something went wrong.