Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

40 metadata writer on sdmx ml #41

Merged
merged 10 commits into from
May 17, 2024
Merged
1 change: 1 addition & 0 deletions src/pysdmx/io/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""IO module for SDMX data."""
1 change: 1 addition & 0 deletions src/pysdmx/io/xml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""XML readers and writers."""
1 change: 1 addition & 0 deletions src/pysdmx/io/xml/sdmx_two_one/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""SDMX 2.1 XML reader and writer."""
javihern98 marked this conversation as resolved.
Show resolved Hide resolved
57 changes: 57 additions & 0 deletions src/pysdmx/io/xml/sdmx_two_one/writer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""SDMX 2.1 writer package."""

from typing import Any, Dict, Optional

from pysdmx.io.xml.sdmx_two_one.writer.__write_aux import (
__write_header,
create_namespaces,
get_end_message,
)
from pysdmx.io.xml.sdmx_two_one.writer.metadata_writer import (
generate_structures,
)
from pysdmx.model.message import Header, MessageType


def writer(
content: Dict[str, Any],
type_: MessageType,
path: str = "",
prettyprint: bool = True,
header: Optional[Header] = None,
) -> Optional[str]:
"""This function writes a SDMX-ML file from the Message Content.

Args:
content: The content to be written
type_: The type of message to be written
path: The path to save the file
prettyprint: Prettyprint or not
header: The header to be used (generated if None)

Returns:
The XML string if path is empty, None otherwise

Raises:
NotImplementedError: If the MessageType is not Metadata
"""
if type_ != MessageType.Metadata:
raise NotImplementedError("Only Metadata messages are supported")
outfile = create_namespaces(type_, content, prettyprint)

if header is None:
header = Header()

outfile += __write_header(header, prettyprint)

outfile += generate_structures(content, prettyprint)

outfile += get_end_message(type_, prettyprint)

if path == "":
return outfile

with open(path, "w", encoding="UTF-8", errors="replace") as f:
f.write(outfile)

return None
230 changes: 230 additions & 0 deletions src/pysdmx/io/xml/sdmx_two_one/writer/__write_aux.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
"""Writer auxiliary functions."""

from collections import OrderedDict
from typing import Any, Dict

from pysdmx.model.message import Header, MessageType

MESSAGE_TYPE_MAPPING = {
MessageType.GenericDataSet: "GenericData",
MessageType.StructureSpecificDataSet: "StructureSpecificData",
MessageType.Metadata: "Structure",
javihern98 marked this conversation as resolved.
Show resolved Hide resolved
}

ABBR_MSG = "mes"
ABBR_GEN = "gen"
ABBR_COM = "com"
ABBR_STR = "str"
ABBR_SPE = "ss"

ANNOTATIONS = "Annotations"
STRUCTURES = "Structures"
ORGS = "OrganisationSchemes"
AGENCIES = "AgencyScheme"
CODELISTS = "Codelists"
CONCEPTS = "Concepts"
DSDS = "DataStructures"
DATAFLOWS = "Dataflows"
CONSTRAINTS = "Constraints"

BASE_URL = "http://www.sdmx.org/resources/sdmxml/schemas/v2_1"

NAMESPACES = {
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
ABBR_MSG: f"{BASE_URL}/message",
ABBR_GEN: f"{BASE_URL}/generic",
ABBR_COM: f"{BASE_URL}/common",
ABBR_STR: f"{BASE_URL}/structure",
ABBR_SPE: f"{BASE_URL}/structureSpecific",
}

URN_DS_BASE = "urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure="


def __namespaces_from_type(type_: MessageType) -> str:
"""Returns the namespaces for the XML file based on type.

Args:
type_: MessageType to be used

Returns:
A string with the namespaces
"""
# if type_ == MessageType.GenericDataSet:
# return f"xmlns:{ABBR_GEN}={NAMESPACES[ABBR_GEN]!r} "
# elif type_ == MessageType.StructureSpecificDataSet:
# return f"xmlns:{ABBR_SPE}={NAMESPACES[ABBR_SPE]!r} "
# elif type_ == MessageType.Metadata:
# return f"xmlns:{ABBR_STR}={NAMESPACES[ABBR_STR]!r} "
# else:
# return ""
javihern98 marked this conversation as resolved.
Show resolved Hide resolved
return f"xmlns:{ABBR_STR}={NAMESPACES[ABBR_STR]!r} "


# def __namespaces_from_content(content: Dict[str, Any]) -> str:
javihern98 marked this conversation as resolved.
Show resolved Hide resolved
# """Returns the namespaces for the XML file based on content.
javihern98 marked this conversation as resolved.
Show resolved Hide resolved
#
# Args:
# content: Datasets or None
#
# Returns:
# A string with the namespaces
#
# Raises:
# Exception: If the dataset has no structure defined
# """
# outfile = ""
# for i, key in enumerate(content):
# if content[key].structure is None:
# raise Exception(f"Dataset {key} has no structure defined")
# ds_urn = URN_DS_BASE
# ds_urn += (
# f"{content[key].structure.unique_id}:"
# f"ObsLevelDim:{content[key].dim_at_obs}"
# )
# outfile += f"xmlns:ns{i}={ds_urn!r}"
# return outfile


def create_namespaces(
type_: MessageType, content: Dict[str, Any], prettyprint: bool = False
) -> str:
"""Creates the namespaces for the XML file.

Args:
type_: MessageType to be used
content: Datasets or None
prettyprint: Prettyprint or not

Returns:
A string with the namespaces
"""
nl = "\n" if prettyprint else ""

outfile = f'<?xml version="1.0" encoding="UTF-8"?>{nl}'

outfile += f"<{ABBR_MSG}:{MESSAGE_TYPE_MAPPING[type_]} "
outfile += f'xmlns:xsi={NAMESPACES["xsi"]!r} '
outfile += f"xmlns:{ABBR_MSG}={NAMESPACES[ABBR_MSG]!r} "
outfile += __namespaces_from_type(type_)
outfile += (
f"xmlns:{ABBR_COM}={NAMESPACES[ABBR_COM]!r} "
f'xsi:schemaLocation="{NAMESPACES[ABBR_MSG]} '
f'https://registry.sdmx.org/schemas/v2_1/SDMXMessage.xsd">'
)

return outfile


MSG_CONTENT_PKG = OrderedDict(
[
(ORGS, "OrganisationSchemes"),
(DATAFLOWS, "Dataflows"),
(CODELISTS, "Codelists"),
(CONCEPTS, "Concepts"),
(DSDS, "DataStructures"),
(CONSTRAINTS, "ContentConstraints"),
]
)


MSG_CONTENT_ITEM = {
ORGS: "AgencyScheme",
DATAFLOWS: "Dataflow",
CODELISTS: "Codelist",
CONCEPTS: "ConceptScheme",
DSDS: "DataStructure",
CONSTRAINTS: "ContentConstraint",
}


def get_end_message(type_: MessageType, prettyprint: bool) -> str:
"""Returns the end message for the XML file.

Args:
type_: MessageType to be used
prettyprint: Prettyprint or not

Returns:
A string with the end message
"""
nl = "\n" if prettyprint else ""
return f"{nl}</{ABBR_MSG}:{MESSAGE_TYPE_MAPPING[type_]}>"


def add_indent(indent: str) -> str:
"""Adds another indent.

Args:
indent: The string to be indented

Returns:
A string with one more indentation
"""
return indent + "\t"


def __value(element: str, value: str, prettyprint: bool) -> str:
"""Generates a value element for the XML file.

A Value element is an XML tag with a value.

Args:
element: ID, Test, Prepared, Sender, Receiver, Source
value: The value to be written
prettyprint: Prettyprint or not

Returns:
A string with the value element
"""
nl = "\n" if prettyprint else ""
child2 = "\t\t" if prettyprint else ""
return (
f"{nl}{child2}<{ABBR_MSG}:{element}>"
f"{value}"
f"</{ABBR_MSG}:{element}>"
)


def __item(element: str, id_: str, prettyprint: bool) -> str:
"""Generates an item element for the XML file.

An Item element is an XML tag with an id attribute.

Args:
element: Sender, Receiver
id_: The ID to be written
prettyprint: Prettyprint or not

Returns:
A string with the item element
"""
nl = "\n" if prettyprint else ""
child2 = "\t\t" if prettyprint else ""
return f"{nl}{child2}<{ABBR_MSG}:{element} id={id_!r}/>"


def __write_header(header: Header, prettyprint: bool) -> str:
"""Writes the Header part of the message.

Args:
header: The Header to be written
prettyprint: Prettyprint or not

Returns:
The XML string
"""
nl = "\n" if prettyprint else ""
child1 = "\t" if prettyprint else ""
prepared = header.prepared.strftime("%Y-%m-%dT%H:%M:%S")

return (
f"{nl}{child1}<{ABBR_MSG}:Header>"
f"{__value('ID', header.id, prettyprint)}"
f"{__value('Test', header.test, prettyprint)}"
f"{__value('Prepared', prepared, prettyprint)}"
f"{__item('Sender', header.sender, prettyprint)}"
f"{__item('Receiver', header.receiver, prettyprint)}"
f"{__value('Source', header.source, prettyprint)}"
f"{nl}{child1}</{ABBR_MSG}:Header>"
)
Loading
Loading