# Overview

The purpose of this notebook is to understand the logic of `fetch_dsd_schema` developed by Tony, then to improve it and make it more robust and finally to extend it to dataflows.

**Note: This notebook is for development purposes and it is not intended to be used in production nor to be executed sequentially.**

# Setup

In [11]:
# import pysdmx as px

In [12]:
from pysdmx.io.format import StructureFormat # To extract json format
from pysdmx.api import fmr # CLient to connect to FMR
from urllib.parse import urljoin

# parse_artefact_id

## line by line

In [29]:
artefact_id = "WB.DATA360:DS_DATA360(1.3)"

In [30]:
agency, rest = artefact_id.split(":")
print(agency)
print(rest)

WB.DATA360
DS_DATA360(1.3)


In [31]:
id, version = rest.split("(")
version = version.rstrip(")")
print(id)
print(version)

DS_DATA360
1.3


## function

In [32]:
def parse_artefact_id(artefact_id):
    """Parses artefact identifier (DSD, Dataflow, Codelist, etc) into its components: agency, id and version.

    Args:
        artefact_id (str): The identifier of the artefact, typically in the format "agency:id(version)".

    Returns:
        tuple: A tuple containing the agency, id, and version.

    Raises:
        ValueError: If the artefact_id is not in the expected format.
    """

    try:
        agency, rest = artefact_id.split(":", 1)
        id_part, version_part = rest.split("(", 1)
        version = version_part.rstrip(")")
        return agency, id_part, version
    except Exception:
        raise ValueError("Invalid artefact_id format. Expected format: 'agency:id(version)'")

# fetch_schema

## Line by line

In [17]:
dsd_id = "WB.DATA360:DS_DATA360(1.3)"

In [18]:
# # If I import only the package, I cannot access this object. 
# # I need to access the class directly.
# px.io.format.StructureFormat.FUSION_JSON

In [19]:
format = StructureFormat.FUSION_JSON
format

<StructureFormat.FUSION_JSON: 'application/vnd.fusion.json'>

In [20]:
# fmr_url = fmr_params[env]["url"]
fmr_url = 'https://fmrqa.worldbank.org/'
fmr_url

'https://fmrqa.worldbank.org/'

In [21]:
# Ensure the URL is syntactically valid
base_url = urljoin(fmr_url, "/FMR/sdmx/v2/")
base_url

'https://fmrqa.worldbank.org/FMR/sdmx/v2/'

In [22]:
client = fmr.RegistryClient(
        base_url,
        format=format,
    )
client

<pysdmx.api.fmr.RegistryClient at 0x235cfc9ec50>

In [33]:
agency, id, version = parse_artefact_id(dsd_id)
print(agency)
print(id)
print(version)

WB.DATA360
DS_DATA360
1.3


### DSD

In [None]:
# ## Checking URLS for DSDs and Dataflows
# # DSDs
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/datastructure/WB/IFPRI_ASTI/1.0
# # Dataflows
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/dataflow/WB/DF_IFPRI_ASTI/1.0

In [28]:
schema = client.get_schema("datastructure", agency, id, version)
print(schema)

context: datastructure, agency: WB.DATA360, id: DS_DATA360, components: 23 components, version: 1.3, artefacts: 23 artefacts


In [26]:
dir(schema)

['__annotations__',
 '__class__',
 '__copy__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__replace__',
 '__repr__',
 '__rich_repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__struct_config__',
 '__struct_defaults__',
 '__struct_encode_fields__',
 '__struct_fields__',
 '__subclasshook__',
 'agency',
 'artefacts',
 'components',
 'context',
 'generated',
 'groups',
 'id',
 'name',
 'short_urn',
 'version']

### Dataflow

In [34]:
dataflow_id = "WB.DATA360:DF_D360_WB_WDI(1.0)"

In [None]:
# ## Checking URLS for DSDs and Dataflows
# # DSDs
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/datastructure/WB/IFPRI_ASTI/1.0
# # Dataflows
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/dataflow/WB/DF_IFPRI_ASTI/1.0

In [35]:
agency, id, version = parse_artefact_id(dataflow_id)

In [36]:
schema = client.get_schema("dataflow", agency, id, version)
print(schema)

context: dataflow, agency: WB.DATA360, id: DF_D360_WB_WDI, components: 23 components, version: 1.0, artefacts: 26 artefacts


In [37]:
dir(schema)

['__annotations__',
 '__class__',
 '__copy__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__replace__',
 '__repr__',
 '__rich_repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__struct_config__',
 '__struct_defaults__',
 '__struct_encode_fields__',
 '__struct_fields__',
 '__subclasshook__',
 'agency',
 'artefacts',
 'components',
 'context',
 'generated',
 'groups',
 'id',
 'name',
 'short_urn',
 'version']

### provisionagreement

In [38]:
provision_id = "WB.TEST:DF_CSC_EN_FSH_SUST_ZS_WB_TEST_DP_ENV(1.0)"

In [None]:
# ## Checking URLS for DSDs and Dataflows
# # DSDs
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/datastructure/WB/IFPRI_ASTI/1.0
# # Dataflows
# https://fmrqa.worldbank.org/FMR/sdmx/v2/structure/dataflow/WB/DF_IFPRI_ASTI/1.0

In [39]:
agency, id, version = parse_artefact_id(provision_id)

In [40]:
schema = client.get_schema("provisionagreement", agency, id, version)
print(schema)

context: provisionagreement, agency: WB.TEST, id: DF_CSC_EN_FSH_SUST_ZS_WB_TEST_DP_ENV, components: 19 components, version: 1.0, artefacts: 23 artefacts


## Function

In [None]:
def fetch_schema(
		base_url:str,
		artefact_id: str,
		context: str = "datastructure"):
	"""Fetches the schema of a specified artefact from an SDMX registry.
	
	Args:
		base_url (str): The base URL of the FMR.
		artefact_id (str): The identifier of the artefact, typically in the format "agency:id(version)".
		context (str, optional): The type of artefact to fetch. Defaults to "datastructure". It can also be "dataflow" and "provisionagreement".
	Returns:
		schema: The fetched schema object.
	"""
	format = StructureFormat.FUSION_JSON

	# Ensure the URL is syntactically valid
	base_url = urljoin(base_url, "/FMR/sdmx/v2/")

	# Initialize the client
	client = fmr.RegistryClient(
        base_url,
        format=format,
    )

	# Parse the artefact ID
	agency, id, version = parse_artefact_id(artefact_id)

	# Fetch the schema
	schema = client.get_schema(context, agency, id, version)
	
	return schema