Merge remote-tracking branch 'origin/50-xml-parsers-for-data-reading'…

… into 50-xml-parsers-for-data-reading # Conflicts: # tests/io/xml/sdmx21/reader/test_reader.py
bis-med-it · Jul 3, 2024 · 14de194 · 14de194
2 parents 9869d60 + a069859
commit 14de194
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 49 deletions.
diff --git a/src/pysdmx/io/xml/sdmx21/reader/data_read.py b/src/pysdmx/io/xml/sdmx21/reader/data_read.py
@@ -103,11 +103,10 @@ def __reading_generic_series(dataset: Dict[str, Any]) -> pd.DataFrame:
         series[OBS] = add_list(series[OBS])
 
         for data in series[OBS]:
-            obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()]}
+            obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()],
+                   OBSVALUE.upper(): None}
             if OBSVALUE in data:
                 obs[OBSVALUE.upper()] = data[OBSVALUE][VALUE.lower()]
-            else:
-                obs[OBSVALUE.upper()] = None
             if ATTRIBUTES in data:
                 obs = {**obs, **__get_element_to_list(data, mode=ATTRIBUTES)}
             test_list.append({**keys, **obs})
@@ -147,8 +146,7 @@ def __reading_str_series(dataset: Dict[str, Any]) -> pd.DataFrame:
     dataset[SERIES] = add_list(dataset[SERIES])
     for data in dataset[SERIES]:
         keys = dict(itertools.islice(data.items(), len(data) - 1))
-        if not isinstance(data[OBS], list):
-            data[OBS] = [data[OBS]]
+        data[OBS] = add_list(data[OBS])
         for j in data[OBS]:
             test_list.append({**keys, **j})
         test_list, df = __process_df(test_list, df)
@@ -185,10 +183,9 @@ def __get_at_att_str(dataset: Dict[str, Any]) -> Dict[str, Any]:
 def __get_at_att_gen(dataset: Dict[str, Any]) -> Dict[str, Any]:
     """Gets all the elements if it is Generic data."""
     attached_attributes = {}
-    if VALUE in dataset[ATTRIBUTES]:
-        dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
-        for k in dataset[ATTRIBUTES][VALUE]:
-            attached_attributes[k[ID]] = k[VALUE.lower()]
+    dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
+    for k in dataset[ATTRIBUTES][VALUE]:
+        attached_attributes[k[ID]] = k[VALUE.lower()]
     return attached_attributes
 
 
@@ -265,8 +262,6 @@ def __parse_structure_specific_data(
 ) -> Dataset:
     attached_attributes = __get_at_att_str(dataset)
 
-    df = pd.DataFrame()
-
     # Parsing data
     if SERIES in dataset:
         # Structure Specific Series
@@ -277,7 +272,7 @@ def __parse_structure_specific_data(
                 set(df.columns).intersection(set(df_group.columns))
             )
             df = pd.merge(df, df_group, on=common_columns, how="left")
-    elif OBS in dataset:
+    else:
         dataset[OBS] = add_list(dataset[OBS])
         # Structure Specific All dimensions
         df = pd.DataFrame(dataset[OBS]).replace(np.nan, "")
@@ -295,13 +290,11 @@ def __parse_generic_data(
 ) -> Dataset:
     attached_attributes = __get_at_att_gen(dataset)
 
-    df = pd.DataFrame()
-
     # Parsing data
     if SERIES in dataset:
         # Generic Series
         df = __reading_generic_series(dataset)
-    elif OBS in dataset:
+    else:
         # Generic All Dimensions
         df = __reading_generic_all(dataset)
 

diff --git a/src/pysdmx/util/handlers.py b/src/pysdmx/util/handlers.py
@@ -35,9 +35,7 @@ def split_from_urn(obj_: str, split_id: bool = True) -> Any:
         full id.
     """
     full_id = obj_.split("=", 1)[1]
-    if split_id:
-        return split_unique_id(full_id)
-    return full_id
+    return split_unique_id(full_id)
 
 
 def add_list(element: Any) -> Any:
@@ -53,21 +51,6 @@ def add_list(element: Any) -> Any:
         element = [element]
     return element
 
-
-def unique_id(agency_id: str, id_: str, version: str) -> str:
-    """Unique_id.
-
-    Args:
-        agency_id: Name of the agency.
-        id_: The id.
-        version: Version.
-
-    Returns:
-        A string with the info contained in the reference.
-    """
-    return f"{agency_id}:{id_}({version})"
-
-
 # def drop_na_all(df: pd.DataFrame):
 #     """
 #

diff --git a/tests/io/xml/sdmx21/reader/test_reader.py b/tests/io/xml/sdmx21/reader/test_reader.py
@@ -238,19 +238,3 @@ def test_load_big_file(samples_folder):
     assert filetype == "xml"
     result = read_xml(input_str, validate=True)
     assert "BIS:BIS_DER(1.0)" in result
-
-
-def test_gen_all_no_attr(samples_folder):
-    data_path = samples_folder / "gen_all_no_atts.xml"
-    input_str, filetype = process_string_to_read(data_path)
-    assert filetype == "xml"
-    with pytest.raises(KeyError, match="Attributes"):
-        read_xml(input_str, validate=True)
-
-
-def test_gen_ser_no_atts(samples_folder):
-    data_path = samples_folder / "gen_ser_no_atts.xml"
-    input_str, filetype = process_string_to_read(data_path)
-    assert filetype == "xml"
-    with pytest.raises(KeyError, match="Attributes"):
-        read_xml(input_str, validate=True)