From a069859405227625780a332b6e36cf759f69e5e1 Mon Sep 17 00:00:00 2001
From: "javier.hernandez" <javier.hernandez@meaningfuldata.eu>
Date: Tue, 2 Jul 2024 14:11:40 +0200
Subject: [PATCH] Optimised code. Added checks on data on test_load_big_file.

Signed-off-by: javier.hernandez <javier.hernandez@meaningfuldata.eu>
---
 src/pysdmx/io/xml/sdmx21/reader/data_read.py | 23 +++++++-------------
 src/pysdmx/util/handlers.py                  | 19 +---------------
 tests/io/xml/sdmx21/reader/test_reader.py    |  2 ++
 3 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/src/pysdmx/io/xml/sdmx21/reader/data_read.py b/src/pysdmx/io/xml/sdmx21/reader/data_read.py
index 8f53deb..f6aa27b 100644
--- a/src/pysdmx/io/xml/sdmx21/reader/data_read.py
+++ b/src/pysdmx/io/xml/sdmx21/reader/data_read.py
@@ -103,11 +103,10 @@ def __reading_generic_series(dataset: Dict[str, Any]) -> pd.DataFrame:
         series[OBS] = add_list(series[OBS])
 
         for data in series[OBS]:
-            obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()]}
+            obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()],
+                   OBSVALUE.upper(): None}
             if OBSVALUE in data:
                 obs[OBSVALUE.upper()] = data[OBSVALUE][VALUE.lower()]
-            else:
-                obs[OBSVALUE.upper()] = None
             if ATTRIBUTES in data:
                 obs = {**obs, **__get_element_to_list(data, mode=ATTRIBUTES)}
             test_list.append({**keys, **obs})
@@ -147,8 +146,7 @@ def __reading_str_series(dataset: Dict[str, Any]) -> pd.DataFrame:
     dataset[SERIES] = add_list(dataset[SERIES])
     for data in dataset[SERIES]:
         keys = dict(itertools.islice(data.items(), len(data) - 1))
-        if not isinstance(data[OBS], list):
-            data[OBS] = [data[OBS]]
+        data[OBS] = add_list(data[OBS])
         for j in data[OBS]:
             test_list.append({**keys, **j})
         test_list, df = __process_df(test_list, df)
@@ -185,10 +183,9 @@ def __get_at_att_str(dataset: Dict[str, Any]) -> Dict[str, Any]:
 def __get_at_att_gen(dataset: Dict[str, Any]) -> Dict[str, Any]:
     """Gets all the elements if it is Generic data."""
     attached_attributes = {}
-    if VALUE in dataset[ATTRIBUTES]:
-        dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
-        for k in dataset[ATTRIBUTES][VALUE]:
-            attached_attributes[k[ID]] = k[VALUE.lower()]
+    dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
+    for k in dataset[ATTRIBUTES][VALUE]:
+        attached_attributes[k[ID]] = k[VALUE.lower()]
     return attached_attributes
 
 
@@ -265,8 +262,6 @@ def __parse_structure_specific_data(
 ) -> Dataset:
     attached_attributes = __get_at_att_str(dataset)
 
-    df = pd.DataFrame()
-
     # Parsing data
     if SERIES in dataset:
         # Structure Specific Series
@@ -277,7 +272,7 @@ def __parse_structure_specific_data(
                 set(df.columns).intersection(set(df_group.columns))
             )
             df = pd.merge(df, df_group, on=common_columns, how="left")
-    elif OBS in dataset:
+    else:
         dataset[OBS] = add_list(dataset[OBS])
         # Structure Specific All dimensions
         df = pd.DataFrame(dataset[OBS]).replace(np.nan, "")
@@ -295,13 +290,11 @@ def __parse_generic_data(
 ) -> Dataset:
     attached_attributes = __get_at_att_gen(dataset)
 
-    df = pd.DataFrame()
-
     # Parsing data
     if SERIES in dataset:
         # Generic Series
         df = __reading_generic_series(dataset)
-    elif OBS in dataset:
+    else:
         # Generic All Dimensions
         df = __reading_generic_all(dataset)
 
diff --git a/src/pysdmx/util/handlers.py b/src/pysdmx/util/handlers.py
index fa46fe8..18078df 100644
--- a/src/pysdmx/util/handlers.py
+++ b/src/pysdmx/util/handlers.py
@@ -35,9 +35,7 @@ def split_from_urn(obj_: str, split_id: bool = True) -> Any:
         full id.
     """
     full_id = obj_.split("=", 1)[1]
-    if split_id:
-        return split_unique_id(full_id)
-    return full_id
+    return split_unique_id(full_id)
 
 
 def add_list(element: Any) -> Any:
@@ -53,21 +51,6 @@ def add_list(element: Any) -> Any:
         element = [element]
     return element
 
-
-def unique_id(agency_id: str, id_: str, version: str) -> str:
-    """Unique_id.
-
-    Args:
-        agency_id: Name of the agency.
-        id_: The id.
-        version: Version.
-
-    Returns:
-        A string with the info contained in the reference.
-    """
-    return f"{agency_id}:{id_}({version})"
-
-
 # def drop_na_all(df: pd.DataFrame):
 #     """
 #
diff --git a/tests/io/xml/sdmx21/reader/test_reader.py b/tests/io/xml/sdmx21/reader/test_reader.py
index 820b010..dd8c6d1 100644
--- a/tests/io/xml/sdmx21/reader/test_reader.py
+++ b/tests/io/xml/sdmx21/reader/test_reader.py
@@ -238,3 +238,5 @@ def test_load_big_file(samples_folder):
     assert filetype == "xml"
     result = read_xml(input_str, validate=True)
     assert "BIS:BIS_DER(1.0)" in result
+    dataset = result["BIS:BIS_DER(1.0)"]
+    assert len(dataset.data) == 50500