From a069859405227625780a332b6e36cf759f69e5e1 Mon Sep 17 00:00:00 2001 From: "javier.hernandez" Date: Tue, 2 Jul 2024 14:11:40 +0200 Subject: [PATCH] Optimised code. Added checks on data on test_load_big_file. Signed-off-by: javier.hernandez --- src/pysdmx/io/xml/sdmx21/reader/data_read.py | 23 +++++++------------- src/pysdmx/util/handlers.py | 19 +--------------- tests/io/xml/sdmx21/reader/test_reader.py | 2 ++ 3 files changed, 11 insertions(+), 33 deletions(-) diff --git a/src/pysdmx/io/xml/sdmx21/reader/data_read.py b/src/pysdmx/io/xml/sdmx21/reader/data_read.py index 8f53deb..f6aa27b 100644 --- a/src/pysdmx/io/xml/sdmx21/reader/data_read.py +++ b/src/pysdmx/io/xml/sdmx21/reader/data_read.py @@ -103,11 +103,10 @@ def __reading_generic_series(dataset: Dict[str, Any]) -> pd.DataFrame: series[OBS] = add_list(series[OBS]) for data in series[OBS]: - obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()]} + obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()], + OBSVALUE.upper(): None} if OBSVALUE in data: obs[OBSVALUE.upper()] = data[OBSVALUE][VALUE.lower()] - else: - obs[OBSVALUE.upper()] = None if ATTRIBUTES in data: obs = {**obs, **__get_element_to_list(data, mode=ATTRIBUTES)} test_list.append({**keys, **obs}) @@ -147,8 +146,7 @@ def __reading_str_series(dataset: Dict[str, Any]) -> pd.DataFrame: dataset[SERIES] = add_list(dataset[SERIES]) for data in dataset[SERIES]: keys = dict(itertools.islice(data.items(), len(data) - 1)) - if not isinstance(data[OBS], list): - data[OBS] = [data[OBS]] + data[OBS] = add_list(data[OBS]) for j in data[OBS]: test_list.append({**keys, **j}) test_list, df = __process_df(test_list, df) @@ -185,10 +183,9 @@ def __get_at_att_str(dataset: Dict[str, Any]) -> Dict[str, Any]: def __get_at_att_gen(dataset: Dict[str, Any]) -> Dict[str, Any]: """Gets all the elements if it is Generic data.""" attached_attributes = {} - if VALUE in dataset[ATTRIBUTES]: - dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE]) - for k in dataset[ATTRIBUTES][VALUE]: - attached_attributes[k[ID]] = k[VALUE.lower()] + dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE]) + for k in dataset[ATTRIBUTES][VALUE]: + attached_attributes[k[ID]] = k[VALUE.lower()] return attached_attributes @@ -265,8 +262,6 @@ def __parse_structure_specific_data( ) -> Dataset: attached_attributes = __get_at_att_str(dataset) - df = pd.DataFrame() - # Parsing data if SERIES in dataset: # Structure Specific Series @@ -277,7 +272,7 @@ def __parse_structure_specific_data( set(df.columns).intersection(set(df_group.columns)) ) df = pd.merge(df, df_group, on=common_columns, how="left") - elif OBS in dataset: + else: dataset[OBS] = add_list(dataset[OBS]) # Structure Specific All dimensions df = pd.DataFrame(dataset[OBS]).replace(np.nan, "") @@ -295,13 +290,11 @@ def __parse_generic_data( ) -> Dataset: attached_attributes = __get_at_att_gen(dataset) - df = pd.DataFrame() - # Parsing data if SERIES in dataset: # Generic Series df = __reading_generic_series(dataset) - elif OBS in dataset: + else: # Generic All Dimensions df = __reading_generic_all(dataset) diff --git a/src/pysdmx/util/handlers.py b/src/pysdmx/util/handlers.py index fa46fe8..18078df 100644 --- a/src/pysdmx/util/handlers.py +++ b/src/pysdmx/util/handlers.py @@ -35,9 +35,7 @@ def split_from_urn(obj_: str, split_id: bool = True) -> Any: full id. """ full_id = obj_.split("=", 1)[1] - if split_id: - return split_unique_id(full_id) - return full_id + return split_unique_id(full_id) def add_list(element: Any) -> Any: @@ -53,21 +51,6 @@ def add_list(element: Any) -> Any: element = [element] return element - -def unique_id(agency_id: str, id_: str, version: str) -> str: - """Unique_id. - - Args: - agency_id: Name of the agency. - id_: The id. - version: Version. - - Returns: - A string with the info contained in the reference. - """ - return f"{agency_id}:{id_}({version})" - - # def drop_na_all(df: pd.DataFrame): # """ # diff --git a/tests/io/xml/sdmx21/reader/test_reader.py b/tests/io/xml/sdmx21/reader/test_reader.py index 820b010..dd8c6d1 100644 --- a/tests/io/xml/sdmx21/reader/test_reader.py +++ b/tests/io/xml/sdmx21/reader/test_reader.py @@ -238,3 +238,5 @@ def test_load_big_file(samples_folder): assert filetype == "xml" result = read_xml(input_str, validate=True) assert "BIS:BIS_DER(1.0)" in result + dataset = result["BIS:BIS_DER(1.0)"] + assert len(dataset.data) == 50500