Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/50-xml-parsers-for-data-reading'…
Browse files Browse the repository at this point in the history
… into 50-xml-parsers-for-data-reading

# Conflicts:
#	tests/io/xml/sdmx21/reader/test_reader.py
  • Loading branch information
Alberto committed Jul 3, 2024
2 parents 9869d60 + a069859 commit 14de194
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 49 deletions.
23 changes: 8 additions & 15 deletions src/pysdmx/io/xml/sdmx21/reader/data_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,10 @@ def __reading_generic_series(dataset: Dict[str, Any]) -> pd.DataFrame:
series[OBS] = add_list(series[OBS])

for data in series[OBS]:
obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()]}
obs = {OBS_DIM: data[OBS_DIM][VALUE.lower()],
OBSVALUE.upper(): None}
if OBSVALUE in data:
obs[OBSVALUE.upper()] = data[OBSVALUE][VALUE.lower()]
else:
obs[OBSVALUE.upper()] = None
if ATTRIBUTES in data:
obs = {**obs, **__get_element_to_list(data, mode=ATTRIBUTES)}
test_list.append({**keys, **obs})
Expand Down Expand Up @@ -147,8 +146,7 @@ def __reading_str_series(dataset: Dict[str, Any]) -> pd.DataFrame:
dataset[SERIES] = add_list(dataset[SERIES])
for data in dataset[SERIES]:
keys = dict(itertools.islice(data.items(), len(data) - 1))
if not isinstance(data[OBS], list):
data[OBS] = [data[OBS]]
data[OBS] = add_list(data[OBS])
for j in data[OBS]:
test_list.append({**keys, **j})
test_list, df = __process_df(test_list, df)
Expand Down Expand Up @@ -185,10 +183,9 @@ def __get_at_att_str(dataset: Dict[str, Any]) -> Dict[str, Any]:
def __get_at_att_gen(dataset: Dict[str, Any]) -> Dict[str, Any]:
"""Gets all the elements if it is Generic data."""
attached_attributes = {}
if VALUE in dataset[ATTRIBUTES]:
dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
for k in dataset[ATTRIBUTES][VALUE]:
attached_attributes[k[ID]] = k[VALUE.lower()]
dataset[ATTRIBUTES][VALUE] = add_list(dataset[ATTRIBUTES][VALUE])
for k in dataset[ATTRIBUTES][VALUE]:
attached_attributes[k[ID]] = k[VALUE.lower()]
return attached_attributes


Expand Down Expand Up @@ -265,8 +262,6 @@ def __parse_structure_specific_data(
) -> Dataset:
attached_attributes = __get_at_att_str(dataset)

df = pd.DataFrame()

# Parsing data
if SERIES in dataset:
# Structure Specific Series
Expand All @@ -277,7 +272,7 @@ def __parse_structure_specific_data(
set(df.columns).intersection(set(df_group.columns))
)
df = pd.merge(df, df_group, on=common_columns, how="left")
elif OBS in dataset:
else:
dataset[OBS] = add_list(dataset[OBS])
# Structure Specific All dimensions
df = pd.DataFrame(dataset[OBS]).replace(np.nan, "")
Expand All @@ -295,13 +290,11 @@ def __parse_generic_data(
) -> Dataset:
attached_attributes = __get_at_att_gen(dataset)

df = pd.DataFrame()

# Parsing data
if SERIES in dataset:
# Generic Series
df = __reading_generic_series(dataset)
elif OBS in dataset:
else:
# Generic All Dimensions
df = __reading_generic_all(dataset)

Expand Down
19 changes: 1 addition & 18 deletions src/pysdmx/util/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def split_from_urn(obj_: str, split_id: bool = True) -> Any:
full id.
"""
full_id = obj_.split("=", 1)[1]
if split_id:
return split_unique_id(full_id)
return full_id
return split_unique_id(full_id)


def add_list(element: Any) -> Any:
Expand All @@ -53,21 +51,6 @@ def add_list(element: Any) -> Any:
element = [element]
return element


def unique_id(agency_id: str, id_: str, version: str) -> str:
"""Unique_id.
Args:
agency_id: Name of the agency.
id_: The id.
version: Version.
Returns:
A string with the info contained in the reference.
"""
return f"{agency_id}:{id_}({version})"


# def drop_na_all(df: pd.DataFrame):
# """
#
Expand Down
16 changes: 0 additions & 16 deletions tests/io/xml/sdmx21/reader/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,19 +238,3 @@ def test_load_big_file(samples_folder):
assert filetype == "xml"
result = read_xml(input_str, validate=True)
assert "BIS:BIS_DER(1.0)" in result


def test_gen_all_no_attr(samples_folder):
data_path = samples_folder / "gen_all_no_atts.xml"
input_str, filetype = process_string_to_read(data_path)
assert filetype == "xml"
with pytest.raises(KeyError, match="Attributes"):
read_xml(input_str, validate=True)


def test_gen_ser_no_atts(samples_folder):
data_path = samples_folder / "gen_ser_no_atts.xml"
input_str, filetype = process_string_to_read(data_path)
assert filetype == "xml"
with pytest.raises(KeyError, match="Attributes"):
read_xml(input_str, validate=True)

0 comments on commit 14de194

Please sign in to comment.