From d4d890c2e4cf58928d3306476761bef16f74ec56 Mon Sep 17 00:00:00 2001 From: saurabh sharma Date: Wed, 26 Nov 2025 23:22:02 -0700 Subject: [PATCH 1/2] handled the null values in the dataframe --- awswrangler/neptune/_neptune.py | 5 +++- tests/unit/test_neptune.py | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/awswrangler/neptune/_neptune.py b/awswrangler/neptune/_neptune.py index e9beb7295..eae19711d 100644 --- a/awswrangler/neptune/_neptune.py +++ b/awswrangler/neptune/_neptune.py @@ -110,7 +110,10 @@ def execute_sparql(client: NeptuneClient, query: str) -> pd.DataFrame: df = None if "results" in data and "bindings" in data["results"]: df = pd.DataFrame(data["results"]["bindings"], columns=data.get("head", {}).get("vars")) - df = df.applymap(lambda d: d["value"] if "value" in d else None) + def _binding_value(d: Any) -> Any: + return d.get("value") if isinstance(d, dict) else None + for col in df.columns: + df[col] = df[col].apply(_binding_value) else: df = pd.DataFrame(data) diff --git a/tests/unit/test_neptune.py b/tests/unit/test_neptune.py index d7236c3fa..d36832934 100644 --- a/tests/unit/test_neptune.py +++ b/tests/unit/test_neptune.py @@ -471,6 +471,53 @@ def test_sparql_write_triples(neptune_endpoint, neptune_port) -> dict[str, Any]: assert len(batch_df.index) == len(final_df.index) + 50 +def test_sparql_bindings_nan_safe() -> None: + class _DummyClient: + def read_sparql(self, _query: str) -> dict[str, Any]: + return { + "head": { + "vars": ["book_id", "title", "checked_out", "last_updated", "category_code"] + }, + "results": { + "bindings": [ + { + "title": { + "xml:lang": "en", + "type": "literal", + "value": "The Art of Space Travel", + }, + "book_id": {"type": "literal", "value": "B10045982"}, + "last_updated": { + "datatype": "http://www.w3.org/2001/XMLSchema#dateTime", + "type": "literal", + "value": "2025-10-01T10:30:00.000Z", + }, + "category_code": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": "5", + }, + }, + ] + }, + } + + client: Any = _DummyClient() + df = wr.neptune.execute_sparql( + client, + "SELECT ?book_id ?title ?checked_out ?last_updated ?category_code WHERE { ?s ?p ?o }", + ) + assert df.shape == (1, 5) + # Row 1 + v1 = df.iloc[0].tolist() + book_id, title, checked_out, last_updated, category_code = v1 + assert book_id == "B10045982" + assert title == "The Art of Space Travel" + assert checked_out is None # missing binding becomes None + assert last_updated == "2025-10-01T10:30:00.000Z" + assert category_code == "5" + + def test_sparql_write_quads(neptune_endpoint, neptune_port) -> dict[str, Any]: label = f"foo_{uuid.uuid4()}" sparkql_query = f"SELECT ?p ?o FROM WHERE {{ <{label}> ?p ?o .}}" From fa37151a95083b6ee1c85b3bb158a8199e79888a Mon Sep 17 00:00:00 2001 From: saurabh sharma Date: Wed, 26 Nov 2025 23:36:14 -0700 Subject: [PATCH 2/2] format check --- awswrangler/neptune/_neptune.py | 2 ++ tests/unit/test_neptune.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/awswrangler/neptune/_neptune.py b/awswrangler/neptune/_neptune.py index eae19711d..4c262ada0 100644 --- a/awswrangler/neptune/_neptune.py +++ b/awswrangler/neptune/_neptune.py @@ -110,8 +110,10 @@ def execute_sparql(client: NeptuneClient, query: str) -> pd.DataFrame: df = None if "results" in data and "bindings" in data["results"]: df = pd.DataFrame(data["results"]["bindings"], columns=data.get("head", {}).get("vars")) + def _binding_value(d: Any) -> Any: return d.get("value") if isinstance(d, dict) else None + for col in df.columns: df[col] = df[col].apply(_binding_value) else: diff --git a/tests/unit/test_neptune.py b/tests/unit/test_neptune.py index d36832934..77509491c 100644 --- a/tests/unit/test_neptune.py +++ b/tests/unit/test_neptune.py @@ -475,9 +475,7 @@ def test_sparql_bindings_nan_safe() -> None: class _DummyClient: def read_sparql(self, _query: str) -> dict[str, Any]: return { - "head": { - "vars": ["book_id", "title", "checked_out", "last_updated", "category_code"] - }, + "head": {"vars": ["book_id", "title", "checked_out", "last_updated", "category_code"]}, "results": { "bindings": [ {