From 88cc1530a166742a1be9ae7287263277bd1ec0f7 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 24 Jun 2024 14:40:50 +0900 Subject: [PATCH] [SPARK-48650][PYTHON] Display correct call site from IPython Notebook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to display correct call site information from IPython Notebook. ### Why are the changes needed? We added `DataFrameQueryContext` for PySpark error message from https://github.com/apache/spark/pull/45377, but it does not working very well from IPython Notebook. ### Does this PR introduce _any_ user-facing change? No API changes, but the user-facing error message from IPython Notebook will be improved: **Before** Screenshot 2024-06-18 at 5 15 56 PM **After** Screenshot 2024-06-19 at 8 45 05 AM **NOTE:** This also works when command is executed across multiple cells: Screenshot 2024-06-19 at 8 42 29 AM ### How was this patch tested? Manually tested with IPython Notebook. ### Was this patch authored or co-authored using generative AI tooling? No Closes #47009 from itholic/error_context_on_notebook. Authored-by: Haejoon Lee Signed-off-by: Hyukjin Kwon --- python/pyspark/errors/utils.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py index cd30463802840..9155bfb54abe8 100644 --- a/python/pyspark/errors/utils.py +++ b/python/pyspark/errors/utils.py @@ -21,6 +21,7 @@ import os import threading from typing import Any, Callable, Dict, Match, TypeVar, Type, Optional, TYPE_CHECKING +import pyspark from pyspark.errors.error_classes import ERROR_CLASSES_MAP if TYPE_CHECKING: @@ -164,9 +165,29 @@ def _capture_call_site(spark_session: "SparkSession", depth: int) -> str: The call site information is used to enhance error messages with the exact location in the user code that led to the error. """ - stack = list(reversed(inspect.stack())) + # Filtering out PySpark code and keeping user code only + pyspark_root = os.path.dirname(pyspark.__file__) + stack = [ + frame_info for frame_info in inspect.stack() if pyspark_root not in frame_info.filename + ] + selected_frames = stack[:depth] - call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames] + + # We try import here since IPython is not a required dependency + try: + from IPython import get_ipython + + ipython = get_ipython() + except ImportError: + ipython = None + + # Identifying the cell is useful when the error is generated from IPython Notebook + if ipython: + call_sites = [ + f"line {frame.lineno} in cell [{ipython.execution_count}]" for frame in selected_frames + ] + else: + call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames] call_sites_str = "\n".join(call_sites) return call_sites_str