diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..d52af94 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,146 @@ +name: benchmark + +on: + workflow_dispatch: + pull_request: + paths: + - "adbpyg_adapter/*.py" + - "benchmark/*.py" + push: + branches: + - master + paths: + - "adbpyg_adapter/*.py" + - "benchmark/*.py" + +jobs: + generate_master_traces: + if: github.event_name == 'push' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Create new branch + run: git checkout -b actions/benchmark + + - name: Set branch upstream + run: git push -u origin actions/benchmark + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Python + uses: actions/setup-python@v4 + with: + cache: 'pip' + cache-dependency-path: setup.py + + - name: Start ArangoDB Instance + run: docker run -d --name arangodb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb + + - name: Start Jaeger Instance + run: docker run -d --name jaeger --rm -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 -p 16686:16686 -p 4317:4317 -p 4318:4318 -p 9411:9411 jaegertracing/all-in-one:latest + + - name: Install packages + run: | + pip install torch==2.1.0 + pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-$(python -c 'import torch; print(torch.__version__.split("+")[0])')+cpu.html + pip install -e '.[dev, tracing]' + + - name: Run Python Script + run: python benchmark/write.py --output_dir master + + - name: Echo PyG to ArangoDB + run: cat benchmark/traces/master/pyg_to_arangodb.json | jq . + + - name: Echo ArangoDB to PyG + run: cat benchmark/traces/master/arangodb_to_pyg.json | jq . + + - name: Make commit for auto-generated benchmark files + uses: EndBug/add-and-commit@v9 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + add: "./benchmark/traces/master/*.json" + new_branch: actions/benchmark + message: "generate benchmark files for $GITHUB_SHA" + + - name: Create pull request for the auto generated benchmark + run: | + echo "PR_URL=$(gh pr create \ + --title "benchmark: $GITHUB_SHA" \ + --body "beep boop, i am a robot ($GITHUB_SHA)" \ + --label documentation)" >> $GITHUB_ENV + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Alert developer of open PR + run: echo "Benchmark $PR_URL is ready to be merged by developer." + + compare_traces: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: setup.py + + - name: Start ArangoDB Instance + run: docker run -d --name arangodb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb + + - name: Start Jaeger Instance + run: docker run -d --name jaeger --rm -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 -p 16686:16686 -p 4317:4317 -p 4318:4318 -p 9411:9411 jaegertracing/all-in-one:latest + + - name: Install packages + run: | + pip install torch==2.1.0 + pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-$(python -c 'import torch; print(torch.__version__.split("+")[0])')+cpu.html + pip install -e '.[dev, tracing]' + + - name: Write PR traces + run: python benchmark/write.py --output_dir branch + + - name: Compare PR & Master Traces + run: | + echo "ROOT_SPAN_COMPARISON<> $GITHUB_ENV + python benchmark/compare.py >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Prepare PR Comment + run: | + echo 'MESSAGE<> $GITHUB_ENV + echo "Benchmark (${{ github.event.pull_request.head.sha }})" >> $GITHUB_ENV + echo '' >> $GITHUB_ENV + echo '```json' >> $GITHUB_ENV + echo "$ROOT_SPAN_COMPARISON" | jq . >> $GITHUB_ENV + echo '```' >> $GITHUB_ENV + echo '' >> $GITHUB_ENV + echo "See the full diff [here](https://github.com/arangoml/pyg-adapter/actions/runs/$GITHUB_RUN_ID)" >> $GITHUB_ENV + echo 'EOF' >> $GITHUB_ENV + + - name: Post PR Comment + uses: actions/github-script@v7 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const prNumber = context.payload.pull_request.number; + const message = process.env.MESSAGE; + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: message + }); + + - name: Echo Full PyG to ArangoDB Diff + run: cat benchmark/traces/diff/pyg_to_arangodb.json | jq . + + - name: Echo Full ArangoDB to PyG Diff + run: cat benchmark/traces/diff/arangodb_to_pyg.json | jq . diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8b2dbce..5a828d5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,6 +10,7 @@ env: jobs: build: runs-on: ubuntu-latest + continue-on-error: true strategy: matrix: python: ["3.8", "3.9", "3.10", "3.11"] # "3.12" @@ -34,7 +35,7 @@ jobs: run: | pip install torch==2.1.0 pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-$(python -c 'import torch; print(torch.__version__.split("+")[0])')+cpu.html - pip install -e .[dev] + pip install -e '.[dev, tracing]' - name: Run black run: black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} diff --git a/.gitignore b/.gitignore index b7b415f..ee05a02 100644 --- a/.gitignore +++ b/.gitignore @@ -118,4 +118,8 @@ adbpyg_adapter/version.py .vscode # PyG Data -tests/data/pyg \ No newline at end of file +tests/data/pyg + +# "Current" Benchmark Results +benchmark/traces/branch/*.json +benchmark/traces/diff/*.json \ No newline at end of file diff --git a/README.md b/README.md index 6690d09..d3f8279 100644 --- a/README.md +++ b/README.md @@ -289,15 +289,16 @@ Prerequisite: `arangorestore` 2. `cd pyg-adapter` 3. (create virtual environment of choice) 4. `pip install torch` -5. `pip install -e .[dev]` +5. `pip install -e '.[dev, tracing]'` 6. (create an ArangoDB instance with method of choice) 7. `pytest --url <> --dbName <> --username <> --password <>` -**Note**: A `pytest` parameter can be omitted if the endpoint is using its default value: +**Note**: `pytest` parameters: ```python def pytest_addoption(parser): parser.addoption("--url", action="store", default="http://localhost:8529") parser.addoption("--dbName", action="store", default="_system") parser.addoption("--username", action="store", default="root") parser.addoption("--password", action="store", default="") + parser.addoption("--otlp_endpoint", action="append", default=[]) # OpenTelemetry Protocol Exporter endpoint ``` diff --git a/adbpyg_adapter/adapter.py b/adbpyg_adapter/adapter.py index b8b9dcb..7b3cc36 100644 --- a/adbpyg_adapter/adapter.py +++ b/adbpyg_adapter/adapter.py @@ -13,7 +13,7 @@ from pandas import DataFrame, Series from rich.console import Group from rich.live import Live -from rich.progress import Progress +from rich.progress import Progress, TaskID from torch import Tensor, cat, tensor from torch_geometric.data import Data, HeteroData from torch_geometric.data.storage import EdgeStorage, NodeStorage @@ -22,6 +22,12 @@ from .abc import Abstract_ADBPyG_Adapter from .controller import ADBPyG_Controller from .exceptions import ADBMetagraphError, InvalidADBEdgesError, PyGMetagraphError +from .tracing import ( + TRACING_ENABLED, + TracingManager, + start_as_current_span, + with_tracing, +) from .typings import ( ADBMap, ADBMetagraph, @@ -40,6 +46,9 @@ validate_pyg_metagraph, ) +if TRACING_ENABLED: + from opentelemetry.trace import Tracer + class ADBPyG_Adapter(Abstract_ADBPyG_Adapter): """ArangoDB-PyG adapter. @@ -53,6 +62,11 @@ class ADBPyG_Adapter(Abstract_ADBPyG_Adapter): :param logging_lvl: Defaults to logging.INFO. Other useful options are logging.DEBUG (more verbose), and logging.WARNING (less verbose). :type logging_lvl: str | int + :param tracer: The OpenTelemetry tracer instance. Requires the `tracing` + extra to be installed (i.e `pip install adbpyg-adapter[tracing]`). + See `adbpyg_adapter.tracing.create_tracer` for details on how to + create a tracer instance. + :type tracer: opentelemetry.trace.Tracer :raise TypeError: If invalid parameter types """ @@ -61,8 +75,10 @@ def __init__( db: StandardDatabase, controller: ADBPyG_Controller = ADBPyG_Controller(), logging_lvl: Union[str, int] = logging.INFO, + tracer: Optional["Tracer"] = None, ): self.set_logging(logging_lvl) + self.set_tracer(tracer) if not isinstance(db, StandardDatabase): msg = "**db** parameter must inherit from arango.database.StandardDatabase" @@ -87,12 +103,31 @@ def cntrl(self) -> ADBPyG_Controller: return self.__cntrl # pragma: no cover def set_logging(self, level: Union[int, str]) -> None: + """Set the logging level for the adapter instance. + + :param level: The logging level (i.e logging.INFO, logging.DEBUG, etc.) + :type level: str | int + """ logger.setLevel(level) + def set_tracer(self, tracer: Optional["Tracer"]) -> None: + """Set the OpenTelemetry tracer for the adapter instance. Requires + the `tracing` extra to be installed (i.e `pip install adbpyg-adapter[tracing]`). + + :param tracer: The OpenTelemetry tracer instance. See + `adbpyg_adapter.tracing.create_tracer` for details on how to + create a tracer instance. + :type tracer: opentelemetry.trace.Tracer + :raise ImportError: If OpenTelemetry is not installed. + """ + if TRACING_ENABLED: + TracingManager.set_tracer(tracer) + ########################### # Public: ArangoDB -> PyG # ########################### + @with_tracing() def arangodb_to_pyg( self, name: str, @@ -260,6 +295,7 @@ def udf_v1_x(v1_df): build a PyG-ready Tensor from a DataFrame equivalent to the associated ArangoDB collection. """ + TracingManager.set_attributes(name=name) logger.debug(f"--arangodb_to_pyg('{name}')--") validate_adb_metagraph(metagraph) @@ -293,22 +329,13 @@ def udf_v1_x(v1_df): if preserve_key is not None: node_data[preserve_key] = [] - # 1. Fetch ArangoDB vertices - v_col_cursor, v_col_size = self.__fetch_adb_docs( - v_col, meta, **adb_export_kwargs - ) - - # 2. Process ArangoDB vertices - self.__process_adb_cursor( - "#8929C2", - v_col_cursor, - v_col_size, - self.__process_adb_vertex_df, + self.__process_adb_v_col( v_col, - adb_map, meta, + adb_export_kwargs, + adb_map, preserve_key, - node_data=node_data, + node_data, ) #################### @@ -322,30 +349,22 @@ def udf_v1_x(v1_df): for e_col, meta in metagraph.get("edgeCollections", {}).items(): logger.debug(f"Preparing '{e_col}' edges") - # 1. Fetch ArangoDB edges - e_col_cursor, e_col_size = self.__fetch_adb_docs( - e_col, meta, **adb_export_kwargs - ) - - # 2. Process ArangoDB edges - self.__process_adb_cursor( - "#40A6F5", - e_col_cursor, - e_col_size, - self.__process_adb_edge_df, + self.__process_adb_e_col( e_col, - adb_map, meta, + adb_export_kwargs, + adb_map, preserve_key, - data=data, - v_cols=v_cols, - strict=strict, - is_homogeneous=is_homogeneous, + data, + v_cols, + strict, + is_homogeneous, ) logger.info(f"Created PyG '{name}' Graph") return data + @with_tracing() def arangodb_collections_to_pyg( self, name: str, @@ -399,6 +418,7 @@ def arangodb_collections_to_pyg( name, metagraph, preserve_adb_keys, strict, **adb_export_kwargs ) + @with_tracing() def arangodb_graph_to_pyg( self, name: str, @@ -450,6 +470,7 @@ def arangodb_graph_to_pyg( # Public: PyG -> ArangoDB # ########################### + @with_tracing() def pyg_to_arangodb( self, name: str, @@ -549,6 +570,7 @@ def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df): 4) Dissasembles the 2-feature Tensor into two ArangoDB attributes, where each attribute holds one feature value. """ + TracingManager.set_attributes(name=name) logger.debug(f"--pyg_to_arangodb('{name}')--") validate_pyg_metagraph(metagraph) @@ -584,6 +606,10 @@ def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df): node_data: NodeStorage edge_data: EdgeStorage + # Define PyG Batch Processing Functions + process_node_batch_fn: Callable[[int, int], DataFrame] + process_edge_batch_fn: Callable[[int, int], DataFrame] + spinner_progress = get_import_spinner_progress(" ") ############# @@ -595,41 +621,35 @@ def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df): meta = n_meta.get(n_type, {}) node_data = pyg_g if is_homogeneous else pyg_g[n_type] - node_data_batch_size = batch_size or node_data.num_nodes - - start_index = 0 - end_index = min(node_data_batch_size, node_data.num_nodes) - batches = ceil(node_data.num_nodes / node_data_batch_size) bar_progress = get_bar_progress(f"(PyG → ADB): '{n_type}'", "#97C423") bar_progress_task = bar_progress.add_task(n_type, total=node_data.num_nodes) - with Live(Group(bar_progress, spinner_progress)): - for _ in range(batches): - # 1. Process the Node batch - df = self.__process_pyg_node_batch( - n_type, - node_data, - meta, - pyg_map, - is_explicit_metagraph, - is_custom_controller, - start_index, - end_index, - ) - - bar_progress.advance(bar_progress_task, advance=len(df)) - - # 2. Insert the ArangoDB Node Documents - self.__insert_adb_docs( - spinner_progress, df, n_type, use_async, **adb_import_kwargs - ) + process_node_batch_fn = ( + lambda start_index, end_index: self.__process_pyg_node_batch( + n_type, + node_data, + meta, + pyg_map, + is_explicit_metagraph, + is_custom_controller, + start_index, + end_index, + ) + ) - # 3. Update the batch indices - start_index = end_index - end_index = min( - end_index + node_data_batch_size, node_data.num_nodes - ) + with Live(Group(bar_progress, spinner_progress)): + self.__process_pyg_n_type( + n_type, + node_data.num_nodes, + batch_size or node_data.num_nodes, + process_node_batch_fn, + bar_progress, + bar_progress_task, + spinner_progress, + use_async, + adb_import_kwargs, + ) ############# # PyG Edges # @@ -640,41 +660,35 @@ def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df): meta = e_meta.get(e_type, {}) edge_data = pyg_g if is_homogeneous else pyg_g[e_type] - edge_data_batch_size = batch_size or edge_data.num_edges - - start_index = 0 - end_index = min(edge_data_batch_size, edge_data.num_edges) - batches = ceil(edge_data.num_edges / edge_data_batch_size) bar_progress = get_bar_progress(f"(PyG → ADB): {e_type}", "#994602") bar_progress_task = bar_progress.add_task(e_type, total=edge_data.num_edges) - with Live(Group(bar_progress, spinner_progress)): - for _ in range(batches): - # 1. Process the Edge batch - df = self.__process_pyg_edge_batch( - e_type, - edge_data, - meta, - pyg_map, - is_explicit_metagraph, - is_custom_controller, - start_index, - end_index, - ) - - bar_progress.advance(bar_progress_task, advance=len(df)) - - # 2. Insert the ArangoDB Edge Documents - self.__insert_adb_docs( - spinner_progress, df, e_type[1], use_async, **adb_import_kwargs - ) + process_edge_batch_fn = ( + lambda start_index, end_index: self.__process_pyg_edge_batch( + e_type, + edge_data, + meta, + pyg_map, + is_explicit_metagraph, + is_custom_controller, + start_index, + end_index, + ) + ) - # 3. Update the batch indices - start_index = end_index - end_index = min( - end_index + edge_data_batch_size, edge_data.num_edges - ) + with Live(Group(bar_progress, spinner_progress)): + self.__process_pyg_e_type( + e_type, + edge_data.num_edges, + batch_size or edge_data.num_edges, + process_edge_batch_fn, + bar_progress, + bar_progress_task, + spinner_progress, + use_async, + adb_import_kwargs, + ) logger.info(f"Created ArangoDB '{name}' Graph") return adb_graph @@ -683,9 +697,116 @@ def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df): # Private: ArangoDB -> PyG # ############################ + @with_tracing("process_adb_vertex_collection") + def __process_adb_v_col( + self, + v_col: str, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + adb_export_kwargs: Dict[str, Any], + adb_map: ADBMap, + preserve_key: Optional[str], + node_data: NodeStorage, + ) -> None: + """ArangoDB -> PyG: Processes the ArangoDB Vertex Collection. + + :param v_col: The ArangoDB vertex collection. + :type v_col: str + :param meta: The metagraph for the current **v_col**. + :type meta: Set[str] | Dict[str, ADBMetagraphValues] + :param adb_export_kwargs: Keyword arguments to specify AQL query options + when fetching documents from the ArangoDB instance. + :type adb_export_kwargs: Dict[str, Any] + :param adb_map: The ArangoDB -> PyG map. + :type adb_map: adbpyg_adapter.typings.ADBMap + :param preserve_key: The PyG key to preserve the ArangoDB _key values. + :type preserve_key: Optional[str] + :param node_data: The PyG NodeStorage object. + :type node_data: torch_geometric.data.storage.NodeStorage + """ + TracingManager.set_attributes(v_col=v_col) + + # 1. Fetch ArangoDB vertices + v_col_cursor, v_col_size = self.__fetch_adb_docs( + v_col, False, meta, **adb_export_kwargs + ) + + # 2. Process ArangoDB vertices + self.__process_adb_cursor( + "#8929C2", + v_col_cursor, + v_col_size, + self.__process_adb_vertex_df, + v_col, + adb_map, + meta, + preserve_key, + node_data=node_data, + ) + + @with_tracing("process_adb_edge_collection") + def __process_adb_e_col( + self, + e_col: str, + meta: Union[Set[str], Dict[str, ADBMetagraphValues]], + adb_export_kwargs: Dict[str, Any], + adb_map: ADBMap, + preserve_key: Optional[str], + data: Union[Data, HeteroData], + v_cols: List[str], + strict: bool, + is_homogeneous: bool, + ) -> None: + """ArangoDB -> PyG: Processes the ArangoDB Edge Collection. + + :param e_col: The ArangoDB edge collection. + :type e_col: str + :param meta: The metagraph for the current **e_col**. + :type meta: Set[str] | Dict[str, ADBMetagraphValues] + :param adb_export_kwargs: Keyword arguments to specify AQL query options + when fetching documents from the ArangoDB instance. + :type adb_export_kwargs: Dict[str, Any] + :param adb_map: The ArangoDB -> PyG map. + :type adb_map: adbpyg_adapter.typings.ADBMap + :param preserve_key: The PyG key to preserve the ArangoDB _key values. + :type preserve_key: Optional[str] + :param data: The PyG Data or HeteroData object. + :type data: torch_geometric.data.Data | torch_geometric.data.HeteroData + :param v_cols: The list of ArangoDB vertex collections. + :type v_cols: List[str] + :param strict: Set fault tolerance when loading a graph from ArangoDB. If set + to false, this will ignore invalid edges (e.g. dangling/half edges). + :type strict: bool + :param is_homogeneous: Whether the ArangoDB graph is homogeneous or not. + :type is_homogeneous: bool + """ + TracingManager.set_attributes(e_col=e_col) + + # 1. Fetch ArangoDB edges + e_col_cursor, e_col_size = self.__fetch_adb_docs( + e_col, True, meta, **adb_export_kwargs + ) + + # 2. Process ArangoDB edges + self.__process_adb_cursor( + "#40A6F5", + e_col_cursor, + e_col_size, + self.__process_adb_edge_df, + e_col, + adb_map, + meta, + preserve_key, + data=data, + v_cols=v_cols, + strict=strict, + is_homogeneous=is_homogeneous, + ) + + @with_tracing("fetch_adb_documents") def __fetch_adb_docs( self, col: str, + is_edge: bool, meta: Union[Set[str], Dict[str, ADBMetagraphValues]], **adb_export_kwargs: Any, ) -> Tuple[Cursor, int]: @@ -694,6 +815,8 @@ def __fetch_adb_docs( :param col: The ArangoDB collection. :type col: str + :param is_edge: True if **col** is an edge collection. + :type is_edge: bool :param meta: The MetaGraph associated to **col** :type meta: Set[str] | Dict[str, adbpyg_adapter.typings.ADBMetagraphValues] :param adb_export_kwargs: Keyword arguments to specify AQL query options @@ -703,48 +826,44 @@ def __fetch_adb_docs( :rtype: pandas.DataFrame """ - def get_aql_return_value( - meta: Union[Set[str], Dict[str, ADBMetagraphValues]] - ) -> str: + def get_aql_return_value() -> str: """Helper method to formulate the AQL `RETURN` value based on the document attributes specified in **meta** """ - attributes = [] + attributes = ["_key"] + attributes += ["_from", "_to"] if is_edge else [] if type(meta) is set: - attributes = list(meta) + attributes += list(meta) elif type(meta) is dict: for value in meta.values(): if type(value) is str: attributes.append(value) elif type(value) is dict: - attributes.extend(list(value.keys())) + attributes += list(value.keys()) elif callable(value): # Cannot determine which attributes to extract if UDFs are used # Therefore we just return the entire document return "doc" - return f""" - MERGE( - {{ _key: doc._key, _from: doc._from, _to: doc._to }}, - KEEP(doc, {list(attributes)}) - ) - """ + return f"KEEP(doc, {attributes})" col_size: int = self.__db.collection(col).count() + TracingManager.set_attributes(col=col, col_size=col_size, meta=meta) with get_export_spinner_progress(f"ADB Export: '{col}' ({col_size})") as p: p.add_task(col) cursor: Cursor = self.__db.aql.execute( - f"FOR doc IN @@col RETURN {get_aql_return_value(meta)}", + f"FOR doc IN @@col RETURN {get_aql_return_value()}", bind_vars={"@col": col}, - **{**adb_export_kwargs, **{"stream": True}}, + **{**adb_export_kwargs, "stream": True}, ) return cursor, col_size + @with_tracing("process_adb_cursor") def __process_adb_cursor( self, progress_color: str, @@ -783,20 +902,19 @@ def __process_adb_cursor( progress = get_bar_progress(f"(ADB → PyG): '{col}'", progress_color) progress_task_id = progress.add_task(col, total=col_size) + i = 0 with Live(Group(progress)): - i = 0 while not cursor.empty(): - cursor_batch = len(cursor.batch()) - df = DataFrame([cursor.pop() for _ in range(cursor_batch)]) + df = DataFrame(cursor.batch()) + cursor.batch().clear() i = process_adb_df(i, df, col, adb_map, meta, preserve_key, **kwargs) progress.advance(progress_task_id, advance=len(df)) - df.drop(df.index, inplace=True) - if cursor.has_more(): cursor.fetch() + @with_tracing("process_adb_vertex_dataframe") def __process_adb_vertex_df( self, i: int, @@ -827,6 +945,8 @@ def __process_adb_vertex_df( :return: The last PyG Node id value. :rtype: int """ + TracingManager.set_attributes(i=i, vertex_df_size=len(df)) + # 1. Map each ArangoDB _key to a PyG node id for adb_key in df["_key"]: adb_map[v_col][adb_key] = i @@ -841,6 +961,7 @@ def __process_adb_vertex_df( return i + @with_tracing("process_adb_edge_dataframe") def __process_adb_edge_df( self, _: int, @@ -881,6 +1002,8 @@ def __process_adb_edge_df( but is needed for type hinting. :rtype: int """ + TracingManager.set_attributes(edge_df_size=len(df)) + # 1. Split the ArangoDB _from & _to IDs into two columns df[["from_col", "from_key"]] = self.__split_adb_ids(df["_from"]) df[["to_col", "to_key"]] = self.__split_adb_ids(df["_to"]) @@ -890,60 +1013,67 @@ def __process_adb_edge_df( df[["from_col", "to_col"]].value_counts().items() ): edge_type = (from_col, e_col, to_col) - edge_data: EdgeStorage = data if is_homogeneous else data[edge_type] - # 3. Check for partial Edge Collection import - if from_col not in v_cols or to_col not in v_cols: - logger.debug(f"Skipping {edge_type}") - continue + with start_as_current_span("process_adb_edge_type_df"): + TracingManager.set_attributes( + edge_type=edge_type, edge_type_df_size=count + ) - logger.debug(f"Preparing {count} {edge_type} edges") + # 3. Check for partial Edge Collection import + if from_col not in v_cols or to_col not in v_cols: + logger.debug(f"Skipping {edge_type}") + TracingManager.set_attributes(skipped=True) + continue + + logger.debug(f"Preparing {count} {edge_type} edges") + + # 4. Get the edge data corresponding to the current edge type + et_df: DataFrame = df[ + (df["from_col"] == from_col) & (df["to_col"] == to_col) + ] + + # 5. Map each ArangoDB from/to _key to the corresponding PyG node id + # NOTE: map() is somehow converting int values to float... + # So we rely on astype(int) to convert the float back to int, + # but we also fill NaN values with -1 so that we can convert + # the entire column to int without any issues. Need to revisit... + from_n = et_df["from_key"].map(adb_map[from_col]).fillna(-1).astype(int) + to_n = et_df["to_key"].map(adb_map[to_col]).fillna(-1).astype(int) + + # 6. Set/Update the PyG Edge Index + edge_data: EdgeStorage = data if is_homogeneous else data[edge_type] + empty_tensor = tensor([], dtype=torch.int64) + existing_edge_index = edge_data.get("edge_index", empty_tensor) + new_edge_index = tensor( + np.array([from_n.to_numpy(), to_n.to_numpy()]), dtype=torch.int64 + ) - # 4. Get the edge data corresponding to the current edge type - et_df: DataFrame = df[ - (df["from_col"] == from_col) & (df["to_col"] == to_col) - ] + edge_data.edge_index = cat((existing_edge_index, new_edge_index), dim=1) - # 5. Map each ArangoDB from/to _key to the corresponding PyG node id - # NOTE: map() is somehow converting int values to float... - # So we rely on astype(int) to convert the float back to int, - # but we also fill NaN values with -1 so that we can convert - # the entire column to int without any issues. Need to revisit... - from_n = et_df["from_key"].map(adb_map[from_col]).fillna(-1).astype(int) - to_n = et_df["to_key"].map(adb_map[to_col]).fillna(-1).astype(int) + # 7. Deal with invalid edges + if torch.any(edge_data.edge_index == -1): + if strict: + m = f"Invalid edges found in Edge Collection {e_col}, {from_col} -> {to_col}." # noqa: E501 + raise InvalidADBEdgesError(m) + else: + # Remove the invalid edges + edge_data.edge_index = edge_data.edge_index[ + :, ~torch.any(edge_data.edge_index == -1, dim=0) + ] - # 6. Set/Update the PyG Edge Index - edge_index = tensor( - np.array([from_n.to_numpy(), to_n.to_numpy()]), dtype=torch.int64 - ) + # 8. Set the PyG Edge Data + self.__set_pyg_data(meta, edge_data, et_df) - empty_tensor = tensor([], dtype=torch.int64) - existing_edge_index = edge_data.get("edge_index", empty_tensor) - edge_data.edge_index = cat((existing_edge_index, edge_index), dim=1) - - # 7. Deal with invalid edges - if torch.any(edge_data.edge_index == -1): - if strict: - m = f"Invalid edges found in Edge Collection {e_col}, {from_col} -> {to_col}." # noqa: E501 - raise InvalidADBEdgesError(m) - else: - # Remove the invalid edges - edge_data.edge_index = edge_data.edge_index[ - :, ~torch.any(edge_data.edge_index == -1, dim=0) - ] - - # 8. Set the PyG Edge Data - self.__set_pyg_data(meta, edge_data, et_df) - - # 9. Maintain the ArangoDB _key values - if preserve_key is not None: - if preserve_key not in edge_data: - edge_data[preserve_key] = [] + # 9. Maintain the ArangoDB _key values + if preserve_key is not None: + if preserve_key not in edge_data: + edge_data[preserve_key] = [] - edge_data[preserve_key].extend(list(et_df["_key"])) + edge_data[preserve_key].extend(list(et_df["_key"])) return 1 # Useless return value, but needed for type hinting + @with_tracing("split_adb_ids") def __split_adb_ids(self, s: Series) -> Series: """AranogDB -> PyG: Helper method to split the ArangoDB IDs within a Series into two columns @@ -956,6 +1086,7 @@ def __split_adb_ids(self, s: Series) -> Series: """ return s.str.split(pat="/", n=1, expand=True) + @with_tracing("set_pyg_data") def __set_pyg_data( self, meta: Union[Set[str], Dict[str, ADBMetagraphValues]], @@ -978,6 +1109,7 @@ def __set_pyg_data( """ valid_meta: Dict[str, ADBMetagraphValues] valid_meta = meta if type(meta) is dict else {m: m for m in meta} + TracingManager.set_attributes(meta=valid_meta) for k, v in valid_meta.items(): t = self.__build_tensor_from_dataframe(df, k, v) @@ -990,6 +1122,7 @@ def __set_pyg_data( m = f"'{k}' key in PyG Data must point to a Tensor" raise TypeError(m) + @with_tracing("build_tensor_from_dataframe") def __build_tensor_from_dataframe( self, adb_df: DataFrame, @@ -1011,8 +1144,8 @@ def __build_tensor_from_dataframe( :rtype: torch.Tensor :raise adbpyg_adapter.exceptions.ADBMetagraphError: If invalid **meta_val**. """ - m = f"__build_tensor_from_dataframe(df, '{meta_key}', {type(meta_val)})" - logger.debug(m) + TracingManager.set_attributes(meta_key=meta_key, meta_val=meta_val) + logger.debug(f"__build_tensor_from_dataframe(df, {meta_key}, {str(meta_val)})") if type(meta_val) is str: return tensor(adb_df[meta_val].to_list()) @@ -1046,6 +1179,7 @@ def __build_tensor_from_dataframe( # Private: PyG -> ArangoDB # ############################ + @with_tracing("get_node_and_edge_types") def __get_node_and_edge_types( self, name: str, @@ -1089,6 +1223,7 @@ def __get_node_and_edge_types( return node_types, edge_types + @with_tracing("edge_types_to_edge_definitions") def __etypes_to_edefinitions(self, edge_types: List[EdgeType]) -> List[Json]: """PyG -> ArangoDB: Converts PyG edge_types to ArangoDB edge_definitions @@ -1133,6 +1268,7 @@ def __etypes_to_edefinitions(self, edge_types: List[EdgeType]) -> List[Json]: return edge_definitions + @with_tracing("node_types_to_orphan_collections") def __ntypes_to_ocollections( self, node_types: List[str], edge_types: List[EdgeType] ) -> List[str]: @@ -1156,6 +1292,7 @@ def __ntypes_to_ocollections( orphan_collections = set(node_types) ^ non_orphan_collections return list(orphan_collections) + @with_tracing("create_adb_graph") def __create_adb_graph( self, name: str, @@ -1194,6 +1331,7 @@ def __create_adb_graph( orphan_collections, ) + @with_tracing("process_pyg_node_batch") def __process_pyg_node_batch( self, n_type: str, @@ -1227,6 +1365,8 @@ def __process_pyg_node_batch( :return: The ArangoDB DataFrame representing the PyG Node batch. :rtype: pandas.DataFrame """ + TracingManager.set_attributes(start_index=start_index, end_index=end_index) + # 1. Set the ArangoDB Node Data df = self.__set_adb_data( DataFrame(index=range(start_index, end_index)), @@ -1251,6 +1391,7 @@ def __process_pyg_node_batch( return df + @with_tracing("process_pyg_edge_batch") def __process_pyg_edge_batch( self, e_type: EdgeType, @@ -1284,6 +1425,8 @@ def __process_pyg_edge_batch( :return: The ArangoDB DataFrame representing the PyG Edge batch. :rtype: pandas.DataFrame """ + TracingManager.set_attributes(start_index=start_index, end_index=end_index) + src_n_type, _, dst_n_type = e_type # 1. Fetch the Edge Index of the current batch @@ -1324,6 +1467,165 @@ def __process_pyg_edge_batch( return df + @with_tracing("process_pyg_node_type") + def __process_pyg_n_type( + self, + n_type: str, + node_data_total_size: int, + node_data_batch_size: int, + process_node_batch_fn: Callable[..., DataFrame], + bar_progress: Progress, + bar_progress_task: TaskID, + spinner_progress: Progress, + use_async: bool, + adb_import_kwargs: Dict[str, Any], + ) -> None: + """PyG -> ArangoDB: Processes a PyG Node type. A simple wrapper + around the __process_batches method in order to set the tracer + attributes accordingly, and to provide a more descriptive + tracer span name. + + :param n_type: The PyG node type. + :type n_type: str + :param node_data_total_size: The total size of the PyG NodeStorage object. + :type node_data_total_size: int + :param node_data_batch_size: The batch size of the PyG NodeStorage object. + :type node_data_batch_size: int + :param process_node_batch_fn: The function to process the PyG Node batch. + :type process_node_batch_fn: Callable + :param bar_progress: The progress bar. + :type bar_progress: Progress + :param bar_progress_task: The progress bar task ID. + :type bar_progress_task: TaskID + :param spinner_progress: The spinner progress bar. + :type spinner_progress: Progress + :param use_async: Whether to use asynchronous insertion. + :type use_async: bool + :param adb_import_kwargs: Keyword arguments to specify import options + when inserting documents into the ArangoDB instance. + :type adb_import_kwargs: Dict[str, Any] + """ + TracingManager.set_attributes(n_type=n_type, n_type_size=node_data_total_size) + + self.__process_batches( + n_type, + node_data_total_size, + node_data_batch_size, + process_node_batch_fn, + bar_progress, + bar_progress_task, + spinner_progress, + use_async, + adb_import_kwargs, + ) + + @with_tracing("process_pyg_edge_type") + def __process_pyg_e_type( + self, + e_type: EdgeType, + edge_data_total_size: int, + edge_data_batch_size: int, + process_edge_batch_fn: Callable[..., DataFrame], + bar_progress: Progress, + bar_progress_task: TaskID, + spinner_progress: Progress, + use_async: bool, + adb_import_kwargs: Dict[str, Any], + ) -> None: + """PyG -> ArangoDB: Processes a PyG Edge type. A simple wrapper + around the __process_batches method in order to set the tracer + attributes accordingly, and to provide a more descriptive + tracer span name. + + :param e_type: The PyG edge type. + :type e_type: torch_geometric.typing.EdgeType + :param edge_data_total_size: The total size of the PyG EdgeStorage object. + :type edge_data_total_size: int + :param edge_data_batch_size: The batch size of the PyG EdgeStorage object. + :type edge_data_batch_size: int + :param process_edge_batch_fn: The function to process the PyG Edge batch. + :type process_edge_batch_fn: Callable + :param bar_progress: The progress bar. + :type bar_progress: Progress + :param bar_progress_task: The progress bar task ID. + :type bar_progress_task: TaskID + :param spinner_progress: The spinner progress bar. + :type spinner_progress: Progress + :param use_async: Whether to use asynchronous insertion. + :type use_async: bool + :param adb_import_kwargs: Keyword arguments to specify import options + when inserting documents into the ArangoDB instance. + :type adb_import_kwargs: Dict[str, Any] + """ + TracingManager.set_attributes(e_type=e_type, e_type_size=edge_data_total_size) + + self.__process_batches( + e_type[1], + edge_data_total_size, + edge_data_batch_size, + process_edge_batch_fn, + bar_progress, + bar_progress_task, + spinner_progress, + use_async, + adb_import_kwargs, + ) + + def __process_batches( + self, + col: str, + total_size: int, + batch_size: int, + process_batch_fn: Callable[..., DataFrame], + bar_progress: Progress, + bar_progress_task: TaskID, + spinner_progress: Progress, + use_async: bool, + adb_import_kwargs: Dict[str, Any], + ) -> None: + """PyG -> ArangoDB: Processes the PyG Node or Edge batches. No tracing + decorator required here. + + :param col: The ArangoDB collection name. + :type col: str + :param total_size: The total size of the PyG NodeStorage or EdgeStorage object. + :type total_size: int + :param batch_size: The batch size of the PyG NodeStorage or EdgeStorage object. + :type batch_size: int + :param process_batch_fn: The function to process the PyG Node or Edge batch. + :type process_batch_fn: Callable + :param bar_progress: The progress bar. + :type bar_progress: Progress + :param bar_progress_task: The progress bar task ID. + :type bar_progress_task: TaskID + :param spinner_progress: The spinner progress bar. + :type spinner_progress: Progress + :param use_async: Whether to use asynchronous insertion. + :type use_async: bool + :param adb_import_kwargs: Keyword arguments to specify import options + when inserting documents into the ArangoDB instance. + :type adb_import_kwargs: Dict[str, Any] + """ + start_index = 0 + end_index = min(batch_size, total_size) + batches = ceil(total_size / batch_size) + + for _ in range(batches): + # 1. Process the batch + df = process_batch_fn(start_index, end_index) + + bar_progress.advance(bar_progress_task, advance=len(df)) + + # 2. Insert the ArangoDB Documents + self.__insert_adb_docs( + spinner_progress, df, col, use_async, **adb_import_kwargs + ) + + # 3. Update the batch indices + start_index = end_index + end_index = min(end_index + batch_size, total_size) + + @with_tracing("set_adb_data") def __set_adb_data( self, df: DataFrame, @@ -1369,6 +1671,7 @@ def __set_adb_data( valid_meta: Dict[Any, PyGMetagraphValues] valid_meta = meta if type(meta) is dict else {m: m for m in meta} + TracingManager.set_attributes(meta=valid_meta) pyg_keys = ( set(valid_meta.keys()) @@ -1381,15 +1684,21 @@ def __set_adb_data( data = pyg_data[meta_key] meta_val = valid_meta.get(meta_key, str(meta_key)) - if ( - type(meta_val) is str - and type(data) is list - and len(data) == pyg_data_size - ): + if not isinstance(data, (list, Tensor)): + m = f"Skipping {meta_key} due to invalid type ({type(data)})" + logger.debug(m) + continue + + if len(data) != pyg_data_size: + m = f"Skipping {meta_key} due to invalid length ({len(data)} != {pyg_data_size})" # noqa: E501 + logger.debug(m) + continue + + if isinstance(data, list): meta_val = "_key" if meta_val in ["_v_key", "_e_key"] else meta_val df = df.join(DataFrame(data[start_index:end_index], columns=[meta_val])) - if type(data) is Tensor and len(data) == pyg_data_size: + elif isinstance(data, Tensor): df = df.join( self.__build_dataframe_from_tensor( data[start_index:end_index], @@ -1402,6 +1711,7 @@ def __set_adb_data( return df + @with_tracing("build_dataframe_from_tensor") def __build_dataframe_from_tensor( self, pyg_tensor: Tensor, @@ -1429,9 +1739,8 @@ def __build_dataframe_from_tensor( :rtype: pandas.DataFrame :raise adbpyg_adapter.exceptions.PyGMetagraphError: If invalid **meta_val**. """ - logger.debug( - f"__build_dataframe_from_tensor(df, '{meta_key}', {type(meta_val)})" - ) + TracingManager.set_attributes(meta_key=meta_key, meta_val=str(meta_val)) + logger.debug(f"__build_dataframe_from_tensor(df, {meta_key}, {type(meta_val)})") if type(meta_val) is str: df = DataFrame(index=range(start_index, end_index), columns=[meta_val]) @@ -1479,6 +1788,7 @@ def __build_dataframe_from_tensor( raise PyGMetagraphError(f"Invalid {meta_val} type") # pragma: no cover + @with_tracing("insert_adb_documents") def __insert_adb_docs( self, spinner_progress: Progress, @@ -1502,6 +1812,8 @@ def __insert_adb_docs( https://docs.python-arango.com/en/main/specs.html#arango.collection.Collection.import_bulk :param adb_import_kwargs: Any """ + TracingManager.set_attributes(col=col, size=len(df)) + action = f"ADB Import: '{col}' ({len(df)})" spinner_progress_task = spinner_progress.add_task("", action=action) diff --git a/adbpyg_adapter/tracing.py b/adbpyg_adapter/tracing.py new file mode 100644 index 0000000..1004882 --- /dev/null +++ b/adbpyg_adapter/tracing.py @@ -0,0 +1,115 @@ +from contextlib import contextmanager +from functools import wraps +from typing import Any, Callable, Iterator, List, Optional, TypeVar, cast + +try: + from opentelemetry import trace + from opentelemetry.sdk.resources import SERVICE_NAME, Resource + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, + SpanExporter, + ) + from opentelemetry.trace import Tracer + + TRACING_ENABLED = True +except ImportError: # pragma: no cover + TRACING_ENABLED = False + + +class TracingManager: + __tracer: Optional["Tracer"] = None + + @classmethod + def get_tracer(cls) -> Optional["Tracer"]: + return cls.__tracer + + @classmethod + def set_tracer(cls, tracer: "Tracer") -> None: + cls.__tracer = tracer + + @classmethod + def set_attributes(self, **attributes: Any) -> None: # pragma: no cover + if TRACING_ENABLED and self.__tracer is not None: + current_span = trace.get_current_span() + for k, v in attributes.items(): + if isinstance(v, set): + v = str(sorted(v)) + + elif isinstance(v, dict): + v = str(dict(sorted(v.items()))) + + # 2D+ List + elif isinstance(v, list) and any(isinstance(item, list) for item in v): + v = str(v) + + current_span.set_attribute(k, v) + + +T = TypeVar("T", bound=Callable[..., Any]) + + +def with_tracing(span_name: Optional[str] = None) -> Callable[[T], T]: + def decorator(method: T) -> T: + if not TRACING_ENABLED: + return method # pragma: no cover + + @wraps(method) + def wrapper(*args: Any, **kwargs: Any) -> Any: + if tracer := TracingManager.get_tracer(): + with tracer.start_as_current_span(span_name or method.__name__): + return method(*args, **kwargs) + + return method(*args, **kwargs) + + return cast(T, wrapper) + + return decorator + + +@contextmanager +def start_as_current_span(*args: Any, **kwargs: Any) -> Iterator[None]: + if tracer := TracingManager.get_tracer(): + with tracer.start_as_current_span(*args, **kwargs): + yield + else: + yield + + +def create_tracer( + name: str, + enable_console_tracing: bool = False, + span_exporters: List["SpanExporter"] = [], +) -> "Tracer": + """ + Create a tracer instance. + + :param name: The name of the tracer. + :type name: str + :param enable_console_tracing: Whether to enable console tracing. Default is False. + :type enable_console_tracing: bool + :param span_exporters: A list of SpanExporter instances to use for tracing. + For example, to export to a local Jaeger instance running via docker, you + could use `[OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)]`. + :type span_exporters: List[opentelemetry.sdk.trace.export.SpanExporter] + :return: A configured tracer instance. + :rtype: opentelemetry.trace.Tracer + """ + if not TRACING_ENABLED: # pragma: no cover + m = "OpenTelemetry is not installed. Cannot create tracer. Use `pip install adbpyg-adapter[tracing]`" # noqa: E501 + raise RuntimeError(m) + + resource = Resource(attributes={SERVICE_NAME: name}) + provider = TracerProvider(resource=resource) + + if enable_console_tracing: # pragma: no cover + console_processor = BatchSpanProcessor(ConsoleSpanExporter()) + provider.add_span_processor(console_processor) + + for span_exporter in span_exporters: # pragma: no cover + provider.add_span_processor(BatchSpanProcessor(span_exporter)) + + trace.set_tracer_provider(provider) + + return trace.get_tracer(__name__) diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..e8ebcdf --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,22 @@ +# Benchmarking + +This directory contains the benchmarking scripts for the project. + +1. `compare.py` compares the benchmarking results of two branches. +2. `write.py`: writes the benchmarking results to a file: +```py +parser.add_argument("--url", type=str, default="http://localhost:8529") +parser.add_argument("--dbName", type=str, default="_system") +parser.add_argument("--username", type=str, default="root") +parser.add_argument("--password", type=str, default="") +parser.add_argument("--jaeger_endpoint", type=str, default="http://localhost:16686") +parser.add_argument("--otlp_endpoint", type=str, default="http://localhost:4317") +parser.add_argument( + "--output_dir", type=str, choices=["branch", "master"], required=True +) +``` + +Results are stored in: +- `benchmark/master` for the master results +- `benchmark/branch` for the branch results (added to `.gitignore`) +- `benchmark/diff` for the diff between the branch and master results (added to `.gitignore`) \ No newline at end of file diff --git a/benchmark/compare.py b/benchmark/compare.py new file mode 100644 index 0000000..6a59e61 --- /dev/null +++ b/benchmark/compare.py @@ -0,0 +1,158 @@ +import json +import pathlib +from typing import Optional + + +def sort_children_by_start_time(children): + return sorted(children, key=lambda span: span["startTime"]) + + +def compare_span(master_child: Optional[dict], branch_child: Optional[dict]): + if master_child and branch_child: + assert master_child.get("operationName") == branch_child.get("operationName"), ( + f"Master Child Operation Name: {master_child.get('operationName')}\n" + f"Branch Child Operation Name: {branch_child.get('operationName')}" + ) + + assert master_child.get("tags") == branch_child.get("tags"), ( + f"Master Child Tags: {master_child.get('tags')}\n" + f"Branch Child Tags: {branch_child.get('tags')}" + ) + + operation_name = ( + master_child.get("operationName") + if master_child + else branch_child.get("operationName") + ) + + master_duration = master_child.get("duration") if master_child else None + branch_duration = branch_child.get("duration") if branch_child else None + improvement = ( + f"{round((1 - branch_duration / master_duration) * 100)}%" + if master_duration and branch_duration + else None + ) + + comparison = { + "operationName": operation_name, + "master_duration": master_duration, + "branch_duration": branch_duration, + "improvement": improvement, + "tags": master_child.get("tags") if master_child else branch_child.get("tags"), + "children": [], + } + + if master_child and branch_child: + comparison["children"] = compare_children( + master_child["children"], branch_child["children"] + ) + + return comparison + + +def match_children( + master_child: dict, + branch_child: dict, + master_children: list[dict], + branch_children: list[dict], +): + # Attempt to find a matching child in Branch Children for the current Master Child + for i, branch_candidate in enumerate(branch_children): + name_match = master_child["operationName"] == branch_candidate["operationName"] + tags_match = master_child["tags"] == branch_candidate["tags"] + + if name_match and tags_match: + branch_children.pop(i) + return master_child, branch_candidate + + # Attempt to find a matching child in Master Children for the current Branch Child + for i, master_candidate in enumerate(master_children): + name_match = master_candidate["operationName"] == branch_child["operationName"] + tags_match = master_candidate["tags"] == branch_child["tags"] + + if name_match and tags_match: + master_children.pop(i) + return master_candidate, branch_child + + return master_child, branch_child + + +def compare_children(master_children: list[dict], branch_children: list[dict]): + result = [] + master_children_sorted = sort_children_by_start_time(master_children) + branch_children_sorted = sort_children_by_start_time(branch_children) + + while master_children_sorted or branch_children_sorted: + master_child = master_children_sorted.pop(0) if master_children_sorted else None + branch_child = branch_children_sorted.pop(0) if branch_children_sorted else None + + if master_child and branch_child: + name_match = master_child["operationName"] == branch_child["operationName"] + tags_match = master_child["tags"] == branch_child["tags"] + + if not (name_match and tags_match): + # Find the matching pair if they are out of order + master_child, branch_child = match_children( + master_child, + branch_child, + master_children_sorted, + branch_children_sorted, + ) + + if master_child or branch_child: + result.append(compare_span(master_child, branch_child)) + else: + # If both are None, break out of the loop to prevent appending None values + break + + return result + + +def compare_traces(master_trace: dict, branch_trace: dict): + assert master_trace.get("operationName") == branch_trace.get("operationName"), ( + f"Master Operation Name: {master_trace.get('operationName')}\n" + f"Branch Operation Name: {branch_trace.get('operationName')}" + ) + + assert master_trace.get("tags") == branch_trace.get("tags"), ( + f"Master Tags: {master_trace.get('tags')}\n" + f"Branch Tags: {branch_trace.get('tags')}" + ) + + result = { + "operationName": master_trace["operationName"], + "master_duration": master_trace["duration"], + "branch_duration": branch_trace["duration"], + "improvement": f"{round((1 - branch_trace['duration'] / master_trace['duration']) * 100)}%", + "tags": master_trace["tags"], + "children": compare_children( + master_trace["children"], branch_trace["children"] + ), + } + + return result + + +def main(): + current_dir = pathlib.Path(__file__).parent.absolute() + + root_span_diffs = {} + for operation in ["pyg_to_arangodb", "arangodb_to_pyg"]: + master_trace = json.load(open(f"{current_dir}/traces/master/{operation}.json")) + branch_trace = json.load(open(f"{current_dir}/traces/branch/{operation}.json")) + diff_trace = compare_traces(master_trace, branch_trace) + + with open(f"{current_dir}/traces/diff/{operation}.json", "w") as file: + file.write(json.dumps(diff_trace, indent=4)) + + root_span_diffs[operation] = { + "master_duration": diff_trace["master_duration"], + "branch_duration": diff_trace["branch_duration"], + "improvement": diff_trace["improvement"], + } + + print(json.dumps(root_span_diffs, indent=4)) + + +if __name__ == "__main__": + main() diff --git a/benchmark/traces/branch/README.md b/benchmark/traces/branch/README.md new file mode 100644 index 0000000..b144637 --- /dev/null +++ b/benchmark/traces/branch/README.md @@ -0,0 +1,3 @@ +Empty directory to store the branch trace files generated during a Pull Request CI. + +See [benchmark.yml](https://github.com/arangoml/pyg-adapter/blob/master/.github/workflows/benchmark.yml) and [write.py](https://github.com/arangoml/pyg-adapter/blob/master/benchmark/write.py) for more details. \ No newline at end of file diff --git a/benchmark/traces/diff/README.md b/benchmark/traces/diff/README.md new file mode 100644 index 0000000..b6fa89e --- /dev/null +++ b/benchmark/traces/diff/README.md @@ -0,0 +1,3 @@ +Empty directory to store the diff trace files generated during a Pull Request CI. + +See [benchmark.yml](https://github.com/arangoml/pyg-adapter/blob/master/.github/workflows/benchmark.yml) and [compare.py](https://github.com/arangoml/pyg-adapter/blob/master/benchmark/compare.py) for more details. \ No newline at end of file diff --git a/benchmark/traces/master/README.md b/benchmark/traces/master/README.md new file mode 100644 index 0000000..84a9327 --- /dev/null +++ b/benchmark/traces/master/README.md @@ -0,0 +1,3 @@ +Stores the traces for the current state of `pyg-adapter@master`. + +See [benchmark.yml](https://github.com/arangoml/pyg-adapter/blob/master/.github/workflows/benchmark.yml) and [write.py](https://github.com/arangoml/pyg-adapter/blob/master/benchmark/write.py) for more details. \ No newline at end of file diff --git a/benchmark/traces/master/arangodb_to_pyg.json b/benchmark/traces/master/arangodb_to_pyg.json new file mode 100644 index 0000000..0d638fc --- /dev/null +++ b/benchmark/traces/master/arangodb_to_pyg.json @@ -0,0 +1,3761 @@ +{ + "spanID": "935ddd725129fb7c", + "operationName": "arangodb_to_pyg", + "startTime": 1705677351837714, + "duration": 836965, + "tags": { + "name": "FakeHeteroGraphBenchmark" + }, + "children": [ + { + "spanID": "4a5308cc3dfabc08", + "operationName": "process_adb_vertex_collection", + "startTime": 1705677351837783, + "duration": 85153, + "tags": { + "v_col": "v0" + }, + "children": [ + { + "spanID": "307bf3262f120554", + "operationName": "fetch_adb_documents", + "startTime": 1705677351837805, + "duration": 32511, + "tags": { + "col": "v0", + "col_size": 1008, + "meta": "['x', 'y']" + }, + "children": [] + }, + { + "spanID": "2fcd81b5d24bace4", + "operationName": "process_adb_cursor", + "startTime": 1705677351870494, + "duration": 52422, + "tags": {}, + "children": [ + { + "spanID": "9cdeb3e60870e15c", + "operationName": "process_adb_vertex_dataframe", + "startTime": 1705677351872646, + "duration": 8512, + "tags": { + "i": 0, + "vertex_df_size": 1000 + }, + "children": [ + { + "spanID": "a81ad477fb3675b8", + "operationName": "set_pyg_data", + "startTime": 1705677351873189, + "duration": 7962, + "tags": { + "meta": "{'x': 'x', 'y': 'y'}" + }, + "children": [ + { + "spanID": "79fdef7c42930b33", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351873226, + "duration": 241, + "tags": { + "meta_key": "y", + "meta_val": "y" + }, + "children": [] + }, + { + "spanID": "16febaa011af923d", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351873577, + "duration": 7532, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "c1f254b8adc0da7a", + "operationName": "process_adb_vertex_dataframe", + "startTime": 1705677351917792, + "duration": 2997, + "tags": { + "i": 1000, + "vertex_df_size": 8 + }, + "children": [ + { + "spanID": "e07405eb215663ab", + "operationName": "set_pyg_data", + "startTime": 1705677351918119, + "duration": 2433, + "tags": { + "meta": "{'x': 'x', 'y': 'y'}" + }, + "children": [ + { + "spanID": "ec62b2c82648ee38", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351918197, + "duration": 135, + "tags": { + "meta_key": "y", + "meta_val": "y" + }, + "children": [] + }, + { + "spanID": "d7ab792809e469e6", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351919245, + "duration": 152, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + } + ] + } + ] + }, + { + "spanID": "e5eeac76148b2758", + "operationName": "process_adb_vertex_collection", + "startTime": 1705677351923008, + "duration": 39611, + "tags": { + "v_col": "v1" + }, + "children": [ + { + "spanID": "ec4f217bb306d1a8", + "operationName": "fetch_adb_documents", + "startTime": 1705677351923037, + "duration": 32213, + "tags": { + "col": "v1", + "col_size": 821, + "meta": "['x']" + }, + "children": [] + }, + { + "spanID": "8a64c1b9d450fe4a", + "operationName": "process_adb_cursor", + "startTime": 1705677351955321, + "duration": 7278, + "tags": {}, + "children": [ + { + "spanID": "642bfa42aef9c00b", + "operationName": "process_adb_vertex_dataframe", + "startTime": 1705677351956719, + "duration": 4841, + "tags": { + "i": 0, + "vertex_df_size": 821 + }, + "children": [ + { + "spanID": "b48d73f1d67e55fd", + "operationName": "set_pyg_data", + "startTime": 1705677351956925, + "duration": 4629, + "tags": { + "meta": "{'x': 'x'}" + }, + "children": [ + { + "spanID": "468ff53d864a7a50", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351956953, + "duration": 4573, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + } + ] + } + ] + }, + { + "spanID": "cfc6e62585940927", + "operationName": "process_adb_vertex_collection", + "startTime": 1705677351962686, + "duration": 29280, + "tags": { + "v_col": "v2" + }, + "children": [ + { + "spanID": "d977e9933c49d76f", + "operationName": "fetch_adb_documents", + "startTime": 1705677351962711, + "duration": 21677, + "tags": { + "col": "v2", + "col_size": 894, + "meta": "['x']" + }, + "children": [] + }, + { + "spanID": "e521460637176e84", + "operationName": "process_adb_cursor", + "startTime": 1705677351984451, + "duration": 7495, + "tags": {}, + "children": [ + { + "spanID": "96fd35d0adf20806", + "operationName": "process_adb_vertex_dataframe", + "startTime": 1705677351985641, + "duration": 4967, + "tags": { + "i": 0, + "vertex_df_size": 894 + }, + "children": [ + { + "spanID": "f323ca74d3447490", + "operationName": "set_pyg_data", + "startTime": 1705677351985865, + "duration": 4735, + "tags": { + "meta": "{'x': 'x'}" + }, + "children": [ + { + "spanID": "9466e4726b5f5241", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677351985890, + "duration": 4662, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + } + ] + } + ] + }, + { + "spanID": "73581a8146743741", + "operationName": "process_adb_edge_collection", + "startTime": 1705677351992022, + "duration": 682567, + "tags": { + "e_col": "e0" + }, + "children": [ + { + "spanID": "a905d7507e1ea9c5", + "operationName": "fetch_adb_documents", + "startTime": 1705677351992050, + "duration": 7506, + "tags": { + "col": "e0", + "col_size": 53450, + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [] + }, + { + "spanID": "ff0ac0f1a425799a", + "operationName": "process_adb_cursor", + "startTime": 1705677351999603, + "duration": 674974, + "tags": {}, + "children": [ + { + "spanID": "eabca8d0b341facd", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352000767, + "duration": 9597, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "cb175a5afb82860d", + "operationName": "split_adb_ids", + "startTime": 1705677352000859, + "duration": 673, + "tags": {}, + "children": [] + }, + { + "spanID": "151665705b7c709a", + "operationName": "split_adb_ids", + "startTime": 1705677352001982, + "duration": 608, + "tags": {}, + "children": [] + }, + { + "spanID": "9cdf5a865306f3f5", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352005840, + "duration": 4481, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "7c879b741d878f9f", + "operationName": "set_pyg_data", + "startTime": 1705677352009886, + "duration": 430, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "a1515607964a870c", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352009924, + "duration": 378, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "d857010255d44936", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352015125, + "duration": 3723, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "3e37952d30bcab0e", + "operationName": "split_adb_ids", + "startTime": 1705677352015195, + "duration": 544, + "tags": {}, + "children": [] + }, + { + "spanID": "bb42e0b20426465e", + "operationName": "split_adb_ids", + "startTime": 1705677352015980, + "duration": 491, + "tags": {}, + "children": [] + }, + { + "spanID": "1dfc83524562be7f", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352017258, + "duration": 1567, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "38701a14b490b608", + "operationName": "set_pyg_data", + "startTime": 1705677352018455, + "duration": 365, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "cb69ca385f3f5638", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352018481, + "duration": 323, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "552116dd2ba4b180", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352023643, + "duration": 3913, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "d0dfae436d16ee18", + "operationName": "split_adb_ids", + "startTime": 1705677352023739, + "duration": 573, + "tags": {}, + "children": [] + }, + { + "spanID": "19c16a0d0febd845", + "operationName": "split_adb_ids", + "startTime": 1705677352024589, + "duration": 507, + "tags": {}, + "children": [] + }, + { + "spanID": "2577bffac87a7463", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352025914, + "duration": 1610, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "b29a8b06daf66c5f", + "operationName": "set_pyg_data", + "startTime": 1705677352027168, + "duration": 351, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "0b9475b138018b47", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352027193, + "duration": 309, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "92e8e269d12ecbc4", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352032151, + "duration": 4199, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "e8f6cf32a25b59fd", + "operationName": "split_adb_ids", + "startTime": 1705677352032248, + "duration": 584, + "tags": {}, + "children": [] + }, + { + "spanID": "88c132adefbfc19e", + "operationName": "split_adb_ids", + "startTime": 1705677352033111, + "duration": 517, + "tags": {}, + "children": [] + }, + { + "spanID": "ae3b16ec9a27d858", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352034500, + "duration": 1812, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "06d599e812f175ff", + "operationName": "set_pyg_data", + "startTime": 1705677352035929, + "duration": 377, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "a28f5ab01fdb8b32", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352035959, + "duration": 327, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "9b38fe803042e325", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352042270, + "duration": 3887, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "9371a71fd480865f", + "operationName": "split_adb_ids", + "startTime": 1705677352042365, + "duration": 575, + "tags": {}, + "children": [] + }, + { + "spanID": "64264cd51ea45cd6", + "operationName": "split_adb_ids", + "startTime": 1705677352043200, + "duration": 493, + "tags": {}, + "children": [] + }, + { + "spanID": "5ec17dbe176ea1b1", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352044530, + "duration": 1601, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "fb0323a1d576d415", + "operationName": "set_pyg_data", + "startTime": 1705677352045790, + "duration": 336, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "0950fd131db53334", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352045815, + "duration": 297, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "0589f8779b025244", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352050495, + "duration": 4576, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "f606254131d0b664", + "operationName": "split_adb_ids", + "startTime": 1705677352050558, + "duration": 542, + "tags": {}, + "children": [] + }, + { + "spanID": "2f5a522af87f43fd", + "operationName": "split_adb_ids", + "startTime": 1705677352051326, + "duration": 501, + "tags": {}, + "children": [] + }, + { + "spanID": "1fb797fab7d6467b", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352052573, + "duration": 2449, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "35e8579a7aaf0e89", + "operationName": "set_pyg_data", + "startTime": 1705677352054433, + "duration": 581, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "ccfdba9bba26d851", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352054472, + "duration": 483, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "efdd35f80fa34266", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352060201, + "duration": 3840, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "05d51433ade9b2b4", + "operationName": "split_adb_ids", + "startTime": 1705677352060282, + "duration": 568, + "tags": {}, + "children": [] + }, + { + "spanID": "6cf55b158b53031d", + "operationName": "split_adb_ids", + "startTime": 1705677352061100, + "duration": 501, + "tags": {}, + "children": [] + }, + { + "spanID": "19fbeb1d9edfa3da", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352062419, + "duration": 1591, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "428a1c22d5fdb76a", + "operationName": "set_pyg_data", + "startTime": 1705677352063665, + "duration": 340, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "3888447911ebcd49", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352063689, + "duration": 300, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "a59cec98126cbc8f", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352068725, + "duration": 4854, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "59acdd984d125e7f", + "operationName": "split_adb_ids", + "startTime": 1705677352068820, + "duration": 706, + "tags": {}, + "children": [] + }, + { + "spanID": "2e2950656fa231e9", + "operationName": "split_adb_ids", + "startTime": 1705677352069854, + "duration": 590, + "tags": {}, + "children": [] + }, + { + "spanID": "80ee526e0fa07a3f", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352071338, + "duration": 2128, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0a14b90a7795e986", + "operationName": "set_pyg_data", + "startTime": 1705677352073049, + "duration": 410, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "19d5f97098b33c6e", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352073084, + "duration": 346, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "fcfcfa81b306d700", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352078172, + "duration": 6040, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "3308fb2e642aad48", + "operationName": "split_adb_ids", + "startTime": 1705677352078245, + "duration": 586, + "tags": {}, + "children": [] + }, + { + "spanID": "5bca47be429817c5", + "operationName": "split_adb_ids", + "startTime": 1705677352079088, + "duration": 496, + "tags": {}, + "children": [] + }, + { + "spanID": "bb4a06cbe786ab37", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352081353, + "duration": 1826, + "tags": { + "edge_type": "[\"v2\",\"e0\",\"v1\"]", + "edge_type_df_size": 895 + }, + "children": [ + { + "spanID": "d69c91c278601602", + "operationName": "set_pyg_data", + "startTime": 1705677352082840, + "duration": 333, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "eb21a3f6e6fd68e8", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352082867, + "duration": 288, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "2b5f693291dc59ef", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352083198, + "duration": 1000, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 105 + }, + "children": [ + { + "spanID": "ac322c12b29c467d", + "operationName": "set_pyg_data", + "startTime": 1705677352084091, + "duration": 98, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "f76fbfb83412fc12", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352084118, + "duration": 62, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "0edc6d2bc470f0e7", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352089259, + "duration": 3947, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "ad1b8f60c9e4dab2", + "operationName": "split_adb_ids", + "startTime": 1705677352089351, + "duration": 576, + "tags": {}, + "children": [] + }, + { + "spanID": "d86dbf1128805c5d", + "operationName": "split_adb_ids", + "startTime": 1705677352090195, + "duration": 514, + "tags": {}, + "children": [] + }, + { + "spanID": "57a1cb712975d279", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352091558, + "duration": 1623, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "402d0baf878b9f6b", + "operationName": "set_pyg_data", + "startTime": 1705677352092819, + "duration": 358, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "98c752051e01a934", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352092844, + "duration": 309, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "713b7e05ebe21368", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352106540, + "duration": 4018, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "2cc0f859aa6524ab", + "operationName": "split_adb_ids", + "startTime": 1705677352106642, + "duration": 600, + "tags": {}, + "children": [] + }, + { + "spanID": "78bc71750361524c", + "operationName": "split_adb_ids", + "startTime": 1705677352107513, + "duration": 512, + "tags": {}, + "children": [] + }, + { + "spanID": "68ef8f5fae68690a", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352108876, + "duration": 1657, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "91b15f5de66cd36e", + "operationName": "set_pyg_data", + "startTime": 1705677352110156, + "duration": 372, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "82339e23dff3334b", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352110181, + "duration": 332, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "4fbaecc0eae2025e", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352115393, + "duration": 4068, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "5b6e4ae7a6208143", + "operationName": "split_adb_ids", + "startTime": 1705677352115474, + "duration": 540, + "tags": {}, + "children": [] + }, + { + "spanID": "d670f668637e0edc", + "operationName": "split_adb_ids", + "startTime": 1705677352116248, + "duration": 496, + "tags": {}, + "children": [] + }, + { + "spanID": "403d1f83a859890c", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352117507, + "duration": 1913, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "8f837ef727460f22", + "operationName": "set_pyg_data", + "startTime": 1705677352118943, + "duration": 471, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "032f06cab0d9c2aa", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352118980, + "duration": 399, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "bdd7d19b753c7c99", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352124262, + "duration": 3866, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "55fea08e143e2e04", + "operationName": "split_adb_ids", + "startTime": 1705677352124349, + "duration": 563, + "tags": {}, + "children": [] + }, + { + "spanID": "0bb2c3f0bd30291a", + "operationName": "split_adb_ids", + "startTime": 1705677352125154, + "duration": 521, + "tags": {}, + "children": [] + }, + { + "spanID": "47e7f5938b5885ca", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352126533, + "duration": 1570, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "3d792fa12284b7a4", + "operationName": "set_pyg_data", + "startTime": 1705677352127766, + "duration": 333, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "f40048d7c31d5a97", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352127789, + "duration": 296, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "5a2b745b7b59051b", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352132788, + "duration": 4019, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "49b25ded9c31d9b2", + "operationName": "split_adb_ids", + "startTime": 1705677352132849, + "duration": 525, + "tags": {}, + "children": [] + }, + { + "spanID": "5bf49c04ac642b4c", + "operationName": "split_adb_ids", + "startTime": 1705677352133593, + "duration": 488, + "tags": {}, + "children": [] + }, + { + "spanID": "f2686baa971c702d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352134800, + "duration": 1970, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "a23d4c9de456697c", + "operationName": "set_pyg_data", + "startTime": 1705677352136380, + "duration": 384, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "9efee464da90f534", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352136403, + "duration": 343, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "b732d46f21e15094", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352141620, + "duration": 3973, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "635518f74f6fa985", + "operationName": "split_adb_ids", + "startTime": 1705677352141711, + "duration": 569, + "tags": {}, + "children": [] + }, + { + "spanID": "6a174c1cbf9cc545", + "operationName": "split_adb_ids", + "startTime": 1705677352142549, + "duration": 519, + "tags": {}, + "children": [] + }, + { + "spanID": "a69cfb85d432f8db", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352143909, + "duration": 1588, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0063e42f14aa451c", + "operationName": "set_pyg_data", + "startTime": 1705677352145147, + "duration": 346, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "313b32b798363189", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352145172, + "duration": 306, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "559b5975b2d650af", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352150347, + "duration": 4009, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "3d4a5d5128fafd04", + "operationName": "split_adb_ids", + "startTime": 1705677352150412, + "duration": 554, + "tags": {}, + "children": [] + }, + { + "spanID": "a32c9b6f391cf046", + "operationName": "split_adb_ids", + "startTime": 1705677352151201, + "duration": 523, + "tags": {}, + "children": [] + }, + { + "spanID": "60ef147172b8ff39", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352152452, + "duration": 1866, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "e01bbf50b5d97ef7", + "operationName": "set_pyg_data", + "startTime": 1705677352153794, + "duration": 517, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "91725f0aac7c8803", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352153845, + "duration": 423, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "6a1689addfe1b307", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352204336, + "duration": 4730, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "66faf98908135d58", + "operationName": "split_adb_ids", + "startTime": 1705677352204448, + "duration": 821, + "tags": {}, + "children": [] + }, + { + "spanID": "b3ab1b2cdf26f517", + "operationName": "split_adb_ids", + "startTime": 1705677352205622, + "duration": 595, + "tags": {}, + "children": [] + }, + { + "spanID": "6b10e53a9145de05", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352207070, + "duration": 1961, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "a985ab61c5adf681", + "operationName": "set_pyg_data", + "startTime": 1705677352208648, + "duration": 378, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "0bf9c0efb5816b74", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352208679, + "duration": 327, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "720299e32a69acc7", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352216318, + "duration": 5237, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "425cb200105ada6b", + "operationName": "split_adb_ids", + "startTime": 1705677352216388, + "duration": 569, + "tags": {}, + "children": [] + }, + { + "spanID": "285e25b4b3969057", + "operationName": "split_adb_ids", + "startTime": 1705677352217213, + "duration": 550, + "tags": {}, + "children": [] + }, + { + "spanID": "870f084c7244f536", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352218698, + "duration": 1671, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 944 + }, + "children": [ + { + "spanID": "7cbd7025e28bc9ff", + "operationName": "set_pyg_data", + "startTime": 1705677352220031, + "duration": 333, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "8fb83babe8754cd3", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352220058, + "duration": 295, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "c167733f9a9e4310", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352220388, + "duration": 1153, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v2\"]", + "edge_type_df_size": 56 + }, + "children": [ + { + "spanID": "e245a4600004884c", + "operationName": "set_pyg_data", + "startTime": 1705677352221394, + "duration": 142, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "7e9cf84f09f6048f", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352221423, + "duration": 50, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "4fe30c9a53710f57", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352229521, + "duration": 3988, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "77863fe5d675ebf7", + "operationName": "split_adb_ids", + "startTime": 1705677352229617, + "duration": 618, + "tags": {}, + "children": [] + }, + { + "spanID": "cf1da1100cc36d8c", + "operationName": "split_adb_ids", + "startTime": 1705677352230517, + "duration": 530, + "tags": {}, + "children": [] + }, + { + "spanID": "e00111e5d29dc5df", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352231887, + "duration": 1594, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "cffa6cddf963a7ef", + "operationName": "set_pyg_data", + "startTime": 1705677352233115, + "duration": 361, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "3020da5c6a46721a", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352233140, + "duration": 322, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "ffda03368c6e9037", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352242130, + "duration": 4076, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "a2121ac5f689a4a5", + "operationName": "split_adb_ids", + "startTime": 1705677352242223, + "duration": 594, + "tags": {}, + "children": [] + }, + { + "spanID": "155e18b1fa83ada4", + "operationName": "split_adb_ids", + "startTime": 1705677352243097, + "duration": 520, + "tags": {}, + "children": [] + }, + { + "spanID": "b9bdee2dd663049d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352244513, + "duration": 1666, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "fca055362169df82", + "operationName": "set_pyg_data", + "startTime": 1705677352245814, + "duration": 360, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "66dd779403c54c71", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352245845, + "duration": 314, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "adb328cbf3158c0c", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352254543, + "duration": 5216, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "50f0fc2b6ae04d52", + "operationName": "split_adb_ids", + "startTime": 1705677352254623, + "duration": 618, + "tags": {}, + "children": [] + }, + { + "spanID": "36a98d7400de59f5", + "operationName": "split_adb_ids", + "startTime": 1705677352255530, + "duration": 548, + "tags": {}, + "children": [] + }, + { + "spanID": "b7a28e0a03a89879", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352257895, + "duration": 1837, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "009a815bc1378be5", + "operationName": "set_pyg_data", + "startTime": 1705677352259348, + "duration": 379, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "d29e8693faf1501b", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352259374, + "duration": 334, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "8741ae91acfebb4b", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352267169, + "duration": 4103, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "190865159cb017c1", + "operationName": "split_adb_ids", + "startTime": 1705677352267241, + "duration": 569, + "tags": {}, + "children": [] + }, + { + "spanID": "1e707c5230c1fb6a", + "operationName": "split_adb_ids", + "startTime": 1705677352268065, + "duration": 520, + "tags": {}, + "children": [] + }, + { + "spanID": "a636425c9bbd750d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352269400, + "duration": 1826, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "dfa7c6ed32d1f81b", + "operationName": "set_pyg_data", + "startTime": 1705677352270846, + "duration": 375, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "47acf2f64d6b234f", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352270877, + "duration": 324, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "fa7ff8bfb044284a", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352280935, + "duration": 3999, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "19a5711b2ea60b99", + "operationName": "split_adb_ids", + "startTime": 1705677352281025, + "duration": 580, + "tags": {}, + "children": [] + }, + { + "spanID": "da9bb01779c147c7", + "operationName": "split_adb_ids", + "startTime": 1705677352281864, + "duration": 494, + "tags": {}, + "children": [] + }, + { + "spanID": "658de17eec3aa314", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352283206, + "duration": 1699, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "14d30dbca0acf4c9", + "operationName": "set_pyg_data", + "startTime": 1705677352284474, + "duration": 425, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "4653a5600597aab6", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352284503, + "duration": 312, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "73f660d8e9f41cc0", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352289870, + "duration": 3917, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "cad6e514ccc14d51", + "operationName": "split_adb_ids", + "startTime": 1705677352289970, + "duration": 571, + "tags": {}, + "children": [] + }, + { + "spanID": "dc8215271da3b7e2", + "operationName": "split_adb_ids", + "startTime": 1705677352290793, + "duration": 499, + "tags": {}, + "children": [] + }, + { + "spanID": "2227d96d41a93f90", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352292098, + "duration": 1663, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "8557716aa7502a81", + "operationName": "set_pyg_data", + "startTime": 1705677352293394, + "duration": 363, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "a699bae0d138d150", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352293420, + "duration": 321, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "58d87776a51ad4f3", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352301141, + "duration": 3911, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "df3277fd1d77ce40", + "operationName": "split_adb_ids", + "startTime": 1705677352301213, + "duration": 574, + "tags": {}, + "children": [] + }, + { + "spanID": "4745dd9e27896389", + "operationName": "split_adb_ids", + "startTime": 1705677352302031, + "duration": 511, + "tags": {}, + "children": [] + }, + { + "spanID": "04c14982d9ead926", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352303318, + "duration": 1696, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0a68e88e0ad40415", + "operationName": "set_pyg_data", + "startTime": 1705677352304616, + "duration": 393, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "ae55cdff34ab18fd", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352304646, + "duration": 338, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "8ef066d44279b14d", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352313686, + "duration": 3843, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "f24dfdd850910bdc", + "operationName": "split_adb_ids", + "startTime": 1705677352313761, + "duration": 573, + "tags": {}, + "children": [] + }, + { + "spanID": "f03d866a5decc06a", + "operationName": "split_adb_ids", + "startTime": 1705677352314581, + "duration": 506, + "tags": {}, + "children": [] + }, + { + "spanID": "e8ec01b3914591ae", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352315906, + "duration": 1594, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0ac0cf0dd974c146", + "operationName": "set_pyg_data", + "startTime": 1705677352317149, + "duration": 346, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "bfc74ca9d8ab0b30", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352317174, + "duration": 306, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "b38a05fbf61164ce", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352321601, + "duration": 4422, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "a7c5cb879b8b71a1", + "operationName": "split_adb_ids", + "startTime": 1705677352321687, + "duration": 594, + "tags": {}, + "children": [] + }, + { + "spanID": "b65d12267e969cf3", + "operationName": "split_adb_ids", + "startTime": 1705677352322531, + "duration": 726, + "tags": {}, + "children": [] + }, + { + "spanID": "e7180322a4e695c9", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352324226, + "duration": 1770, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "a3e04b3b756b0715", + "operationName": "set_pyg_data", + "startTime": 1705677352325615, + "duration": 376, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "5f58d5b56f790959", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352325642, + "duration": 326, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "89b5b368df14c612", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352334157, + "duration": 5120, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "353545792da44da1", + "operationName": "split_adb_ids", + "startTime": 1705677352334246, + "duration": 599, + "tags": {}, + "children": [] + }, + { + "spanID": "964ddb776025f0ae", + "operationName": "split_adb_ids", + "startTime": 1705677352335135, + "duration": 521, + "tags": {}, + "children": [] + }, + { + "spanID": "0247145f4a814d53", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352336540, + "duration": 1622, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 922 + }, + "children": [ + { + "spanID": "26a974652371ea2c", + "operationName": "set_pyg_data", + "startTime": 1705677352337807, + "duration": 350, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "555a40854578bab3", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352337837, + "duration": 308, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "ca24be4d56672017", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352338181, + "duration": 1012, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v1\"]", + "edge_type_df_size": 78 + }, + "children": [ + { + "spanID": "b7ef941c5e00ea6d", + "operationName": "set_pyg_data", + "startTime": 1705677352339098, + "duration": 90, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "5697f17c17fd3736", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352339120, + "duration": 49, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "9edb95f2c787ddfb", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352345635, + "duration": 3705, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "0a8c46c709215f4f", + "operationName": "split_adb_ids", + "startTime": 1705677352345702, + "duration": 551, + "tags": {}, + "children": [] + }, + { + "spanID": "29f2c3c74505f4f6", + "operationName": "split_adb_ids", + "startTime": 1705677352346494, + "duration": 502, + "tags": {}, + "children": [] + }, + { + "spanID": "fb5eb8662640211e", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352347788, + "duration": 1528, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "4a1eb1b7955d0e77", + "operationName": "set_pyg_data", + "startTime": 1705677352348971, + "duration": 340, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "651116565c646036", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352348995, + "duration": 304, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "8c69778ffd42f697", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352360895, + "duration": 3960, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "4b1cb8bd2130260c", + "operationName": "split_adb_ids", + "startTime": 1705677352360991, + "duration": 575, + "tags": {}, + "children": [] + }, + { + "spanID": "7a62722e1d69d9fc", + "operationName": "split_adb_ids", + "startTime": 1705677352361816, + "duration": 521, + "tags": {}, + "children": [] + }, + { + "spanID": "3d5d60bcbb0378eb", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352363161, + "duration": 1665, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0c5a876fef0a81ed", + "operationName": "set_pyg_data", + "startTime": 1705677352364418, + "duration": 402, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "2df967474ed13553", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352364443, + "duration": 359, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "85e69ea9db66bfda", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352371659, + "duration": 3966, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "122411e6ba8982dd", + "operationName": "split_adb_ids", + "startTime": 1705677352371755, + "duration": 631, + "tags": {}, + "children": [] + }, + { + "spanID": "673617d94d7bd307", + "operationName": "split_adb_ids", + "startTime": 1705677352372647, + "duration": 522, + "tags": {}, + "children": [] + }, + { + "spanID": "5419eefcd5e73e3f", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352374009, + "duration": 1591, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "6a2b32004c9a0ae1", + "operationName": "set_pyg_data", + "startTime": 1705677352375256, + "duration": 339, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "19724ce31bd09448", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352375281, + "duration": 298, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "e89dc8158f928dc5", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352380950, + "duration": 3950, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "79585e697b2e1b82", + "operationName": "split_adb_ids", + "startTime": 1705677352381046, + "duration": 574, + "tags": {}, + "children": [] + }, + { + "spanID": "d741d609564ae909", + "operationName": "split_adb_ids", + "startTime": 1705677352381886, + "duration": 505, + "tags": {}, + "children": [] + }, + { + "spanID": "f9ea2c64cc417e7c", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352383235, + "duration": 1637, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "57f98d1ecff4c56b", + "operationName": "set_pyg_data", + "startTime": 1705677352384517, + "duration": 350, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "7aa56a181fd3c017", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352384545, + "duration": 304, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "b318ad4c1db2b452", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352395526, + "duration": 4029, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "6d316b4a7f6b8793", + "operationName": "split_adb_ids", + "startTime": 1705677352395619, + "duration": 604, + "tags": {}, + "children": [] + }, + { + "spanID": "4d4985dc09aedbd0", + "operationName": "split_adb_ids", + "startTime": 1705677352396487, + "duration": 509, + "tags": {}, + "children": [] + }, + { + "spanID": "bc18a40b55c7ed9d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352397879, + "duration": 1650, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "e4f7625eafe6790a", + "operationName": "set_pyg_data", + "startTime": 1705677352399183, + "duration": 341, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "eb70ba6527d99a23", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352399208, + "duration": 301, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "a0722aa02aa36cf7", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352408419, + "duration": 4230, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "6025719990823eda", + "operationName": "split_adb_ids", + "startTime": 1705677352408502, + "duration": 607, + "tags": {}, + "children": [] + }, + { + "spanID": "f97ccc57ce5dc807", + "operationName": "split_adb_ids", + "startTime": 1705677352409511, + "duration": 572, + "tags": {}, + "children": [] + }, + { + "spanID": "a38d8afcfdd2ed7a", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352410965, + "duration": 1655, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "10da8a9516408169", + "operationName": "set_pyg_data", + "startTime": 1705677352412257, + "duration": 358, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "15ace7a1ceca2ee3", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352412281, + "duration": 318, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "bff773ce32b2c492", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352424216, + "duration": 4110, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "0fa7ee0538974df5", + "operationName": "split_adb_ids", + "startTime": 1705677352424313, + "duration": 592, + "tags": {}, + "children": [] + }, + { + "spanID": "0202861c62830869", + "operationName": "split_adb_ids", + "startTime": 1705677352425183, + "duration": 509, + "tags": {}, + "children": [] + }, + { + "spanID": "64d09913191b8adf", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352426569, + "duration": 1636, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "84dd6da68e751eb7", + "operationName": "set_pyg_data", + "startTime": 1705677352427849, + "duration": 351, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "72d3cc5d4a31b243", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352427882, + "duration": 299, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "7d161f29eb8f2056", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352443165, + "duration": 4754, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "95bb440dc9cd4af9", + "operationName": "split_adb_ids", + "startTime": 1705677352443256, + "duration": 580, + "tags": {}, + "children": [] + }, + { + "spanID": "ade6c5e9b6e355f6", + "operationName": "split_adb_ids", + "startTime": 1705677352444103, + "duration": 515, + "tags": {}, + "children": [] + }, + { + "spanID": "6c4c3935379deda1", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352445463, + "duration": 1394, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 743 + }, + "children": [ + { + "spanID": "5e4af862156af458", + "operationName": "set_pyg_data", + "startTime": 1705677352446583, + "duration": 270, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "fd0ba70e385af463", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352446609, + "duration": 235, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "42cb6d1dffc573d5", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352446875, + "duration": 1028, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v0\"]", + "edge_type_df_size": 257 + }, + "children": [ + { + "spanID": "c6f0093395d18051", + "operationName": "set_pyg_data", + "startTime": 1705677352447763, + "duration": 135, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "6e6480432aa50f4e", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352447785, + "duration": 99, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "5bc7fdeb31234efe", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352460663, + "duration": 4055, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "1058fe8c1d7173e5", + "operationName": "split_adb_ids", + "startTime": 1705677352460743, + "duration": 565, + "tags": {}, + "children": [] + }, + { + "spanID": "dd138266d26d5396", + "operationName": "split_adb_ids", + "startTime": 1705677352461560, + "duration": 768, + "tags": {}, + "children": [] + }, + { + "spanID": "b3b68b57da54f267", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352463146, + "duration": 1548, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "e72bb5b707120911", + "operationName": "set_pyg_data", + "startTime": 1705677352464352, + "duration": 337, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "739cd488869bdbd2", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352464376, + "duration": 300, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "ad4ab155c09fcd8f", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352476609, + "duration": 3862, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "1e70e79933a1d1c2", + "operationName": "split_adb_ids", + "startTime": 1705677352476691, + "duration": 576, + "tags": {}, + "children": [] + }, + { + "spanID": "65e049937f411fed", + "operationName": "split_adb_ids", + "startTime": 1705677352477521, + "duration": 513, + "tags": {}, + "children": [] + }, + { + "spanID": "350d278d41a8a6e1", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352478872, + "duration": 1574, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "0ac728b4a41865bf", + "operationName": "set_pyg_data", + "startTime": 1705677352480103, + "duration": 339, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "f2ad985fff3e0ba1", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352480127, + "duration": 301, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "3744da64cc249558", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352492391, + "duration": 3881, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "25777cf09f982188", + "operationName": "split_adb_ids", + "startTime": 1705677352492475, + "duration": 595, + "tags": {}, + "children": [] + }, + { + "spanID": "32ae2a201ac902ee", + "operationName": "split_adb_ids", + "startTime": 1705677352493320, + "duration": 516, + "tags": {}, + "children": [] + }, + { + "spanID": "60c6b3ed755a3ac1", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352494665, + "duration": 1581, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "8be04c3e5c949381", + "operationName": "set_pyg_data", + "startTime": 1705677352495898, + "duration": 343, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "26bdd974d3b564b0", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352495922, + "duration": 302, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "fd1ac7ce1ad0a6f2", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352507429, + "duration": 4215, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "fba52e5998a33736", + "operationName": "split_adb_ids", + "startTime": 1705677352507523, + "duration": 586, + "tags": {}, + "children": [] + }, + { + "spanID": "25fdacbe7ce71b48", + "operationName": "split_adb_ids", + "startTime": 1705677352508373, + "duration": 802, + "tags": {}, + "children": [] + }, + { + "spanID": "67e98363905c053b", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352510019, + "duration": 1599, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "ae0fdbc8a36bcb01", + "operationName": "set_pyg_data", + "startTime": 1705677352511266, + "duration": 347, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "e0ae1a1b6c596216", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352511294, + "duration": 305, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "7ed2ec2f856f3d95", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352518701, + "duration": 3804, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "eac39204ade7cef3", + "operationName": "split_adb_ids", + "startTime": 1705677352518758, + "duration": 520, + "tags": {}, + "children": [] + }, + { + "spanID": "528cc241e345ac72", + "operationName": "split_adb_ids", + "startTime": 1705677352519492, + "duration": 544, + "tags": {}, + "children": [] + }, + { + "spanID": "7f99d273d5627386", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352520788, + "duration": 1680, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "7fa74d8aff88ec82", + "operationName": "set_pyg_data", + "startTime": 1705677352522075, + "duration": 388, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "ab899605a2939b3b", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352522103, + "duration": 340, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "33b5b3cedfec4623", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352531826, + "duration": 3944, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "9c19ed348af58903", + "operationName": "split_adb_ids", + "startTime": 1705677352531899, + "duration": 563, + "tags": {}, + "children": [] + }, + { + "spanID": "38018399ee6a8e2f", + "operationName": "split_adb_ids", + "startTime": 1705677352532703, + "duration": 504, + "tags": {}, + "children": [] + }, + { + "spanID": "5718ada2027c013f", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352534000, + "duration": 1737, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "f66ac168b4a1ca79", + "operationName": "set_pyg_data", + "startTime": 1705677352535366, + "duration": 366, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "e6256403bf3df0bb", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352535395, + "duration": 319, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "d17034ce51797350", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352546765, + "duration": 4152, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "091472ad52631db9", + "operationName": "split_adb_ids", + "startTime": 1705677352546829, + "duration": 561, + "tags": {}, + "children": [] + }, + { + "spanID": "25fb5f3d866d7002", + "operationName": "split_adb_ids", + "startTime": 1705677352547634, + "duration": 816, + "tags": {}, + "children": [] + }, + { + "spanID": "41c30359dfde2281", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352549261, + "duration": 1630, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "c8bf23fb9a431f7a", + "operationName": "set_pyg_data", + "startTime": 1705677352550527, + "duration": 359, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "d7a3283c27e969e2", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352550551, + "duration": 317, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "953c178e61067a8c", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352559267, + "duration": 4470, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "b7d779cc4b5ca436", + "operationName": "split_adb_ids", + "startTime": 1705677352559333, + "duration": 547, + "tags": {}, + "children": [] + }, + { + "spanID": "ce9b2e70b4d4dfcc", + "operationName": "split_adb_ids", + "startTime": 1705677352560110, + "duration": 504, + "tags": {}, + "children": [] + }, + { + "spanID": "10fce97d786e30ef", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352561382, + "duration": 1280, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 584 + }, + "children": [ + { + "spanID": "15ab2c21ccc93ff7", + "operationName": "set_pyg_data", + "startTime": 1705677352562437, + "duration": 220, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "de6fec4b843b2a7d", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352562461, + "duration": 188, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "0a1727f7ea5f24b6", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352562679, + "duration": 1041, + "tags": { + "edge_type": "[\"v1\",\"e0\",\"v1\"]", + "edge_type_df_size": 416 + }, + "children": [ + { + "spanID": "399f8a8f10fc9eee", + "operationName": "set_pyg_data", + "startTime": 1705677352563543, + "duration": 172, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "0a66dc4e21681081", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352563564, + "duration": 138, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "03e9ba024cea2df0", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352577850, + "duration": 4405, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "d80d6a1cc2472fd6", + "operationName": "split_adb_ids", + "startTime": 1705677352577921, + "duration": 552, + "tags": {}, + "children": [] + }, + { + "spanID": "54a1d50572d6bc20", + "operationName": "split_adb_ids", + "startTime": 1705677352578867, + "duration": 630, + "tags": {}, + "children": [] + }, + { + "spanID": "2922fbd8dca5b353", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352580495, + "duration": 1731, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "261908b9ccf719ab", + "operationName": "set_pyg_data", + "startTime": 1705677352581858, + "duration": 363, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "a7f5195cde62d43f", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352581886, + "duration": 319, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "f7f60e7f75f2bc20", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352591809, + "duration": 3983, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "8147a8f45f0ef320", + "operationName": "split_adb_ids", + "startTime": 1705677352591877, + "duration": 825, + "tags": {}, + "children": [] + }, + { + "spanID": "e6addd9e61d9fe39", + "operationName": "split_adb_ids", + "startTime": 1705677352592946, + "duration": 512, + "tags": {}, + "children": [] + }, + { + "spanID": "809f292387a1798f", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352594216, + "duration": 1552, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "92e94e89089b30a0", + "operationName": "set_pyg_data", + "startTime": 1705677352595424, + "duration": 339, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "adb6da351734a26c", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352595447, + "duration": 303, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "ce1bb02acb4d18d6", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352604876, + "duration": 3663, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "c202387b849b8a44", + "operationName": "split_adb_ids", + "startTime": 1705677352604935, + "duration": 546, + "tags": {}, + "children": [] + }, + { + "spanID": "fd938adc99a2ecb1", + "operationName": "split_adb_ids", + "startTime": 1705677352605708, + "duration": 510, + "tags": {}, + "children": [] + }, + { + "spanID": "bf391fbb138c3460", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352606959, + "duration": 1554, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "e7e13ed86d265dd8", + "operationName": "set_pyg_data", + "startTime": 1705677352608147, + "duration": 352, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "34c3494ac12ea9b8", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352608173, + "duration": 303, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "89110af04a276dda", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352617033, + "duration": 3799, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "993ec8c6e6b106e2", + "operationName": "split_adb_ids", + "startTime": 1705677352617092, + "duration": 555, + "tags": {}, + "children": [] + }, + { + "spanID": "d360da696af79ad2", + "operationName": "split_adb_ids", + "startTime": 1705677352617871, + "duration": 532, + "tags": {}, + "children": [] + }, + { + "spanID": "7b72590bf8f8f071", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352619174, + "duration": 1628, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "ca819c6fd872298c", + "operationName": "set_pyg_data", + "startTime": 1705677352620436, + "duration": 361, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "63794035f8e45086", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352620460, + "duration": 320, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "961d8dcf9b8086da", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352628313, + "duration": 4236, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "d9efe28b3bcb50b3", + "operationName": "split_adb_ids", + "startTime": 1705677352628387, + "duration": 906, + "tags": {}, + "children": [] + }, + { + "spanID": "cc4da021dd620222", + "operationName": "split_adb_ids", + "startTime": 1705677352629553, + "duration": 545, + "tags": {}, + "children": [] + }, + { + "spanID": "a83023ab053e4b42", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352630897, + "duration": 1626, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "000fc63de2a01335", + "operationName": "set_pyg_data", + "startTime": 1705677352632165, + "duration": 353, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "2e9583eabda17da2", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352632190, + "duration": 313, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "81c16e984d6cd782", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352636622, + "duration": 4015, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "4124405b91fcfe88", + "operationName": "split_adb_ids", + "startTime": 1705677352636695, + "duration": 571, + "tags": {}, + "children": [] + }, + { + "spanID": "10cc8711552ae5ca", + "operationName": "split_adb_ids", + "startTime": 1705677352637510, + "duration": 559, + "tags": {}, + "children": [] + }, + { + "spanID": "dc2151e17e56ac3d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352638874, + "duration": 1735, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "f164f9d84312ece2", + "operationName": "set_pyg_data", + "startTime": 1705677352640231, + "duration": 372, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "4d849ec5d334886f", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352640257, + "duration": 329, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "68777babc5c14262", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352644807, + "duration": 4070, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "cf5e9ea362584ab3", + "operationName": "split_adb_ids", + "startTime": 1705677352644882, + "duration": 611, + "tags": {}, + "children": [] + }, + { + "spanID": "0ff030b86238d0a0", + "operationName": "split_adb_ids", + "startTime": 1705677352645750, + "duration": 557, + "tags": {}, + "children": [] + }, + { + "spanID": "a417956f29ee7f3d", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352647135, + "duration": 1714, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "209818d1ef7e85ec", + "operationName": "set_pyg_data", + "startTime": 1705677352648463, + "duration": 380, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "497e9f1a3d2bf042", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352648491, + "duration": 336, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "d476fe38babd4745", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352653078, + "duration": 4483, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "0e3705265582a3bd", + "operationName": "split_adb_ids", + "startTime": 1705677352653164, + "duration": 924, + "tags": {}, + "children": [] + }, + { + "spanID": "0932f5b6f11ddff7", + "operationName": "split_adb_ids", + "startTime": 1705677352654356, + "duration": 536, + "tags": {}, + "children": [] + }, + { + "spanID": "6af944e07b38785b", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352655760, + "duration": 1772, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "7de8a2342412579d", + "operationName": "set_pyg_data", + "startTime": 1705677352657158, + "duration": 369, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "dd02e100e3d48408", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352657186, + "duration": 319, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "b799ae8e9a1a7d6f", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352663511, + "duration": 4087, + "tags": { + "edge_df_size": 1000 + }, + "children": [ + { + "spanID": "ac6d5df814e5064c", + "operationName": "split_adb_ids", + "startTime": 1705677352663588, + "duration": 585, + "tags": {}, + "children": [] + }, + { + "spanID": "26c06e67b2ddc481", + "operationName": "split_adb_ids", + "startTime": 1705677352664435, + "duration": 537, + "tags": {}, + "children": [] + }, + { + "spanID": "fc98c279cf6f111c", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352665837, + "duration": 1735, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 1000 + }, + "children": [ + { + "spanID": "69407be75a4f4145", + "operationName": "set_pyg_data", + "startTime": 1705677352667209, + "duration": 357, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "9c9d03f309018aee", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352667238, + "duration": 306, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + }, + { + "spanID": "62fda854775e0ec3", + "operationName": "process_adb_edge_dataframe", + "startTime": 1705677352671044, + "duration": 2790, + "tags": { + "edge_df_size": 450 + }, + "children": [ + { + "spanID": "0c0a59677579501a", + "operationName": "split_adb_ids", + "startTime": 1705677352671106, + "duration": 300, + "tags": {}, + "children": [] + }, + { + "spanID": "788c31f619faa06e", + "operationName": "split_adb_ids", + "startTime": 1705677352671621, + "duration": 267, + "tags": {}, + "children": [] + }, + { + "spanID": "26c00984c734bb05", + "operationName": "process_adb_edge_type_df", + "startTime": 1705677352672594, + "duration": 1221, + "tags": { + "edge_type": "[\"v0\",\"e0\",\"v2\"]", + "edge_type_df_size": 450 + }, + "children": [ + { + "spanID": "084fa819052daad3", + "operationName": "set_pyg_data", + "startTime": 1705677352673607, + "duration": 203, + "tags": { + "meta": "{'edge_attr': 'edge_attr'}" + }, + "children": [ + { + "spanID": "9e0df45b992a34a1", + "operationName": "build_tensor_from_dataframe", + "startTime": 1705677352673630, + "duration": 153, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/benchmark/traces/master/pyg_to_arangodb.json b/benchmark/traces/master/pyg_to_arangodb.json new file mode 100644 index 0000000..6fb416f --- /dev/null +++ b/benchmark/traces/master/pyg_to_arangodb.json @@ -0,0 +1,568 @@ +{ + "spanID": "40212ef7cca5a5a1", + "operationName": "pyg_to_arangodb", + "startTime": 1705677350412614, + "duration": 1424964, + "tags": { + "name": "FakeHeteroGraphBenchmark" + }, + "children": [ + { + "spanID": "e8e5216afcbd04c3", + "operationName": "get_node_and_edge_types", + "startTime": 1705677350412677, + "duration": 12, + "tags": {}, + "children": [] + }, + { + "spanID": "fb97d43588561712", + "operationName": "create_adb_graph", + "startTime": 1705677350412716, + "duration": 5920, + "tags": {}, + "children": [ + { + "spanID": "cf6a659eb4862b21", + "operationName": "edge_types_to_edge_definitions", + "startTime": 1705677350415265, + "duration": 16, + "tags": {}, + "children": [] + }, + { + "spanID": "e6f4590b9a164106", + "operationName": "node_types_to_orphan_collections", + "startTime": 1705677350415302, + "duration": 7, + "tags": {}, + "children": [] + } + ] + }, + { + "spanID": "4f65d4d9259f4329", + "operationName": "process_pyg_node_type", + "startTime": 1705677350419876, + "duration": 137646, + "tags": { + "n_type": "v0", + "n_type_size": 1008 + }, + "children": [ + { + "spanID": "bad640fb19488dec", + "operationName": "process_pyg_node_batch", + "startTime": 1705677350419914, + "duration": 13309, + "tags": { + "start_index": 0, + "end_index": 1008 + }, + "children": [ + { + "spanID": "e61a441c12e0c8b2", + "operationName": "set_adb_data", + "startTime": 1705677350421257, + "duration": 10150, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "af19922ad9b8a714", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350421620, + "duration": 2935, + "tags": { + "meta_key": "y", + "meta_val": "y" + }, + "children": [] + }, + { + "spanID": "78de58575487ce1e", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350426245, + "duration": 1702, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "19c78df48f4ff31e", + "operationName": "insert_adb_documents", + "startTime": 1705677350433306, + "duration": 124191, + "tags": { + "col": "v0", + "size": 1008 + }, + "children": [] + } + ] + }, + { + "spanID": "6f25e2a25a921187", + "operationName": "process_pyg_node_type", + "startTime": 1705677350559298, + "duration": 82233, + "tags": { + "n_type": "v1", + "n_type_size": 821 + }, + "children": [ + { + "spanID": "9c6316b950f24455", + "operationName": "process_pyg_node_batch", + "startTime": 1705677350559338, + "duration": 2455, + "tags": { + "start_index": 0, + "end_index": 821 + }, + "children": [ + { + "spanID": "e9bb17bca3f2c9bf", + "operationName": "set_adb_data", + "startTime": 1705677350559603, + "duration": 1472, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "f77383c13458a748", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350559671, + "duration": 1114, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "7a1d50068d723104", + "operationName": "insert_adb_documents", + "startTime": 1705677350561831, + "duration": 79670, + "tags": { + "col": "v1", + "size": 821 + }, + "children": [] + } + ] + }, + { + "spanID": "dd84f39e71545a13", + "operationName": "process_pyg_node_type", + "startTime": 1705677350643251, + "duration": 77198, + "tags": { + "n_type": "v2", + "n_type_size": 894 + }, + "children": [ + { + "spanID": "42af9fc385776e9a", + "operationName": "process_pyg_node_batch", + "startTime": 1705677350643284, + "duration": 2235, + "tags": { + "start_index": 0, + "end_index": 894 + }, + "children": [ + { + "spanID": "ce164dba0ff18e02", + "operationName": "set_adb_data", + "startTime": 1705677350643505, + "duration": 1306, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "8c778ea6eb2083e6", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350643563, + "duration": 963, + "tags": { + "meta_key": "x", + "meta_val": "x" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "03983ca8ea7e9d49", + "operationName": "insert_adb_documents", + "startTime": 1705677350645557, + "duration": 74861, + "tags": { + "col": "v2", + "size": 894 + }, + "children": [] + } + ] + }, + { + "spanID": "b83e90ec17e0aa3c", + "operationName": "process_pyg_edge_type", + "startTime": 1705677350721993, + "duration": 199098, + "tags": { + "e_type": "[\"v2\",\"e0\",\"v1\"]", + "e_type_size": 8895 + }, + "children": [ + { + "spanID": "66194cb1d71037d1", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677350722032, + "duration": 6419, + "tags": { + "start_index": 0, + "end_index": 8895 + }, + "children": [ + { + "spanID": "d3290a4cb5d32b16", + "operationName": "set_adb_data", + "startTime": 1705677350724898, + "duration": 1762, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "ab0c1681c8f8e3d0", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350724946, + "duration": 1372, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "004ae545a0116be5", + "operationName": "insert_adb_documents", + "startTime": 1705677350728509, + "duration": 192552, + "tags": { + "col": "e0", + "size": 8895 + }, + "children": [] + } + ] + }, + { + "spanID": "7e5b1e7f9ca5499d", + "operationName": "process_pyg_edge_type", + "startTime": 1705677350922998, + "duration": 147111, + "tags": { + "e_type": "[\"v1\",\"e0\",\"v2\"]", + "e_type_size": 8161 + }, + "children": [ + { + "spanID": "de1b372ad3fbf47a", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677350923056, + "duration": 5320, + "tags": { + "start_index": 0, + "end_index": 8161 + }, + "children": [ + { + "spanID": "3e70f16a55485822", + "operationName": "set_adb_data", + "startTime": 1705677350925687, + "duration": 1779, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "534097cabaf3897a", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677350925754, + "duration": 1309, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "ded733e8b421eaeb", + "operationName": "insert_adb_documents", + "startTime": 1705677350928429, + "duration": 141653, + "tags": { + "col": "e0", + "size": 8161 + }, + "children": [] + } + ] + }, + { + "spanID": "30e9c5cc101fbccc", + "operationName": "process_pyg_edge_type", + "startTime": 1705677351071917, + "duration": 205157, + "tags": { + "e_type": "[\"v0\",\"e0\",\"v1\"]", + "e_type_size": 10022 + }, + "children": [ + { + "spanID": "9148624feac1c14f", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677351071962, + "duration": 5514, + "tags": { + "start_index": 0, + "end_index": 10022 + }, + "children": [ + { + "spanID": "3d15eef738c1962e", + "operationName": "set_adb_data", + "startTime": 1705677351074725, + "duration": 1806, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "f7b0b7d2cda8056c", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677351074773, + "duration": 1414, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "cd9d2b7d247a8333", + "operationName": "insert_adb_documents", + "startTime": 1705677351077519, + "duration": 199528, + "tags": { + "col": "e0", + "size": 10022 + }, + "children": [] + } + ] + }, + { + "spanID": "72ae22448b0163c1", + "operationName": "process_pyg_edge_type", + "startTime": 1705677351279085, + "duration": 125564, + "tags": { + "e_type": "[\"v1\",\"e0\",\"v0\"]", + "e_type_size": 8179 + }, + "children": [ + { + "spanID": "149818d11759edc3", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677351279128, + "duration": 4637, + "tags": { + "start_index": 0, + "end_index": 8179 + }, + "children": [ + { + "spanID": "51ef1922fe43c49e", + "operationName": "set_adb_data", + "startTime": 1705677351281404, + "duration": 1560, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "820865d6e005b860", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677351281450, + "duration": 1175, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "eece328bff7b118e", + "operationName": "insert_adb_documents", + "startTime": 1705677351283800, + "duration": 120819, + "tags": { + "col": "e0", + "size": 8179 + }, + "children": [] + } + ] + }, + { + "spanID": "1beb37117d41e602", + "operationName": "process_pyg_edge_type", + "startTime": 1705677351406630, + "duration": 127494, + "tags": { + "e_type": "[\"v1\",\"e0\",\"v1\"]", + "e_type_size": 8159 + }, + "children": [ + { + "spanID": "8d1fd9b74d2b9deb", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677351406674, + "duration": 4985, + "tags": { + "start_index": 0, + "end_index": 8159 + }, + "children": [ + { + "spanID": "b4e1357d4a84eb03", + "operationName": "set_adb_data", + "startTime": 1705677351409196, + "duration": 1679, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "8c25166a1ff39849", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677351409259, + "duration": 1257, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "d080e66e552f233a", + "operationName": "insert_adb_documents", + "startTime": 1705677351411703, + "duration": 122399, + "tags": { + "col": "e0", + "size": 8159 + }, + "children": [] + } + ] + }, + { + "spanID": "8a5006c1ec188efb", + "operationName": "process_pyg_edge_type", + "startTime": 1705677351535677, + "duration": 300614, + "tags": { + "e_type": "[\"v0\",\"e0\",\"v2\"]", + "e_type_size": 10034 + }, + "children": [ + { + "spanID": "f6be1f723405095c", + "operationName": "process_pyg_edge_batch", + "startTime": 1705677351535729, + "duration": 6490, + "tags": { + "start_index": 0, + "end_index": 10034 + }, + "children": [ + { + "spanID": "9a6a5f92cca74147", + "operationName": "set_adb_data", + "startTime": 1705677351538806, + "duration": 2319, + "tags": { + "meta": "{}" + }, + "children": [ + { + "spanID": "966e12778c1745a7", + "operationName": "build_dataframe_from_tensor", + "startTime": 1705677351538873, + "duration": 1794, + "tags": { + "meta_key": "edge_attr", + "meta_val": "edge_attr" + }, + "children": [] + } + ] + } + ] + }, + { + "spanID": "71eacd0549a3e80e", + "operationName": "insert_adb_documents", + "startTime": 1705677351542275, + "duration": 293944, + "tags": { + "col": "e0", + "size": 10034 + }, + "children": [] + } + ] + } + ] +} \ No newline at end of file diff --git a/benchmark/write.py b/benchmark/write.py new file mode 100644 index 0000000..32a5ab8 --- /dev/null +++ b/benchmark/write.py @@ -0,0 +1,284 @@ +import argparse +import json +import pathlib +import random +import time +from collections import defaultdict +from typing import Any, Dict, List + +import numpy as np +import requests +import torch +from arango import ArangoClient +from retry import retry +from torch_geometric.datasets import FakeHeteroDataset + +try: + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +except ImportError: + m = """ + OpenTelemetry is not installed. + Please install it with `pip install adbpyg-adapter[tracing]` + """ + + raise ImportError(m) + +from adbpyg_adapter import ADBPyG_Adapter +from adbpyg_adapter.tracing import create_tracer + +seed = 0 +torch.manual_seed(seed) +random.seed(seed) +np.random.seed(seed) + + +class JaegerSpan: + def __init__( + self, + span_id: str, + operation_name: str, + start_time: int, + duration: int, + tags: list[dict[str, str]], + ): + self.span_id = span_id + self.operation_name = operation_name + self.start_time = start_time + self.duration = duration + self.tags = { + tag["key"]: tag["value"] + for tag in tags + if tag["key"] not in ["span.kind", "internal.span.format"] + } + + self.children: dict[str, "JaegerSpan"] = {} + self.parent: "JaegerSpan" = None + + def add_child(self, span_id: str, child: "JaegerSpan"): + self.children[span_id] = child + + def set_parent(self, parent: "JaegerSpan"): + self.parent = parent + + def to_dict(self) -> dict[str, Any]: + return { + "spanID": self.span_id, + "operationName": self.operation_name, + "startTime": self.start_time, + "duration": self.duration, + "tags": self.tags, + "children": [child.to_dict() for child in self.children.values()], + } + + +class JaegerSpanTree: + def __init__( + self, + jaeger_endpoint: str, + service_name: str, + operation_name: str, + start_time: str, + tags: Dict[str, Any] = {}, + ) -> None: + self.jaeger_endpoint = jaeger_endpoint + self.service_name = service_name + self.operation_name = operation_name + self.start_time = start_time + self.tags = tags + + self.root_span: JaegerSpan = None + self.span_id_to_span: Dict[str, JaegerSpan] = {} + self.operation_name_to_span: Dict[str, List[JaegerSpan]] = defaultdict(list) + + self.__build_span_tree() + print(f"Built span tree for {self.service_name}-{self.operation_name}") + + def get_spans_by_operation_name(self, operation_name: str) -> List[JaegerSpan]: + return self.operation_name_to_span[operation_name] + + def get_span_by_span_id(self, span_id: str) -> JaegerSpan: + return self.span_id_to_span[span_id] + + def get_span_tag_value(self, span_id: str, tag_key: str) -> str: + return self.span_id_to_span[span_id].tags[tag_key] + + def __build_span_tree(self) -> None: + for span in self.__fetch_sorted_spans(): + span_id: str = span["spanID"] + operation_name: str = span["operationName"] + + span_object = JaegerSpan( + span_id, + operation_name, + span["startTime"], + span["duration"], + span["tags"], + ) + + self.span_id_to_span[span_id] = span_object + self.operation_name_to_span[operation_name].append(span_object) + + references = span.get("references", []) + if len(references) == 0: + if self.root_span is not None: + m = f"Found multiple root spans: {self.root_span.span_id} and {span_id}" + print(m) + raise Exception(m) + + self.root_span = self.span_id_to_span[span_id] + continue + + for ref in references: + if ref["refType"] == "CHILD_OF": + parent_span_id = ref["spanID"] + parent_span = self.span_id_to_span[parent_span_id] + child_span = self.span_id_to_span[span_id] + + parent_span.add_child(span_id, child_span) + child_span.set_parent(parent_span) + + def __fetch_sorted_spans(self) -> List[Dict[str, Any]]: + params = { + "service": self.service_name, + "operation": self.operation_name, + "tag": [f"{k}:{v}" for k, v in self.tags.items()], + "start": self.start_time, + } + + traces = self.__get_jaeger_traces(f"{self.jaeger_endpoint}/api/traces", params) + + if len(traces) > 1: + m = f"Found multiple traces for {params}" + print(m) + raise Exception(m) + + spans = traces[0]["spans"] + return sorted(spans, key=lambda span: span["startTime"]) + + @retry(tries=6, delay=2, backoff=2) + def __get_jaeger_traces( + self, url: str, params: dict[str, Any] + ) -> List[dict[str, Any]]: + response = requests.get(url, params=params) + + if response.status_code != 200: + m = f"Failed to fetch traces for {params}: {response.status_code}" + print(m) + raise Exception(m) + + traces = response.json()["data"] + if len(traces) == 0: + m = f"No traces found for {params}" + print(m) + raise Exception(m) + + return traces + + def to_dict(self) -> dict[str, Any]: + return self.root_span.to_dict() + + def to_json_file(self, output: str): + current_dir = pathlib.Path(__file__).parent.absolute() + with open(f"{current_dir}/traces/{output}", "w") as file: + file.write(json.dumps(self.to_dict(), indent=4)) + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--url", type=str, default="http://localhost:8529") + parser.add_argument("--dbName", type=str, default="_system") + parser.add_argument("--username", type=str, default="root") + parser.add_argument("--password", type=str, default="") + parser.add_argument("--jaeger_endpoint", type=str, default="http://localhost:16686") + parser.add_argument("--otlp_endpoint", type=str, default="http://localhost:4317") + parser.add_argument( + "--output_dir", type=str, choices=["branch", "master"], required=True + ) + + # Parse the arguments + args = parser.parse_args() + + return args + + +def get_adapter(args, service_name: str) -> ADBPyG_Adapter: + db = ArangoClient(hosts=args.url).db( + args.dbName, username=args.username, password=args.password, verify=True + ) + + tracer = create_tracer( + service_name, + enable_console_tracing=False, + span_exporters=[OTLPSpanExporter(endpoint=args.otlp_endpoint, insecure=True)], + ) + + return ADBPyG_Adapter(db, tracer=tracer) + + +def run_pyg_to_arangodb(adapter: ADBPyG_Adapter, name: str) -> None: + data = FakeHeteroDataset(edge_dim=2)[0] + adapter.db.delete_graph(name, drop_collections=True, ignore_missing=True) + adapter.pyg_to_arangodb(name, data) + + +def run_arangodb_to_pyg(adapter: ADBPyG_Adapter, name: str) -> None: + adapter.arangodb_to_pyg( + name, + { + "vertexCollections": { + "v0": {"x", "y"}, + "v1": {"x"}, + "v2": {"x"}, + }, + "edgeCollections": { + "e0": {"edge_attr": "edge_attr"}, + }, + }, + ) + + +def main(): + service_name = "adbpyg-adapter-benchmark" + + # 1. Parse the arguments + args = parse_args() + + # 2. Get the adapter + adbpyg_adapter = get_adapter(args, service_name) + + # 3. Run the benchmark + # TODO: Figure out why Jaeger is reporting the traces + # in the **same** operation... (only a problem for benchmarking) + name = "FakeHeteroGraphBenchmark" + start_time = str(time.time()).replace(".", "") + run_pyg_to_arangodb(adbpyg_adapter, name) + run_arangodb_to_pyg(adbpyg_adapter, name) + + # Wait for OTLP Export + time.sleep(5) + + # 4. Get the span trees + pyg_to_arangodb_span_tree = JaegerSpanTree( + args.jaeger_endpoint, + service_name, + "pyg_to_arangodb", + start_time, + {"name": name}, + ) + + arangodb_to_pyg_span_tree = JaegerSpanTree( + args.jaeger_endpoint, + service_name, + "arangodb_to_pyg", + start_time, + {"name": name}, + ) + + # 5. Write the span trees to disk + pyg_to_arangodb_span_tree.to_json_file(f"{args.output_dir}/pyg_to_arangodb.json") + arangodb_to_pyg_span_tree.to_json_file(f"{args.output_dir}/arangodb_to_pyg.json") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 9552e8a..5bfc925 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,12 @@ dev = [ "types-setuptools>=57.4.9", "types-requests>=2.27.11", "networkx>=2.5.1", + "retry", +] +tracing = [ + "opentelemetry-api==1.21.0", + "opentelemetry-sdk==1.21.0", + "opentelemetry-exporter-otlp-proto-grpc==1.21.0" ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py index 1ca6adb..2a98d5c 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,12 @@ import logging import os +import random import subprocess from pathlib import Path -from typing import Any, Callable, Dict +from typing import Any, Callable, Dict, Optional +import numpy as np +import torch from arango import ArangoClient from arango.database import StandardDatabase from pandas import DataFrame @@ -13,10 +16,25 @@ from torch_geometric.typing import EdgeType from adbpyg_adapter import ADBPyG_Adapter, ADBPyG_Controller +from adbpyg_adapter.tracing import create_tracer from adbpyg_adapter.typings import Json +try: + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.trace import Tracer + + TRACING_ENABLED = True +except ImportError: + TRACING_ENABLED = False + +seed = 0 +torch.manual_seed(seed) +random.seed(seed) +np.random.seed(seed) + con: Json db: StandardDatabase +tracer: Optional["Tracer"] adbpyg_adapter: ADBPyG_Adapter PROJECT_DIR = Path(__file__).parent.parent @@ -26,6 +44,7 @@ def pytest_addoption(parser: Any) -> None: parser.addoption("--dbName", action="store", default="_system") parser.addoption("--username", action="store", default="root") parser.addoption("--password", action="store", default="") + parser.addoption("--otlp_endpoint", action="append", default=[]) def pytest_configure(config: Any) -> None: @@ -38,10 +57,11 @@ def pytest_configure(config: Any) -> None: } print("----------------------------------------") - print("URL: " + con["url"]) - print("Username: " + con["username"]) - print("Password: " + con["password"]) - print("Database: " + con["dbName"]) + print(f"URL: {con['url']}") + print(f"Username: {con['username']}") + print(f"Password: {con['password']}") + print(f"Database: {con['dbName']}") + print(f"TRACING_ENABLED: {TRACING_ENABLED}") print("----------------------------------------") global db @@ -49,8 +69,20 @@ def pytest_configure(config: Any) -> None: con["dbName"], con["username"], con["password"], verify=True ) + global tracer + tracer = None + if TRACING_ENABLED: + tracer = create_tracer( + "adbpyg-adapter-test", + enable_console_tracing=False, + span_exporters=[ + OTLPSpanExporter(endpoint=endpoint) + for endpoint in config.getoption("otlp_endpoint") + ], + ) + global adbpyg_adapter - adbpyg_adapter = ADBPyG_Adapter(db, logging_lvl=logging.INFO) + adbpyg_adapter = ADBPyG_Adapter(db, logging_lvl=logging.INFO, tracer=tracer) def pytest_exception_interact(node: Any, call: Any, report: Any) -> None: diff --git a/tests/test_adapter.py b/tests/test_adapter.py index c1e4cfc..827b2ca 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -37,6 +37,7 @@ get_fake_homo_graph, get_karate_graph, get_social_graph, + tracer, udf_key_df_to_tensor, udf_users_x_tensor_to_df, udf_v2_x_tensor_to_df, @@ -389,7 +390,7 @@ def test_pyg_to_adb_ambiguity_error() -> None: adbpyg_adapter.pyg_to_arangodb("graph", d) -def test_pyg_to_arangodb_with_controller() -> None: +def test_pyg_to_adb_with_controller() -> None: name = "Karate_3" data = get_karate_graph() db.delete_graph(name, drop_collections=True, ignore_missing=True) @@ -682,7 +683,7 @@ def test_adb_graph_to_pyg_to_arangodb_with_missing_document_and_strict( data = get_karate_graph() db.delete_graph(name, drop_collections=True, ignore_missing=True) - ADBPyG_Adapter(db).pyg_to_arangodb(name, data) + ADBPyG_Adapter(db, tracer=tracer).pyg_to_arangodb(name, data) graph = db.graph(name) v_cols: Set[str] = graph.vertex_collections() @@ -712,7 +713,7 @@ def test_adb_graph_to_pyg_to_arangodb_with_missing_document_and_permissive( data = get_karate_graph() db.delete_graph(name, drop_collections=True, ignore_missing=True) - ADBPyG_Adapter(db).pyg_to_arangodb(name, data) + ADBPyG_Adapter(db, tracer=tracer).pyg_to_arangodb(name, data) graph = db.graph(name) v_cols: Set[str] = graph.vertex_collections()