Skip to content

Commit

Permalink
[docs][dagit-screenshot] Add CLI command to generate asset SVGs from …
Browse files Browse the repository at this point in the history
…snippets (#9764)

## Summary

First pass at a CLI script which reads a MDX file and generates asset
graph SVGs for marked snippets.

Does this by:
- Loading snippet file in Dagit
- Triggering SVG download from Dagit using Selenium
- Adding necessary font info to the SVG
- Adding link to SVG to the MDX file

## Test Plan

Tested locally. See vercel preview for
#9765 for an example in the
asset tutorials:
https://dagster-git-benpankow-update-tutorials-with-ass-ce5c5e-elementl.vercel.app/tutorial/assets/asset-graph
  • Loading branch information
benpankow committed Jan 13, 2023
1 parent f26f766 commit 66c4e7d
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 7 deletions.
29 changes: 25 additions & 4 deletions docs/dagit-screenshot/dagit_screenshot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import click

from dagit_screenshot.commands.asset_svg import generate_svg as _svg
from dagit_screenshot.commands.audit import audit as _audit
from dagit_screenshot.commands.capture import capture as _capture
from dagit_screenshot.commands.show import show as _show
Expand All @@ -23,7 +24,7 @@
"--spec-db",
type=click.Path(exists=True),
default=DEFAULT_SPEC_DB,
help="Path to directory containing a tree of YAML files with screenshot specs."
help="Path to directory containing a tree of YAML files with screenshot specs.",
)
@click.option(
"--workspace-root",
Expand All @@ -44,7 +45,14 @@ def dagit_screenshot(ctx, output_root: str, spec_db: str, workspace_root: str) -
workspace files exist. Optionally verify that corresponding output files exist.
"""
)
@click.option("--verify-outputs/--no-verify-outputs", type=click.BOOL, default=False, help="If set, then the existence of output screenshots in the output root will also be checked.")
@click.option(
"--verify-outputs/--no-verify-outputs",
type=click.BOOL,
default=False,
help=(
"If set, then the existence of output screenshots in the output root will also be checked."
),
)
@click.pass_context
def audit(ctx, verify_outputs) -> None:
output_root = ctx.obj["output_root"]
Expand Down Expand Up @@ -72,11 +80,24 @@ def capture(ctx, spec_id: str, output_path: str) -> None:
spec = load_spec(spec_id, ctx.obj["spec_db"])
_capture(spec, output_path)


@dagit_screenshot.command(help="Dump the contents of a screenshot DB to the terminal as YAML.")
@click.option('--prefix', help="If provided, only specs with ids starting with the passed value will be dumped.")
@click.option(
"--prefix",
help="If provided, only specs with ids starting with the passed value will be dumped.",
)
@click.pass_context
def show(ctx, prefix: Optional[str]):
_show(ctx.obj['spec_db'], prefix)
_show(ctx.obj["spec_db"], prefix)


@dagit_screenshot.command(
help="Reads a given markdown file and generates asset graph SVGs for code snippets."
)
@click.option("--target", "-t", help="Path to markdown file to process.")
def svg(target: str) -> None:
_svg(target)


def main():
dagit_screenshot(obj={})
192 changes: 192 additions & 0 deletions docs/dagit-screenshot/dagit_screenshot/commands/asset_svg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# pylint: disable=print-call

import glob
import os
import pathlib
import re
import signal
import subprocess
from contextlib import contextmanager
from tempfile import TemporaryDirectory
from time import sleep
from typing import Dict, Optional

from selenium import webdriver # pylint: disable=import-error
from selenium.webdriver.common.by import By

from dagit_screenshot.defaults import DEFAULT_OUTPUT_ROOT

# Time in seconds that we sleep waiting for a dagit route to load
DAGIT_ROUTE_LOAD_TIME = 2

# Time in seconds that we sleep waiting for the dagit process to start up
DAGIT_STARTUP_TIME = 6

# Time in seconds we sleep to wait for Dagit to finish downloading the SVG
DOWNLOAD_SVG_TIME = 2

SVG_ROOT = os.path.join(DEFAULT_OUTPUT_ROOT, "asset-screenshots")

CODE_SAMPLES_ROOT = os.path.join(
os.path.dirname(__file__),
"..",
"..",
"..",
"..",
"examples",
"docs_snippets",
"docs_snippets",
)

SVG_FONT_DATA_FILE = os.path.join(os.path.dirname(__file__), "..", "static", "font_info.svg")

with open(SVG_FONT_DATA_FILE, "r", encoding="utf-8") as f:
SVG_FONT_DATA = f.read()


def _add_font_info_to_svg(svg_filepath: str):
"""
Adds embedded Dagster font information to an SVG file downloaded from Dagit.
"""
with open(svg_filepath, "r", encoding="utf-8") as f:
svg = f.read()
with open(svg_filepath, "w", encoding="utf-8") as f:
f.write(svg.replace('<style xmlns="http://www.w3.org/1999/xhtml"></style>', SVG_FONT_DATA))


def _get_latest_download(file_extension: str) -> str:
"""
Returns the path to the most recently downloaded file with the given extension.
"""
# https://stackoverflow.com/a/60004701
downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads")
list_of_downloads = glob.glob(downloads_folder + f"/*.{file_extension}")
return max(list_of_downloads, key=os.path.getctime)


@contextmanager
def _setup_snippet_file(code_path: str, snippet_fn: Optional[str]):
"""
Creates a temporary file that contains the contents of the given code file,
setting up the given snippet function as a repository if specified.
"""
with TemporaryDirectory() as temp_dir:
with open(code_path, "r", encoding="utf-8") as f:
code = f.read()

if snippet_fn:
code = f"""{code}
from dagster import repository
@repository
def demo_repo():
return {snippet_fn}()
"""

temp_code_file = os.path.join(temp_dir, "code.py")
with open(temp_code_file, "w", encoding="utf-8") as f:
f.write(code)
yield temp_code_file


def generate_svg_for_file(code_path: str, destination_path: str, snippet_fn: Optional[str]):
"""
Generates an SVG for the given code file & entry function, saving it to the given destination path.
"""
driver = None
dagit_process = None

try:
with _setup_snippet_file(code_path, snippet_fn) as temp_code_file:
command = ["dagit", "-f", temp_code_file]

dagit_process = subprocess.Popen(command)
sleep(DAGIT_STARTUP_TIME) # Wait for the dagit server to start up

driver = webdriver.Chrome()
driver.set_window_size(1024, 768)
driver.get("http://localhost:3000")
driver.execute_script("window.localStorage.setItem('communityNux','1')")
driver.refresh()

sleep(DAGIT_ROUTE_LOAD_TIME) # wait for page to load

element = driver.find_element(By.XPATH, '//div[@aria-label="download_for_offline"]')
element.click()

sleep(DOWNLOAD_SVG_TIME) # wait for download to complete

downloaded_file = _get_latest_download("svg")
pathlib.Path(destination_path).parent.mkdir(parents=True, exist_ok=True)
output_file = destination_path
os.rename(downloaded_file, output_file)

_add_font_info_to_svg(output_file)

finally:
if driver: # quit chrome
driver.quit()
if dagit_process: # quit dagit
dagit_process.send_signal(signal.SIGINT)
dagit_process.wait()


def parse_params(param_str: str) -> Dict[str, str]:
"""
Parses a set of params for a markdown code block, e.g. returns {"foo": "bar", "baz": "qux"} for
```python foo=bar baz=qux
"""
params = re.split(r"\s+", param_str)
return {param.split("=")[0]: param.split("=")[1] for param in params if len(param) > 0}


def generate_svg(target_mdx_file: str):
# Parse all code blocks in the MD file
with open(target_mdx_file, "r", encoding="utf-8") as f:
snippets = [
parse_params(x) for x in re.findall(r"```python([^\n]+dagimage[^\n]+)", f.read())
]

updated_snippet_params = []
for snippet_params in snippets:
filepath = snippet_params["file"]
snippet_fn = snippet_params.get("function")

destination_file_path = f".{filepath[:-3]}{'/' + snippet_fn if snippet_fn else ''}.svg"
generate_svg_for_file(
os.path.join(CODE_SAMPLES_ROOT, f".{filepath}"),
os.path.join(SVG_ROOT, destination_file_path),
snippet_fn,
)
# Add pointer to the generated screenshot to the params for the code block
updated_snippet_params.append(
{
**snippet_params,
"dagimage": os.path.normpath(
os.path.join("images", "asset-screenshots", destination_file_path)
),
}
)

with open(target_mdx_file, "r", encoding="utf-8") as f:
pattern = re.compile(r"(```python)([^\n]+dagimage[^\n]+)", re.S)

# Find and replace the code block params with our updated params
# https://stackoverflow.com/a/16762053
idx = [0]

def _replace(match):
snippet_parmas = updated_snippet_params[idx[0]]
snippet_params_text = " ".join(f"{k}={v}" for k, v in snippet_parmas.items())
out = f"{match.group(1)} {snippet_params_text}"
idx[0] += 1
return out

updated_mdx_contents = re.sub(
pattern,
_replace,
f.read(),
)

with open(target_mdx_file, "w", encoding="utf-8") as f:
f.write(updated_mdx_contents)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/next/components/mdx/MDXComponents.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ const Code: React.FC<CodeProps> = ({children, dagimage, ...props}) => {
</div>
{dagimage && (
<RenderedDAG
svgSrc="/images/asset-screenshots/my_assets.svg"
svgSrc={'/' + dagimage}
mobileImgSrc="/images-2022-july/screenshots/python-assets2.png"
/>
)}
Expand Down

1 comment on commit 66c4e7d

@vercel
Copy link

@vercel vercel bot commented on 66c4e7d Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.