Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace special symbol characters with other characters to allow them to be drawn as mermaid #85

Merged
merged 5 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 56 additions & 3 deletions dbterd/adapters/targets/mermaid/mermaid_test_relationship.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import re
from typing import Optional

from dbterd.adapters.algos import test_relationship


Expand All @@ -14,6 +17,56 @@ def run(manifest, catalog, **kwargs):
return ("output.md", parse(manifest, catalog, **kwargs))


def replace_column_name(column_name: str) -> str:
"""Replace column names containing special characters.
To prevent mermaid from not being able to render column names that may contain special characters.

Args:
column_name (str): column name

Returns:
str: Column name with special characters substituted
"""
return column_name.replace(" ", "-").replace(".", "__")


def match_complex_column_type(column_type: str) -> Optional[str]:
"""Returns the root type from nested complex types.
As an example, if the input is `Struct<field1 string, field2 string>`, return `Struct`.

Args:
column_type (str): column type

Returns:
Optional[str]: Returns root type if input type is nested complex type, otherwise returns `None` for primitive types
"""
pattern = r"(\w+)<(\w+\s+\w+(\s*,\s*\w+\s+\w+)*)>"
match = re.match(pattern, column_type)
if match:
return match.group(1)
else:
return None


def replace_column_type(column_type: str) -> str:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are many possible ways to abbreviate or omit this character, and it may be possible to change it from an environment variable or command line argument. However, we want to start out with a simple form.

"""If type of column contains special characters that cannot be drawn by mermaid, replace them with strings that can be drawn.
If the type string contains a nested complex type, omit it to make it easier to read.

Args:
column_type (str): column type

Returns:
str: Type of column with special characters are substituted or omitted
"""
# Some specific DWHs may have types that cannot be drawn in mermaid, such as `Struct<first_name string, last_name string>`.
# These types may be nested and can be very long, so omit them
datnguye marked this conversation as resolved.
Show resolved Hide resolved
complex_column_type = match_complex_column_type(column_type)
if complex_column_type:
return f"{complex_column_type}[OMITTED]"
else:
return column_type.replace(" ", "-")


def parse(manifest, catalog, **kwargs):
"""Get the Mermaid content from dbt artifacts

Expand All @@ -35,7 +88,7 @@ def parse(manifest, catalog, **kwargs):
table_name = table.name.upper()
columns = "\n".join(
[
f' {x.data_type.replace(" ","-")} {x.name.replace(" ","-")}'
f" {replace_column_type(x.data_type)} {replace_column_name(x.name)}"
for x in table.columns
]
)
Expand All @@ -49,9 +102,9 @@ def parse(manifest, catalog, **kwargs):
for rel in relationships:
key_from = f'"{rel.table_map[1]}"'
key_to = f'"{rel.table_map[0]}"'
reference_text = rel.column_map[0].replace(" ", "-")
reference_text = replace_column_name(rel.column_map[0])
if rel.column_map[0] != rel.column_map[1]:
reference_text += f"--{ rel.column_map[1].replace(' ','-')}"
reference_text += f"--{ replace_column_name(rel.column_map[1])}"
mermaid += f" {key_from.upper()} {get_rel_symbol(rel.type)} {key_to.upper()}: {reference_text}\n"

return mermaid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,55 @@ class TestMermaidTestRelationship:
}
""",
),
(
[
Table(
name="model.dbt_resto.table1",
node_name="model.dbt_resto.table1",
database="--database--",
schema="--schema--",
columns=[
Column(name="name1.first_name", data_type="name1-type")
],
raw_sql="--irrelevant--",
),
Table(
name="model.dbt_resto.table2",
node_name="model.dbt_resto.table2",
database="--database2--",
schema="--schema2--",
columns=[
Column(name="name2.first_name", data_type="name2-type2"),
Column(
name="complex_struct",
data_type="Struct<field1 string, field2 string>",
),
],
raw_sql="--irrelevant--",
),
],
[
Ref(
name="test.dbt_resto.relationships_table1",
table_map=["model.dbt_resto.table2", "model.dbt_resto.table1"],
column_map=["name2.first_name", "name1.first_name"],
),
],
[],
[],
["model", "source"],
False,
"""erDiagram
"MODEL.DBT_RESTO.TABLE1" {
name1-type name1__first_name
}
"MODEL.DBT_RESTO.TABLE2" {
name2-type2 name2__first_name
Struct[OMITTED] complex_struct
}
"MODEL.DBT_RESTO.TABLE1" }|--|| "MODEL.DBT_RESTO.TABLE2": name2__first_name--name1__first_name
""",
),
],
)
def test_parse(
Expand Down
Loading