Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(ingest): cleanup various methods #9221

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion metadata-events/mxe-schemas/rename-namespace.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash

SCRIPT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]:-$0}" )" >/dev/null && pwd )"

Expand Down
11 changes: 3 additions & 8 deletions metadata-ingestion/scripts/docgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import sys
import textwrap
from importlib.metadata import metadata, requires
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional

import click
from pydantic import BaseModel, Field
from pydantic.dataclasses import dataclass

from datahub.configuration.common import ConfigModel
from datahub.ingestion.api.decorators import (
Expand Down Expand Up @@ -94,7 +93,6 @@ class Component(BaseModel):

@staticmethod
def map_field_path_to_components(field_path: str) -> List[Component]:

m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path)
v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path)
components: List[FieldRow.Component] = []
Expand Down Expand Up @@ -197,7 +195,7 @@ def get_checkbox(self) -> str:
# Using a non-breaking space to prevent the checkbox from being
# broken into a new line.
if not self.parent: # None and empty string both count
return f'&nbsp;<abbr title="Required">✅</abbr>'
return '&nbsp;<abbr title="Required">✅</abbr>'
else:
return f'&nbsp;<abbr title="Required if {self.parent} is set">❓</abbr>'
else:
Expand Down Expand Up @@ -356,7 +354,6 @@ def priority_value(path: str) -> str:


def gen_md_table_from_struct(schema_dict: Dict[str, Any]) -> List[str]:

from datahub.ingestion.extractor.json_schema_util import JsonSchemaTranslator

# we don't want default field values to be injected into the description of the field
Expand Down Expand Up @@ -460,7 +457,6 @@ def get_additional_deps_for_extra(extra_name: str) -> List[str]:


def relocate_path(orig_path: str, relative_path: str, relocated_path: str) -> str:

newPath = os.path.join(os.path.dirname(orig_path), relative_path)
assert os.path.exists(newPath)

Expand Down Expand Up @@ -515,7 +511,6 @@ def generate(

if extra_docs:
for path in glob.glob(f"{extra_docs}/**/*[.md|.yaml|.yml]", recursive=True):

m = re.search("/docs/sources/(.*)/(.*).md", path)
if m:
platform_name = m.group(1).lower()
Expand Down Expand Up @@ -741,7 +736,7 @@ def generate(
i += 1
f.write(f"---\nsidebar_position: {i}\n---\n\n")
f.write(
f"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n"
"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n"
)
f.write(f"# {platform_docs['name']}\n")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def from_api( # noqa: C901
)
else:
logger.warning(
f"Failed to extract explore {explore_name} from model {model}.", e
f"Failed to extract explore {explore_name} from model {model}: {e}"
)

except AssertionError:
Expand Down
2 changes: 2 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ def construct_dashboard(
if creator is not None:
modified_actor = builder.make_user_urn(creator)
if report_info.get("last_saved_at") is None:
# Sometimes mode returns null for last_saved_at.
# In that case, we use the created_at timestamp instead.
report_info["last_saved_at"] = report_info.get("created_at")

modified_ts = int(
Expand Down