Skip to content

Commit

Permalink
feat(cli): Make consistent use of DataHubGraphClientConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
pedro93 committed May 9, 2024
1 parent 41fa259 commit ff71460
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 23 deletions.
26 changes: 24 additions & 2 deletions metadata-ingestion/src/datahub/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datahub.emitter.serialization_helper import post_json_transform
from datahub.metadata.schema_classes import _Aspect
from datahub.utilities.urns.urn import Urn, guess_entity_type
from datahub.ingestion.graph.client import DatahubClientConfig

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -90,14 +91,35 @@ def get_url_and_token():
return gms_host, gms_token


def load_graph_config() -> DatahubClientConfig:
config_utils.ensure_datahub_config()
config = config_utils.get_details_from_config()

# If config does not exist, create a default one.
if not config:
config = DatahubClientConfig()

# Override gms & token configs if specified.
if len(config_override.keys()) > 0:
config.gms_host = config_override.get(ENV_METADATA_HOST_URL)
config.token = config_override.get(ENV_METADATA_TOKEN)
elif config_utils.should_skip_config():
gms_host_env, gms_token_env = get_details_from_env()
config.gms_host = gms_host_env
config.token = gms_token_env

return config


def get_token():
return get_url_and_token()[1]
return load_graph_config().token


def get_session_and_host():
session = requests.Session()
config = load_graph_config()

gms_host, gms_token = get_url_and_token()
gms_host = config.server, gms_token = config.server

if gms_host is None or gms_host.strip() == "":
log.error(
Expand Down
21 changes: 7 additions & 14 deletions metadata-ingestion/src/datahub/cli/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pydantic import BaseModel, ValidationError

from datahub.cli.env_utils import get_boolean_env_variable
from datahub.ingestion.graph.client import DatahubClientConfig

log = logging.getLogger(__name__)

Expand All @@ -22,13 +23,8 @@
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"


class GmsConfig(BaseModel):
server: str
token: Optional[str] = None


class DatahubConfig(BaseModel):
gms: GmsConfig
gms: DatahubClientConfig


def persist_datahub_config(config: dict) -> None:
Expand All @@ -40,7 +36,7 @@ def persist_datahub_config(config: dict) -> None:
def write_gms_config(
host: str, token: Optional[str], merge_with_previous: bool = True
) -> None:
config = DatahubConfig(gms=GmsConfig(server=host, token=token))
config = DatahubConfig(gms=DatahubClientConfig(server=host, token=token))
if merge_with_previous:
try:
previous_config = get_client_config(as_dict=True)
Expand All @@ -57,17 +53,14 @@ def write_gms_config(
persist_datahub_config(config_dict)


def get_details_from_config():
def get_details_from_config() -> Optional[DatahubClientConfig]:
datahub_config = get_client_config(as_dict=False)
assert isinstance(datahub_config, DatahubConfig)
if datahub_config is not None:
gms_config = datahub_config.gms

gms_host = gms_config.server
gms_token = gms_config.token
return gms_host, gms_token
return gms_config
else:
return None, None
return None


def should_skip_config() -> bool:
Expand All @@ -83,7 +76,7 @@ def ensure_datahub_config() -> None:
write_gms_config(DEFAULT_GMS_HOST, None)


def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]:
def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubClientConfig], dict]:
with open(DATAHUB_CONFIG_PATH, "r") as stream:
try:
config_json = yaml.safe_load(stream)
Expand Down
6 changes: 3 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from deprecated import deprecated
from requests.models import HTTPError

from datahub.cli.cli_utils import get_url_and_token
from datahub.cli.cli_utils import load_graph_config
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
Expand Down Expand Up @@ -1181,7 +1181,7 @@ def close(self) -> None:


def get_default_graph() -> DataHubGraph:
(url, token) = get_url_and_token()
graph = DataHubGraph(DatahubClientConfig(server=url, token=token))
graph_config = load_graph_config()
graph = DataHubGraph(graph_config)
graph.test_connection()
return graph
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pydantic import Field, root_validator, validator

from datahub.cli.cli_utils import get_url_and_token
from datahub.cli.cli_utils import load_graph_config
from datahub.configuration import config_loader
from datahub.configuration.common import ConfigModel, DynamicTypedConfig
from datahub.ingestion.graph.client import DatahubClientConfig
Expand Down Expand Up @@ -103,12 +103,13 @@ def run_id_should_be_semantic(
@root_validator(pre=True)
def default_sink_is_datahub_rest(cls, values: Dict[str, Any]) -> Any:
if "sink" not in values:
gms_host, gms_token = get_url_and_token()
config = load_graph_config()
# update this
default_sink_config = {
"type": "datahub-rest",
"config": {
"server": gms_host,
"token": gms_token,
"server": config.server,
"token": config.token,
},
}
# resolve env variables if present
Expand Down

0 comments on commit ff71460

Please sign in to comment.