Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add --ddl-file option and support offload to an existing empty table #149

Merged
merged 36 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
59560a6
feat: Add --ddl-file option
nj1973 Apr 8, 2024
16d3175
feat: Add --ddl-file option
nj1973 Apr 9, 2024
9edbc7b
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Apr 9, 2024
1106434
feat: Prep for --ddl-file, move execute from config to operation
nj1973 Apr 9, 2024
a7c88af
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Apr 9, 2024
f75ca9c
feat: Add --ddl-file option for local filesystem
nj1973 Apr 10, 2024
4f0d519
feat: Add --ddl-file option
nj1973 Apr 11, 2024
0fd6666
feat: Add --ddl-file option
nj1973 Apr 11, 2024
92d44c6
feat: Add --ddl-file option
nj1973 Apr 12, 2024
0947495
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Apr 12, 2024
0470bda
feat: Decouple table creation and data loading
nj1973 Apr 15, 2024
cc73276
feat: Decouple table creation and data loading
nj1973 Apr 17, 2024
3abf5b3
feat: Decouple table creation and data loading
nj1973 Apr 17, 2024
2f9163a
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Apr 17, 2024
ae7b2f1
feat: Decouple table creation and data loading
nj1973 Apr 17, 2024
51f791a
chore: repo upgrade for 1.0.4
abb9979 Apr 17, 2024
ab44783
chore: repo upgrade for 1.0.4
abb9979 Apr 17, 2024
477a031
feat: Decouple table creation and data loading
nj1973 Apr 18, 2024
0dd77eb
feat: Decouple table creation and data loading
nj1973 Apr 25, 2024
2ff3429
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Apr 29, 2024
d32131e
feat: Decouple table creation and data loading
nj1973 May 10, 2024
0674398
feat: Decouple table creation and data loading
nj1973 May 10, 2024
3591321
feat: Decouple table creation and data loading
nj1973 May 10, 2024
2fe928e
feat: Add backend schema DDL to DDL file
nj1973 May 16, 2024
b806a76
feat: Better message when heap table already has data
nj1973 May 17, 2024
06a8c59
feat: Better message when heap table already has data
nj1973 May 17, 2024
593dcfb
feat: Add backend load database DDL to DDL file
nj1973 May 21, 2024
c260ce3
feat: Remove trailing spaces from backend table DDL
nj1973 May 22, 2024
127d0d8
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 May 23, 2024
a7f895e
feat: Decouple table creation and data loading
nj1973 May 23, 2024
9f4b04d
feat: Change BigQUery external table creation to be with SQL and not …
nj1973 May 23, 2024
61cb152
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Jun 14, 2024
b56eac6
fix: Fix bug where load db dataset location is excluded from DDL file
nj1973 Jul 5, 2024
c308100
chore: Typo
nj1973 Jul 5, 2024
d5e9d64
Merge remote-tracking branch 'origin/main' into 139-decouple-table-cr…
nj1973 Jul 8, 2024
fa4f305
fix: Fix merge issues
nj1973 Jul 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ build
eggs
.eggs
parts
bin
var
sdist
develop-eggs
Expand Down
3 changes: 2 additions & 1 deletion bin/offload
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import sys

from goe.config import config_file
from goe.exceptions import OffloadOptionError
from goe.goe import (
get_options,
OFFLOAD_OP_NAME,
)
from goe.offload.offload import OffloadOptionError, get_offload_options
from goe.offload.offload import get_offload_options
from goe.orchestration.cli_entry_points import offload_by_cli


Expand Down
45 changes: 45 additions & 0 deletions sql/oracle/source/sql/create_offload_repo_104.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
# Copyright 2016 The GOE Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/

define goe_offload_repo_version = '1.0.4'
define goe_offload_repo_comments = "GOE repo upgrades for &goe_offload_repo_version."

PROMPT Installing GOE repository &goe_offload_repo_version....

-- New seed data
-- -----------------------------------------------------------------------------------------------

DECLARE
PROCEDURE add_command_step ( p_code IN command_step.code%TYPE,
p_title IN command_step.title%TYPE ) IS
BEGIN
INSERT INTO command_step
(id, code, title, create_time)
VALUES
(command_step_seq.NEXTVAL, p_code, p_title, SYSTIMESTAMP);
END add_command_step;
BEGIN
add_command_step('DDL_FILE', 'Create DDL file');
END;
/

--------------------------------------------------------------------------------------------------
@@upgrade_offload_repo_version.sql

PROMPT GOE repository &goe_offload_repo_version. installed.

undefine goe_offload_repo_version
undefine goe_offload_repo_comments
1 change: 1 addition & 0 deletions sql/oracle/source/sql/install_offload_repo.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ prompt Installing GOE repository...
alter session set current_schema = &goe_db_repo_user;
-- Start offload repo version files...
@@create_offload_repo_100.sql
@@create_offload_repo_104.sql
-- End offload repo version files.
@@install_offload_repo_code.sql
1 change: 1 addition & 0 deletions sql/oracle/source/sql/upgrade_offload_repo_deltas.sql
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ begin

-- Follow this pattern for each repo version file in sequence...
check_version(v_current_version, '1.0.0');
check_version(v_current_version, '1.0.4');

end;
/
Expand Down
17 changes: 17 additions & 0 deletions src/goe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2024 The GOE Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import importlib.metadata

__version__ = importlib.metadata.version("goe")
2 changes: 2 additions & 0 deletions src/goe/conductor/hybrid_view_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def _get_backend_table(self):
self._connection_options,
self._messages,
hybrid_metadata=self._offload_metadata,
dry_run=self._dry_run,
)

def _get_backend_detail(self, attribute_name=None):
Expand Down Expand Up @@ -315,6 +316,7 @@ def validate_by_aggregation(self, lower_hv=None, upper_hv=None, as_json=True):
messages=self._messages,
backend_db=self._backend_table_owner,
backend_table=self._backend_table_name,
execute=(not self._dry_run),
)
status, agg_msg = validator.validate(
safe=False,
Expand Down
24 changes: 0 additions & 24 deletions src/goe/config/config_descriptions.py

This file was deleted.

38 changes: 38 additions & 0 deletions src/goe/config/option_descriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#! /usr/bin/env python3

# Copyright 2016 The GOE Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""" option_descriptions: Library of constants defining descriptions for options.
In the future we expect to refactor all option processing, including descriptions, and this module will
may become redundant at that time.
"""

DATA_SAMPLE_PARALLELISM = (
"Degree of parallelism to use when sampling RDBMS data for columns with no precision/scale properties. "
"Values of 0 or 1 will execute the query without parallelism"
)

RESET_BACKEND_TABLE = "Remove backend data table. Use with caution - this will delete previously offloaded data for this table!"

REUSE_BACKEND_TABLE = (
"Allow Offload to re-use an empty backend table when there is already Offload metadata. "
"This may be useful if a backend table had data removed by an administrator and a re-offload is required"
)

VERIFY_PARALLELISM = (
"Degree of parallelism to use for the RDBMS query executed when validating an offload. "
"Values of 0 or 1 will execute the query without parallelism. Values > 1 will force a parallel query of the given degree. "
"If unset, the RDBMS query will fall back to using the behavior specified by RDBMS defaults"
)
5 changes: 0 additions & 5 deletions src/goe/config/orchestration_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@
"data_governance_auto_tags_csv",
"data_governance_auto_properties_csv",
"dev_log_level",
"execute",
"error_on_token",
"frontend_odbc_driver_name",
"google_dataproc_batches_subnet",
Expand Down Expand Up @@ -252,7 +251,6 @@ class OrchestrationConfig:
db_type: str
dev_log_level: str
error_on_token: Optional[str]
execute: bool
frontend_odbc_driver_name: Optional[str]
google_dataproc_batches_subnet: Optional[str]
google_dataproc_batches_ttl: Optional[str]
Expand Down Expand Up @@ -429,9 +427,6 @@ def from_dict(config_dict: dict, do_not_connect=False):
"dev_log_level", orchestration_defaults.dev_log_level_default()
),
error_on_token=config_dict.get("error_on_token"),
execute=config_dict.get(
"execute", orchestration_defaults.execute_default()
),
frontend_odbc_driver_name=config_dict.get(
"frontend_odbc_driver_name",
orchestration_defaults.frontend_odbc_driver_name_default(),
Expand Down
3 changes: 1 addition & 2 deletions src/goe/connect/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def check_environment(options, orchestration_config):
def test_offload_fs_container(orchestration_config, messages):
test_name = "Offload filesystem container"
test_header(test_name)
dfs_client = get_dfs_from_options(orchestration_config, messages)
dfs_client = get_dfs_from_options(orchestration_config, messages, dry_run=False)
display_uri = dfs_client.gen_uri(
orchestration_config.offload_fs_scheme,
orchestration_config.offload_fs_container,
Expand All @@ -466,7 +466,6 @@ def test_offload_fs_container(orchestration_config, messages):

def get_config_with_connect_overrides(connect_options):
override_dict = {
"execute": True,
"verbose": connect_options.verbose,
"hive_timeout_s": CONNECT_HIVE_TIMEOUT_S,
}
Expand Down
7 changes: 4 additions & 3 deletions src/goe/connect/connect_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,11 @@ def test_raw_conn(hadoop_host, hadoop_port):


def get_cli_hdfs(orchestration_config, host, messages):
# dry_run always = False in connect.
return CliHdfs(
host,
orchestration_config.hadoop_ssh_user,
dry_run=(not orchestration_config.execute),
dry_run=False,
messages=messages,
db_path_suffix=orchestration_config.hdfs_db_path_suffix,
hdfs_data=orchestration_config.hdfs_data,
Expand Down Expand Up @@ -241,7 +242,7 @@ def test_webhdfs_config(orchestration_config, messages):
orchestration_config.hadoop_ssh_user,
True if orchestration_config.kerberos_service else False,
orchestration_config.webhdfs_verify_ssl,
dry_run=not orchestration_config.execute,
dry_run=False,
messages=messages,
db_path_suffix=orchestration_config.hdfs_db_path_suffix,
hdfs_data=orchestration_config.hdfs_data,
Expand All @@ -264,7 +265,7 @@ def test_sentry_privs(orchestration_config, backend_api, messages):
log("Skipping Sentry steps due to backend system", detail=VVERBOSE)
return

dfs_client = get_dfs_from_options(orchestration_config, messages)
dfs_client = get_dfs_from_options(orchestration_config, messages, dry_run=False)
uris_left_to_check = get_hdfs_dirs(
orchestration_config, dfs_client, include_hdfs_home=False
)
Expand Down
26 changes: 13 additions & 13 deletions src/goe/data_governance/hadoop_data_governance.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,12 @@ def data_governance_auto_property_defaults(
"""
property_defaults = {}
now = datetime.datetime.now().replace(microsecond=0)
property_defaults[
DATA_GOVERNANCE_DYNAMIC_PROPERTY_INITIAL_OPERATION_DATETIME
] = now.isoformat()
property_defaults[
DATA_GOVERNANCE_DYNAMIC_PROPERTY_LATEST_OPERATION_DATETIME
] = now.isoformat()
property_defaults[DATA_GOVERNANCE_DYNAMIC_PROPERTY_INITIAL_OPERATION_DATETIME] = (
now.isoformat()
)
property_defaults[DATA_GOVERNANCE_DYNAMIC_PROPERTY_LATEST_OPERATION_DATETIME] = (
now.isoformat()
)
if source_rdbms_object:
property_defaults[DATA_GOVERNANCE_DYNAMIC_PROPERTY_SOURCE_RDBMS_OBJECT] = (
"%s.%s.%s" % (rdbms_name, rdbms_schema, source_rdbms_object)
Expand All @@ -223,9 +223,9 @@ def data_governance_auto_property_defaults(
"%s.%s.%s" % (rdbms_name, rdbms_schema, target_rdbms_object)
).upper()
if goe_object_type:
property_defaults[
DATA_GOVERNANCE_DYNAMIC_PROPERTY_GOE_OBJECT_TYPE
] = goe_object_type
property_defaults[DATA_GOVERNANCE_DYNAMIC_PROPERTY_GOE_OBJECT_TYPE] = (
goe_object_type
)
property_defaults = filter_properties_by_goe_object_type(
property_defaults, goe_object_type
)
Expand Down Expand Up @@ -436,8 +436,8 @@ def data_governance_register_new_db(
def data_governance_register_new_db_step(
hadoop_db, data_gov_client, messages, goe_object_type, options=None
):
opts_execute = options.execute if options else True
if data_gov_client:
opts_execute = options.execute if options else True

def step_fn():
data_governance_register_new_db(
Expand All @@ -463,8 +463,8 @@ def data_governance_register_new_object_step(
renaming_from_object_name=None,
dg_object_type=None,
):
opts_execute = options.execute if options else True
if data_gov_client:
opts_execute = options.execute if options else True

def step_fn():
data_governance_register_new_object(
Expand Down Expand Up @@ -526,8 +526,8 @@ def data_governance_register_new_view_step(
def data_governance_update_metadata_step(
hadoop_db, hadoop_object_name, data_gov_client, messages, options=None
):
opts_execute = options.execute if options else True
if data_gov_client:
opts_execute = options.execute if options else True

def step_fn():
data_governance_update_metadata(
Expand All @@ -550,8 +550,8 @@ def data_governance_register_new_multi_db_step(
"""
assert hadoop_db_list
assert type(hadoop_db_list) is list
opts_execute = options.execute if options else True
if data_gov_client:
opts_execute = options.execute if options else True

def step_fn():
for hadoop_db, goe_object_type in hadoop_db_list:
Expand Down
25 changes: 25 additions & 0 deletions src/goe/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2024 The GOE Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class OffloadException(Exception):
pass


class OffloadOptionError(Exception):
def __init__(self, detail):
self.detail = detail

def __str__(self):
return repr(self.detail)
Loading
Loading