Skip to content

Commit

Permalink
Merge pull request #151 from icanbwell/print_execution_plan_if_loglev…
Browse files Browse the repository at this point in the history
…el_is_debug

print execution plan if loglevel is DEBUG
  • Loading branch information
imranq2 committed Nov 30, 2022
2 parents d5372a3 + 2786129 commit e177d4d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 42 deletions.
22 changes: 2 additions & 20 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,11 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.7'

- name: Set cache permissions
run: |
mkdir ~/.local && mkdir ~/.local/share && mkdir ~/.local/share/virtualenvs
chown -R $(whoami) ~/.local/share/virtualenvs/
- name: Cache pip dependencies
uses: actions/cache@v2
env:
cache-name: cache-pip-deps-warehouse-p
with:
path: ~/.local/share/virtualenvs/
key: ${{ runner.os }}-${{ env.CACHE_SEED }}-${{ github.job }}-${{ env.cache-name }}-${{ hashFiles('Pipfile.lock') }}
restore-keys: |
${{ runner.os }}-${{ env.CACHE_SEED }}-${{ github.job }}-${{ env.cache-name }}-
${{ runner.os }}-${{ env.CACHE_SEED }}-${{ github.job }}-
${{ runner.os }}-${{ env.CACHE_SEED }}-
${{ runner.os }}-
- name: pre-commit
run: make run-pre-commit

Expand Down
53 changes: 32 additions & 21 deletions spark_auto_mapper/automappers/automapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,22 @@ def _transform_with_data_frame_single_select(
if not self.drop_key_columns:
column_specs = [col(f"b.{c}") for c in keys] + column_specs

self.logger.debug(f"-------- automapper ({self.view}) column specs ------")
self.logger.debug(self.to_debug_string(source_df=source_df))
self.logger.debug(
f"-------- end automapper ({self.view}) column specs ------"
)
self.logger.debug(
f"-------- automapper ({self.source_view}) source_df schema ------"
)
# noinspection PyProtectedMember
self.logger.debug(source_df._jdf.schema().treeString())
self.logger.debug(
f"-------- end automapper ({self.source_view}) source_df schema ------"
)
if self.log_level and self.log_level == "DEBUG":
self.logger.debug(
f"-------- automapper ({self.view}) column specs ------"
)
self.logger.debug(self.to_debug_string(source_df=source_df))
self.logger.debug(
f"-------- end automapper ({self.view}) column specs ------"
)
self.logger.debug(
f"-------- automapper ({self.source_view}) source_df schema ------"
)
# noinspection PyProtectedMember
self.logger.debug(source_df._jdf.schema().treeString())
self.logger.debug(
f"-------- end automapper ({self.source_view}) source_df schema ------"
)

if self.check_schema_for_all_columns:
for column_name, mapper in self.mappers.items():
Expand All @@ -247,6 +250,13 @@ def _transform_with_data_frame_single_select(

# run all the selects
df = source_df.alias("b").select(*column_specs)

if self.log_level and self.log_level == "DEBUG":
print(
f"------------ Start Execution Plan for view {self.view} -----------"
)
df.explain(extended="cost")
print("------------ End Execution Plan -----------")
# write out final checkpoint for this automapper
if self.checkpoint_path:
checkpoint_path = (
Expand All @@ -264,14 +274,15 @@ def _transform_with_data_frame_single_select(
self.logger.warning(
f"-------- end automapper ({self.view}) column specs ------"
)
self.logger.debug(
f"-------- automapper ({self.source_view}) source_df schema ------"
)
# noinspection PyProtectedMember
self.logger.debug(source_df._jdf.schema().treeString())
self.logger.debug(
f"-------- end automapper ({self.source_view}) source_df schema ------"
)
if self.log_level and self.log_level == "DEBUG":
self.logger.debug(
f"-------- automapper ({self.source_view}) source_df schema ------"
)
# noinspection PyProtectedMember
self.logger.debug(source_df._jdf.schema().treeString())
self.logger.debug(
f"-------- end automapper ({self.source_view}) source_df schema ------"
)
# iterate through each column to find the problem child
for column_name, mapper in self.mappers.items():
try:
Expand Down
2 changes: 1 addition & 1 deletion tests/amount/test_automapper_amount_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_auto_mapper_amount_typed(spark_session: SparkSession) -> None:

# Act
mapper = AutoMapper(
view="members", source_view="patients", keys=["member_id"]
view="members", source_view="patients", keys=["member_id"], log_level="DEBUG"
).columns(age=A.amount(A.column("my_age")))

assert isinstance(mapper, AutoMapper)
Expand Down

0 comments on commit e177d4d

Please sign in to comment.