Skip to content

Commit

Permalink
Refactor run_ids and get rendering working a little bit
Browse files Browse the repository at this point in the history
  • Loading branch information
abegong committed Jun 13, 2019
1 parent 9a0374f commit 63166df
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 17 deletions.
4 changes: 2 additions & 2 deletions great_expectations/cli/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,12 @@ def add_datasource(context):

cli_message("")

context.profile_datasource(
run_id = context.profile_datasource(
data_source_name,
max_data_assets=20
)

context.render_datasource(data_source_name)
context.render_datasource(data_source_name, run_id)

else:
cli_message(
Expand Down
3 changes: 2 additions & 1 deletion great_expectations/cli/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def scaffold_directories_and_notebooks(base_dir):
for directory in [notebook_dir_name, "expectations", "datasources", "uncommitted", "plugins", "fixtures"]:
safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)

for uncommitted_directory in ["validations", "credentials", "samples"]:
for uncommitted_directory in ["validations", "credentials", "documents", "samples"]:
safe_mmkdir(os.path.join(base_dir, "uncommitted",
uncommitted_directory), exist_ok=True)

Expand Down Expand Up @@ -71,6 +71,7 @@ def scaffold_directories_and_notebooks(base_dir):
├── uncommitted
│  ├── validations
│  ├── credentials
│  ├── documents
│  └── samples
└── .gitignore
Expand Down
42 changes: 32 additions & 10 deletions great_expectations/data_context/data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from great_expectations.datasource import SparkDFDatasource
from great_expectations.profile.basic_dataset_profiler import BasicDatasetProfiler

from great_expectations.render.renderer import DescriptivePageRenderer
from great_expectations.render.view import DescriptivePageView

from .expectation_explorer import ExpectationExplorer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -672,8 +675,9 @@ def update_return_obj(self, data_asset, return_obj):
else:
return return_obj

def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfiler", max_data_assets=10):
def profile_datasource(self, datasource_name, run_id=None, profiler_name="BasicDatasetProfiler", max_data_assets=10):
#!!! FIXME: We're not actually using profiler_name to fetch the right profiler.
total_start_time = datetime.datetime.now()

# logger.info("Profiling %s with %s" % (datasource_name, profiler_name))
print("Profiling %s with %s" % (datasource_name, profiler_name))
Expand All @@ -686,7 +690,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
total_data_assets = len(data_asset_name_list)
# logger.info("Found %d named data assets" % (total_data_assets))
print("Found %d named data assets" % (total_data_assets))


if run_id == None:
run_id = profiler_name+"_"+total_start_time.strftime("%Y%m%d_%H%M%S")

if max_data_assets == None or max_data_assets >= len(data_asset_name_list):
# logger.info("Profiling all %d." % (len(data_asset_name_list)))
print("Profiling all %d." % (len(data_asset_name_list)))
Expand All @@ -697,7 +704,6 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
data_asset_name_list = data_asset_name_list[:max_data_assets]

total_columns, total_expectations, total_rows, skipped_data_assets = 0, 0, 0, 0
total_start_time = datetime.datetime.now()
for name in data_asset_name_list:
try:
start_time = datetime.datetime.now()
Expand All @@ -707,7 +713,8 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile

#Note: This logic is specific to DatasetProfilers, which profile a single batch. Multi-batch profilers will have more to unpack.
expectations_config, validation_result = BasicDatasetProfiler.profile(
batch, run_id=profiler_name+"_"+start_time.strftime("%Y%m%d_%H%M%S"))
batch, run_id=run_id
)

row_count = batch.shape[0]
total_rows += row_count
Expand Down Expand Up @@ -746,7 +753,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
total_expectations,
))

def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourceRenderer"):
return run_id

#FIXME: This method is implemented as demo-ware on a demo branch. Significant work needed to make it production-ready.
def render_datasource(self, datasource_name, run_id, renderer_name="DescriptiveDataSourceRenderer"):
#!!! FIXME: This seems to imply a whole new category of renderer: one that can take a datasource as input.
#!!! FIXME: Not using renderer_name at all

Expand All @@ -758,11 +768,23 @@ def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourc

for name in data_asset_name_list:
print(name)

# config_file_path = os.path.join(self.expectations_directory, data_asset_name + '.json')
# safe_mmkdir(os.path.split(config_file_path)[0], exist_ok=True)
# with open(config_file_path, 'w') as outfile:
# json.dump(expectations, outfile)

print(glob(self.context_root_directory+"/great_expectations/uncommitted/validations/*/*"))

evr_file_path = os.path.join(
self.context_root_directory, "great_expectations", "uncommitted", "validations", run_id, name+'.json'
)
validation_results = json.load(open(evr_file_path))

rendered_json = DescriptivePageRenderer.render(validation_results)
rendered_page = DescriptivePageView.render(rendered_json)

doc_file_path = os.path.join(
self.context_root_directory, "great_expectations", "uncommitted", "documents", name+'.html'
)
safe_mmkdir(os.path.split(doc_file_path)[0], exist_ok=True)
with open(doc_file_path, 'w') as outfile:
outfile.write(rendered_page)



Expand Down
21 changes: 17 additions & 4 deletions tests/test_render_new.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest

import json
import glob
import os

import great_expectations as ge
from great_expectations.render.renderer import DescriptivePageRenderer, DescriptiveColumnSectionRenderer, PrescriptiveColumnSectionRenderer
Expand Down Expand Up @@ -91,15 +93,26 @@ def test_full_oobe_flow():
rendered_page = DescriptivePageView.render(rendered_json)
assert rendered_page != None

with open('./test.html', 'w') as f:
f.write(rendered_page)
# with open('./test.html', 'w') as f:
# f.write(rendered_page)


def test_context_render_data_source(empty_data_context, filesystem_csv_2):
empty_data_context.add_datasource(
"my_datasource", "pandas", base_directory=str(filesystem_csv_2))
empty_data_context.profile_datasource("my_datasource")
empty_data_context.profile_datasource("my_datasource", "my_run_id")
not_so_empty_data_context = empty_data_context

not_so_empty_data_context.render_datasource("my_datasource")
not_so_empty_data_context.render_datasource("my_datasource", "my_run_id")
# assert False

doc_path = os.path.join(
not_so_empty_data_context.context_root_directory, "great_expectations/uncommitted/documents"
)
print(glob.glob(doc_path+"/*"))

with open(doc_path+"/f1.html", "r") as doc_file:
doc = doc_file.read()
print(doc)

assert False

0 comments on commit 63166df

Please sign in to comment.