From 63166df7d9172ac3e084832832ca239ccf6bf92f Mon Sep 17 00:00:00 2001 From: Abe Date: Thu, 13 Jun 2019 01:20:30 -0700 Subject: [PATCH] Refactor run_ids and get rendering working a little bit --- great_expectations/cli/datasource.py | 4 +- great_expectations/cli/init.py | 3 +- .../data_context/data_context.py | 42 ++++++++++++++----- tests/test_render_new.py | 21 ++++++++-- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/great_expectations/cli/datasource.py b/great_expectations/cli/datasource.py index 1ee28803f95a..95f44953de96 100644 --- a/great_expectations/cli/datasource.py +++ b/great_expectations/cli/datasource.py @@ -114,12 +114,12 @@ def add_datasource(context): cli_message("") - context.profile_datasource( + run_id = context.profile_datasource( data_source_name, max_data_assets=20 ) - context.render_datasource(data_source_name) + context.render_datasource(data_source_name, run_id) else: cli_message( diff --git a/great_expectations/cli/init.py b/great_expectations/cli/init.py index 639819e4e013..4a79208d6d40 100644 --- a/great_expectations/cli/init.py +++ b/great_expectations/cli/init.py @@ -39,7 +39,7 @@ def scaffold_directories_and_notebooks(base_dir): for directory in [notebook_dir_name, "expectations", "datasources", "uncommitted", "plugins", "fixtures"]: safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True) - for uncommitted_directory in ["validations", "credentials", "samples"]: + for uncommitted_directory in ["validations", "credentials", "documents", "samples"]: safe_mmkdir(os.path.join(base_dir, "uncommitted", uncommitted_directory), exist_ok=True) @@ -71,6 +71,7 @@ def scaffold_directories_and_notebooks(base_dir): ├── uncommitted │  ├── validations │  ├── credentials + │  ├── documents │  └── samples └── .gitignore diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py index 31fc8765dac2..9acef5fa25ea 100644 --- a/great_expectations/data_context/data_context.py +++ b/great_expectations/data_context/data_context.py @@ -28,6 +28,9 @@ from great_expectations.datasource import SparkDFDatasource from great_expectations.profile.basic_dataset_profiler import BasicDatasetProfiler +from great_expectations.render.renderer import DescriptivePageRenderer +from great_expectations.render.view import DescriptivePageView + from .expectation_explorer import ExpectationExplorer logger = logging.getLogger(__name__) @@ -672,8 +675,9 @@ def update_return_obj(self, data_asset, return_obj): else: return return_obj - def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfiler", max_data_assets=10): + def profile_datasource(self, datasource_name, run_id=None, profiler_name="BasicDatasetProfiler", max_data_assets=10): #!!! FIXME: We're not actually using profiler_name to fetch the right profiler. + total_start_time = datetime.datetime.now() # logger.info("Profiling %s with %s" % (datasource_name, profiler_name)) print("Profiling %s with %s" % (datasource_name, profiler_name)) @@ -686,7 +690,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile total_data_assets = len(data_asset_name_list) # logger.info("Found %d named data assets" % (total_data_assets)) print("Found %d named data assets" % (total_data_assets)) - + + if run_id == None: + run_id = profiler_name+"_"+total_start_time.strftime("%Y%m%d_%H%M%S") + if max_data_assets == None or max_data_assets >= len(data_asset_name_list): # logger.info("Profiling all %d." % (len(data_asset_name_list))) print("Profiling all %d." % (len(data_asset_name_list))) @@ -697,7 +704,6 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile data_asset_name_list = data_asset_name_list[:max_data_assets] total_columns, total_expectations, total_rows, skipped_data_assets = 0, 0, 0, 0 - total_start_time = datetime.datetime.now() for name in data_asset_name_list: try: start_time = datetime.datetime.now() @@ -707,7 +713,8 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile #Note: This logic is specific to DatasetProfilers, which profile a single batch. Multi-batch profilers will have more to unpack. expectations_config, validation_result = BasicDatasetProfiler.profile( - batch, run_id=profiler_name+"_"+start_time.strftime("%Y%m%d_%H%M%S")) + batch, run_id=run_id + ) row_count = batch.shape[0] total_rows += row_count @@ -746,7 +753,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile total_expectations, )) - def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourceRenderer"): + return run_id + + #FIXME: This method is implemented as demo-ware on a demo branch. Significant work needed to make it production-ready. + def render_datasource(self, datasource_name, run_id, renderer_name="DescriptiveDataSourceRenderer"): #!!! FIXME: This seems to imply a whole new category of renderer: one that can take a datasource as input. #!!! FIXME: Not using renderer_name at all @@ -758,11 +768,23 @@ def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourc for name in data_asset_name_list: print(name) - - # config_file_path = os.path.join(self.expectations_directory, data_asset_name + '.json') - # safe_mmkdir(os.path.split(config_file_path)[0], exist_ok=True) - # with open(config_file_path, 'w') as outfile: - # json.dump(expectations, outfile) + + print(glob(self.context_root_directory+"/great_expectations/uncommitted/validations/*/*")) + + evr_file_path = os.path.join( + self.context_root_directory, "great_expectations", "uncommitted", "validations", run_id, name+'.json' + ) + validation_results = json.load(open(evr_file_path)) + + rendered_json = DescriptivePageRenderer.render(validation_results) + rendered_page = DescriptivePageView.render(rendered_json) + + doc_file_path = os.path.join( + self.context_root_directory, "great_expectations", "uncommitted", "documents", name+'.html' + ) + safe_mmkdir(os.path.split(doc_file_path)[0], exist_ok=True) + with open(doc_file_path, 'w') as outfile: + outfile.write(rendered_page) diff --git a/tests/test_render_new.py b/tests/test_render_new.py index fb60ba99e298..8b95c93d0e76 100644 --- a/tests/test_render_new.py +++ b/tests/test_render_new.py @@ -1,6 +1,8 @@ import pytest import json +import glob +import os import great_expectations as ge from great_expectations.render.renderer import DescriptivePageRenderer, DescriptiveColumnSectionRenderer, PrescriptiveColumnSectionRenderer @@ -91,15 +93,26 @@ def test_full_oobe_flow(): rendered_page = DescriptivePageView.render(rendered_json) assert rendered_page != None - with open('./test.html', 'w') as f: - f.write(rendered_page) + # with open('./test.html', 'w') as f: + # f.write(rendered_page) def test_context_render_data_source(empty_data_context, filesystem_csv_2): empty_data_context.add_datasource( "my_datasource", "pandas", base_directory=str(filesystem_csv_2)) - empty_data_context.profile_datasource("my_datasource") + empty_data_context.profile_datasource("my_datasource", "my_run_id") not_so_empty_data_context = empty_data_context - not_so_empty_data_context.render_datasource("my_datasource") + not_so_empty_data_context.render_datasource("my_datasource", "my_run_id") # assert False + + doc_path = os.path.join( + not_so_empty_data_context.context_root_directory, "great_expectations/uncommitted/documents" + ) + print(glob.glob(doc_path+"/*")) + + with open(doc_path+"/f1.html", "r") as doc_file: + doc = doc_file.read() + print(doc) + + assert False