Refactor run_ids and get rendering working a little bit

great-expectations · Jun 13, 2019 · 63166df · 63166df
1 parent 9a0374f
commit 63166df
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 17 deletions.
diff --git a/great_expectations/cli/datasource.py b/great_expectations/cli/datasource.py
@@ -114,12 +114,12 @@ def add_datasource(context):
 
             cli_message("")
 
-            context.profile_datasource(
+            run_id = context.profile_datasource(
                 data_source_name,
                 max_data_assets=20
             )
 
-            context.render_datasource(data_source_name)
+            context.render_datasource(data_source_name, run_id)
 
         else:
             cli_message(

diff --git a/great_expectations/cli/init.py b/great_expectations/cli/init.py
@@ -39,7 +39,7 @@ def scaffold_directories_and_notebooks(base_dir):
     for directory in [notebook_dir_name, "expectations", "datasources", "uncommitted", "plugins", "fixtures"]:
         safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)
 
-    for uncommitted_directory in ["validations", "credentials", "samples"]:
+    for uncommitted_directory in ["validations", "credentials", "documents", "samples"]:
         safe_mmkdir(os.path.join(base_dir, "uncommitted",
                                  uncommitted_directory), exist_ok=True)
 
@@ -71,6 +71,7 @@ def scaffold_directories_and_notebooks(base_dir):
         ├── uncommitted
         │   ├── validations
         │   ├── credentials
+        │   ├── documents
         │   └── samples
         └── .gitignore
 

diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py
@@ -28,6 +28,9 @@
 from great_expectations.datasource import SparkDFDatasource
 from great_expectations.profile.basic_dataset_profiler import BasicDatasetProfiler
 
+from great_expectations.render.renderer import DescriptivePageRenderer
+from great_expectations.render.view import DescriptivePageView
+
 from .expectation_explorer import ExpectationExplorer
 
 logger = logging.getLogger(__name__)
@@ -672,8 +675,9 @@ def update_return_obj(self, data_asset, return_obj):
         else:
             return return_obj
 
-    def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfiler", max_data_assets=10):
+    def profile_datasource(self, datasource_name, run_id=None, profiler_name="BasicDatasetProfiler", max_data_assets=10):
         #!!! FIXME: We're not actually using profiler_name to fetch the right profiler.
+        total_start_time = datetime.datetime.now()
 
         # logger.info("Profiling %s with %s" % (datasource_name, profiler_name))
         print("Profiling %s with %s" % (datasource_name, profiler_name))
@@ -686,7 +690,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
         total_data_assets = len(data_asset_name_list)
         # logger.info("Found %d named data assets" % (total_data_assets))
         print("Found %d named data assets" % (total_data_assets))
-
+
+        if run_id == None:
+            run_id = profiler_name+"_"+total_start_time.strftime("%Y%m%d_%H%M%S")
+
         if max_data_assets == None or max_data_assets >= len(data_asset_name_list):
             # logger.info("Profiling all %d." % (len(data_asset_name_list)))
             print("Profiling all %d." % (len(data_asset_name_list)))
@@ -697,7 +704,6 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
             data_asset_name_list = data_asset_name_list[:max_data_assets]
 
         total_columns, total_expectations, total_rows, skipped_data_assets = 0, 0, 0, 0
-        total_start_time = datetime.datetime.now()
         for name in data_asset_name_list:
             try:
                 start_time = datetime.datetime.now()
@@ -707,7 +713,8 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
 
                 #Note: This logic is specific to DatasetProfilers, which profile a single batch. Multi-batch profilers will have more to unpack.
                 expectations_config, validation_result = BasicDatasetProfiler.profile(
-                    batch, run_id=profiler_name+"_"+start_time.strftime("%Y%m%d_%H%M%S"))
+                    batch, run_id=run_id
+                )
 
                 row_count = batch.shape[0]
                 total_rows += row_count
@@ -746,7 +753,10 @@ def profile_datasource(self, datasource_name, profiler_name="BasicDatasetProfile
             total_expectations,
         ))
 
-    def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourceRenderer"):
+        return run_id
+
+    #FIXME: This method is implemented as demo-ware on a demo branch. Significant work needed to make it production-ready.
+    def render_datasource(self, datasource_name, run_id, renderer_name="DescriptiveDataSourceRenderer"):
         #!!! FIXME: This seems to imply a whole new category of renderer: one that can take a datasource as input.
         #!!! FIXME: Not using renderer_name at all
 
@@ -758,11 +768,23 @@ def render_datasource(self, datasource_name, renderer_name="DescriptiveDataSourc
 
         for name in data_asset_name_list:
             print(name)
-
-            # config_file_path = os.path.join(self.expectations_directory, data_asset_name + '.json')
-            # safe_mmkdir(os.path.split(config_file_path)[0], exist_ok=True)
-            # with open(config_file_path, 'w') as outfile:
-            #     json.dump(expectations, outfile)
+
+            print(glob(self.context_root_directory+"/great_expectations/uncommitted/validations/*/*"))
+
+            evr_file_path = os.path.join(
+                self.context_root_directory, "great_expectations", "uncommitted", "validations", run_id, name+'.json'
+            )
+            validation_results = json.load(open(evr_file_path))
+
+            rendered_json = DescriptivePageRenderer.render(validation_results)
+            rendered_page = DescriptivePageView.render(rendered_json)
+
+            doc_file_path = os.path.join(
+                self.context_root_directory, "great_expectations", "uncommitted", "documents", name+'.html'
+            )
+            safe_mmkdir(os.path.split(doc_file_path)[0], exist_ok=True)
+            with open(doc_file_path, 'w') as outfile:
+                outfile.write(rendered_page)
 
 
 

diff --git a/tests/test_render_new.py b/tests/test_render_new.py
@@ -1,6 +1,8 @@
 import pytest
 
 import json
+import glob
+import os
 
 import great_expectations as ge
 from great_expectations.render.renderer import DescriptivePageRenderer, DescriptiveColumnSectionRenderer, PrescriptiveColumnSectionRenderer
@@ -91,15 +93,26 @@ def test_full_oobe_flow():
     rendered_page = DescriptivePageView.render(rendered_json)
     assert rendered_page != None
 
-    with open('./test.html', 'w') as f:
-        f.write(rendered_page)
+    # with open('./test.html', 'w') as f:
+    #     f.write(rendered_page)
 
 
 def test_context_render_data_source(empty_data_context, filesystem_csv_2):
     empty_data_context.add_datasource(
         "my_datasource", "pandas", base_directory=str(filesystem_csv_2))
-    empty_data_context.profile_datasource("my_datasource")
+    empty_data_context.profile_datasource("my_datasource", "my_run_id")
     not_so_empty_data_context = empty_data_context
 
-    not_so_empty_data_context.render_datasource("my_datasource")
+    not_so_empty_data_context.render_datasource("my_datasource", "my_run_id")
     # assert False
+
+    doc_path = os.path.join(
+        not_so_empty_data_context.context_root_directory, "great_expectations/uncommitted/documents"
+    )
+    print(glob.glob(doc_path+"/*"))
+
+    with open(doc_path+"/f1.html", "r") as doc_file:
+        doc = doc_file.read()
+        print(doc)
+
+    assert False