Merge pull request #503 from great-expectations/feature/cli_refactor_…

…rebase Add test_cli_profile; Various auto-pepping
great-expectations · Jun 17, 2019 · 17d8fa9 · 17d8fa9
2 parents a5ea721 + f799675
commit 17d8fa9
Show file tree

Hide file tree

Showing 8 changed files with 364 additions and 256 deletions.
diff --git a/great_expectations/cli/cli.py b/great_expectations/cli/cli.py
@@ -8,13 +8,11 @@
 import sys
 
 from pyfiglet import figlet_format
-
 try:
     from termcolor import colored
 except ImportError:
     colored = None
 
-from .supporting_methods import _scaffold_directories_and_notebooks
 from great_expectations import __version__, read_csv
 from great_expectations.dataset import Dataset, PandasDataset
 from great_expectations.data_asset import FileDataAsset
@@ -23,19 +21,20 @@
 from great_expectations.render.renderer import DescriptivePageRenderer, PrescriptivePageRenderer
 from great_expectations.render.view import DescriptivePageView
 
+
+from .util import cli_message
+from .init import (
+    scaffold_directories_and_notebooks,
+    greeting_1,
+    msg_prompt_lets_begin,
+)
+from .datasource import (
+    add_datasource
+)
+
 # Take over the entire GE module logging namespace when running CLI
 logger = logging.getLogger("great_expectations")
 
-def cli_message(string, color, font="big", figlet=False):
-    if colored:
-        if not figlet:
-            six.print_(colored(string, color))
-        else:
-            six.print_(colored(figlet_format(
-                string, font=font), color))
-    else:
-        six.print_(string)
-
 
 @click.group()
 @click.version_option(version=__version__)
@@ -124,236 +123,50 @@ def validate(dataset, expectations_config_file, evaluation_parameters, result_fo
         only_return_failures=only_return_failures,
     )
 
+    # Note: Should this be rendered through cli_message?
+    # Probably not, on the offchance that the JSON object contains <color> tags
     print(json.dumps(result, indent=2))
     sys.exit(result['statistics']['unsuccessful_expectations'])
 
 
 @cli.command()
-@click.option('--target_directory', '-d', default="./",
-              help='The root of the project directory where you want to initialize Great Expectations.')
+@click.option(
+    '--target_directory',
+    '-d',
+    default="./",
+    help='The root of the project directory where you want to initialize Great Expectations.'
+)
 def init(target_directory):
-    """Initialze a new Great Expectations project.
+    """Initialize a new Great Expectations project.
 
     This guided input walks the user through setting up a project.
 
     It scaffolds directories, sets up notebooks, creates a project file, and
     appends to a `.gitignore` file.
     """
 
-    #!!! This injects a version tag into the docs. We should test that those versioned docs exist in RTD.
-    greeting_1 = """
-Always know what to expect from your data.
-
-If you're new to Great Expectations, this tutorial is a good place to start:
-
-    https://great-expectations.readthedocs.io/en/v%s/intro.html#how-do-i-get-started
-    """ % __version__
-
-    msg_prompt_lets_begin = """
-Let's add Great Expectations to your project, by scaffolding a new great_expectations directory:
-
-    great_expectations
-        ├── great_expectations.yml
-        ├── datasources
-        ├── expectations
-        ├── fixtures
-        ├── notebooks
-        ├── plugins
-        ├── uncommitted
-        │   ├── validations
-        │   ├── credentials
-        │   └── samples
-        └── .gitignore
-
-OK to proceed?
-    """
-
-    msg_prompt_choose_data_source = """
-Configure a data source
-    1. Pandas data frames from local filesystem (CSV files)
-    2. Relational database (SQL)
-    3. Spark DataFrames from local filesystem (CSV files)
-    4. None of the above
-    """
-
-#     msg_prompt_choose_data_source = """
-# Time to create expectations for your data. This is done in Jupyter Notebook/Jupyter Lab.
-#
-# Before we point you to the right notebook, what data does your project work with?
-#     1. Directory on local filesystem
-#     2. Relational database (SQL)
-#     3. DBT (data build tool) models
-#     4. None of the above
-#     """
-
-
-#     msg_prompt_dbt_choose_profile = """
-# Please specify the name of the dbt profile (from your ~/.dbt/profiles.yml file Great Expectations \
-# should use to connect to the database
-#     """
-
-#     msg_dbt_go_to_notebook = """
-# To create expectations for your dbt models start Jupyter and open notebook
-# great_expectations/notebooks/using_great_expectations_with_dbt.ipynb -
-# it will walk you through next steps.
-#     """
-
-    msg_prompt_filesys_enter_base_path = """
-Enter the path of the root directory where the data files are stored
-(the path may be either absolute or relative to current directory)
-    """
-
-    msg_filesys_go_to_notebook = """
-To create expectations for your CSV files start Jupyter and open the notebook
-great_expectations/notebooks/using_great_expectations_with_pandas.ipynb.
-it will walk you through configuring the database connection and next steps.
-
-To launch with jupyter notebooks:
-    jupyter notebook great_expectations/notebooks/create_expectations_for_csv_files.ipynb
-
-To launch with jupyter lab:
-    jupyter lab great_expectations/notebooks/create_expectations_for_csv_files.ipynb
-    """
-
-    msg_prompt_datasource_name = """
-Give your new data source a short name
-    """
-
-    msg_sqlalchemy_config_connection = """
-Great Expectations relies on sqlalchemy to connect to relational databases.
-Please make sure that you have it installed.
-
-Next, we will configure database credentials and store them in the "{0:s}" section
-of this config file: great_expectations/uncommitted/credentials/profiles.yml:
-     """
-
-    msg_sqlalchemy_go_to_notebook = """
-To create expectations for your SQL queries start Jupyter and open notebook
-great_expectations/notebooks/using_great_expectations_with_sql.ipynb -
-it will walk you through configuring the database connection and next steps.
-"""
-
-    msg_unknown_data_source = """
-We are looking for more types of data types to support.
-Please create a GitHub issue here:
-https://github.com/great-expectations/great_expectations/issues/new
-In the meantime you can see what Great Expectations can do on CSV files.
-To create expectations for your CSV files start Jupyter and open notebook
-great_expectations/notebooks/using_great_expectations_with_pandas.ipynb -
-it will walk you through configuring the database connection and next steps.
-     """
-    msg_spark_go_to_notebook = """
-To create expectations for your CSV files start Jupyter and open the notebook
-great_expectations/notebooks/using_great_expectations_with_pandas.ipynb.
-it will walk you through configuring the database connection and next steps.
-
-To launch with jupyter notebooks:
-    jupyter notebook great_expectations/notebooks/create_expectations_for_spark_dataframes.ipynb
-
-To launch with jupyter lab:
-    jupyter lab great_expectations/notebooks/create_expectations_for_spark_dataframes.ipynb
-    """
-    context = DataContext.create('.')
-
+    context = DataContext.create(target_directory)
     base_dir = os.path.join(target_directory, "great_expectations")
 
-    cli_message("Great Expectations", color="cyan", figlet=True)
+    six.print_(colored(
+        figlet_format("Great Expectations", font="big"),
+        color="cyan"
+    ))
 
-    cli_message(greeting_1, color="blue")
+    cli_message(greeting_1)
 
     if not click.confirm(msg_prompt_lets_begin, default=True):
         cli_message(
-            "OK - run great_expectations init again when ready. Exiting...", color="blue")
+            "OK - run great_expectations init again when ready. Exiting..."
+        )
         exit(0)
 
-    _scaffold_directories_and_notebooks(base_dir)
+    scaffold_directories_and_notebooks(base_dir)
     cli_message(
         "\nDone.",
-        color="blue")
-
-
-    # Shows a list of options to select from
-
-    data_source_selection = click.prompt(msg_prompt_choose_data_source, type=click.Choice(["1", "2", "3", "4"]),
-                                         show_choices=False)
-
-    print(data_source_selection)
-
-    # if data_source_selection == "5": # dbt
-    #     dbt_profile = click.prompt(msg_prompt_dbt_choose_profile)
-    #     log_message(msg_dbt_go_to_notebook, color="blue")
-    #     context.add_datasource("dbt", "dbt", profile=dbt_profile)
-    if data_source_selection == "3":  # Spark
-        path = click.prompt(msg_prompt_filesys_enter_base_path, default='/data/', type=click.Path(exists=True,
-                                                                                                  file_okay=False,
-                                                                                                  dir_okay=True,
-                                                                                                  readable=True),
-                            show_default=True)
-        if path.startswith("./"):
-            path = path[2:]
-
-        if path.endswith("/"):
-            basenamepath = path[:-1]
-        default_data_source_name = os.path.basename(basenamepath)
-        data_source_name = click.prompt(
-            msg_prompt_datasource_name, default=default_data_source_name, show_default=True)
-
-        cli_message(msg_spark_go_to_notebook, color="blue")
-        context.add_datasource(data_source_name, "spark", base_directory=path)
-
-    elif data_source_selection == "2":  # sqlalchemy
-        data_source_name = click.prompt(
-            msg_prompt_datasource_name, default="mydb", show_default=True)
-
-        cli_message(msg_sqlalchemy_config_connection.format(
-            data_source_name), color="blue")
-
-        drivername = click.prompt("What is the driver for the sqlalchemy connection?", default="postgres",
-                                  show_default=True)
-        host = click.prompt("What is the host for the sqlalchemy connection?", default="localhost",
-                            show_default=True)
-        port = click.prompt("What is the port for the sqlalchemy connection?", default="5432",
-                            show_default=True)
-        username = click.prompt("What is the username for the sqlalchemy connection?", default="postgres",
-                                show_default=True)
-        password = click.prompt("What is the password for the sqlalchemy connection?", default="",
-                                show_default=False, hide_input=True)
-        database = click.prompt("What is the database name for the sqlalchemy connection?", default="postgres",
-                                show_default=True)
-
-        credentials = {
-            "drivername": drivername,
-            "host": host,
-            "port": port,
-            "username": username,
-            "password": password,
-            "database": database
-        }
-        context.add_profile_credentials(data_source_name, **credentials)
-
-        cli_message(msg_sqlalchemy_go_to_notebook, color="blue")
-
-        context.add_datasource(
-            data_source_name, "sqlalchemy", profile=data_source_name)
-
-    elif data_source_selection == "1":  # csv
-        path = click.prompt(msg_prompt_filesys_enter_base_path, default='/data/', type=click.Path(exists=False,
-                                                                                                  file_okay=False,
-                                                                                                  dir_okay=True,
-                                                                                                  readable=True),
-                            show_default=True)
-        if path.startswith("./"):
-            path = path[2:]
-
-        default_data_source_name = os.path.basename(path)
-        data_source_name = click.prompt(
-            msg_prompt_datasource_name, default=default_data_source_name, show_default=True)
-
-        cli_message(msg_filesys_go_to_notebook, color="blue")
-        context.add_datasource(data_source_name, "pandas", base_directory=path)
+    )
 
-    else:
-        cli_message(msg_unknown_data_source, color="blue")
+    add_datasource(context)
 
 
 @cli.command()
@@ -377,7 +190,9 @@ def render(render_object):
               help='Maximum number of named data assets to profile.')
 @click.option('--profile_all_data_assets', '-A', is_flag=True, default=False,
               help='Profile ALL data assets within the target data source. If True, this will override --max_data_assets.')
-def profile(datasource_name, max_data_assets, profile_all_data_assets):
+@click.option('--target_directory', '-d', default="./",
+              help='The root of a project directory containing a great_expectations/ config.')
+def profile(datasource_name, max_data_assets, profile_all_data_assets, target_directory):
     """Profile a great expectations object.
 
     datasource_name: A datasource within this GE context to profile.
@@ -387,17 +202,19 @@ def profile(datasource_name, max_data_assets, profile_all_data_assets):
         max_data_assets = None
 
     # FIXME: By default, this should iterate over all datasources
-    context = DataContext('.')
+    context = DataContext(target_directory)
     context.profile_datasource(
-        datasource_name, max_data_assets=max_data_assets)
+        datasource_name,
+        max_data_assets=max_data_assets
+    )
 
 
 def main():
     handler = logging.StreamHandler()
     # Just levelname and message Could re-add other info if we want
     formatter = logging.Formatter(
         '%(levelname)s %(message)s')
-        # '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
+    # '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
     handler.setFormatter(formatter)
     logger.addHandler(handler)
     logger.setLevel(logging.INFO)