Permalink
Browse files

add command line support

  • Loading branch information...
1 parent 9231db4 commit 9ecd1b6b62b54aae522fbc02fdd62b1238c83161 @sb2nov sb2nov committed Nov 13, 2014
Showing with 74 additions and 8 deletions.
  1. +3 −0 .gitignore
  2. +0 −1 MANIFEST
  3. +3 −1 bin/README.md
  4. +57 −0 bin/dataduct
  5. +1 −1 dataduct/__init__.py
  6. +5 −0 dataduct/config/config.py
  7. +0 −1 resources/README.md
  8. +0 −1 scripts/README.md
  9. +5 −3 setup.py
View
@@ -7,6 +7,9 @@
# Docs build folder
/docs/_build
+# Build directory
+/build/
+
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
/*.egg
View
@@ -6,7 +6,6 @@ README.rst
setup.py
bin/README.md
dataduct/__init__.py
-dataduct/constants.py
dataduct/definition_parser.py
dataduct/etl_pipeline.py
dataduct/pipeline/__init__.py
View
@@ -1 +1,3 @@
-#### THIS IS THE BIN FOLDER
+#### Bin
+
+Folder contains scripts to be added to the path variable of the user for command line access.
View
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""
+Script that helps create and validate pipelines from command line
+"""
+
+import argparse
+from dataduct.definition_parser import read_pipeline_definition
+from dataduct.definition_parser import create_pipeline
+from dataduct.definition_parser import validate_pipeline
+from dataduct.definition_parser import activate_pipeline
+
+
+CREATE_STR = 'create'
+VALIDATE_STR = 'validate'
+ACTIVATE_STR = 'activate'
+
+def main():
+ """Main function"""
+ parser = argparse.ArgumentParser(description='Run Dataduct commands')
+ parser.add_argument(
+ '-a',
+ '--action',
+ type=str,
+ choices={
+ CREATE_STR: 'Create a pipeline locally',
+ VALIDATE_STR: 'Validate a pipeline with AWS without activating',
+ ACTIVATE_STR: 'create a pipeline and activate it on AWS',
+ },
+ default=CREATE_STR,
+ )
+ parser.add_argument(
+ 'load_definitions',
+ nargs='*',
+ help='Enter the paths of the load definitions.',
+ )
+ parser.add_argument(
+ '-f',
+ '--force_overwrite',
+ action='store_true',
+ default=False,
+ help='Indicates that if this pipeline exists, it will be destroyed'
+ ' first.',
+ )
+ args = parser.parse_args()
+
+ for load_definition in args.load_definitions:
+ definition = read_pipeline_definition(load_definition)
+ etl = create_pipeline(definition)
+ if args.action in [VALIDATE_STR, ACTIVATE_STR]:
+ validate_pipeline(etl, args.force_overwrite)
+ elif args.action == ACTIVATE_STR:
+ activate_pipeline(etl)
+
+
+if __name__ == '__main__':
+ main()
@@ -1,4 +1,4 @@
"""Welcome to DataDuct
"""
__version__ = '0.1.0'
-
+__import__('pkg_resources').declare_namespace(__name__)
@@ -15,14 +15,19 @@
def load_yaml(configFiles):
+ """Load the config files based on environment settings
+ """
for configFile in configFiles:
try:
return yaml.load(open(configFile, 'r'))
except (OSError, IOError):
continue
+ raise Exception('Dataduct config file is missing')
class Config(object):
+ """Config singleton to manage changes config variables across the package
+ """
_shared_config = load_yaml(DataductConfigFiles)
def __init__(self):
View
@@ -1 +0,0 @@
-#### THIS IS THE RESOURCES FOLDER
View
@@ -1 +0,0 @@
-#### THIS IS THE SCRIPTS FOLDER
View
@@ -1,30 +1,32 @@
"""
Setup file for installation of the etllib code
"""
-from distutils.core import setup
+from setuptools import setup
setup(
name='dataduct',
version='0.1.0',
author='Coursera Inc.',
packages=[
'dataduct',
+ 'dataduct.config',
'dataduct.pipeline',
'dataduct.s3',
'dataduct.steps',
'dataduct.utils',
],
namespace_packages=['dataduct'],
include_package_data=True,
- url='https://github.com/coursera/dataduct', # TODO: Update URL
+ url='https://github.com/coursera/dataduct',
long_description=open('README.rst').read(),
author_email='data-infra@coursera.org',
license='Apache License 2.0',
description='DataPipeline for Humans.',
install_requires=[
'boto>=2.32',
- 'yaml'
+ 'pyyaml'
],
+ scripts=['bin/dataduct'],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',

0 comments on commit 9ecd1b6

Please sign in to comment.